From: rkinyon Date: Fri, 9 Nov 2007 15:42:30 +0000 (+0000) Subject: r6209@rob-kinyons-computer-2 (orig r9991): rkinyon | 2007-09-24 21:18:27 -0400 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=4c76b4dd69d421625033f95869a95d2db565684e;p=dbsrgits%2FDBM-Deep.git r6209@rob-kinyons-computer-2 (orig r9991): rkinyon | 2007-09-24 21:18:27 -0400 r6205@rob-kinyons-computer-2 (orig r9987): rkinyon | 2007-09-24 14:24:05 -0400 Added references and a fix for 29583 r6206@rob-kinyons-computer-2 (orig r9988): rkinyon | 2007-09-24 15:41:06 -0400 Fixed pod coverage test and added a line to Changes making clear that the file format is incompatible r6207@rob-kinyons-computer-2 (orig r9989): rkinyon | 2007-09-24 20:21:26 -0400 Changed everything around so that we're releasing 1.0003, not 1.0009_01. Plus, updated all documentation and cleaned up the distinction between the main docco and the cookbook r6208@rob-kinyons-computer-2 (orig r9990): rkinyon | 2007-09-24 21:17:47 -0400 Final POD fix r6210@rob-kinyons-computer-2 (orig r9992): rkinyon | 2007-09-24 21:19:22 -0400 Fixed MANIFEST r8199@rob-kinyons-computer-2 (orig r10013): rkinyon | 2007-09-28 12:05:34 -0400 r6222@h460878c2 (orig r10003): rkinyon | 2007-09-26 21:30:53 -0400 Added _dump_file and improved how arrays/hashes clean up after themselves r8192@h460878c2 (orig r10004): rkinyon | 2007-09-26 22:25:04 -0400 Added test that breaks the dumper r8193@h460878c2 (orig r10005): rkinyon | 2007-09-27 15:16:18 -0400 Fixed the bug revealed by making bucketlists properly clean up after themselves r8194@h460878c2 (orig r10006): rkinyon | 2007-09-27 15:19:45 -0400 Fixed hardcoded 0 staleness for HEAD in inc_txn_staleness_counter r8195@h460878c2 (orig r10008): rkinyon | 2007-09-27 23:06:25 -0400 The refcount functions have been refactored a bit r8196@h460878c2 (orig r10011): rkinyon | 2007-09-28 09:35:35 -0400 Added a test for dump_file within the core tests and got all subs to be called at least once in the core tests. r8198@h460878c2 (orig r10012): rkinyon | 2007-09-28 11:29:08 -0400 A raft of minor improvements r8200@rob-kinyons-computer-2 (orig r10014): rkinyon | 2007-09-28 12:10:04 -0400 Updated Changes file r8208@rob-kinyons-computer-2 (orig r10033): rkinyon | 2007-10-01 11:17:40 -0400 r8204@rob-kinyons-computer-2 (orig r10021): rkinyon | 2007-09-28 20:00:36 -0400 Have a 98% solution to making references work. r8205@rob-kinyons-computer-2 (orig r10027): rkinyon | 2007-09-30 13:59:07 -0400 cached singletons for most cases. The external reference issue is starting to come into larger focus r8206@rob-kinyons-computer-2 (orig r10031): rkinyon | 2007-10-01 11:15:50 -0400 Added coverage report and tests that were wrong r8207@rob-kinyons-computer-2 (orig r10032): rkinyon | 2007-10-01 11:16:12 -0400 Fixed date on release of 1.0005 r8223@rob-kinyons-computer-2 (orig r10043): rkinyon | 2007-10-01 23:11:14 -0400 r8215@rob-kinyons-computer-2 (orig r10039): rkinyon | 2007-10-01 21:25:29 -0400 Removed usage of Clone from the code, replacing it with a hand-rolled datawalk r8222@rob-kinyons-computer-2 (orig r10042): rkinyon | 2007-10-01 23:10:50 -0400 Final prep for 1.0006 release --- diff --git a/Build.PL b/Build.PL index 595b201..abcf310 100644 --- a/Build.PL +++ b/Build.PL @@ -7,11 +7,10 @@ my $build = Module::Build->new( license => 'perl', requires => { 'perl' => '5.006_000', - 'Clone' => '0.01', - 'Digest::MD5' => '1.00', 'Fcntl' => '0.01', - 'FileHandle::Fmode' => '0.05', 'Scalar::Util' => '1.14', + 'Digest::MD5' => '1.00', + 'FileHandle::Fmode' => '0.05', }, optional => { }, @@ -26,7 +25,7 @@ my $build = Module::Build->new( }, create_makefile_pl => 'traditional', add_to_cleanup => [ - 'META.yml', '*.bak', '*.gz', 'Makefile.PL', 't/test*.db', 'cover_db', + 'META.yml', '*.bak', '*.gz', 'Makefile.PL', 'cover_db', ], test_files => 't/??_*.t', ); diff --git a/Changes b/Changes index 7211f45..22535e0 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,43 @@ Revision history for DBM::Deep. +1.0006 Oct 01 23:15:00 2007 EDT + - (This version is compatible with 1.0005) + - Removed Clone and replaced it with a hand-written datastructure walker. + - This greatly reduces the footprint of a large import + - This bypasses a failure of Clone under Perl 5.9.5 + - Moved t/37_delete_edge_cases.t to t_attic because it wasn't really used + - import() has a stricter API now. This is a potentially incompatible API + change. Only HASH and ARRAY refs are now allowed and they must match the type + of the object being imported into. + +1.0005 Oct 01 11:15:00 2007 EDT + - (This version is compatible with 1.0004) + - Added proper singleton support. This means that the following now works: + $db->{foo} = [ 1 .. 3]; + my $x = $db->{foo}; + my $y = $db->{foo}; + is( $x, $y ); # Now passes + - This means that Data::Dumper now properly reports when $db->{foo} = $db->{bar} + +1.0004 Sep 28 12:15:00 2007 EDT + - (This version is compatible with 1.0003) + - Fixed the Changes file (wrong version was displayed for 1.0003) + - Added filter sugar methods to be more API-compatible with other DBMs + - This was added to support a patch provided to IO::All so it can + use DBM::Deep as a DBM provider. + - Implemented _dump_file in order to display the file structure. As a + result, the following bugs were fixed: + - Arrays and hashes now clean up after themselves better. + - Bucketlists now clean up after themselves better. + - Reindexing properly clears the old bucketlist before freeing it. + +1.0003 Sep 24 14:00:00 2007 EDT + - THIS VERSION IS INCOMPATIBLE WITH FILES FROM ALL OTHER PRIOR VERSIONS. + - Further fixes for unshift/shift/splice and references (RT# 29583) + - To fix that, I had to put support for real references in. + - the 16 and 22 tests are now re-enabled. + - Yes, this means that real references work. See t/45_references.t + 1.0002 Sep 20 22:00:00 2007 EDT - (This version is compatible with 1.0001) - Expanded _throw_error() so that it provides better information. diff --git a/MANIFEST b/MANIFEST index ad92bcd..a0bbd13 100644 --- a/MANIFEST +++ b/MANIFEST @@ -14,6 +14,7 @@ lib/DBM/Deep/Hash.pm lib/DBM/Deep/Internals.pod utils/upgrade_db.pl utils/lib/DBM/Deep/09830.pm +utils/lib/DBM/Deep/10002.pm t/01_basic.t t/02_hash.t t/03_bighash.t @@ -49,7 +50,6 @@ t/32_dash_ell.t t/33_transactions.t t/34_transaction_arrays.t t/35_transaction_multiple.t -t/37_delete_edge_cases.t t/38_data_sector_size.t t/39_singletons.t t/40_freespace.t @@ -57,9 +57,12 @@ t/41_transaction_multilevel.t t/42_transaction_indexsector.t t/43_transaction_maximum.t t/44_upgrade_db.t +t/45_references.t +t/97_dump_file.t t/98_pod.t t/99_pod_coverage.t t/common.pm t/etc/db-0-983 t/etc/db-0-99_04 t/etc/db-1-0000 +t/etc/db-1-0003 diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm index f5ecd68..d34e675 100644 --- a/lib/DBM/Deep.pm +++ b/lib/DBM/Deep.pm @@ -5,11 +5,10 @@ use 5.006_000; use strict; use warnings; -our $VERSION = q(1.0002); +our $VERSION = q(1.0006); use Fcntl qw( :flock ); -use Clone (); use Digest::MD5 (); use FileHandle::Fmode (); use Scalar::Util (); @@ -17,6 +16,10 @@ use Scalar::Util (); use DBM::Deep::Engine; use DBM::Deep::File; +use overload + '""' => sub { overload::StrVal( $_[0] ) }, + fallback => 1; + ## # Setup constants for users to pass to new() ## @@ -198,29 +201,85 @@ sub export { return $temp; } +sub _check_legality { + my $self = shift; + my ($val) = @_; + + my $r = Scalar::Util::reftype( $val ); + + return $r if !defined $r || '' eq $r; + return $r if 'HASH' eq $r; + return $r if 'ARRAY' eq $r; + + DBM::Deep->_throw_error( + "Storage of references of type '$r' is not supported." + ); +} + sub import { - ## - # Recursively import Perl hash/array structure - ## - if (!ref($_[0])) { return; } # Perl calls import() on use -- ignore + # Perl calls import() on use -- ignore + return if !ref $_[0]; my $self = shift->_get_self; my ($struct) = @_; - # struct is not a reference, so just import based on our type - if (!ref($struct)) { - $struct = $self->_repr( @_ ); + my $type = $self->_check_legality( $struct ); + if ( !$type ) { + DBM::Deep->_throw_error( "Cannot import a scalar" ); } - #XXX This isn't the best solution. Better would be to use Data::Walker, - #XXX but that's a lot more thinking than I want to do right now. - eval { - local $SIG{'__DIE__'}; - $self->_import( Clone::clone( $struct ) ); - }; if ( my $e = $@ ) { - die $e; + if ( substr( $type, 0, 1 ) ne $self->_type ) { + DBM::Deep->_throw_error( + "Cannot import " . ('HASH' eq $type ? 'a hash' : 'an array') + . " into " . ('HASH' eq $type ? 'an array' : 'a hash') + ); } + my %seen; + my $recurse; + $recurse = sub { + my ($db, $val) = @_; + + my $obj = 'HASH' eq Scalar::Util::reftype( $db ) ? tied(%$db) : tied(@$db); + $obj ||= $db; + + my $r = $self->_check_legality( $val ); + if ( 'HASH' eq $r ) { + while ( my ($k, $v) = each %$val ) { + my $r = $self->_check_legality( $v ); + if ( $r ) { + my $temp = 'HASH' eq $r ? {} : []; + if ( my $c = Scalar::Util::blessed( $v ) ) { + bless $temp, $c; + } + $obj->put( $k, $temp ); + $recurse->( $temp, $v ); + } + else { + $obj->put( $k, $v ); + } + } + } + elsif ( 'ARRAY' eq $r ) { + foreach my $k ( 0 .. $#$val ) { + my $v = $val->[$k]; + my $r = $self->_check_legality( $v ); + if ( $r ) { + my $temp = 'HASH' eq $r ? {} : []; + if ( my $c = Scalar::Util::blessed( $v ) ) { + bless $temp, $c; + } + $obj->put( $k, $temp ); + $recurse->( $temp, $v ); + } + else { + $obj->put( $k, $v ); + } + } + } + }; + $recurse->( $self, $struct ); + return 1; } @@ -240,17 +299,19 @@ sub optimize { #XXX Do we have to lock the tempfile? + #XXX Should we use tempfile() here instead of a hard-coded name? my $db_temp = DBM::Deep->new( file => $self->_storage->{file} . '.tmp', type => $self->_type, # Bring over all the parameters that we need to bring over - num_txns => $self->_engine->num_txns, - byte_size => $self->_engine->byte_size, - max_buckets => $self->_engine->max_buckets, + ( map { $_ => $self->_engine->$_ } qw( + byte_size max_buckets data_sector_size num_txns + )), ); $self->lock(); + $self->_engine->clear_cache; $self->_copy_node( $db_temp ); undef $db_temp; @@ -319,9 +380,6 @@ sub clone { ); sub set_filter { - ## - # Setup filter function for storing or fetching the key or value - ## my $self = shift->_get_self; my $type = lc shift; my $func = shift; @@ -333,6 +391,11 @@ sub clone { return; } + + sub filter_store_key { $_[0]->set_filter( store_key => $_[1] ); } + sub filter_store_value { $_[0]->set_filter( store_value => $_[1] ); } + sub filter_fetch_key { $_[0]->set_filter( fetch_key => $_[1] ); } + sub filter_fetch_value { $_[0]->set_filter( fetch_value => $_[1] ); } } sub begin_work { @@ -389,14 +452,12 @@ sub _fh { ## sub _throw_error { - die "DBM::Deep: $_[1]\n"; my $n = 0; while( 1 ) { my @caller = caller( ++$n ); next if $caller[0] =~ m/^DBM::Deep/; die "DBM::Deep: $_[1] at $0 line $caller[2]\n"; - last; } } @@ -552,5 +613,7 @@ sub delete { (shift)->DELETE( @_ ) } sub exists { (shift)->EXISTS( @_ ) } sub clear { (shift)->CLEAR( @_ ) } +sub _dump_file {shift->_get_self->_engine->_dump_file;} + 1; __END__ diff --git a/lib/DBM/Deep.pod b/lib/DBM/Deep.pod index 622130c..8100fec 100644 --- a/lib/DBM/Deep.pod +++ b/lib/DBM/Deep.pod @@ -387,16 +387,26 @@ value. =item * lock() / unlock() -q.v. Locking. +q.v. L for more info. =item * optimize() -Recover lost disk space. This is important to do, especially if you use -transactions. +This will compress the datafile so that it takes up as little space as possible. +There is a freespace manager so that when space is freed up, it is used before +extending the size of the datafile. But, that freespace just sits in the datafile +unless C is called. -=item * import() / export() +=item * import() -Data going in and out. +Unlike simple assignment, C does not tie the right-hand side. Instead, +a copy of your data is put into the DB. C takes either an arrayref (if +your DB is an array) or a hashref (if your DB is a hash). C will die +if anything else is passed in. + +=item * export() + +This returns a complete copy of the data structure at the point you do the export. +This copy is in RAM, not on disk like the DB is. =item * begin_work() / commit() / rollback() @@ -529,7 +539,7 @@ Here are some examples of using arrays: Enable or disable automatic file locking by passing a boolean value to the C parameter when constructing your DBM::Deep object (see L - above). +above). my $db = DBM::Deep->new( file => "foo.db", @@ -647,7 +657,12 @@ way to extend the engine, and implement things like real-time compression or encryption. Filtering applies to the base DB level, and all child hashes / arrays. Filter hooks can be specified when your DBM::Deep object is first constructed, or by calling the C method at any time. There are -four available filter hooks, described below: +four available filter hooks. + +=head2 set_filter() + +This method takes two paramters - the filter type and the filter subreference. +The four types are: =over @@ -699,31 +714,6 @@ remove a filter, set the function reference to C: Please read L for examples of filters. -=head2 set_filter() - -This method takes two paramters - the filter type and the filter subreference. -The four types are: - -=over 4 - -=item * filter_store_key - -This subreference is called when a key is stored in a hash. - -=item * filter_store_value - -This subreference is called when a value is stored. - -=item * filter_fetch_key - -This subreference is called when a key is retrieved fram a hash. - -=item * filter_fetch_value - -This subreference is called when a key is retrieved. - -=back - =head1 ERROR HANDLING Most DBM::Deep methods return a true value for success, and call die() on @@ -759,7 +749,7 @@ the file's header and cannot be changed for the life of the file. These parameters are per-file, meaning you can access 32-bit and 64-bit files, as you choose. -B We have not personally tested files larger than 4 GB -- all my +B We have not personally tested files larger than 4 GB -- all our systems have only a 32-bit Perl. However, we have received user reports that this does indeed work. @@ -785,12 +775,7 @@ any child hash or array. =head1 CIRCULAR REFERENCES -B: DBM::Deep 1.0000 has turned off circular references pending -evaluation of some edge cases. I hope to be able to re-enable circular -references in a future version after 1.0000. This means that circular references -are B available. - -DBM::Deep has B support for circular references. Meaning you +DBM::Deep has full support for circular references. Meaning you can have a nested hash key or array element that points to a parent object. This relationship is stored in the DB file, and is preserved between sessions. Here is an example: @@ -803,14 +788,32 @@ Here is an example: print $db->{foo} . "\n"; # prints "bar" print $db->{circle}->{foo} . "\n"; # prints "bar" again +This also works as expected with array and hash references. So, the following +works as expected: + + $db->{foo} = [ 1 .. 3 ]; + $db->{bar} = $db->{foo}; + + push @{$db->{foo}}, 42; + is( $db->{bar}[-1], 42 ); # Passes + +This, however, does I extend to assignments from one DB file to another. +So, the following will throw an error: + + my $db1 = DBM::Deep->new( "foo.db" ); + my $db2 = DBM::Deep->new( "bar.db" ); + + $db1->{foo} = []; + $db2->{foo} = $db1->{foo}; # dies + B: Passing the object to a function that recursively walks the object tree (such as I or even the built-in C or C methods) will result in an infinite loop. This will be fixed in -a future release. +a future release by adding singleton support. =head1 TRANSACTIONS -New in 1.0000 is ACID transactions. Every DBM::Deep object is completely +As of 1.0000, DBM::Deep hass ACID transactions. Every DBM::Deep object is completely transaction-ready - it is not an option you have to turn on. You do have to specify how many transactions may run simultaneously (q.v. L). @@ -837,48 +840,6 @@ the transaction. Transactions in DBM::Deep are done using a variant of the MVCC method, the same method used by the InnoDB MySQL engine. -=head1 PERFORMANCE - -Because DBM::Deep is a conncurrent datastore, every change is flushed to disk -immediately and every read goes to disk. This means that DBM::Deep functions -at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally -50-70ns), or at least 150-200x slower than the comparable in-memory -datastructure in Perl. - -There are several techniques you can use to speed up how DBM::Deep functions. - -=over 4 - -=item * Put it on a ramdisk - -The easiest and quickest mechanism to making DBM::Deep run faster is to create -a ramdisk and locate the DBM::Deep file there. Doing this as an option may -become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN. - -=item * Work at the tightest level possible - -It is much faster to assign the level of your db that you are working with to -an intermediate variable than to re-look it up every time. Thus - - # BAD - while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) { - ... - } - - # GOOD - my $x = $db->{foo}{bar}{baz}; - while ( my ($k, $v) = each %$x ) { - ... - } - -=item * Make your file as tight as possible - -If you know that you are not going to use more than 65K in your database, -consider using the C 'small'> option. This will instruct -DBM::Deep to use 16bit addresses, meaning that the seek times will be less. - -=back - =head1 MIGRATION As of 1.0000, the file format has changed. Furthermore, DBM::Deep is now @@ -994,6 +955,14 @@ the reference. Again, this would generally be considered a feature. =back +=head2 External references and transactions + +If you do C{foo};>, then start a transaction, $x will be +referencing the database from outside the transaction. A fix for this (and other +issues with how external references into the database) is being looked into. This +is the skipped set of tests in t/39_singletons.t and a related issue is the focus +of t/37_delete_edge_cases.t + =head2 File corruption The current level of error handling in DBM::Deep is minimal. Files I checked @@ -1065,16 +1034,16 @@ reference to be imported in order to explicitly leave it untied. B is used to test the code coverage of the tests. Below is the B report on this distribution's test suite. - ----------------------------------- ------ ------ ------ ------ ------ ------ - File stmt bran cond sub time total - ----------------------------------- ------ ------ ------ ------ ------ ------ - blib/lib/DBM/Deep.pm 94.4 85.0 90.5 100.0 5.0 93.4 - blib/lib/DBM/Deep/Array.pm 100.0 94.6 100.0 100.0 4.7 98.8 - blib/lib/DBM/Deep/Engine.pm 97.2 85.8 82.4 100.0 51.3 93.8 - blib/lib/DBM/Deep/File.pm 97.2 81.6 66.7 100.0 36.5 91.9 - blib/lib/DBM/Deep/Hash.pm 100.0 100.0 100.0 100.0 2.5 100.0 - Total 97.2 87.4 83.9 100.0 100.0 94.6 - ----------------------------------- ------ ------ ------ ------ ------ ------ + ------------------------------------------ ------ ------ ------ ------ ------ + File stmt bran cond sub total + ------------------------------------------ ------ ------ ------ ------ ------ + blib/lib/DBM/Deep.pm 97.2 90.9 83.3 100.0 95.4 + blib/lib/DBM/Deep/Array.pm 100.0 95.7 100.0 100.0 99.0 + blib/lib/DBM/Deep/Engine.pm 95.6 84.7 81.6 98.4 92.5 + blib/lib/DBM/Deep/File.pm 97.2 81.6 66.7 100.0 91.9 + blib/lib/DBM/Deep/Hash.pm 100.0 100.0 100.0 100.0 100.0 + Total 96.7 87.5 82.2 99.2 94.1 + ------------------------------------------ ------ ------ ------ ------ ------ =head1 MORE INFORMATION diff --git a/lib/DBM/Deep/Array.pm b/lib/DBM/Deep/Array.pm index db84214..6f78c0d 100644 --- a/lib/DBM/Deep/Array.pm +++ b/lib/DBM/Deep/Array.pm @@ -5,7 +5,7 @@ use 5.006_000; use strict; use warnings; -our $VERSION = q(1.0002); +our $VERSION = q(1.0006); # This is to allow DBM::Deep::Array to handle negative indices on # its own. Otherwise, Perl would intercept the call to negative @@ -20,16 +20,7 @@ sub _get_self { eval { local $SIG{'__DIE__'}; tied( @{$_[0]} ) } || $_[0] } -sub _repr { shift;[ @_ ] } - -sub _import { - my $self = shift; - my ($struct) = @_; - - $self->push( @$struct ); - - return 1; -} +sub _repr { [] } sub TIEARRAY { my $class = shift; @@ -47,6 +38,7 @@ sub FETCH { $self->lock( $self->LOCK_SH ); if ( !defined $key ) { + $self->unlock; DBM::Deep->_throw_error( "Cannot use an undefined array index." ); } elsif ( $key =~ /^-?\d+$/ ) { @@ -79,6 +71,7 @@ sub STORE { my $size; my $idx_is_numeric; if ( !defined $key ) { + $self->unlock; DBM::Deep->_throw_error( "Cannot use an undefined array index." ); } elsif ( $key =~ /^-?\d+$/ ) { @@ -117,6 +110,7 @@ sub EXISTS { $self->lock( $self->LOCK_SH ); if ( !defined $key ) { + $self->unlock; DBM::Deep->_throw_error( "Cannot use an undefined array index." ); } elsif ( $key =~ /^-?\d+$/ ) { @@ -148,6 +142,7 @@ sub DELETE { my $size = $self->FETCHSIZE; if ( !defined $key ) { + $self->unlock; DBM::Deep->_throw_error( "Cannot use an undefined array index." ); } elsif ( $key =~ /^-?\d+$/ ) { @@ -257,16 +252,7 @@ sub _move_value { my $self = shift; my ($old_key, $new_key) = @_; - my $val = $self->FETCH( $old_key ); - if ( eval { local $SIG{'__DIE__'}; $val->isa( 'DBM::Deep::Hash' ) } ) { - $self->STORE( $new_key, { %$val } ); - } - elsif ( eval { local $SIG{'__DIE__'}; $val->isa( 'DBM::Deep::Array' ) } ) { - $self->STORE( $new_key, [ @$val ] ); - } - else { - $self->STORE( $new_key, $val ); - } + return $self->_engine->make_reference( $self, $old_key, $new_key ); } sub SHIFT { @@ -276,22 +262,21 @@ sub SHIFT { my $length = $self->FETCHSIZE(); - if ($length) { - my $content = $self->FETCH( 0 ); - - for (my $i = 0; $i < $length - 1; $i++) { - $self->_move_value( $i+1, $i ); - } - $self->DELETE( $length - 1 ); - - $self->unlock; - - return $content; - } - else { + if ( !$length ) { $self->unlock; return; } + + my $content = $self->FETCH( 0 ); + + for (my $i = 0; $i < $length - 1; $i++) { + $self->_move_value( $i+1, $i ); + } + $self->DELETE( $length - 1 ); + + $self->unlock; + + return $content; } sub UNSHIFT { @@ -307,6 +292,8 @@ sub UNSHIFT { for (my $i = $length - 1; $i >= 0; $i--) { $self->_move_value( $i, $i+$new_size ); } + + $self->STORESIZE( $length + $new_size ); } for (my $i = 0; $i < $new_size; $i++) { @@ -355,6 +342,7 @@ sub SPLICE { for (my $i = $length - 1; $i >= $offset + $splice_length; $i--) { $self->_move_value( $i, $i + ($new_size - $splice_length) ); } + $self->STORESIZE( $length + $new_size - $splice_length ); } else { for (my $i = $offset + $splice_length; $i < $length; $i++) { diff --git a/lib/DBM/Deep/Cookbook.pod b/lib/DBM/Deep/Cookbook.pod index b82ad8a..7c62c16 100644 --- a/lib/DBM/Deep/Cookbook.pod +++ b/lib/DBM/Deep/Cookbook.pod @@ -28,6 +28,10 @@ L will do these things for you. =head2 Real-time Encryption Example +B: This is just an example of how to write a filter. This most +definitely should B be taken as a proper way to write a filter that does +encryption. + Here is a working example that uses the I module to do real-time encryption / decryption of keys & values with DBM::Deep Filters. Please visit L for more @@ -150,4 +154,46 @@ behavior will occur otherwise. B If you do choose to use a custom digest algorithm, you must set it every time you access this file. Otherwise, the default (MD5) will be used. +=head1 PERFORMANCE + +Because DBM::Deep is a conncurrent datastore, every change is flushed to disk +immediately and every read goes to disk. This means that DBM::Deep functions +at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally +50-70ns), or at least 150-200x slower than the comparable in-memory +datastructure in Perl. + +There are several techniques you can use to speed up how DBM::Deep functions. + +=over 4 + +=item * Put it on a ramdisk + +The easiest and quickest mechanism to making DBM::Deep run faster is to create +a ramdisk and locate the DBM::Deep file there. Doing this as an option may +become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN. + +=item * Work at the tightest level possible + +It is much faster to assign the level of your db that you are working with to +an intermediate variable than to re-look it up every time. Thus + + # BAD + while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) { + ... + } + + # GOOD + my $x = $db->{foo}{bar}{baz}; + while ( my ($k, $v) = each %$x ) { + ... + } + +=item * Make your file as tight as possible + +If you know that you are not going to use more than 65K in your database, +consider using the C 'small'> option. This will instruct +DBM::Deep to use 16bit addresses, meaning that the seek times will be less. + +=back + =cut diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index 4efd777..720e7e3 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -5,7 +5,7 @@ use 5.006_000; use strict; use warnings; -our $VERSION = q(1.0002); +our $VERSION = q(1.0006); use Scalar::Util (); @@ -164,6 +164,55 @@ sub get_classname { return $sector->get_classname; } +sub make_reference { + my $self = shift; + my ($obj, $old_key, $new_key) = @_; + + # This will be a Reference sector + my $sector = $self->_load_sector( $obj->_base_offset ) + or DBM::Deep->_throw_error( "How did get_classname fail (no sector for '$obj')?!" ); + + if ( $sector->staleness != $obj->_staleness ) { + return; + } + + my $old_md5 = $self->_apply_digest( $old_key ); + + my $value_sector = $sector->get_data_for({ + key_md5 => $old_md5, + allow_head => 1, + }); + + unless ( $value_sector ) { + $value_sector = DBM::Deep::Engine::Sector::Null->new({ + engine => $self, + data => undef, + }); + + $sector->write_data({ + key_md5 => $old_md5, + key => $old_key, + value => $value_sector, + }); + } + + if ( $value_sector->isa( 'DBM::Deep::Engine::Sector::Reference' ) ) { + $sector->write_data({ + key => $new_key, + key_md5 => $self->_apply_digest( $new_key ), + value => $value_sector, + }); + $value_sector->increment_refcount; + } + else { + $sector->write_data({ + key => $new_key, + key_md5 => $self->_apply_digest( $new_key ), + value => $value_sector->clone, + }); + } +} + sub key_exists { my $self = shift; my ($obj, $key) = @_; @@ -217,11 +266,34 @@ sub write_value { ); } + # This will be a Reference sector + my $sector = $self->_load_sector( $obj->_base_offset ) + or DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep." ); + + if ( $sector->staleness != $obj->_staleness ) { + DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep.n" ); + } + my ($class, $type); if ( !defined $value ) { $class = 'DBM::Deep::Engine::Sector::Null'; } elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) { + my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ); }; + if ( $is_dbm_deep ) { + if ( $value->_engine->storage == $self->storage ) { + my $value_sector = $self->_load_sector( $value->_base_offset ); + $sector->write_data({ + key => $key, + key_md5 => $self->_apply_digest( $key ), + value => $value_sector, + }); + $value_sector->increment_refcount; + return 1; + } + + DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." ); + } if ( $r eq 'ARRAY' && tied(@$value) ) { DBM::Deep->_throw_error( "Cannot store something that is tied." ); } @@ -232,17 +304,12 @@ sub write_value { $type = substr( $r, 0, 1 ); } else { + if ( tied($value) ) { + DBM::Deep->_throw_error( "Cannot store something that is tied." ); + } $class = 'DBM::Deep::Engine::Sector::Scalar'; } - # This will be a Reference sector - my $sector = $self->_load_sector( $obj->_base_offset ) - or DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep." ); - - if ( $sector->staleness != $obj->_staleness ) { - DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep.n" ); - } - # Create this after loading the reference sector in case something bad happens. # This way, we won't allocate value sector(s) needlessly. my $value_sector = $class->new({ @@ -497,8 +564,8 @@ sub get_txn_staleness_counter { return unpack( $StP{$STALE_SIZE}, $self->storage->read_at( - $self->trans_loc + 4 + $STALE_SIZE * ($trans_id - 1), - 4, + $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1), + $STALE_SIZE, ) ); } @@ -508,10 +575,10 @@ sub inc_txn_staleness_counter { my ($trans_id) = @_; # Hardcode staleness of 0 for the HEAD - return unless $trans_id; + return 0 unless $trans_id; $self->storage->print_at( - $self->trans_loc + 4 + $STALE_SIZE * ($trans_id - 1), + $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1), pack( $StP{$STALE_SIZE}, $self->get_txn_staleness_counter( $trans_id ) + 1 ), ); } @@ -556,7 +623,7 @@ sub clear_entries { { my $header_fixed = length( SIG_FILE ) + 1 + 4 + 4; - my $this_file_version = 2; + my $this_file_version = 3; sub _write_file_header { my $self = shift; @@ -658,7 +725,7 @@ sub _load_sector { my ($offset) = @_; # Add a catch for offset of 0 or 1 - return if $offset <= 1; + return if !$offset || $offset <= 1; my $type = $self->storage->read_at( $offset, 1 ); return if $type eq chr(0); @@ -808,6 +875,113 @@ sub set_trans_loc { $_[0]{trans_loc} = $_[1] } sub chains_loc { $_[0]{chains_loc} } sub set_chains_loc { $_[0]{chains_loc} = $_[1] } +sub cache { $_[0]{cache} ||= {} } +sub clear_cache { %{$_[0]->cache} = () } + +sub _dump_file { + my $self = shift; + + # Read the header + my $spot = $self->_read_file_header(); + + my %types = ( + 0 => 'B', + 1 => 'D', + 2 => 'I', + ); + + my %sizes = ( + 'D' => $self->data_sector_size, + 'B' => DBM::Deep::Engine::Sector::BucketList->new({engine=>$self,offset=>1})->size, + 'I' => DBM::Deep::Engine::Sector::Index->new({engine=>$self,offset=>1})->size, + ); + + my $return = ""; + + # Header values + $return .= "NumTxns: " . $self->num_txns . $/; + + # Read the free sector chains + my %sectors; + foreach my $multiple ( 0 .. 2 ) { + $return .= "Chains($types{$multiple}):"; + my $old_loc = $self->chains_loc + $multiple * $self->byte_size; + while ( 1 ) { + my $loc = unpack( + $StP{$self->byte_size}, + $self->storage->read_at( $old_loc, $self->byte_size ), + ); + + # We're now out of free sectors of this kind. + unless ( $loc ) { + last; + } + + $sectors{ $types{$multiple} }{ $loc } = undef; + $old_loc = $loc + SIG_SIZE + $STALE_SIZE; + $return .= " $loc"; + } + $return .= $/; + } + + SECTOR: + while ( $spot < $self->storage->{end} ) { + # Read each sector in order. + my $sector = $self->_load_sector( $spot ); + if ( !$sector ) { + # Find it in the free-sectors that were found already + foreach my $type ( keys %sectors ) { + if ( exists $sectors{$type}{$spot} ) { + my $size = $sizes{$type}; + $return .= sprintf "%08d: %s %04d\n", $spot, 'F' . $type, $size; + $spot += $size; + next SECTOR; + } + } + + die "********\n$return\nDidn't find free sector for $spot in chains\n********\n"; + } + else { + $return .= sprintf "%08d: %s %04d", $spot, $sector->type, $sector->size; + if ( $sector->type eq 'D' ) { + $return .= ' ' . $sector->data; + } + elsif ( $sector->type eq 'A' || $sector->type eq 'H' ) { + $return .= ' REF: ' . $sector->get_refcount; + } + elsif ( $sector->type eq 'B' ) { + foreach my $bucket ( $sector->chopped_up ) { + $return .= "\n "; + $return .= sprintf "%08d", unpack($StP{$self->byte_size}, + substr( $bucket->[-1], $self->hash_size, $self->byte_size), + ); + my $l = unpack( $StP{$self->byte_size}, + substr( $bucket->[-1], + $self->hash_size + $self->byte_size, + $self->byte_size, + ), + ); + $return .= sprintf " %08d", $l; + foreach my $txn ( 0 .. $self->num_txns - 2 ) { + my $l = unpack( $StP{$self->byte_size}, + substr( $bucket->[-1], + $self->hash_size + 2 * $self->byte_size + $txn * ($self->byte_size + $STALE_SIZE), + $self->byte_size, + ), + ); + $return .= sprintf " %08d", $l; + } + } + } + $return .= $/; + + $spot += $sector->size; + } + } + + return $return; +} + ################################################################################ package DBM::Deep::Iterator; @@ -1181,7 +1355,7 @@ sub _init { unless ( $self->offset ) { my $classname = Scalar::Util::blessed( delete $self->{data} ); - my $leftover = $self->size - $self->base_size - 2 * $e->byte_size; + my $leftover = $self->size - $self->base_size - 3 * $e->byte_size; my $class_offset = 0; if ( defined $classname ) { @@ -1198,7 +1372,7 @@ sub _init { $e->storage->print_at( $self->offset + $self->base_size, pack( $StP{$e->byte_size}, 0 ), # Index/BList loc pack( $StP{$e->byte_size}, $class_offset ), # Classname loc - # XXX Add the recounting location here. We can take $e->byte_size bytes. + pack( $StP{$e->byte_size}, 1 ), # Initial refcount chr(0) x $leftover, # Zero-fill the rest ); } @@ -1214,18 +1388,6 @@ sub _init { return; } -sub free { - my $self = shift; - - my $blist_loc = $self->get_blist_loc; - $self->engine->_load_sector( $blist_loc )->free if $blist_loc; - - my $class_loc = $self->get_class_offset; - $self->engine->_load_sector( $class_loc )->free if $class_loc; - - $self->SUPER::free(); -} - sub staleness { $_[0]{staleness} } sub get_data_for { @@ -1335,6 +1497,8 @@ sub delete_key { my @trans_ids = $self->engine->get_running_txn_ids; + # If we're the HEAD and there are running txns, then we need to clone this value to the other + # transactions to preserve Isolation. if ( $self->engine->trans_id == 0 ) { if ( @trans_ids ) { foreach my $other_trans_id ( @trans_ids ) { @@ -1493,6 +1657,7 @@ sub get_bucket_list { ); } + $sector->clear; $sector->free; $sector = $blist_cache{ ord( substr( $args->{key_md5}, $i, 1 ) ) }; @@ -1524,26 +1689,98 @@ sub get_classname { return $self->engine->_load_sector( $class_offset )->data; } -#XXX Add singleton handling here sub data { my $self = shift; - my $new_obj = DBM::Deep->new({ - type => $self->type, - base_offset => $self->offset, - staleness => $self->staleness, - storage => $self->engine->storage, - engine => $self->engine, - }); + unless ( $self->engine->cache->{ $self->offset } ) { + my $new_obj = DBM::Deep->new({ + type => $self->type, + base_offset => $self->offset, + staleness => $self->staleness, + storage => $self->engine->storage, + engine => $self->engine, + }); - if ( $self->engine->storage->{autobless} ) { - my $classname = $self->get_classname; - if ( defined $classname ) { - bless $new_obj, $classname; + if ( $self->engine->storage->{autobless} ) { + my $classname = $self->get_classname; + if ( defined $classname ) { + bless $new_obj, $classname; + } } + + $self->engine->cache->{$self->offset} = $new_obj; } + return $self->engine->cache->{$self->offset}; +} + +sub free { + my $self = shift; + + # We're not ready to be removed yet. + if ( $self->decrement_refcount > 0 ) { + return; + } + + # Rebless the object into DBM::Deep::Null. + eval { %{ $self->engine->cache->{ $self->offset } } = (); }; + eval { @{ $self->engine->cache->{ $self->offset } } = (); }; + bless $self->engine->cache->{ $self->offset }, 'DBM::Deep::Null'; + delete $self->engine->cache->{ $self->offset }; + + my $blist_loc = $self->get_blist_loc; + $self->engine->_load_sector( $blist_loc )->free if $blist_loc; + + my $class_loc = $self->get_class_offset; + $self->engine->_load_sector( $class_loc )->free if $class_loc; + + $self->SUPER::free(); +} + +sub increment_refcount { + my $self = shift; + + my $refcount = $self->get_refcount; + + $refcount++; + + $self->write_refcount( $refcount ); - return $new_obj; + return $refcount; +} + +sub decrement_refcount { + my $self = shift; + + my $refcount = $self->get_refcount; + + $refcount--; + + $self->write_refcount( $refcount ); + + return $refcount; +} + +sub get_refcount { + my $self = shift; + + my $e = $self->engine; + return unpack( + $StP{$e->byte_size}, + $e->storage->read_at( + $self->offset + $self->base_size + 2 * $e->byte_size, $e->byte_size, + ), + ); +} + +sub write_refcount { + my $self = shift; + my ($num) = @_; + + my $e = $self->engine; + $e->storage->print_at( + $self->offset + $self->base_size + 2 * $e->byte_size, + pack( $StP{$e->byte_size}, $num ), + ); } package DBM::Deep::Engine::Sector::BucketList; @@ -1573,6 +1810,13 @@ sub _init { return $self; } +sub clear { + my $self = shift; + $self->engine->storage->print_at( $self->offset + $self->base_size, + chr(0) x ($self->size - $self->base_size), # Zero-fill the data + ); +} + sub size { my $self = shift; unless ( $self->{size} ) { @@ -1585,6 +1829,40 @@ sub size { sub free_meth { return '_add_free_blist_sector' } +sub free { + my $self = shift; + + my $e = $self->engine; + foreach my $bucket ( $self->chopped_up ) { + my $rest = $bucket->[-1]; + + # Delete the keysector + my $l = unpack( $StP{$e->byte_size}, substr( $rest, $e->hash_size, $e->byte_size ) ); + my $s = $e->_load_sector( $l ); $s->free if $s; + + # Delete the HEAD sector + $l = unpack( $StP{$e->byte_size}, + substr( $rest, + $e->hash_size + $e->byte_size, + $e->byte_size, + ), + ); + $s = $e->_load_sector( $l ); $s->free if $s; + + foreach my $txn ( 0 .. $e->num_txns - 2 ) { + my $l = unpack( $StP{$e->byte_size}, + substr( $rest, + $e->hash_size + 2 * $e->byte_size + $txn * ($e->byte_size + $STALE_SIZE), + $e->byte_size, + ), + ); + my $s = $e->_load_sector( $l ); $s->free if $s; + } + } + + $self->SUPER::free(); +} + sub bucket_size { my $self = shift; unless ( $self->{bucket_size} ) { @@ -1947,5 +2225,18 @@ sub set_entry { ); } +# This was copied from MARCEL's Class::Null. However, I couldn't use it because +# I need an undef value, not an implementation of the Null Class pattern. +package DBM::Deep::Null; + +use overload + 'bool' => sub { undef }, + '""' => sub { undef }, + '0+' => sub { undef }, + fallback => 1, + nomethod => 'AUTOLOAD'; + +sub AUTOLOAD { return; } + 1; __END__ diff --git a/lib/DBM/Deep/File.pm b/lib/DBM/Deep/File.pm index 3f8511e..83835d9 100644 --- a/lib/DBM/Deep/File.pm +++ b/lib/DBM/Deep/File.pm @@ -5,7 +5,7 @@ use 5.006_000; use strict; use warnings; -our $VERSION = q(1.0002); +our $VERSION = q(1.0006); use Fcntl qw( :DEFAULT :flock :seek ); diff --git a/lib/DBM/Deep/Hash.pm b/lib/DBM/Deep/Hash.pm index 3602a90..7bca7ce 100644 --- a/lib/DBM/Deep/Hash.pm +++ b/lib/DBM/Deep/Hash.pm @@ -5,7 +5,7 @@ use 5.006_000; use strict; use warnings; -our $VERSION = q(1.0002); +our $VERSION = q(1.0006); use base 'DBM::Deep'; @@ -13,19 +13,7 @@ sub _get_self { eval { local $SIG{'__DIE__'}; tied( %{$_[0]} ) } || $_[0] } -#XXX Need to add a check here for @_ % 2 -sub _repr { shift;return { @_ } } - -sub _import { - my $self = shift; - my ($struct) = @_; - - foreach my $key (keys %$struct) { - $self->put($key, $struct->{$key}); - } - - return 1; -} +sub _repr { return {} } sub TIEHASH { ## @@ -52,7 +40,7 @@ sub FETCH { sub STORE { my $self = shift->_get_self; DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0]; - my $key = ($self->_storage->{filter_store_key}) + my $key = ($self->_storage->{filter_store_key}) ? $self->_storage->{filter_store_key}->($_[0]) : $_[0]; my $value = $_[1]; @@ -63,7 +51,7 @@ sub STORE { sub EXISTS { my $self = shift->_get_self; DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0]; - my $key = ($self->_storage->{filter_store_key}) + my $key = ($self->_storage->{filter_store_key}) ? $self->_storage->{filter_store_key}->($_[0]) : $_[0]; @@ -73,7 +61,7 @@ sub EXISTS { sub DELETE { my $self = shift->_get_self; DBM::Deep->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0]; - my $key = ($self->_storage->{filter_store_key}) + my $key = ($self->_storage->{filter_store_key}) ? $self->_storage->{filter_store_key}->($_[0]) : $_[0]; @@ -81,45 +69,45 @@ sub DELETE { } sub FIRSTKEY { - ## - # Locate and return first key (in no particular order) - ## + ## + # Locate and return first key (in no particular order) + ## my $self = shift->_get_self; - ## - # Request shared lock for reading - ## - $self->lock( $self->LOCK_SH ); - - my $result = $self->_engine->get_next_key( $self ); - - $self->unlock(); - - return ($result && $self->_storage->{filter_fetch_key}) + ## + # Request shared lock for reading + ## + $self->lock( $self->LOCK_SH ); + + my $result = $self->_engine->get_next_key( $self ); + + $self->unlock(); + + return ($result && $self->_storage->{filter_fetch_key}) ? $self->_storage->{filter_fetch_key}->($result) : $result; } sub NEXTKEY { - ## - # Return next key (in no particular order), given previous one - ## + ## + # Return next key (in no particular order), given previous one + ## my $self = shift->_get_self; - my $prev_key = ($self->_storage->{filter_store_key}) + my $prev_key = ($self->_storage->{filter_store_key}) ? $self->_storage->{filter_store_key}->($_[0]) : $_[0]; - ## - # Request shared lock for reading - ## - $self->lock( $self->LOCK_SH ); - - my $result = $self->_engine->get_next_key( $self, $prev_key ); - - $self->unlock(); - - return ($result && $self->_storage->{filter_fetch_key}) + ## + # Request shared lock for reading + ## + $self->lock( $self->LOCK_SH ); + + my $result = $self->_engine->get_next_key( $self, $prev_key ); + + $self->unlock(); + + return ($result && $self->_storage->{filter_fetch_key}) ? $self->_storage->{filter_fetch_key}->($result) : $result; } diff --git a/lib/DBM/Deep/Internals.pod b/lib/DBM/Deep/Internals.pod index b5b0ff2..cc851ed 100644 --- a/lib/DBM/Deep/Internals.pod +++ b/lib/DBM/Deep/Internals.pod @@ -4,6 +4,10 @@ DBM::Deep::Internals =head1 DESCRIPTION +B: This document is out-of-date. It describes an intermediate file +format used during the development from 0.983 to 1.0000. It will be rewritten +soon. + This is a document describing the internal workings of L. It is not necessary to read this document if you only intend to be a user. This document is intended for people who either want a deeper understanding of diff --git a/t/04_array.t b/t/04_array.t index cc2b2b9..e4616ee 100644 --- a/t/04_array.t +++ b/t/04_array.t @@ -2,7 +2,7 @@ # DBM::Deep Test ## use strict; -use Test::More tests => 124; +use Test::More tests => 128; use Test::Exception; use t::common qw( new_fh ); @@ -77,6 +77,7 @@ is( $db->length, 3, "... and we have three after shifting" ); is( $db->[0], 'elem1', "0th element still there after shifting" ); is( $db->[1], 'elem2', "1st element still there after shifting" ); is( $db->[2], 'elem3', "2nd element still there after shifting" ); +is( $db->[3], undef, "There is no third element now" ); is( $shifted, 'elem0', "Shifted value is correct" ); ## @@ -196,6 +197,11 @@ is($db->[0], "elem first"); is($db->[1], "elem last"); is($returned[0], "middle ABC"); +@returned = $db->splice; +is( $db->length, 0 ); +is( $returned[0], "elem first" ); +is( $returned[1], "elem last" ); + $db->[0] = [ 1 .. 3 ]; $db->[1] = { a => 'foo' }; is( $db->[0]->length, 3, "Reuse of same space with array successful" ); @@ -240,6 +246,7 @@ throws_ok { } qr/Cannot use an undefined array index/, "EXISTS fails on an undefined key"; # Bug reported by Mike Schilli +# Also, RT #29583 reported by HANENKAMP { my ($fh, $filename) = new_fh(); my $db = DBM::Deep->new( @@ -247,23 +254,23 @@ throws_ok { type => DBM::Deep->TYPE_ARRAY ); - push @{$db}, 1, { foo => 1 }; + push @{$db}, 3, { foo => 1 }; lives_ok { shift @{$db}; } "Shift doesn't die moving references around"; is( $db->[0]{foo}, 1, "Right hashref there" ); lives_ok { - unshift @{$db}, [ 1 .. 3 ]; + unshift @{$db}, [ 1 .. 3, [ 1 .. 3 ] ]; unshift @{$db}, 1; } "Unshift doesn't die moving references around"; - is( $db->[1][1], 2, "Right arrayref there" ); + is( $db->[1][3][1], 2, "Right arrayref there" ); is( $db->[2]{foo}, 1, "Right hashref there" ); # Add test for splice moving references around lives_ok { splice @{$db}, 0, 0, 1 .. 3; } "Splice doesn't die moving references around"; - is( $db->[4][1], 2, "Right arrayref there" ); + is( $db->[4][3][1], 2, "Right arrayref there" ); is( $db->[5]{foo}, 1, "Right hashref there" ); } diff --git a/t/06_error.t b/t/06_error.t index c8775e8..75af309 100644 --- a/t/06_error.t +++ b/t/06_error.t @@ -133,5 +133,5 @@ use_ok( 'DBM::Deep' ); { throws_ok { DBM::Deep->new( 't/etc/db-0-99_04' ); - } qr/DBM::Deep: Wrong file version found - 1 - expected 2/, "Fail if opening a file version 1"; + } qr/DBM::Deep: Wrong file version found - 1 - expected 3/, "Fail if opening a file version 1"; } diff --git a/t/14_filter.t b/t/14_filter.t index 240e96d..fbff9b1 100644 --- a/t/14_filter.t +++ b/t/14_filter.t @@ -53,10 +53,10 @@ ok( exists $db->{key2}, "Key2 exists" ); ## # Now clear all filters, and make sure all is unfiltered ## -ok( $db->set_filter( 'store_key', undef ), "Unset store_key filter" ); -ok( $db->set_filter( 'store_value', undef ), "Unset store_value filter" ); -ok( $db->set_filter( 'fetch_key', undef ), "Unset fetch_key filter" ); -ok( $db->set_filter( 'fetch_value', undef ), "Unset fetch_value filter" ); +ok( $db->filter_store_key( undef ), "Unset store_key filter" ); +ok( $db->filter_store_value( undef ), "Unset store_value filter" ); +ok( $db->filter_fetch_key( undef ), "Unset fetch_key filter" ); +ok( $db->filter_fetch_value( undef ), "Unset fetch_value filter" ); is( $db->{MYFILTERkey2}, "MYFILTERvalue2", "We get the right unfiltered value" ); diff --git a/t/16_circular.t b/t/16_circular.t index 61ec238..501435d 100644 --- a/t/16_circular.t +++ b/t/16_circular.t @@ -2,8 +2,7 @@ # DBM::Deep Test ## use strict; -use Test::More skip_all => "Internal references are not supported right now"; -#use Test::More tests => 32; +use Test::More tests => 32; use t::common qw( new_fh ); use_ok( 'DBM::Deep' ); diff --git a/t/17_import.t b/t/17_import.t index b4ff262..c5e034e 100644 --- a/t/17_import.t +++ b/t/17_import.t @@ -2,12 +2,49 @@ # DBM::Deep Test ## use strict; -use Test::More tests => 11; +use Test::More tests => 17; use Test::Deep; +use Test::Exception; use t::common qw( new_fh ); use_ok( 'DBM::Deep' ); +# Failure cases to make sure that things are caught right. +foreach my $type ( DBM::Deep->TYPE_HASH, DBM::Deep->TYPE_ARRAY ) { + my ($fh, $filename) = new_fh(); + my $db = DBM::Deep->new({ + file => $filename, + type => $type, + }); + + # Load a scalar + throws_ok { + $db->import( 'foo' ); + } qr/Cannot import a scalar/, "Importing a scalar to type '$type' fails"; + + # Load a ref of the wrong type + # Load something with bad stuff in it + my $x = 3; + if ( $type eq 'A' ) { + throws_ok { + $db->import( { foo => 'bar' } ); + } qr/Cannot import a hash into an array/, "Wrong type fails"; + + throws_ok { + $db->import( [ \$x ] ); + } qr/Storage of references of type 'SCALAR' is not supported/, "Bad stuff fails"; + } + else { + throws_ok { + $db->import( [ 1 .. 3 ] ); + } qr/Cannot import an array into a hash/, "Wrong type fails"; + + throws_ok { + $db->import( { foo => \$x } ); + } qr/Storage of references of type 'SCALAR' is not supported/, "Bad stuff fails"; + } +} + { my ($fh, $filename) = new_fh(); my $db = DBM::Deep->new({ @@ -25,7 +62,7 @@ use_ok( 'DBM::Deep' ); hash1 => { subkey1 => "subvalue1", subkey2 => "subvalue2", - subkey3 => bless( {}, 'Foo' ), + subkey3 => bless( { a => 'b' }, 'Foo' ), } }; @@ -40,7 +77,7 @@ use_ok( 'DBM::Deep' ); hash1 => { subkey1 => "subvalue1", subkey2 => "subvalue2", - subkey3 => useclass( bless {}, 'Foo' ), + subkey3 => useclass( bless { a => 'b' }, 'Foo' ), }, }), "Everything matches", @@ -56,9 +93,6 @@ use_ok( 'DBM::Deep' ); } { - diag "\nThere seems to be a bug in Clone on Perl 5.9+ that is causing\nthese tests to fail." - if $] >= 5.009; - my ($fh, $filename) = new_fh(); my $db = DBM::Deep->new({ file => $filename, diff --git a/t/18_export.t b/t/18_export.t index 9cca868..949697a 100644 --- a/t/18_export.t +++ b/t/18_export.t @@ -33,7 +33,7 @@ my $db = DBM::Deep->new({ ## # Create structure in DB ## -$db->import( %struct ); +$db->import( \%struct ); ## # Export entire thing diff --git a/t/19_crossref.t b/t/19_crossref.t index fcd48eb..67a3589 100644 --- a/t/19_crossref.t +++ b/t/19_crossref.t @@ -2,7 +2,7 @@ # DBM::Deep Test ## use strict; -use Test::More tests => 6; +use Test::More tests => 9; use Test::Exception; use t::common qw( new_fh ); @@ -11,6 +11,28 @@ use_ok( 'DBM::Deep' ); my ($fh2, $filename2) = new_fh(); my $db2 = DBM::Deep->new( $filename2 ); +SKIP: { + skip "Apparently, we cannot detect a tied scalar?", 1; + tie my $foo, 'Tied::Scalar'; + throws_ok { + $db2->{failure} = $foo; + } qr/Cannot store something that is tied\./, "tied scalar storage fails"; +} + +{ + tie my @foo, 'Tied::Array'; + throws_ok { + $db2->{failure} = \@foo; + } qr/Cannot store something that is tied\./, "tied array storage fails"; +} + +{ + tie my %foo, 'Tied::Hash'; + throws_ok { + $db2->{failure} = \%foo; + } qr/Cannot store something that is tied\./, "tied hash storage fails"; +} + { my ($fh, $filename) = new_fh(); my $db = DBM::Deep->new( $filename ); @@ -18,19 +40,19 @@ my $db2 = DBM::Deep->new( $filename2 ); ## # Create structure in $db ## - $db->import( + $db->import({ hash1 => { subkey1 => "subvalue1", subkey2 => "subvalue2", } - ); + }); is( $db->{hash1}{subkey1}, 'subvalue1', "Value imported correctly" ); is( $db->{hash1}{subkey2}, 'subvalue2', "Value imported correctly" ); # Test cross-ref nested hash accross DB objects throws_ok { $db2->{copy} = $db->{hash1}; - } qr/Cannot store something that is tied\./, "cross-ref fails"; + } qr/Cannot store values across DBM::Deep files\. Please use export\(\) instead\./, "cross-ref fails"; # This error text is for when internal cross-refs are implemented #} qr/Cannot cross-reference\. Use export\(\) instead\./, "cross-ref fails"; @@ -43,3 +65,13 @@ my $db2 = DBM::Deep->new( $filename2 ); ## is( $db2->{copy}{subkey1}, 'subvalue1', "Value copied correctly" ); is( $db2->{copy}{subkey2}, 'subvalue2', "Value copied correctly" ); + +package Tied::Scalar; +sub TIESCALAR { bless {}, $_[0]; } +sub FETCH{} + +package Tied::Array; +sub TIEARRAY { bless {}, $_[0]; } + +package Tied::Hash; +sub TIEHASH { bless {}, $_[0]; } diff --git a/t/22_internal_copy.t b/t/22_internal_copy.t index edd2531..f1a51a5 100644 --- a/t/22_internal_copy.t +++ b/t/22_internal_copy.t @@ -2,8 +2,7 @@ # DBM::Deep Test ## use strict; -use Test::More skip_all => "Internal references are not supported right now"; -#use Test::More tests => 13; +use Test::More tests => 13; use t::common qw( new_fh ); use_ok( 'DBM::Deep' ); @@ -14,7 +13,7 @@ my $db = DBM::Deep->new( $filename ); ## # Create structure in $db ## -$db->import( +$db->import({ hash1 => { subkey1 => "subvalue1", subkey2 => "subvalue2", @@ -22,7 +21,7 @@ $db->import( hash2 => { subkey3 => 'subvalue3', }, -); +}); is( $db->{hash1}{subkey1}, 'subvalue1', "Value imported correctly" ); is( $db->{hash1}{subkey2}, 'subvalue2', "Value imported correctly" ); diff --git a/t/27_filehandle.t b/t/27_filehandle.t index 810154d..11f9eca 100644 --- a/t/27_filehandle.t +++ b/t/27_filehandle.t @@ -20,16 +20,14 @@ use_ok( 'DBM::Deep' ); { open(my $fh, '<', $filename) || die("Can't open '$filename' for reading: $!\n"); - my $db; - # test if we can open and read a db using its filehandle - ok(($db = DBM::Deep->new(fh => $fh)), "open db in filehandle"); - ok($db->{hash}->{foo}->[1] eq 'b', "and get at stuff in the database"); + my $db; + ok( ($db = DBM::Deep->new( fh => $fh )), "open db in filehandle" ); + ok( $db->{hash}{foo}[1] eq 'b', "and get at stuff in the database" ); throws_ok { $db->{foo} = 1; - } qr/Cannot write to a readonly filehandle/, - "Can't write to a read-only filehandle"; + } qr/Cannot write to a readonly filehandle/, "Can't write to a read-only filehandle"; ok( !$db->exists( 'foo' ), "foo doesn't exist" ); my $db_obj = $db->_get_self; diff --git a/t/31_references.t b/t/31_references.t index ebeb811..af9bc30 100644 --- a/t/31_references.t +++ b/t/31_references.t @@ -1,8 +1,7 @@ -## -# DBM::Deep Test -## use strict; + use Test::More tests => 16; +use Test::Deep; use Test::Exception; use t::common qw( new_fh ); @@ -21,8 +20,8 @@ $db->{hash} = \%hash; isa_ok( tied(%hash), 'DBM::Deep::Hash' ); is( $db->{hash}{foo}, 1 ); -is_deeply( $db->{hash}{bar}, [ 1 .. 3 ] ); -is_deeply( $db->{hash}{baz}, { a => 42 } ); +cmp_deeply( $db->{hash}{bar}, noclass([ 1 .. 3 ]) ); +cmp_deeply( $db->{hash}{baz}, noclass({ a => 42 }) ); $hash{foo} = 2; is( $db->{hash}{foo}, 2 ); @@ -41,8 +40,8 @@ $db->{array} = \@array; isa_ok( tied(@array), 'DBM::Deep::Array' ); is( $db->{array}[0], 1 ); -is_deeply( $db->{array}[1], [ 1 .. 3 ] ); -is_deeply( $db->{array}[2], { a => 42 } ); +cmp_deeply( $db->{array}[1], noclass([ 1 .. 3 ]) ); +cmp_deeply( $db->{array}[2], noclass({ a => 42 }) ); $array[0] = 2; is( $db->{array}[0], 2 ); @@ -55,9 +54,6 @@ is( $db->{array}[2]{b}, 'floober' ); my %hash2 = ( abc => [ 1 .. 3 ] ); $array[3] = \%hash2; -SKIP: { - skip "Internal references are not supported right now", 1; - $hash2{ def } = \%hash; - is( $array[3]{def}{foo}, 2 ); -} +$hash2{ def } = \%hash; +is( $array[3]{def}{foo}, 2 ); diff --git a/t/33_transactions.t b/t/33_transactions.t index cdf18ad..1edd082 100644 --- a/t/33_transactions.t +++ b/t/33_transactions.t @@ -233,7 +233,3 @@ SKIP: { } __END__ - -Tests to add: -* Two transactions running at the same time -* Doing a clear on the head while a transaction is running diff --git a/t/39_singletons.t b/t/39_singletons.t index f9ff2e1..45afc60 100644 --- a/t/39_singletons.t +++ b/t/39_singletons.t @@ -1,24 +1,64 @@ use strict; -use Test::More tests => 2; +use Test::More tests => 11; use Test::Deep; use t::common qw( new_fh ); use_ok( 'DBM::Deep' ); -my ($fh, $filename) = new_fh(); -my $db = DBM::Deep->new( - file => $filename, - locking => 1, - autoflush => 1, -); +{ + my ($fh, $filename) = new_fh(); + my $db = DBM::Deep->new( + file => $filename, + locking => 1, + autoflush => 1, + ); -$db->{foo} = { a => 'b' }; -my $x = $db->{foo}; -my $y = $db->{foo}; + $db->{a} = 1; + $db->{foo} = { a => 'b' }; + my $x = $db->{foo}; + my $y = $db->{foo}; -print "$x -> $y\n"; + is( $x, $y, "The references are the same" ); -TODO: { - local $TODO = "Singletons aren't working yet"; -is( $x, $y, "The references are the same" ); + delete $db->{foo}; + is( $x, undef ); + is( $y, undef ); + is( $x + 0, undef ); + is( $y + 0, undef ); + is( $db->{foo}, undef ); + + # These shenanigans work to get another hashref + # into the same data location as $db->{foo} was. + $db->{foo} = {}; + delete $db->{foo}; + $db->{foo} = {}; + $db->{bar} = {}; + + is( $x, undef ); + is( $y, undef ); +} + +SKIP: { + skip "What do we do with external references and txns?", 2; + my ($fh, $filename) = new_fh(); + my $db = DBM::Deep->new( + file => $filename, + locking => 1, + autoflush => 1, + num_txns => 2, + ); + + $db->{foo} = { a => 'b' }; + my $x = $db->{foo}; + + $db->begin_work; + + $db->{foo} = { c => 'd' }; + my $y = $db->{foo}; + + # XXX What should happen here with $x and $y? + is( $x, $y ); + is( $x->{c}, 'd' ); + + $db->rollback; } diff --git a/t/41_transaction_multilevel.t b/t/41_transaction_multilevel.t index aa2a959..3351e98 100644 --- a/t/41_transaction_multilevel.t +++ b/t/41_transaction_multilevel.t @@ -10,14 +10,14 @@ my $db1 = DBM::Deep->new( file => $filename, locking => 1, autoflush => 1, - num_txns => 16, + num_txns => 2, ); my $db2 = DBM::Deep->new( file => $filename, locking => 1, autoflush => 1, - num_txns => 16, + num_txns => 2, ); $db1->{x} = { foo => 'y' }; diff --git a/t/44_upgrade_db.t b/t/44_upgrade_db.t index a737591..f72ef70 100644 --- a/t/44_upgrade_db.t +++ b/t/44_upgrade_db.t @@ -13,7 +13,7 @@ BEGIN { } } -plan tests => 116; +plan tests => 222; use t::common qw( new_fh ); use File::Spec; @@ -55,13 +55,15 @@ my @input_files = ( '0-983', '0-99_04', '1-0000', + '1-0003', ); my @output_versions = ( '0.91', '0.92', '0.93', '0.94', '0.95', '0.96', '0.97', '0.98', '0.981', '0.982', '0.983', '0.99_01', '0.99_02', '0.99_03', '0.99_04', - '1.00', '1.000', '1.0000', + '1.00', '1.000', '1.0000', '1.0001', '1.0002', + '1.0003', '1.0004', '1.0005', '1.0006', ); foreach my $input_filename ( @@ -82,6 +84,15 @@ foreach my $input_filename ( "-version $v", ); + # Clone was removed as a requirement in 1.0006 + if ( $output =~ /Can\'t locate Clone\.pm in \@INC/ ) { + ok( 1 ); + unless ( $input_filename =~ /_/ || $v =~ /_/ ) { + ok( 1 ); ok( 1 ); + } + next; + } + if ( $input_filename =~ /_/ ) { is( $output, "'$input_filename' is a dev release and not supported.\n$short", @@ -110,7 +121,12 @@ foreach my $input_filename ( eval "use DBM::Deep::09830"; $db = DBM::Deep::09830->new( $output_filename ); } - elsif ( $v =~ /^1/ ) { + elsif ( $v =~ /^1\.000?[0-2]?/ ) { + push @INC, File::Spec->catdir( 'utils', 'lib' ); + eval "use DBM::Deep::10002"; + $db = DBM::Deep::10002->new( $output_filename ); + } + elsif ( $v =~ /^1\.000[3-6]/ ) { push @INC, 'lib'; eval "use DBM::Deep"; $db = DBM::Deep->new( $output_filename ); diff --git a/t/45_references.t b/t/45_references.t new file mode 100644 index 0000000..d39ba0a --- /dev/null +++ b/t/45_references.t @@ -0,0 +1,83 @@ +## +# DBM::Deep Test +## +use strict; +use Test::More tests => 15; +use Test::Exception; +use t::common qw( new_fh ); + +use_ok( 'DBM::Deep' ); + +my ($fh, $filename) = new_fh(); +my $db = DBM::Deep->new( + file => $filename, + locking => 1, + autoflush => 1, + num_txns => 16, +); + +my $db2 = DBM::Deep->new( + file => $filename, + locking => 1, + autoflush => 1, + num_txns => 16, +); + +$db->{foo} = 5; +$db->{bar} = $db->{foo}; + +is( $db->{foo}, 5, "Foo is still 5" ); +is( $db->{bar}, 5, "Bar is now 5" ); + +$db->{foo} = 6; + +is( $db->{foo}, 6, "Foo is now 6" ); +is( $db->{bar}, 5, "Bar is still 5" ); + +$db->{foo} = [ 1 .. 3 ]; +$db->{bar} = $db->{foo}; + +is( $db->{foo}[1], 2, "Foo[1] is still 2" ); +is( $db->{bar}[1], 2, "Bar[1] is now 2" ); + +$db->{foo}[3] = 42; + +is( $db->{foo}[3], 42, "Foo[3] is now 42" ); +is( $db->{bar}[3], 42, "Bar[3] is also 42" ); + +delete $db->{foo}; +is( $db->{bar}[3], 42, "After delete Foo, Bar[3] is still 42" ); + +$db->{foo} = $db->{bar}; +$db2->begin_work; + + delete $db2->{bar}; + delete $db2->{foo}; + + is( $db2->{bar}, undef, "It's deleted in the transaction" ); + is( $db->{bar}[3], 42, "... but not in the main" ); + +$db2->rollback; + +# Why hasn't this failed!? Is it because stuff isn't getting deleted as expected? +# I need a test that walks the sectors +is( $db->{bar}[3], 42, "After delete Foo, Bar[3] is still 42" ); +is( $db2->{bar}[3], 42, "After delete Foo, Bar[3] is still 42" ); + +delete $db->{foo}; + +is( $db->{bar}[3], 42, "After delete Foo, Bar[3] is still 42" ); + +__END__ +warn "-2\n"; +$db2->begin_work; + +warn "-1\n"; + delete $db2->{bar}; + +warn "0\n"; +$db2->commit; + +warn "1\n"; +ok( !exists $db->{bar}, "After commit, bar is gone" ); +warn "2\n"; diff --git a/t/97_dump_file.t b/t/97_dump_file.t new file mode 100644 index 0000000..1445517 --- /dev/null +++ b/t/97_dump_file.t @@ -0,0 +1,34 @@ +use strict; +use Test::More tests => 3; +use Test::Deep; +use t::common qw( new_fh ); + +use_ok( 'DBM::Deep' ); + +my ($fh, $filename) = new_fh(); +my $db = DBM::Deep->new( + file => $filename, +); + +is( $db->_dump_file, <<"__END_DUMP__", "Dump of initial file correct" ); +NumTxns: 1 +Chains(B): +Chains(D): +Chains(I): +00000030: H 0064 REF: 1 +__END_DUMP__ + +$db->{foo} = 'bar'; + +is( $db->_dump_file, <<"__END_DUMP__", "Dump of initial file correct" ); +NumTxns: 1 +Chains(B): +Chains(D): +Chains(I): +00000030: H 0064 REF: 1 +00000094: D 0064 bar +00000158: B 0387 + 00000545 00000094 +00000545: D 0064 foo +__END_DUMP__ + diff --git a/t/99_pod_coverage.t b/t/99_pod_coverage.t index 0fd3457..12009e4 100644 --- a/t/99_pod_coverage.t +++ b/t/99_pod_coverage.t @@ -3,11 +3,13 @@ use strict; -use Test::More tests => 1; +use Test::More; eval "use Test::Pod::Coverage 1.04"; plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@; +plan tests => 1; + # I don't know why TYPE_ARRAY isn't being caught and TYPE_HASH is. my @private_methods = qw( TYPE_ARRAY diff --git a/t/etc/db-1-0003 b/t/etc/db-1-0003 new file mode 100644 index 0000000..242ffb8 Binary files /dev/null and b/t/etc/db-1-0003 differ diff --git a/t/37_delete_edge_cases.t b/t_attic/37_delete_edge_cases.t similarity index 100% rename from t/37_delete_edge_cases.t rename to t_attic/37_delete_edge_cases.t diff --git a/utils/lib/DBM/Deep/10002.pm b/utils/lib/DBM/Deep/10002.pm new file mode 100644 index 0000000..86581ad --- /dev/null +++ b/utils/lib/DBM/Deep/10002.pm @@ -0,0 +1,3294 @@ +package DBM::Deep::10002; + +use 5.006_000; + +use strict; +use warnings; + +our $VERSION = q(1.0002); + +use Fcntl qw( :flock ); + +use Clone (); +use Digest::MD5 (); +use FileHandle::Fmode (); +use Scalar::Util (); + +#use DBM::Deep::10002::Engine; +#use DBM::Deep::10002::File; + +## +# Setup constants for users to pass to new() +## +sub TYPE_HASH () { DBM::Deep::10002::Engine->SIG_HASH } +sub TYPE_ARRAY () { DBM::Deep::10002::Engine->SIG_ARRAY } + +# This is used in all the children of this class in their TIE methods. +sub _get_args { + my $proto = shift; + + my $args; + if (scalar(@_) > 1) { + if ( @_ % 2 ) { + $proto->_throw_error( "Odd number of parameters to " . (caller(1))[2] ); + } + $args = {@_}; + } + elsif ( ref $_[0] ) { + unless ( eval { local $SIG{'__DIE__'}; %{$_[0]} || 1 } ) { + $proto->_throw_error( "Not a hashref in args to " . (caller(1))[2] ); + } + $args = $_[0]; + } + else { + $args = { file => shift }; + } + + return $args; +} + +sub new { + ## + # Class constructor method for Perl OO interface. + # Calls tie() and returns blessed reference to tied hash or array, + # providing a hybrid OO/tie interface. + ## + my $class = shift; + my $args = $class->_get_args( @_ ); + + ## + # Check if we want a tied hash or array. + ## + my $self; + if (defined($args->{type}) && $args->{type} eq TYPE_ARRAY) { + $class = 'DBM::Deep::10002::Array'; + #require DBM::Deep::10002::Array; + tie @$self, $class, %$args; + } + else { + $class = 'DBM::Deep::10002::Hash'; + #require DBM::Deep::10002::Hash; + tie %$self, $class, %$args; + } + + return bless $self, $class; +} + +# This initializer is called from the various TIE* methods. new() calls tie(), +# which allows for a single point of entry. +sub _init { + my $class = shift; + my ($args) = @_; + + $args->{storage} = DBM::Deep::10002::File->new( $args ) + unless exists $args->{storage}; + + # locking implicitly enables autoflush + if ($args->{locking}) { $args->{autoflush} = 1; } + + # These are the defaults to be optionally overridden below + my $self = bless { + type => TYPE_HASH, + base_offset => undef, + staleness => undef, + + storage => undef, + engine => undef, + }, $class; + + $args->{engine} = DBM::Deep::10002::Engine->new( { %{$args}, obj => $self } ) + unless exists $args->{engine}; + + # Grab the parameters we want to use + foreach my $param ( keys %$self ) { + next unless exists $args->{$param}; + $self->{$param} = $args->{$param}; + } + + eval { + local $SIG{'__DIE__'}; + + $self->lock; + $self->_engine->setup_fh( $self ); + $self->_storage->set_inode; + $self->unlock; + }; if ( $@ ) { + my $e = $@; + eval { local $SIG{'__DIE__'}; $self->unlock; }; + die $e; + } + + return $self; +} + +sub TIEHASH { + shift; + #require DBM::Deep::10002::Hash; + return DBM::Deep::10002::Hash->TIEHASH( @_ ); +} + +sub TIEARRAY { + shift; + #require DBM::Deep::10002::Array; + return DBM::Deep::10002::Array->TIEARRAY( @_ ); +} + +sub lock { + my $self = shift->_get_self; + return $self->_storage->lock( $self, @_ ); +} + +sub unlock { + my $self = shift->_get_self; + return $self->_storage->unlock( $self, @_ ); +} + +sub _copy_value { + my $self = shift->_get_self; + my ($spot, $value) = @_; + + if ( !ref $value ) { + ${$spot} = $value; + } + elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep::10002' ) } ) { + ${$spot} = $value->_repr; + $value->_copy_node( ${$spot} ); + } + else { + my $r = Scalar::Util::reftype( $value ); + my $c = Scalar::Util::blessed( $value ); + if ( $r eq 'ARRAY' ) { + ${$spot} = [ @{$value} ]; + } + else { + ${$spot} = { %{$value} }; + } + ${$spot} = bless ${$spot}, $c + if defined $c; + } + + return 1; +} + +#sub _copy_node { +# die "Must be implemented in a child class\n"; +#} +# +#sub _repr { +# die "Must be implemented in a child class\n"; +#} + +sub export { + ## + # Recursively export into standard Perl hashes and arrays. + ## + my $self = shift->_get_self; + + my $temp = $self->_repr; + + $self->lock(); + $self->_copy_node( $temp ); + $self->unlock(); + + my $classname = $self->_engine->get_classname( $self ); + if ( defined $classname ) { + bless $temp, $classname; + } + + return $temp; +} + +sub import { + ## + # Recursively import Perl hash/array structure + ## + if (!ref($_[0])) { return; } # Perl calls import() on use -- ignore + + my $self = shift->_get_self; + my ($struct) = @_; + + # struct is not a reference, so just import based on our type + if (!ref($struct)) { + $struct = $self->_repr( @_ ); + } + + #XXX This isn't the best solution. Better would be to use Data::Walker, + #XXX but that's a lot more thinking than I want to do right now. + eval { + local $SIG{'__DIE__'}; + $self->_import( Clone::clone( $struct ) ); + }; if ( my $e = $@ ) { + die $e; + } + + return 1; +} + +#XXX Need to keep track of who has a fh to this file in order to +#XXX close them all prior to optimize on Win32/cygwin +sub optimize { + ## + # Rebuild entire database into new file, then move + # it back on top of original. + ## + my $self = shift->_get_self; + +#XXX Need to create a new test for this +# if ($self->_storage->{links} > 1) { +# $self->_throw_error("Cannot optimize: reference count is greater than 1"); +# } + + #XXX Do we have to lock the tempfile? + + my $db_temp = DBM::Deep::10002->new( + file => $self->_storage->{file} . '.tmp', + type => $self->_type, + + # Bring over all the parameters that we need to bring over + num_txns => $self->_engine->num_txns, + byte_size => $self->_engine->byte_size, + max_buckets => $self->_engine->max_buckets, + ); + + $self->lock(); + $self->_copy_node( $db_temp ); + undef $db_temp; + + ## + # Attempt to copy user, group and permissions over to new file + ## + my @stats = stat($self->_fh); + my $perms = $stats[2] & 07777; + my $uid = $stats[4]; + my $gid = $stats[5]; + chown( $uid, $gid, $self->_storage->{file} . '.tmp' ); + chmod( $perms, $self->_storage->{file} . '.tmp' ); + + # q.v. perlport for more information on this variable + if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) { + ## + # Potential race condition when optmizing on Win32 with locking. + # The Windows filesystem requires that the filehandle be closed + # before it is overwritten with rename(). This could be redone + # with a soft copy. + ## + $self->unlock(); + $self->_storage->close; + } + + if (!rename $self->_storage->{file} . '.tmp', $self->_storage->{file}) { + unlink $self->_storage->{file} . '.tmp'; + $self->unlock(); + $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!"); + } + + $self->unlock(); + $self->_storage->close; + + $self->_storage->open; + $self->lock(); + $self->_engine->setup_fh( $self ); + $self->unlock(); + + return 1; +} + +sub clone { + ## + # Make copy of object and return + ## + my $self = shift->_get_self; + + return DBM::Deep::10002->new( + type => $self->_type, + base_offset => $self->_base_offset, + staleness => $self->_staleness, + storage => $self->_storage, + engine => $self->_engine, + ); +} + +#XXX Migrate this to the engine, where it really belongs and go through some +# API - stop poking in the innards of someone else.. +{ + my %is_legal_filter = map { + $_ => ~~1, + } qw( + store_key store_value + fetch_key fetch_value + ); + + sub set_filter { + ## + # Setup filter function for storing or fetching the key or value + ## + my $self = shift->_get_self; + my $type = lc shift; + my $func = shift; + + if ( $is_legal_filter{$type} ) { + $self->_storage->{"filter_$type"} = $func; + return 1; + } + + return; + } +} + +sub begin_work { + my $self = shift->_get_self; + return $self->_engine->begin_work( $self, @_ ); +} + +sub rollback { + my $self = shift->_get_self; + return $self->_engine->rollback( $self, @_ ); +} + +sub commit { + my $self = shift->_get_self; + return $self->_engine->commit( $self, @_ ); +} + +## +# Accessor methods +## + +sub _engine { + my $self = $_[0]->_get_self; + return $self->{engine}; +} + +sub _storage { + my $self = $_[0]->_get_self; + return $self->{storage}; +} + +sub _type { + my $self = $_[0]->_get_self; + return $self->{type}; +} + +sub _base_offset { + my $self = $_[0]->_get_self; + return $self->{base_offset}; +} + +sub _staleness { + my $self = $_[0]->_get_self; + return $self->{staleness}; +} + +sub _fh { + my $self = $_[0]->_get_self; + return $self->_storage->{fh}; +} + +## +# Utility methods +## + +sub _throw_error { + die "DBM::Deep::10002: $_[1]\n"; + my $n = 0; + while( 1 ) { + my @caller = caller( ++$n ); + next if $caller[0] =~ m/^DBM::Deep::10002/; + + die "DBM::Deep::10002: $_[1] at $0 line $caller[2]\n"; + last; + } +} + +sub STORE { + ## + # Store single hash key/value or array element in database. + ## + my $self = shift->_get_self; + my ($key, $value) = @_; + + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { + $self->_throw_error( 'Cannot write to a readonly filehandle' ); + } + + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); + + # User may be storing a complex value, in which case we do not want it run + # through the filtering system. + if ( !ref($value) && $self->_storage->{filter_store_value} ) { + $value = $self->_storage->{filter_store_value}->( $value ); + } + + $self->_engine->write_value( $self, $key, $value); + + $self->unlock(); + + return 1; +} + +sub FETCH { + ## + # Fetch single value or element given plain key or array index + ## + my $self = shift->_get_self; + my ($key) = @_; + + ## + # Request shared lock for reading + ## + $self->lock( LOCK_SH ); + + my $result = $self->_engine->read_value( $self, $key); + + $self->unlock(); + + # Filters only apply to scalar values, so the ref check is making + # sure the fetched bucket is a scalar, not a child hash or array. + return ($result && !ref($result) && $self->_storage->{filter_fetch_value}) + ? $self->_storage->{filter_fetch_value}->($result) + : $result; +} + +sub DELETE { + ## + # Delete single key/value pair or element given plain key or array index + ## + my $self = shift->_get_self; + my ($key) = @_; + + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { + $self->_throw_error( 'Cannot write to a readonly filehandle' ); + } + + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); + + ## + # Delete bucket + ## + my $value = $self->_engine->delete_key( $self, $key); + + if (defined $value && !ref($value) && $self->_storage->{filter_fetch_value}) { + $value = $self->_storage->{filter_fetch_value}->($value); + } + + $self->unlock(); + + return $value; +} + +sub EXISTS { + ## + # Check if a single key or element exists given plain key or array index + ## + my $self = shift->_get_self; + my ($key) = @_; + + ## + # Request shared lock for reading + ## + $self->lock( LOCK_SH ); + + my $result = $self->_engine->key_exists( $self, $key ); + + $self->unlock(); + + return $result; +} + +sub CLEAR { + ## + # Clear all keys from hash, or all elements from array. + ## + my $self = shift->_get_self; + + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { + $self->_throw_error( 'Cannot write to a readonly filehandle' ); + } + + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); + + #XXX Rewrite this dreck to do it in the engine as a tight loop vs. + # iterating over keys - such a WASTE - is this required for transactional + # clearning?! Surely that can be detected in the engine ... + if ( $self->_type eq TYPE_HASH ) { + my $key = $self->first_key; + while ( $key ) { + # Retrieve the key before deleting because we depend on next_key + my $next_key = $self->next_key( $key ); + $self->_engine->delete_key( $self, $key, $key ); + $key = $next_key; + } + } + else { + my $size = $self->FETCHSIZE; + for my $key ( 0 .. $size - 1 ) { + $self->_engine->delete_key( $self, $key, $key ); + } + $self->STORESIZE( 0 ); + } + + $self->unlock(); + + return 1; +} + +## +# Public method aliases +## +sub put { (shift)->STORE( @_ ) } +sub store { (shift)->STORE( @_ ) } +sub get { (shift)->FETCH( @_ ) } +sub fetch { (shift)->FETCH( @_ ) } +sub delete { (shift)->DELETE( @_ ) } +sub exists { (shift)->EXISTS( @_ ) } +sub clear { (shift)->CLEAR( @_ ) } + +package DBM::Deep::10002::Array; + +use 5.006_000; + +use strict; +use warnings; + +our $VERSION = q(1.0002); + +# This is to allow DBM::Deep::10002::Array to handle negative indices on +# its own. Otherwise, Perl would intercept the call to negative +# indices for us. This was causing bugs for negative index handling. +our $NEGATIVE_INDICES = 1; + +use base 'DBM::Deep::10002'; + +use Scalar::Util (); + +sub _get_self { + eval { local $SIG{'__DIE__'}; tied( @{$_[0]} ) } || $_[0] +} + +sub _repr { shift;[ @_ ] } + +sub _import { + my $self = shift; + my ($struct) = @_; + + $self->push( @$struct ); + + return 1; +} + +sub TIEARRAY { + my $class = shift; + my $args = $class->_get_args( @_ ); + + $args->{type} = $class->TYPE_ARRAY; + + return $class->_init($args); +} + +sub FETCH { + my $self = shift->_get_self; + my ($key) = @_; + + $self->lock( $self->LOCK_SH ); + + if ( !defined $key ) { + DBM::Deep::10002->_throw_error( "Cannot use an undefined array index." ); + } + elsif ( $key =~ /^-?\d+$/ ) { + if ( $key < 0 ) { + $key += $self->FETCHSIZE; + unless ( $key >= 0 ) { + $self->unlock; + return; + } + } + } + elsif ( $key ne 'length' ) { + $self->unlock; + DBM::Deep::10002->_throw_error( "Cannot use '$key' as an array index." ); + } + + my $rv = $self->SUPER::FETCH( $key ); + + $self->unlock; + + return $rv; +} + +sub STORE { + my $self = shift->_get_self; + my ($key, $value) = @_; + + $self->lock( $self->LOCK_EX ); + + my $size; + my $idx_is_numeric; + if ( !defined $key ) { + DBM::Deep::10002->_throw_error( "Cannot use an undefined array index." ); + } + elsif ( $key =~ /^-?\d+$/ ) { + $idx_is_numeric = 1; + if ( $key < 0 ) { + $size = $self->FETCHSIZE; + if ( $key + $size < 0 ) { + die( "Modification of non-creatable array value attempted, subscript $key" ); + } + $key += $size + } + } + elsif ( $key ne 'length' ) { + $self->unlock; + DBM::Deep::10002->_throw_error( "Cannot use '$key' as an array index." ); + } + + my $rv = $self->SUPER::STORE( $key, $value ); + + if ( $idx_is_numeric ) { + $size = $self->FETCHSIZE unless defined $size; + if ( $key >= $size ) { + $self->STORESIZE( $key + 1 ); + } + } + + $self->unlock; + + return $rv; +} + +sub EXISTS { + my $self = shift->_get_self; + my ($key) = @_; + + $self->lock( $self->LOCK_SH ); + + if ( !defined $key ) { + DBM::Deep::10002->_throw_error( "Cannot use an undefined array index." ); + } + elsif ( $key =~ /^-?\d+$/ ) { + if ( $key < 0 ) { + $key += $self->FETCHSIZE; + unless ( $key >= 0 ) { + $self->unlock; + return; + } + } + } + elsif ( $key ne 'length' ) { + $self->unlock; + DBM::Deep::10002->_throw_error( "Cannot use '$key' as an array index." ); + } + + my $rv = $self->SUPER::EXISTS( $key ); + + $self->unlock; + + return $rv; +} + +sub DELETE { + my $self = shift->_get_self; + my ($key) = @_; + + $self->lock( $self->LOCK_EX ); + + my $size = $self->FETCHSIZE; + if ( !defined $key ) { + DBM::Deep::10002->_throw_error( "Cannot use an undefined array index." ); + } + elsif ( $key =~ /^-?\d+$/ ) { + if ( $key < 0 ) { + $key += $size; + unless ( $key >= 0 ) { + $self->unlock; + return; + } + } + } + elsif ( $key ne 'length' ) { + $self->unlock; + DBM::Deep::10002->_throw_error( "Cannot use '$key' as an array index." ); + } + + my $rv = $self->SUPER::DELETE( $key ); + + if ($rv && $key == $size - 1) { + $self->STORESIZE( $key ); + } + + $self->unlock; + + return $rv; +} + +# Now that we have a real Reference sector, we should store arrayzize there. However, +# arraysize needs to be transactionally-aware, so a simple location to store it isn't +# going to work. +sub FETCHSIZE { + my $self = shift->_get_self; + + $self->lock( $self->LOCK_SH ); + + my $SAVE_FILTER = $self->_storage->{filter_fetch_value}; + $self->_storage->{filter_fetch_value} = undef; + + my $size = $self->FETCH('length') || 0; + + $self->_storage->{filter_fetch_value} = $SAVE_FILTER; + + $self->unlock; + + return $size; +} + +sub STORESIZE { + my $self = shift->_get_self; + my ($new_length) = @_; + + $self->lock( $self->LOCK_EX ); + + my $SAVE_FILTER = $self->_storage->{filter_store_value}; + $self->_storage->{filter_store_value} = undef; + + my $result = $self->STORE('length', $new_length, 'length'); + + $self->_storage->{filter_store_value} = $SAVE_FILTER; + + $self->unlock; + + return $result; +} + +sub POP { + my $self = shift->_get_self; + + $self->lock( $self->LOCK_EX ); + + my $length = $self->FETCHSIZE(); + + if ($length) { + my $content = $self->FETCH( $length - 1 ); + $self->DELETE( $length - 1 ); + + $self->unlock; + + return $content; + } + else { + $self->unlock; + return; + } +} + +sub PUSH { + my $self = shift->_get_self; + + $self->lock( $self->LOCK_EX ); + + my $length = $self->FETCHSIZE(); + + while (my $content = shift @_) { + $self->STORE( $length, $content ); + $length++; + } + + $self->unlock; + + return $length; +} + +# XXX This really needs to be something more direct within the file, not a +# fetch and re-store. -RobK, 2007-09-20 +sub _move_value { + my $self = shift; + my ($old_key, $new_key) = @_; + + my $val = $self->FETCH( $old_key ); + if ( eval { local $SIG{'__DIE__'}; $val->isa( 'DBM::Deep::10002::Hash' ) } ) { + $self->STORE( $new_key, { %$val } ); + } + elsif ( eval { local $SIG{'__DIE__'}; $val->isa( 'DBM::Deep::10002::Array' ) } ) { + $self->STORE( $new_key, [ @$val ] ); + } + else { + $self->STORE( $new_key, $val ); + } +} + +sub SHIFT { + my $self = shift->_get_self; + + $self->lock( $self->LOCK_EX ); + + my $length = $self->FETCHSIZE(); + + if ($length) { + my $content = $self->FETCH( 0 ); + + for (my $i = 0; $i < $length - 1; $i++) { + $self->_move_value( $i+1, $i ); + } + $self->DELETE( $length - 1 ); + + $self->unlock; + + return $content; + } + else { + $self->unlock; + return; + } +} + +sub UNSHIFT { + my $self = shift->_get_self; + my @new_elements = @_; + + $self->lock( $self->LOCK_EX ); + + my $length = $self->FETCHSIZE(); + my $new_size = scalar @new_elements; + + if ($length) { + for (my $i = $length - 1; $i >= 0; $i--) { + $self->_move_value( $i, $i+$new_size ); + } + } + + for (my $i = 0; $i < $new_size; $i++) { + $self->STORE( $i, $new_elements[$i] ); + } + + $self->unlock; + + return $length + $new_size; +} + +sub SPLICE { + my $self = shift->_get_self; + + $self->lock( $self->LOCK_EX ); + + my $length = $self->FETCHSIZE(); + + ## + # Calculate offset and length of splice + ## + my $offset = shift; + $offset = 0 unless defined $offset; + if ($offset < 0) { $offset += $length; } + + my $splice_length; + if (scalar @_) { $splice_length = shift; } + else { $splice_length = $length - $offset; } + if ($splice_length < 0) { $splice_length += ($length - $offset); } + + ## + # Setup array with new elements, and copy out old elements for return + ## + my @new_elements = @_; + my $new_size = scalar @new_elements; + + my @old_elements = map { + $self->FETCH( $_ ) + } $offset .. ($offset + $splice_length - 1); + + ## + # Adjust array length, and shift elements to accomodate new section. + ## + if ( $new_size != $splice_length ) { + if ($new_size > $splice_length) { + for (my $i = $length - 1; $i >= $offset + $splice_length; $i--) { + $self->_move_value( $i, $i + ($new_size - $splice_length) ); + } + } + else { + for (my $i = $offset + $splice_length; $i < $length; $i++) { + $self->_move_value( $i, $i + ($new_size - $splice_length) ); + } + for (my $i = 0; $i < $splice_length - $new_size; $i++) { + $self->DELETE( $length - 1 ); + $length--; + } + } + } + + ## + # Insert new elements into array + ## + for (my $i = $offset; $i < $offset + $new_size; $i++) { + $self->STORE( $i, shift @new_elements ); + } + + $self->unlock; + + ## + # Return deleted section, or last element in scalar context. + ## + return wantarray ? @old_elements : $old_elements[-1]; +} + +# We don't need to populate it, yet. +# It will be useful, though, when we split out HASH and ARRAY +sub EXTEND { + ## + # Perl will call EXTEND() when the array is likely to grow. + # We don't care, but include it because it gets called at times. + ## +} + +sub _copy_node { + my $self = shift; + my ($db_temp) = @_; + + my $length = $self->length(); + for (my $index = 0; $index < $length; $index++) { + my $value = $self->get($index); + $self->_copy_value( \$db_temp->[$index], $value ); + } + + return 1; +} + +## +# Public method aliases +## +sub length { (shift)->FETCHSIZE(@_) } +sub pop { (shift)->POP(@_) } +sub push { (shift)->PUSH(@_) } +sub unshift { (shift)->UNSHIFT(@_) } +sub splice { (shift)->SPLICE(@_) } + +# This must be last otherwise we have to qualify all other calls to shift +# as calls to CORE::shift +sub shift { (CORE::shift)->SHIFT(@_) } + +package DBM::Deep::10002::Hash; + +use 5.006_000; + +use strict; +use warnings; + +our $VERSION = q(1.0002); + +use base 'DBM::Deep::10002'; + +sub _get_self { + eval { local $SIG{'__DIE__'}; tied( %{$_[0]} ) } || $_[0] +} + +#XXX Need to add a check here for @_ % 2 +sub _repr { shift;return { @_ } } + +sub _import { + my $self = shift; + my ($struct) = @_; + + foreach my $key (keys %$struct) { + $self->put($key, $struct->{$key}); + } + + return 1; +} + +sub TIEHASH { + ## + # Tied hash constructor method, called by Perl's tie() function. + ## + my $class = shift; + my $args = $class->_get_args( @_ ); + + $args->{type} = $class->TYPE_HASH; + + return $class->_init($args); +} + +sub FETCH { + my $self = shift->_get_self; + DBM::Deep::10002->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0]; + my $key = ($self->_storage->{filter_store_key}) + ? $self->_storage->{filter_store_key}->($_[0]) + : $_[0]; + + return $self->SUPER::FETCH( $key, $_[0] ); +} + +sub STORE { + my $self = shift->_get_self; + DBM::Deep::10002->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0]; + my $key = ($self->_storage->{filter_store_key}) + ? $self->_storage->{filter_store_key}->($_[0]) + : $_[0]; + my $value = $_[1]; + + return $self->SUPER::STORE( $key, $value, $_[0] ); +} + +sub EXISTS { + my $self = shift->_get_self; + DBM::Deep::10002->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0]; + my $key = ($self->_storage->{filter_store_key}) + ? $self->_storage->{filter_store_key}->($_[0]) + : $_[0]; + + return $self->SUPER::EXISTS( $key ); +} + +sub DELETE { + my $self = shift->_get_self; + DBM::Deep::10002->_throw_error( "Cannot use an undefined hash key." ) unless defined $_[0]; + my $key = ($self->_storage->{filter_store_key}) + ? $self->_storage->{filter_store_key}->($_[0]) + : $_[0]; + + return $self->SUPER::DELETE( $key, $_[0] ); +} + +sub FIRSTKEY { + ## + # Locate and return first key (in no particular order) + ## + my $self = shift->_get_self; + + ## + # Request shared lock for reading + ## + $self->lock( $self->LOCK_SH ); + + my $result = $self->_engine->get_next_key( $self ); + + $self->unlock(); + + return ($result && $self->_storage->{filter_fetch_key}) + ? $self->_storage->{filter_fetch_key}->($result) + : $result; +} + +sub NEXTKEY { + ## + # Return next key (in no particular order), given previous one + ## + my $self = shift->_get_self; + + my $prev_key = ($self->_storage->{filter_store_key}) + ? $self->_storage->{filter_store_key}->($_[0]) + : $_[0]; + + ## + # Request shared lock for reading + ## + $self->lock( $self->LOCK_SH ); + + my $result = $self->_engine->get_next_key( $self, $prev_key ); + + $self->unlock(); + + return ($result && $self->_storage->{filter_fetch_key}) + ? $self->_storage->{filter_fetch_key}->($result) + : $result; +} + +## +# Public method aliases +## +sub first_key { (shift)->FIRSTKEY(@_) } +sub next_key { (shift)->NEXTKEY(@_) } + +sub _copy_node { + my $self = shift; + my ($db_temp) = @_; + + my $key = $self->first_key(); + while ($key) { + my $value = $self->get($key); + $self->_copy_value( \$db_temp->{$key}, $value ); + $key = $self->next_key($key); + } + + return 1; +} + +package DBM::Deep::10002::File; + +use 5.006_000; + +use strict; +use warnings; + +our $VERSION = q(1.0002); + +use Fcntl qw( :DEFAULT :flock :seek ); + +sub new { + my $class = shift; + my ($args) = @_; + + my $self = bless { + autobless => 1, + autoflush => 1, + end => 0, + fh => undef, + file => undef, + file_offset => 0, + locking => 1, + locked => 0, +#XXX Migrate this to the engine, where it really belongs. + filter_store_key => undef, + filter_store_value => undef, + filter_fetch_key => undef, + filter_fetch_value => undef, + }, $class; + + # Grab the parameters we want to use + foreach my $param ( keys %$self ) { + next unless exists $args->{$param}; + $self->{$param} = $args->{$param}; + } + + if ( $self->{fh} && !$self->{file_offset} ) { + $self->{file_offset} = tell( $self->{fh} ); + } + + $self->open unless $self->{fh}; + + return $self; +} + +sub open { + my $self = shift; + + # Adding O_BINARY should remove the need for the binmode below. However, + # I'm not going to remove it because I don't have the Win32 chops to be + # absolutely certain everything will be ok. + my $flags = O_CREAT | O_BINARY; + + if ( !-e $self->{file} || -w _ ) { + $flags |= O_RDWR; + } + else { + $flags |= O_RDONLY; + } + + my $fh; + sysopen( $fh, $self->{file}, $flags ) + or die "DBM::Deep::10002: Cannot sysopen file '$self->{file}': $!\n"; + $self->{fh} = $fh; + + # Even though we use O_BINARY, better be safe than sorry. + binmode $fh; + + if ($self->{autoflush}) { + my $old = select $fh; + $|=1; + select $old; + } + + return 1; +} + +sub close { + my $self = shift; + + if ( $self->{fh} ) { + close $self->{fh}; + $self->{fh} = undef; + } + + return 1; +} + +sub set_inode { + my $self = shift; + + unless ( defined $self->{inode} ) { + my @stats = stat($self->{fh}); + $self->{inode} = $stats[1]; + $self->{end} = $stats[7]; + } + + return 1; +} + +sub print_at { + my $self = shift; + my $loc = shift; + + local ($/,$\); + + my $fh = $self->{fh}; + if ( defined $loc ) { + seek( $fh, $loc + $self->{file_offset}, SEEK_SET ); + } + + print( $fh @_ ); + + return 1; +} + +sub read_at { + my $self = shift; + my ($loc, $size) = @_; + + local ($/,$\); + + my $fh = $self->{fh}; + if ( defined $loc ) { + seek( $fh, $loc + $self->{file_offset}, SEEK_SET ); + } + + my $buffer; + read( $fh, $buffer, $size); + + return $buffer; +} + +sub DESTROY { + my $self = shift; + return unless $self; + + $self->close; + + return; +} + +sub request_space { + my $self = shift; + my ($size) = @_; + + #XXX Do I need to reset $self->{end} here? I need a testcase + my $loc = $self->{end}; + $self->{end} += $size; + + return $loc; +} + +## +# If db locking is set, flock() the db file. If called multiple +# times before unlock(), then the same number of unlocks() must +# be called before the lock is released. +## +sub lock { + my $self = shift; + my ($obj, $type) = @_; + + $type = LOCK_EX unless defined $type; + + if (!defined($self->{fh})) { return; } + + if ($self->{locking}) { + if (!$self->{locked}) { + flock($self->{fh}, $type); + + # refresh end counter in case file has changed size + my @stats = stat($self->{fh}); + $self->{end} = $stats[7]; + + # double-check file inode, in case another process + # has optimize()d our file while we were waiting. + if (defined($self->{inode}) && $stats[1] != $self->{inode}) { + $self->close; + $self->open; + + #XXX This needs work + $obj->{engine}->setup_fh( $obj ); + + flock($self->{fh}, $type); # re-lock + + # This may not be necessary after re-opening + $self->{end} = (stat($self->{fh}))[7]; # re-end + } + } + $self->{locked}++; + + return 1; + } + + return; +} + +## +# If db locking is set, unlock the db file. See note in lock() +# regarding calling lock() multiple times. +## +sub unlock { + my $self = shift; + + if (!defined($self->{fh})) { return; } + + if ($self->{locking} && $self->{locked} > 0) { + $self->{locked}--; + if (!$self->{locked}) { flock($self->{fh}, LOCK_UN); } + + return 1; + } + + return; +} + +sub flush { + my $self = shift; + + # Flush the filehandle + my $old_fh = select $self->{fh}; + my $old_af = $|; $| = 1; $| = $old_af; + select $old_fh; + + return 1; +} + +package DBM::Deep::10002::Engine; + +use 5.006_000; + +use strict; +use warnings; + +our $VERSION = q(1.0002); + +use Scalar::Util (); + +# File-wide notes: +# * Every method in here assumes that the storage has been appropriately +# safeguarded. This can be anything from flock() to some sort of manual +# mutex. But, it's the caller's responsability to make sure that this has +# been done. + +# Setup file and tag signatures. These should never change. +sub SIG_FILE () { 'DPDB' } +sub SIG_HEADER () { 'h' } +sub SIG_HASH () { 'H' } +sub SIG_ARRAY () { 'A' } +sub SIG_NULL () { 'N' } +sub SIG_DATA () { 'D' } +sub SIG_INDEX () { 'I' } +sub SIG_BLIST () { 'B' } +sub SIG_FREE () { 'F' } +sub SIG_SIZE () { 1 } + +my $STALE_SIZE = 2; + +# Please refer to the pack() documentation for further information +my %StP = ( + 1 => 'C', # Unsigned char value (no order needed as it's just one byte) + 2 => 'n', # Unsigned short in "network" (big-endian) order + 4 => 'N', # Unsigned long in "network" (big-endian) order + 8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent) +); + +################################################################################ + +sub new { + my $class = shift; + my ($args) = @_; + + my $self = bless { + byte_size => 4, + + digest => undef, + hash_size => 16, # In bytes + hash_chars => 256, # Number of chars the algorithm uses per byte + max_buckets => 16, + num_txns => 1, # The HEAD + trans_id => 0, # Default to the HEAD + + data_sector_size => 64, # Size in bytes of each data sector + + entries => {}, # This is the list of entries for transactions + storage => undef, + }, $class; + + # Never allow byte_size to be set directly. + delete $args->{byte_size}; + if ( defined $args->{pack_size} ) { + if ( lc $args->{pack_size} eq 'small' ) { + $args->{byte_size} = 2; + } + elsif ( lc $args->{pack_size} eq 'medium' ) { + $args->{byte_size} = 4; + } + elsif ( lc $args->{pack_size} eq 'large' ) { + $args->{byte_size} = 8; + } + else { + DBM::Deep::10002->_throw_error( "Unknown pack_size value: '$args->{pack_size}'" ); + } + } + + # Grab the parameters we want to use + foreach my $param ( keys %$self ) { + next unless exists $args->{$param}; + $self->{$param} = $args->{$param}; + } + + my %validations = ( + max_buckets => { floor => 16, ceil => 256 }, + num_txns => { floor => 1, ceil => 255 }, + data_sector_size => { floor => 32, ceil => 256 }, + ); + + while ( my ($attr, $c) = each %validations ) { + if ( !defined $self->{$attr} + || !length $self->{$attr} + || $self->{$attr} =~ /\D/ + || $self->{$attr} < $c->{floor} + ) { + $self->{$attr} = '(undef)' if !defined $self->{$attr}; + warn "Floor of $attr is $c->{floor}. Setting it to $c->{floor} from '$self->{$attr}'\n"; + $self->{$attr} = $c->{floor}; + } + elsif ( $self->{$attr} > $c->{ceil} ) { + warn "Ceiling of $attr is $c->{ceil}. Setting it to $c->{ceil} from '$self->{$attr}'\n"; + $self->{$attr} = $c->{ceil}; + } + } + + if ( !$self->{digest} ) { + require Digest::MD5; + $self->{digest} = \&Digest::MD5::md5; + } + + return $self; +} + +################################################################################ + +sub read_value { + my $self = shift; + my ($obj, $key) = @_; + + # This will be a Reference sector + my $sector = $self->_load_sector( $obj->_base_offset ) + or return; + + if ( $sector->staleness != $obj->_staleness ) { + return; + } + + my $key_md5 = $self->_apply_digest( $key ); + + my $value_sector = $sector->get_data_for({ + key_md5 => $key_md5, + allow_head => 1, + }); + + unless ( $value_sector ) { + $value_sector = DBM::Deep::10002::Engine::Sector::Null->new({ + engine => $self, + data => undef, + }); + + $sector->write_data({ + key_md5 => $key_md5, + key => $key, + value => $value_sector, + }); + } + + return $value_sector->data; +} + +sub get_classname { + my $self = shift; + my ($obj) = @_; + + # This will be a Reference sector + my $sector = $self->_load_sector( $obj->_base_offset ) + or DBM::Deep::10002->_throw_error( "How did get_classname fail (no sector for '$obj')?!" ); + + if ( $sector->staleness != $obj->_staleness ) { + return; + } + + return $sector->get_classname; +} + +sub key_exists { + my $self = shift; + my ($obj, $key) = @_; + + # This will be a Reference sector + my $sector = $self->_load_sector( $obj->_base_offset ) + or return ''; + + if ( $sector->staleness != $obj->_staleness ) { + return ''; + } + + my $data = $sector->get_data_for({ + key_md5 => $self->_apply_digest( $key ), + allow_head => 1, + }); + + # exists() returns 1 or '' for true/false. + return $data ? 1 : ''; +} + +sub delete_key { + my $self = shift; + my ($obj, $key) = @_; + + my $sector = $self->_load_sector( $obj->_base_offset ) + or return; + + if ( $sector->staleness != $obj->_staleness ) { + return; + } + + return $sector->delete_key({ + key_md5 => $self->_apply_digest( $key ), + allow_head => 0, + }); +} + +sub write_value { + my $self = shift; + my ($obj, $key, $value) = @_; + + my $r = Scalar::Util::reftype( $value ) || ''; + { + last if $r eq ''; + last if $r eq 'HASH'; + last if $r eq 'ARRAY'; + + DBM::Deep::10002->_throw_error( + "Storage of references of type '$r' is not supported." + ); + } + + my ($class, $type); + if ( !defined $value ) { + $class = 'DBM::Deep::10002::Engine::Sector::Null'; + } + elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) { + if ( $r eq 'ARRAY' && tied(@$value) ) { + DBM::Deep::10002->_throw_error( "Cannot store something that is tied." ); + } + if ( $r eq 'HASH' && tied(%$value) ) { + DBM::Deep::10002->_throw_error( "Cannot store something that is tied." ); + } + $class = 'DBM::Deep::10002::Engine::Sector::Reference'; + $type = substr( $r, 0, 1 ); + } + else { + $class = 'DBM::Deep::10002::Engine::Sector::Scalar'; + } + + # This will be a Reference sector + my $sector = $self->_load_sector( $obj->_base_offset ) + or DBM::Deep::10002->_throw_error( "Cannot write to a deleted spot in DBM::Deep::10002." ); + + if ( $sector->staleness != $obj->_staleness ) { + DBM::Deep::10002->_throw_error( "Cannot write to a deleted spot in DBM::Deep::10002.n" ); + } + + # Create this after loading the reference sector in case something bad happens. + # This way, we won't allocate value sector(s) needlessly. + my $value_sector = $class->new({ + engine => $self, + data => $value, + type => $type, + }); + + $sector->write_data({ + key => $key, + key_md5 => $self->_apply_digest( $key ), + value => $value_sector, + }); + + # This code is to make sure we write all the values in the $value to the disk + # and to make sure all changes to $value after the assignment are reflected + # on disk. This may be counter-intuitive at first, but it is correct dwimmery. + # NOTE - simply tying $value won't perform a STORE on each value. Hence, the + # copy to a temp value. + if ( $r eq 'ARRAY' ) { + my @temp = @$value; + tie @$value, 'DBM::Deep::10002', { + base_offset => $value_sector->offset, + staleness => $value_sector->staleness, + storage => $self->storage, + engine => $self, + }; + @$value = @temp; + bless $value, 'DBM::Deep::10002::Array' unless Scalar::Util::blessed( $value ); + } + elsif ( $r eq 'HASH' ) { + my %temp = %$value; + tie %$value, 'DBM::Deep::10002', { + base_offset => $value_sector->offset, + staleness => $value_sector->staleness, + storage => $self->storage, + engine => $self, + }; + + %$value = %temp; + bless $value, 'DBM::Deep::10002::Hash' unless Scalar::Util::blessed( $value ); + } + + return 1; +} + +# XXX Add staleness here +sub get_next_key { + my $self = shift; + my ($obj, $prev_key) = @_; + + # XXX Need to add logic about resetting the iterator if any key in the reference has changed + unless ( $prev_key ) { + $obj->{iterator} = DBM::Deep::10002::Iterator->new({ + base_offset => $obj->_base_offset, + engine => $self, + }); + } + + return $obj->{iterator}->get_next_key( $obj ); +} + +################################################################################ + +sub setup_fh { + my $self = shift; + my ($obj) = @_; + + # We're opening the file. + unless ( $obj->_base_offset ) { + my $bytes_read = $self->_read_file_header; + + # Creating a new file + unless ( $bytes_read ) { + $self->_write_file_header; + + # 1) Create Array/Hash entry + my $initial_reference = DBM::Deep::10002::Engine::Sector::Reference->new({ + engine => $self, + type => $obj->_type, + }); + $obj->{base_offset} = $initial_reference->offset; + $obj->{staleness} = $initial_reference->staleness; + + $self->storage->flush; + } + # Reading from an existing file + else { + $obj->{base_offset} = $bytes_read; + my $initial_reference = DBM::Deep::10002::Engine::Sector::Reference->new({ + engine => $self, + offset => $obj->_base_offset, + }); + unless ( $initial_reference ) { + DBM::Deep::10002->_throw_error("Corrupted file, no master index record"); + } + + unless ($obj->_type eq $initial_reference->type) { + DBM::Deep::10002->_throw_error("File type mismatch"); + } + + $obj->{staleness} = $initial_reference->staleness; + } + } + + return 1; +} + +sub begin_work { + my $self = shift; + my ($obj) = @_; + + if ( $self->trans_id ) { + DBM::Deep::10002->_throw_error( "Cannot begin_work within an active transaction" ); + } + + my @slots = $self->read_txn_slots; + my $found; + for my $i ( 0 .. $#slots ) { + next if $slots[$i]; + + $slots[$i] = 1; + $self->set_trans_id( $i + 1 ); + $found = 1; + last; + } + unless ( $found ) { + DBM::Deep::10002->_throw_error( "Cannot allocate transaction ID" ); + } + $self->write_txn_slots( @slots ); + + if ( !$self->trans_id ) { + DBM::Deep::10002->_throw_error( "Cannot begin_work - no available transactions" ); + } + + return; +} + +sub rollback { + my $self = shift; + my ($obj) = @_; + + if ( !$self->trans_id ) { + DBM::Deep::10002->_throw_error( "Cannot rollback without an active transaction" ); + } + + # Each entry is the file location for a bucket that has a modification for + # this transaction. The entries need to be expunged. + foreach my $entry (@{ $self->get_entries } ) { + # Remove the entry here + my $read_loc = $entry + + $self->hash_size + + $self->byte_size + + $self->byte_size + + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE ); + + my $data_loc = $self->storage->read_at( $read_loc, $self->byte_size ); + $data_loc = unpack( $StP{$self->byte_size}, $data_loc ); + $self->storage->print_at( $read_loc, pack( $StP{$self->byte_size}, 0 ) ); + + if ( $data_loc > 1 ) { + $self->_load_sector( $data_loc )->free; + } + } + + $self->clear_entries; + + my @slots = $self->read_txn_slots; + $slots[$self->trans_id-1] = 0; + $self->write_txn_slots( @slots ); + $self->inc_txn_staleness_counter( $self->trans_id ); + $self->set_trans_id( 0 ); + + return 1; +} + +sub commit { + my $self = shift; + my ($obj) = @_; + + if ( !$self->trans_id ) { + DBM::Deep::10002->_throw_error( "Cannot commit without an active transaction" ); + } + + foreach my $entry (@{ $self->get_entries } ) { + # Overwrite the entry in head with the entry in trans_id + my $base = $entry + + $self->hash_size + + $self->byte_size; + + my $head_loc = $self->storage->read_at( $base, $self->byte_size ); + $head_loc = unpack( $StP{$self->byte_size}, $head_loc ); + + my $spot = $base + $self->byte_size + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE ); + my $trans_loc = $self->storage->read_at( + $spot, $self->byte_size, + ); + + $self->storage->print_at( $base, $trans_loc ); + $self->storage->print_at( + $spot, + pack( $StP{$self->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ), + ); + + if ( $head_loc > 1 ) { + $self->_load_sector( $head_loc )->free; + } + } + + $self->clear_entries; + + my @slots = $self->read_txn_slots; + $slots[$self->trans_id-1] = 0; + $self->write_txn_slots( @slots ); + $self->inc_txn_staleness_counter( $self->trans_id ); + $self->set_trans_id( 0 ); + + return 1; +} + +sub read_txn_slots { + my $self = shift; + my $bl = $self->txn_bitfield_len; + my $num_bits = $bl * 8; + return split '', unpack( 'b'.$num_bits, + $self->storage->read_at( + $self->trans_loc, $bl, + ) + ); +} + +sub write_txn_slots { + my $self = shift; + my $num_bits = $self->txn_bitfield_len * 8; + $self->storage->print_at( $self->trans_loc, + pack( 'b'.$num_bits, join('', @_) ), + ); +} + +sub get_running_txn_ids { + my $self = shift; + my @transactions = $self->read_txn_slots; + my @trans_ids = map { $_+1} grep { $transactions[$_] } 0 .. $#transactions; +} + +sub get_txn_staleness_counter { + my $self = shift; + my ($trans_id) = @_; + + # Hardcode staleness of 0 for the HEAD + return 0 unless $trans_id; + + return unpack( $StP{$STALE_SIZE}, + $self->storage->read_at( + $self->trans_loc + 4 + $STALE_SIZE * ($trans_id - 1), + 4, + ) + ); +} + +sub inc_txn_staleness_counter { + my $self = shift; + my ($trans_id) = @_; + + # Hardcode staleness of 0 for the HEAD + return unless $trans_id; + + $self->storage->print_at( + $self->trans_loc + 4 + $STALE_SIZE * ($trans_id - 1), + pack( $StP{$STALE_SIZE}, $self->get_txn_staleness_counter( $trans_id ) + 1 ), + ); +} + +sub get_entries { + my $self = shift; + return [ keys %{ $self->{entries}{$self->trans_id} ||= {} } ]; +} + +sub add_entry { + my $self = shift; + my ($trans_id, $loc) = @_; + + $self->{entries}{$trans_id} ||= {}; + $self->{entries}{$trans_id}{$loc} = undef; +} + +# If the buckets are being relocated because of a reindexing, the entries +# mechanism needs to be made aware of it. +sub reindex_entry { + my $self = shift; + my ($old_loc, $new_loc) = @_; + + TRANS: + while ( my ($trans_id, $locs) = each %{ $self->{entries} } ) { + foreach my $orig_loc ( keys %{ $locs } ) { + if ( $orig_loc == $old_loc ) { + delete $locs->{orig_loc}; + $locs->{$new_loc} = undef; + next TRANS; + } + } + } +} + +sub clear_entries { + my $self = shift; + delete $self->{entries}{$self->trans_id}; +} + +################################################################################ + +{ + my $header_fixed = length( SIG_FILE ) + 1 + 4 + 4; + my $this_file_version = 2; + + sub _write_file_header { + my $self = shift; + + my $nt = $self->num_txns; + my $bl = $self->txn_bitfield_len; + + my $header_var = 1 + 1 + 1 + 1 + $bl + $STALE_SIZE * ($nt - 1) + 3 * $self->byte_size; + + my $loc = $self->storage->request_space( $header_fixed + $header_var ); + + $self->storage->print_at( $loc, + SIG_FILE, + SIG_HEADER, + pack('N', $this_file_version), # At this point, we're at 9 bytes + pack('N', $header_var), # header size + # --- Above is $header_fixed. Below is $header_var + pack('C', $self->byte_size), + + # These shenanigans are to allow a 256 within a C + pack('C', $self->max_buckets - 1), + pack('C', $self->data_sector_size - 1), + + pack('C', $nt), + pack('C' . $bl, 0 ), # Transaction activeness bitfield + pack($StP{$STALE_SIZE}.($nt-1), 0 x ($nt-1) ), # Transaction staleness counters + pack($StP{$self->byte_size}, 0), # Start of free chain (blist size) + pack($StP{$self->byte_size}, 0), # Start of free chain (data size) + pack($StP{$self->byte_size}, 0), # Start of free chain (index size) + ); + + #XXX Set these less fragilely + $self->set_trans_loc( $header_fixed + 4 ); + $self->set_chains_loc( $header_fixed + 4 + $bl + $STALE_SIZE * ($nt-1) ); + + return; + } + + sub _read_file_header { + my $self = shift; + + my $buffer = $self->storage->read_at( 0, $header_fixed ); + return unless length($buffer); + + my ($file_signature, $sig_header, $file_version, $size) = unpack( + 'A4 A N N', $buffer + ); + + unless ( $file_signature eq SIG_FILE ) { + $self->storage->close; + DBM::Deep::10002->_throw_error( "Signature not found -- file is not a Deep DB" ); + } + + unless ( $sig_header eq SIG_HEADER ) { + $self->storage->close; + DBM::Deep::10002->_throw_error( "Pre-1.00 file version found" ); + } + + unless ( $file_version == $this_file_version ) { + $self->storage->close; + DBM::Deep::10002->_throw_error( + "Wrong file version found - " . $file_version . + " - expected " . $this_file_version + ); + } + + my $buffer2 = $self->storage->read_at( undef, $size ); + my @values = unpack( 'C C C C', $buffer2 ); + + if ( @values != 4 || grep { !defined } @values ) { + $self->storage->close; + DBM::Deep::10002->_throw_error("Corrupted file - bad header"); + } + + #XXX Add warnings if values weren't set right + @{$self}{qw(byte_size max_buckets data_sector_size num_txns)} = @values; + + # These shenangians are to allow a 256 within a C + $self->{max_buckets} += 1; + $self->{data_sector_size} += 1; + + my $bl = $self->txn_bitfield_len; + + my $header_var = scalar(@values) + $bl + $STALE_SIZE * ($self->num_txns - 1) + 3 * $self->byte_size; + unless ( $size == $header_var ) { + $self->storage->close; + DBM::Deep::10002->_throw_error( "Unexpected size found ($size <-> $header_var)." ); + } + + $self->set_trans_loc( $header_fixed + scalar(@values) ); + $self->set_chains_loc( $header_fixed + scalar(@values) + $bl + $STALE_SIZE * ($self->num_txns - 1) ); + + return length($buffer) + length($buffer2); + } +} + +sub _load_sector { + my $self = shift; + my ($offset) = @_; + + # Add a catch for offset of 0 or 1 + return if $offset <= 1; + + my $type = $self->storage->read_at( $offset, 1 ); + return if $type eq chr(0); + + if ( $type eq $self->SIG_ARRAY || $type eq $self->SIG_HASH ) { + return DBM::Deep::10002::Engine::Sector::Reference->new({ + engine => $self, + type => $type, + offset => $offset, + }); + } + # XXX Don't we need key_md5 here? + elsif ( $type eq $self->SIG_BLIST ) { + return DBM::Deep::10002::Engine::Sector::BucketList->new({ + engine => $self, + type => $type, + offset => $offset, + }); + } + elsif ( $type eq $self->SIG_INDEX ) { + return DBM::Deep::10002::Engine::Sector::Index->new({ + engine => $self, + type => $type, + offset => $offset, + }); + } + elsif ( $type eq $self->SIG_NULL ) { + return DBM::Deep::10002::Engine::Sector::Null->new({ + engine => $self, + type => $type, + offset => $offset, + }); + } + elsif ( $type eq $self->SIG_DATA ) { + return DBM::Deep::10002::Engine::Sector::Scalar->new({ + engine => $self, + type => $type, + offset => $offset, + }); + } + # This was deleted from under us, so just return and let the caller figure it out. + elsif ( $type eq $self->SIG_FREE ) { + return; + } + + DBM::Deep::10002->_throw_error( "'$offset': Don't know what to do with type '$type'" ); +} + +sub _apply_digest { + my $self = shift; + return $self->{digest}->(@_); +} + +sub _add_free_blist_sector { shift->_add_free_sector( 0, @_ ) } +sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) } +sub _add_free_index_sector { shift->_add_free_sector( 2, @_ ) } + +sub _add_free_sector { + my $self = shift; + my ($multiple, $offset, $size) = @_; + + my $chains_offset = $multiple * $self->byte_size; + + my $storage = $self->storage; + + # Increment staleness. + # XXX Can this increment+modulo be done by "&= 0x1" ? + my $staleness = unpack( $StP{$STALE_SIZE}, $storage->read_at( $offset + SIG_SIZE, $STALE_SIZE ) ); + $staleness = ($staleness + 1 ) % ( 2 ** ( 8 * $STALE_SIZE ) ); + $storage->print_at( $offset + SIG_SIZE, pack( $StP{$STALE_SIZE}, $staleness ) ); + + my $old_head = $storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size ); + + $storage->print_at( $self->chains_loc + $chains_offset, + pack( $StP{$self->byte_size}, $offset ), + ); + + # Record the old head in the new sector after the signature and staleness counter + $storage->print_at( $offset + SIG_SIZE + $STALE_SIZE, $old_head ); +} + +sub _request_blist_sector { shift->_request_sector( 0, @_ ) } +sub _request_data_sector { shift->_request_sector( 1, @_ ) } +sub _request_index_sector { shift->_request_sector( 2, @_ ) } + +sub _request_sector { + my $self = shift; + my ($multiple, $size) = @_; + + my $chains_offset = $multiple * $self->byte_size; + + my $old_head = $self->storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size ); + my $loc = unpack( $StP{$self->byte_size}, $old_head ); + + # We don't have any free sectors of the right size, so allocate a new one. + unless ( $loc ) { + my $offset = $self->storage->request_space( $size ); + + # Zero out the new sector. This also guarantees correct increases + # in the filesize. + $self->storage->print_at( $offset, chr(0) x $size ); + + return $offset; + } + + # Read the new head after the signature and the staleness counter + my $new_head = $self->storage->read_at( $loc + SIG_SIZE + $STALE_SIZE, $self->byte_size ); + $self->storage->print_at( $self->chains_loc + $chains_offset, $new_head ); + $self->storage->print_at( + $loc + SIG_SIZE + $STALE_SIZE, + pack( $StP{$self->byte_size}, 0 ), + ); + + return $loc; +} + +################################################################################ + +sub storage { $_[0]{storage} } +sub byte_size { $_[0]{byte_size} } +sub hash_size { $_[0]{hash_size} } +sub hash_chars { $_[0]{hash_chars} } +sub num_txns { $_[0]{num_txns} } +sub max_buckets { $_[0]{max_buckets} } +sub blank_md5 { chr(0) x $_[0]->hash_size } +sub data_sector_size { $_[0]{data_sector_size} } + +# This is a calculated value +sub txn_bitfield_len { + my $self = shift; + unless ( exists $self->{txn_bitfield_len} ) { + my $temp = ($self->num_txns) / 8; + if ( $temp > int( $temp ) ) { + $temp = int( $temp ) + 1; + } + $self->{txn_bitfield_len} = $temp; + } + return $self->{txn_bitfield_len}; +} + +sub trans_id { $_[0]{trans_id} } +sub set_trans_id { $_[0]{trans_id} = $_[1] } + +sub trans_loc { $_[0]{trans_loc} } +sub set_trans_loc { $_[0]{trans_loc} = $_[1] } + +sub chains_loc { $_[0]{chains_loc} } +sub set_chains_loc { $_[0]{chains_loc} = $_[1] } + +################################################################################ + +package DBM::Deep::10002::Iterator; + +sub new { + my $class = shift; + my ($args) = @_; + + my $self = bless { + breadcrumbs => [], + engine => $args->{engine}, + base_offset => $args->{base_offset}, + }, $class; + + Scalar::Util::weaken( $self->{engine} ); + + return $self; +} + +sub reset { $_[0]{breadcrumbs} = [] } + +sub get_sector_iterator { + my $self = shift; + my ($loc) = @_; + + my $sector = $self->{engine}->_load_sector( $loc ) + or return; + + if ( $sector->isa( 'DBM::Deep::10002::Engine::Sector::Index' ) ) { + return DBM::Deep::10002::Iterator::Index->new({ + iterator => $self, + sector => $sector, + }); + } + elsif ( $sector->isa( 'DBM::Deep::10002::Engine::Sector::BucketList' ) ) { + return DBM::Deep::10002::Iterator::BucketList->new({ + iterator => $self, + sector => $sector, + }); + } + + DBM::Deep::10002->_throw_error( "get_sector_iterator(): Why did $loc make a $sector?" ); +} + +sub get_next_key { + my $self = shift; + my ($obj) = @_; + + my $crumbs = $self->{breadcrumbs}; + my $e = $self->{engine}; + + unless ( @$crumbs ) { + # This will be a Reference sector + my $sector = $e->_load_sector( $self->{base_offset} ) + # If no sector is found, thist must have been deleted from under us. + or return; + + if ( $sector->staleness != $obj->_staleness ) { + return; + } + + my $loc = $sector->get_blist_loc + or return; + + push @$crumbs, $self->get_sector_iterator( $loc ); + } + + FIND_NEXT_KEY: { + # We're at the end. + unless ( @$crumbs ) { + $self->reset; + return; + } + + my $iterator = $crumbs->[-1]; + + # This level is done. + if ( $iterator->at_end ) { + pop @$crumbs; + redo FIND_NEXT_KEY; + } + + if ( $iterator->isa( 'DBM::Deep::10002::Iterator::Index' ) ) { + # If we don't have any more, it will be caught at the + # prior check. + if ( my $next = $iterator->get_next_iterator ) { + push @$crumbs, $next; + } + redo FIND_NEXT_KEY; + } + + unless ( $iterator->isa( 'DBM::Deep::10002::Iterator::BucketList' ) ) { + DBM::Deep::10002->_throw_error( + "Should have a bucketlist iterator here - instead have $iterator" + ); + } + + # At this point, we have a BucketList iterator + my $key = $iterator->get_next_key; + if ( defined $key ) { + return $key; + } + #XXX else { $iterator->set_to_end() } ? + + # We hit the end of the bucketlist iterator, so redo + redo FIND_NEXT_KEY; + } + + DBM::Deep::10002->_throw_error( "get_next_key(): How did we get here?" ); +} + +package DBM::Deep::10002::Iterator::Index; + +sub new { + my $self = bless $_[1] => $_[0]; + $self->{curr_index} = 0; + return $self; +} + +sub at_end { + my $self = shift; + return $self->{curr_index} >= $self->{iterator}{engine}->hash_chars; +} + +sub get_next_iterator { + my $self = shift; + + my $loc; + while ( !$loc ) { + return if $self->at_end; + $loc = $self->{sector}->get_entry( $self->{curr_index}++ ); + } + + return $self->{iterator}->get_sector_iterator( $loc ); +} + +package DBM::Deep::10002::Iterator::BucketList; + +sub new { + my $self = bless $_[1] => $_[0]; + $self->{curr_index} = 0; + return $self; +} + +sub at_end { + my $self = shift; + return $self->{curr_index} >= $self->{iterator}{engine}->max_buckets; +} + +sub get_next_key { + my $self = shift; + + return if $self->at_end; + + my $idx = $self->{curr_index}++; + + my $data_loc = $self->{sector}->get_data_location_for({ + allow_head => 1, + idx => $idx, + }) or return; + + #XXX Do we want to add corruption checks here? + return $self->{sector}->get_key_for( $idx )->data; +} + +package DBM::Deep::10002::Engine::Sector; + +sub new { + my $self = bless $_[1], $_[0]; + Scalar::Util::weaken( $self->{engine} ); + $self->_init; + return $self; +} + +#sub _init {} +#sub clone { DBM::Deep::10002->_throw_error( "Must be implemented in the child class" ); } + +sub engine { $_[0]{engine} } +sub offset { $_[0]{offset} } +sub type { $_[0]{type} } + +sub base_size { + my $self = shift; + return $self->engine->SIG_SIZE + $STALE_SIZE; +} + +sub free { + my $self = shift; + + my $e = $self->engine; + + $e->storage->print_at( $self->offset, $e->SIG_FREE ); + # Skip staleness counter + $e->storage->print_at( $self->offset + $self->base_size, + chr(0) x ($self->size - $self->base_size), + ); + + my $free_meth = $self->free_meth; + $e->$free_meth( $self->offset, $self->size ); + + return; +} + +package DBM::Deep::10002::Engine::Sector::Data; + +our @ISA = qw( DBM::Deep::10002::Engine::Sector ); + +# This is in bytes +sub size { $_[0]{engine}->data_sector_size } +sub free_meth { return '_add_free_data_sector' } + +sub clone { + my $self = shift; + return ref($self)->new({ + engine => $self->engine, + type => $self->type, + data => $self->data, + }); +} + +package DBM::Deep::10002::Engine::Sector::Scalar; + +our @ISA = qw( DBM::Deep::10002::Engine::Sector::Data ); + +sub free { + my $self = shift; + + my $chain_loc = $self->chain_loc; + + $self->SUPER::free(); + + if ( $chain_loc ) { + $self->engine->_load_sector( $chain_loc )->free; + } + + return; +} + +sub type { $_[0]{engine}->SIG_DATA } +sub _init { + my $self = shift; + + my $engine = $self->engine; + + unless ( $self->offset ) { + my $data_section = $self->size - $self->base_size - $engine->byte_size - 1; + + $self->{offset} = $engine->_request_data_sector( $self->size ); + + my $data = delete $self->{data}; + my $dlen = length $data; + my $continue = 1; + my $curr_offset = $self->offset; + while ( $continue ) { + + my $next_offset = 0; + + my ($leftover, $this_len, $chunk); + if ( $dlen > $data_section ) { + $leftover = 0; + $this_len = $data_section; + $chunk = substr( $data, 0, $this_len ); + + $dlen -= $data_section; + $next_offset = $engine->_request_data_sector( $self->size ); + $data = substr( $data, $this_len ); + } + else { + $leftover = $data_section - $dlen; + $this_len = $dlen; + $chunk = $data; + + $continue = 0; + } + + $engine->storage->print_at( $curr_offset, $self->type ); # Sector type + # Skip staleness + $engine->storage->print_at( $curr_offset + $self->base_size, + pack( $StP{$engine->byte_size}, $next_offset ), # Chain loc + pack( $StP{1}, $this_len ), # Data length + $chunk, # Data to be stored in this sector + chr(0) x $leftover, # Zero-fill the rest + ); + + $curr_offset = $next_offset; + } + + return; + } +} + +sub data_length { + my $self = shift; + + my $buffer = $self->engine->storage->read_at( + $self->offset + $self->base_size + $self->engine->byte_size, 1 + ); + + return unpack( $StP{1}, $buffer ); +} + +sub chain_loc { + my $self = shift; + return unpack( + $StP{$self->engine->byte_size}, + $self->engine->storage->read_at( + $self->offset + $self->base_size, + $self->engine->byte_size, + ), + ); +} + +sub data { + my $self = shift; + + my $data; + while ( 1 ) { + my $chain_loc = $self->chain_loc; + + $data .= $self->engine->storage->read_at( + $self->offset + $self->base_size + $self->engine->byte_size + 1, $self->data_length, + ); + + last unless $chain_loc; + + $self = $self->engine->_load_sector( $chain_loc ); + } + + return $data; +} + +package DBM::Deep::10002::Engine::Sector::Null; + +our @ISA = qw( DBM::Deep::10002::Engine::Sector::Data ); + +sub type { $_[0]{engine}->SIG_NULL } +sub data_length { 0 } +sub data { return } + +sub _init { + my $self = shift; + + my $engine = $self->engine; + + unless ( $self->offset ) { + my $leftover = $self->size - $self->base_size - 1 * $engine->byte_size - 1; + + $self->{offset} = $engine->_request_data_sector( $self->size ); + $engine->storage->print_at( $self->offset, $self->type ); # Sector type + # Skip staleness counter + $engine->storage->print_at( $self->offset + $self->base_size, + pack( $StP{$engine->byte_size}, 0 ), # Chain loc + pack( $StP{1}, $self->data_length ), # Data length + chr(0) x $leftover, # Zero-fill the rest + ); + + return; + } +} + +package DBM::Deep::10002::Engine::Sector::Reference; + +our @ISA = qw( DBM::Deep::10002::Engine::Sector::Data ); + +sub _init { + my $self = shift; + + my $e = $self->engine; + + unless ( $self->offset ) { + my $classname = Scalar::Util::blessed( delete $self->{data} ); + my $leftover = $self->size - $self->base_size - 2 * $e->byte_size; + + my $class_offset = 0; + if ( defined $classname ) { + my $class_sector = DBM::Deep::10002::Engine::Sector::Scalar->new({ + engine => $e, + data => $classname, + }); + $class_offset = $class_sector->offset; + } + + $self->{offset} = $e->_request_data_sector( $self->size ); + $e->storage->print_at( $self->offset, $self->type ); # Sector type + # Skip staleness counter + $e->storage->print_at( $self->offset + $self->base_size, + pack( $StP{$e->byte_size}, 0 ), # Index/BList loc + pack( $StP{$e->byte_size}, $class_offset ), # Classname loc + chr(0) x $leftover, # Zero-fill the rest + ); + } + else { + $self->{type} = $e->storage->read_at( $self->offset, 1 ); + } + + $self->{staleness} = unpack( + $StP{$STALE_SIZE}, + $e->storage->read_at( $self->offset + $e->SIG_SIZE, $STALE_SIZE ), + ); + + return; +} + +sub free { + my $self = shift; + + my $blist_loc = $self->get_blist_loc; + $self->engine->_load_sector( $blist_loc )->free if $blist_loc; + + my $class_loc = $self->get_class_offset; + $self->engine->_load_sector( $class_loc )->free if $class_loc; + + $self->SUPER::free(); +} + +sub staleness { $_[0]{staleness} } + +sub get_data_for { + my $self = shift; + my ($args) = @_; + + # Assume that the head is not allowed unless otherwise specified. + $args->{allow_head} = 0 unless exists $args->{allow_head}; + + # Assume we don't create a new blist location unless otherwise specified. + $args->{create} = 0 unless exists $args->{create}; + + my $blist = $self->get_bucket_list({ + key_md5 => $args->{key_md5}, + key => $args->{key}, + create => $args->{create}, + }); + return unless $blist && $blist->{found}; + + # At this point, $blist knows where the md5 is. What it -doesn't- know yet + # is whether or not this transaction has this key. That's part of the next + # function call. + my $location = $blist->get_data_location_for({ + allow_head => $args->{allow_head}, + }) or return; + + return $self->engine->_load_sector( $location ); +} + +sub write_data { + my $self = shift; + my ($args) = @_; + + my $blist = $self->get_bucket_list({ + key_md5 => $args->{key_md5}, + key => $args->{key}, + create => 1, + }) or DBM::Deep::10002->_throw_error( "How did write_data fail (no blist)?!" ); + + # Handle any transactional bookkeeping. + if ( $self->engine->trans_id ) { + if ( ! $blist->has_md5 ) { + $blist->mark_deleted({ + trans_id => 0, + }); + } + } + else { + my @trans_ids = $self->engine->get_running_txn_ids; + if ( $blist->has_md5 ) { + if ( @trans_ids ) { + my $old_value = $blist->get_data_for; + foreach my $other_trans_id ( @trans_ids ) { + next if $blist->get_data_location_for({ + trans_id => $other_trans_id, + allow_head => 0, + }); + $blist->write_md5({ + trans_id => $other_trans_id, + key => $args->{key}, + key_md5 => $args->{key_md5}, + value => $old_value->clone, + }); + } + } + } + else { + if ( @trans_ids ) { + foreach my $other_trans_id ( @trans_ids ) { + #XXX This doesn't seem to possible to ever happen . . . + next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 }); + $blist->mark_deleted({ + trans_id => $other_trans_id, + }); + } + } + } + } + + #XXX Is this safe to do transactionally? + # Free the place we're about to write to. + if ( $blist->get_data_location_for({ allow_head => 0 }) ) { + $blist->get_data_for({ allow_head => 0 })->free; + } + + $blist->write_md5({ + key => $args->{key}, + key_md5 => $args->{key_md5}, + value => $args->{value}, + }); +} + +sub delete_key { + my $self = shift; + my ($args) = @_; + + # XXX What should happen if this fails? + my $blist = $self->get_bucket_list({ + key_md5 => $args->{key_md5}, + }) or DBM::Deep::10002->_throw_error( "How did delete_key fail (no blist)?!" ); + + # Save the location so that we can free the data + my $location = $blist->get_data_location_for({ + allow_head => 0, + }); + my $old_value = $location && $self->engine->_load_sector( $location ); + + my @trans_ids = $self->engine->get_running_txn_ids; + + if ( $self->engine->trans_id == 0 ) { + if ( @trans_ids ) { + foreach my $other_trans_id ( @trans_ids ) { + next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 }); + $blist->write_md5({ + trans_id => $other_trans_id, + key => $args->{key}, + key_md5 => $args->{key_md5}, + value => $old_value->clone, + }); + } + } + } + + my $data; + if ( @trans_ids ) { + $blist->mark_deleted( $args ); + + if ( $old_value ) { + $data = $old_value->data; + $old_value->free; + } + } + else { + $data = $blist->delete_md5( $args ); + } + + return $data; +} + +sub get_blist_loc { + my $self = shift; + + my $e = $self->engine; + my $blist_loc = $e->storage->read_at( $self->offset + $self->base_size, $e->byte_size ); + return unpack( $StP{$e->byte_size}, $blist_loc ); +} + +sub get_bucket_list { + my $self = shift; + my ($args) = @_; + $args ||= {}; + + # XXX Add in check here for recycling? + + my $engine = $self->engine; + + my $blist_loc = $self->get_blist_loc; + + # There's no index or blist yet + unless ( $blist_loc ) { + return unless $args->{create}; + + my $blist = DBM::Deep::10002::Engine::Sector::BucketList->new({ + engine => $engine, + key_md5 => $args->{key_md5}, + }); + + $engine->storage->print_at( $self->offset + $self->base_size, + pack( $StP{$engine->byte_size}, $blist->offset ), + ); + + return $blist; + } + + my $sector = $engine->_load_sector( $blist_loc ) + or DBM::Deep::10002->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" ); + my $i = 0; + my $last_sector = undef; + while ( $sector->isa( 'DBM::Deep::10002::Engine::Sector::Index' ) ) { + $blist_loc = $sector->get_entry( ord( substr( $args->{key_md5}, $i++, 1 ) ) ); + $last_sector = $sector; + if ( $blist_loc ) { + $sector = $engine->_load_sector( $blist_loc ) + or DBM::Deep::10002->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" ); + } + else { + $sector = undef; + last; + } + } + + # This means we went through the Index sector(s) and found an empty slot + unless ( $sector ) { + return unless $args->{create}; + + DBM::Deep::10002->_throw_error( "No last_sector when attempting to build a new entry" ) + unless $last_sector; + + my $blist = DBM::Deep::10002::Engine::Sector::BucketList->new({ + engine => $engine, + key_md5 => $args->{key_md5}, + }); + + $last_sector->set_entry( ord( substr( $args->{key_md5}, $i - 1, 1 ) ) => $blist->offset ); + + return $blist; + } + + $sector->find_md5( $args->{key_md5} ); + + # See whether or not we need to reindex the bucketlist + if ( !$sector->has_md5 && $args->{create} && $sector->{idx} == -1 ) { + my $new_index = DBM::Deep::10002::Engine::Sector::Index->new({ + engine => $engine, + }); + + my %blist_cache; + #XXX q.v. the comments for this function. + foreach my $entry ( $sector->chopped_up ) { + my ($spot, $md5) = @{$entry}; + my $idx = ord( substr( $md5, $i, 1 ) ); + + # XXX This is inefficient + my $blist = $blist_cache{$idx} + ||= DBM::Deep::10002::Engine::Sector::BucketList->new({ + engine => $engine, + }); + + $new_index->set_entry( $idx => $blist->offset ); + + my $new_spot = $blist->write_at_next_open( $md5 ); + $engine->reindex_entry( $spot => $new_spot ); + } + + # Handle the new item separately. + { + my $idx = ord( substr( $args->{key_md5}, $i, 1 ) ); + my $blist = $blist_cache{$idx} + ||= DBM::Deep::10002::Engine::Sector::BucketList->new({ + engine => $engine, + }); + + $new_index->set_entry( $idx => $blist->offset ); + + #XXX THIS IS HACKY! + $blist->find_md5( $args->{key_md5} ); + $blist->write_md5({ + key => $args->{key}, + key_md5 => $args->{key_md5}, + value => DBM::Deep::10002::Engine::Sector::Null->new({ + engine => $engine, + data => undef, + }), + }); + } + + if ( $last_sector ) { + $last_sector->set_entry( + ord( substr( $args->{key_md5}, $i - 1, 1 ) ), + $new_index->offset, + ); + } else { + $engine->storage->print_at( $self->offset + $self->base_size, + pack( $StP{$engine->byte_size}, $new_index->offset ), + ); + } + + $sector->free; + + $sector = $blist_cache{ ord( substr( $args->{key_md5}, $i, 1 ) ) }; + $sector->find_md5( $args->{key_md5} ); + } + + return $sector; +} + +sub get_class_offset { + my $self = shift; + + my $e = $self->engine; + return unpack( + $StP{$e->byte_size}, + $e->storage->read_at( + $self->offset + $self->base_size + 1 * $e->byte_size, $e->byte_size, + ), + ); +} + +sub get_classname { + my $self = shift; + + my $class_offset = $self->get_class_offset; + + return unless $class_offset; + + return $self->engine->_load_sector( $class_offset )->data; +} + +#XXX Add singleton handling here +sub data { + my $self = shift; + + my $new_obj = DBM::Deep::10002->new({ + type => $self->type, + base_offset => $self->offset, + staleness => $self->staleness, + storage => $self->engine->storage, + engine => $self->engine, + }); + + if ( $self->engine->storage->{autobless} ) { + my $classname = $self->get_classname; + if ( defined $classname ) { + bless $new_obj, $classname; + } + } + + return $new_obj; +} + +package DBM::Deep::10002::Engine::Sector::BucketList; + +our @ISA = qw( DBM::Deep::10002::Engine::Sector ); + +sub _init { + my $self = shift; + + my $engine = $self->engine; + + unless ( $self->offset ) { + my $leftover = $self->size - $self->base_size; + + $self->{offset} = $engine->_request_blist_sector( $self->size ); + $engine->storage->print_at( $self->offset, $engine->SIG_BLIST ); # Sector type + # Skip staleness counter + $engine->storage->print_at( $self->offset + $self->base_size, + chr(0) x $leftover, # Zero-fill the data + ); + } + + if ( $self->{key_md5} ) { + $self->find_md5; + } + + return $self; +} + +sub size { + my $self = shift; + unless ( $self->{size} ) { + my $e = $self->engine; + # Base + numbuckets * bucketsize + $self->{size} = $self->base_size + $e->max_buckets * $self->bucket_size; + } + return $self->{size}; +} + +sub free_meth { return '_add_free_blist_sector' } + +sub bucket_size { + my $self = shift; + unless ( $self->{bucket_size} ) { + my $e = $self->engine; + # Key + head (location) + transactions (location + staleness-counter) + my $location_size = $e->byte_size + $e->byte_size + ($e->num_txns - 1) * ($e->byte_size + $STALE_SIZE); + $self->{bucket_size} = $e->hash_size + $location_size; + } + return $self->{bucket_size}; +} + +# XXX This is such a poor hack. I need to rethink this code. +sub chopped_up { + my $self = shift; + + my $e = $self->engine; + + my @buckets; + foreach my $idx ( 0 .. $e->max_buckets - 1 ) { + my $spot = $self->offset + $self->base_size + $idx * $self->bucket_size; + my $md5 = $e->storage->read_at( $spot, $e->hash_size ); + + #XXX If we're chopping, why would we ever have the blank_md5? + last if $md5 eq $e->blank_md5; + + my $rest = $e->storage->read_at( undef, $self->bucket_size - $e->hash_size ); + push @buckets, [ $spot, $md5 . $rest ]; + } + + return @buckets; +} + +sub write_at_next_open { + my $self = shift; + my ($entry) = @_; + + #XXX This is such a hack! + $self->{_next_open} = 0 unless exists $self->{_next_open}; + + my $spot = $self->offset + $self->base_size + $self->{_next_open}++ * $self->bucket_size; + $self->engine->storage->print_at( $spot, $entry ); + + return $spot; +} + +sub has_md5 { + my $self = shift; + unless ( exists $self->{found} ) { + $self->find_md5; + } + return $self->{found}; +} + +sub find_md5 { + my $self = shift; + + $self->{found} = undef; + $self->{idx} = -1; + + if ( @_ ) { + $self->{key_md5} = shift; + } + + # If we don't have an MD5, then what are we supposed to do? + unless ( exists $self->{key_md5} ) { + DBM::Deep::10002->_throw_error( "Cannot find_md5 without a key_md5 set" ); + } + + my $e = $self->engine; + foreach my $idx ( 0 .. $e->max_buckets - 1 ) { + my $potential = $e->storage->read_at( + $self->offset + $self->base_size + $idx * $self->bucket_size, $e->hash_size, + ); + + if ( $potential eq $e->blank_md5 ) { + $self->{idx} = $idx; + return; + } + + if ( $potential eq $self->{key_md5} ) { + $self->{found} = 1; + $self->{idx} = $idx; + return; + } + } + + return; +} + +sub write_md5 { + my $self = shift; + my ($args) = @_; + + DBM::Deep::10002->_throw_error( "write_md5: no key" ) unless exists $args->{key}; + DBM::Deep::10002->_throw_error( "write_md5: no key_md5" ) unless exists $args->{key_md5}; + DBM::Deep::10002->_throw_error( "write_md5: no value" ) unless exists $args->{value}; + + my $engine = $self->engine; + + $args->{trans_id} = $engine->trans_id unless exists $args->{trans_id}; + + my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size; + $engine->add_entry( $args->{trans_id}, $spot ); + + unless ($self->{found}) { + my $key_sector = DBM::Deep::10002::Engine::Sector::Scalar->new({ + engine => $engine, + data => $args->{key}, + }); + + $engine->storage->print_at( $spot, + $args->{key_md5}, + pack( $StP{$engine->byte_size}, $key_sector->offset ), + ); + } + + my $loc = $spot + + $engine->hash_size + + $engine->byte_size; + + if ( $args->{trans_id} ) { + $loc += $engine->byte_size + ($args->{trans_id} - 1) * ( $engine->byte_size + $STALE_SIZE ); + + $engine->storage->print_at( $loc, + pack( $StP{$engine->byte_size}, $args->{value}->offset ), + pack( $StP{$STALE_SIZE}, $engine->get_txn_staleness_counter( $args->{trans_id} ) ), + ); + } + else { + $engine->storage->print_at( $loc, + pack( $StP{$engine->byte_size}, $args->{value}->offset ), + ); + } +} + +sub mark_deleted { + my $self = shift; + my ($args) = @_; + $args ||= {}; + + my $engine = $self->engine; + + $args->{trans_id} = $engine->trans_id unless exists $args->{trans_id}; + + my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size; + $engine->add_entry( $args->{trans_id}, $spot ); + + my $loc = $spot + + $engine->hash_size + + $engine->byte_size; + + if ( $args->{trans_id} ) { + $loc += $engine->byte_size + ($args->{trans_id} - 1) * ( $engine->byte_size + $STALE_SIZE ); + + $engine->storage->print_at( $loc, + pack( $StP{$engine->byte_size}, 1 ), # 1 is the marker for deleted + pack( $StP{$STALE_SIZE}, $engine->get_txn_staleness_counter( $args->{trans_id} ) ), + ); + } + else { + $engine->storage->print_at( $loc, + pack( $StP{$engine->byte_size}, 1 ), # 1 is the marker for deleted + ); + } + +} + +sub delete_md5 { + my $self = shift; + my ($args) = @_; + + my $engine = $self->engine; + return undef unless $self->{found}; + + # Save the location so that we can free the data + my $location = $self->get_data_location_for({ + allow_head => 0, + }); + my $key_sector = $self->get_key_for; + + my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size; + $engine->storage->print_at( $spot, + $engine->storage->read_at( + $spot + $self->bucket_size, + $self->bucket_size * ( $engine->max_buckets - $self->{idx} - 1 ), + ), + chr(0) x $self->bucket_size, + ); + + $key_sector->free; + + my $data_sector = $self->engine->_load_sector( $location ); + my $data = $data_sector->data; + $data_sector->free; + + return $data; +} + +sub get_data_location_for { + my $self = shift; + my ($args) = @_; + $args ||= {}; + + $args->{allow_head} = 0 unless exists $args->{allow_head}; + $args->{trans_id} = $self->engine->trans_id unless exists $args->{trans_id}; + $args->{idx} = $self->{idx} unless exists $args->{idx}; + + my $e = $self->engine; + + my $spot = $self->offset + $self->base_size + + $args->{idx} * $self->bucket_size + + $e->hash_size + + $e->byte_size; + + if ( $args->{trans_id} ) { + $spot += $e->byte_size + ($args->{trans_id} - 1) * ( $e->byte_size + $STALE_SIZE ); + } + + my $buffer = $e->storage->read_at( + $spot, + $e->byte_size + $STALE_SIZE, + ); + my ($loc, $staleness) = unpack( $StP{$e->byte_size} . ' ' . $StP{$STALE_SIZE}, $buffer ); + + if ( $args->{trans_id} ) { + # We have found an entry that is old, so get rid of it + if ( $staleness != (my $s = $e->get_txn_staleness_counter( $args->{trans_id} ) ) ) { + $e->storage->print_at( + $spot, + pack( $StP{$e->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ), + ); + $loc = 0; + } + } + + # If we're in a transaction and we never wrote to this location, try the + # HEAD instead. + if ( $args->{trans_id} && !$loc && $args->{allow_head} ) { + return $self->get_data_location_for({ + trans_id => 0, + allow_head => 1, + idx => $args->{idx}, + }); + } + return $loc <= 1 ? 0 : $loc; +} + +sub get_data_for { + my $self = shift; + my ($args) = @_; + $args ||= {}; + + return unless $self->{found}; + my $location = $self->get_data_location_for({ + allow_head => $args->{allow_head}, + }); + return $self->engine->_load_sector( $location ); +} + +sub get_key_for { + my $self = shift; + my ($idx) = @_; + $idx = $self->{idx} unless defined $idx; + + if ( $idx >= $self->engine->max_buckets ) { + DBM::Deep::10002->_throw_error( "get_key_for(): Attempting to retrieve $idx" ); + } + + my $location = $self->engine->storage->read_at( + $self->offset + $self->base_size + $idx * $self->bucket_size + $self->engine->hash_size, + $self->engine->byte_size, + ); + $location = unpack( $StP{$self->engine->byte_size}, $location ); + DBM::Deep::10002->_throw_error( "get_key_for: No location?" ) unless $location; + + return $self->engine->_load_sector( $location ); +} + +package DBM::Deep::10002::Engine::Sector::Index; + +our @ISA = qw( DBM::Deep::10002::Engine::Sector ); + +sub _init { + my $self = shift; + + my $engine = $self->engine; + + unless ( $self->offset ) { + my $leftover = $self->size - $self->base_size; + + $self->{offset} = $engine->_request_index_sector( $self->size ); + $engine->storage->print_at( $self->offset, $engine->SIG_INDEX ); # Sector type + # Skip staleness counter + $engine->storage->print_at( $self->offset + $self->base_size, + chr(0) x $leftover, # Zero-fill the rest + ); + } + + return $self; +} + +#XXX Change here +sub size { + my $self = shift; + unless ( $self->{size} ) { + my $e = $self->engine; + $self->{size} = $self->base_size + $e->byte_size * $e->hash_chars; + } + return $self->{size}; +} + +sub free_meth { return '_add_free_index_sector' } + +sub free { + my $self = shift; + my $e = $self->engine; + + for my $i ( 0 .. $e->hash_chars - 1 ) { + my $l = $self->get_entry( $i ) or next; + $e->_load_sector( $l )->free; + } + + $self->SUPER::free(); +} + +sub _loc_for { + my $self = shift; + my ($idx) = @_; + return $self->offset + $self->base_size + $idx * $self->engine->byte_size; +} + +sub get_entry { + my $self = shift; + my ($idx) = @_; + + my $e = $self->engine; + + DBM::Deep::10002->_throw_error( "get_entry: Out of range ($idx)" ) + if $idx < 0 || $idx >= $e->hash_chars; + + return unpack( + $StP{$e->byte_size}, + $e->storage->read_at( $self->_loc_for( $idx ), $e->byte_size ), + ); +} + +sub set_entry { + my $self = shift; + my ($idx, $loc) = @_; + + my $e = $self->engine; + + DBM::Deep::10002->_throw_error( "set_entry: Out of range ($idx)" ) + if $idx < 0 || $idx >= $e->hash_chars; + + $self->engine->storage->print_at( + $self->_loc_for( $idx ), + pack( $StP{$e->byte_size}, $loc ), + ); +} + +1; +__END__ diff --git a/utils/upgrade_db.pl b/utils/upgrade_db.pl index 84fc833..b80889b 100755 --- a/utils/upgrade_db.pl +++ b/utils/upgrade_db.pl @@ -17,7 +17,8 @@ use Pod::Usage; my %headerver_to_module = ( '0' => 'DBM::Deep::09830', - '2' => 'DBM::Deep', + '2' => 'DBM::Deep::10002', + '3' => 'DBM::Deep', ); my %is_dev = ( @@ -27,8 +28,8 @@ my %is_dev = ( my %opts = ( man => 0, help => 0, - version => '1.0002', - autobless => 0, + version => '1.0006', + autobless => 1, ); GetOptions( \%opts, 'input=s', 'output=s', 'version:s', 'autobless:i', @@ -57,6 +58,9 @@ my %db; my $mod = $headerver_to_module{ $ver }; eval "use $mod;"; + if ( $@ ) { + _exit( "Cannot load '$mod' to read header version '$ver':\n\t$@" ); + } $db{input} = $mod->new({ file => $opts{input}, locking => 1, @@ -76,6 +80,9 @@ my %db; elsif ( $ver =~ /^1\.000?[0-2]?/) { $ver = 2; } + elsif ( $ver =~ /^1\.000[3-6]/) { + $ver = 3; + } else { _exit( "'$ver' is an unrecognized version." ); } @@ -89,6 +96,9 @@ my %db; my $mod = $headerver_to_module{ $ver }; eval "use $mod;"; + if ( $@ ) { + _exit( "Cannot load '$mod' to read header version '$ver':\n\t$@" ); + } $db{output} = $mod->new({ file => $opts{output}, locking => 1, @@ -177,8 +187,8 @@ of the database. =item B<-autobless> -In pre-1.0000 versions, autoblessing was an optional setting. This defaults to -false. +In pre-1.0000 versions, autoblessing was an optional setting defaulting to +false. Autobless in upgrade_db.pl defaults to true. =item B<-help> @@ -203,7 +213,7 @@ This will require about twice the diskspace of the input file. =item * Feature support Not all versions support the same features. In particular, internal references -were supported in 0.983 and support was removed in 1.000. There is no +were supported in 0.983, removed in 1.000, and re-added in 1.0003. There is no detection of this by upgrade_db.pl. =back