From: rkinyon Date: Sun, 28 Jan 2007 06:22:53 +0000 (+0000) Subject: On the warpath to 1.0000 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=64a7e079f074a8f935df082b8e489ac06d95571d;p=dbsrgits%2FDBM-Deep.git On the warpath to 1.0000 --- diff --git a/MANIFEST b/MANIFEST index 0343370..0422ec9 100644 --- a/MANIFEST +++ b/MANIFEST @@ -47,6 +47,7 @@ t/32_dash_ell.t t/33_transactions.t t/34_transaction_arrays.t t/35_transaction_multiple.t +t/36_verybighash.t t/37_delete_edge_cases.t t/39_singletons.t t/40_freespace.t diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm index 2d00dca..db76940 100644 --- a/lib/DBM/Deep.pm +++ b/lib/DBM/Deep.pm @@ -1,40 +1,11 @@ package DBM::Deep; -## -# DBM::Deep -# -# Description: -# Multi-level database module for storing hash trees, arrays and simple -# key/value pairs into FTP-able, cross-platform binary database files. -# -# Type `perldoc DBM::Deep` for complete documentation. -# -# Usage Examples: -# my %db; -# tie %db, 'DBM::Deep', 'my_database.db'; # standard tie() method -# -# my $db = new DBM::Deep( 'my_database.db' ); # preferred OO method -# -# $db->{my_scalar} = 'hello world'; -# $db->{my_hash} = { larry => 'genius', hashes => 'fast' }; -# $db->{my_array} = [ 1, 2, 3, time() ]; -# $db->{my_complex} = [ 'hello', { perl => 'rules' }, 42, 99 ]; -# push @{$db->{my_array}}, 'another value'; -# my @key_list = keys %{$db->{my_hash}}; -# print "This module " . $db->{my_complex}->[1]->{perl} . "!\n"; -# -# Copyright: -# (c) 2002-2006 Joseph Huckaby. All Rights Reserved. -# This program is free software; you can redistribute it and/or -# modify it under the same terms as Perl itself. -## - use 5.006_000; use strict; use warnings; -our $VERSION = q(0.99_04); +our $VERSION = q(1.0000); use Fcntl qw( :flock ); diff --git a/lib/DBM/Deep.pod b/lib/DBM/Deep.pod index 2312d95..bc8993b 100644 --- a/lib/DBM/Deep.pod +++ b/lib/DBM/Deep.pod @@ -212,8 +212,8 @@ See L below. This is the number of entries that can be added before a reindexing. The larger this number is made, the larger a file gets, but the better performance you will -have. The default and minimum number this can be is 16. There is no maximum, but -more than 32 isn't recommended. +have. The default and minimum number this can be is 16. The maximum is 255, but +more than 64 isn't recommended. =item * pack_size @@ -224,15 +224,16 @@ are: =item * small -This uses 2-byte offsets, allowing for a maximum file size of 65K +This uses 2-byte offsets, allowing for a maximum file size of 65 KB. =item * medium (default) -This uses 4-byte offsets, allowing for a maximum file size of 2G. +This uses 4-byte offsets, allowing for a maximum file size of 4 GB. =item * large -This uses 8-byte offsets, allowing for a maximum file size of 16XB (exabytes). +This uses 8-byte offsets, allowing for a maximum file size of 16 XB +(exabytes). This can only be enabled if your Perl is compiled for 64-bit. =back @@ -780,7 +781,7 @@ failure. You can wrap calls in an eval block to catch the die. =head1 LARGEFILE SUPPORT If you have a 64-bit system, and your Perl is compiled with both LARGEFILE -and 64-bit support, you I be able to create databases larger than 2 GB. +and 64-bit support, you I be able to create databases larger than 4 GB. DBM::Deep by default uses 32-bit file offset tags, but these can be changed by specifying the 'pack_size' parameter when constructing the file. @@ -802,9 +803,9 @@ the file's header and cannot be changed for the life of the file. These parameters are per-file, meaning you can access 32-bit and 64-bit files, as you choose. -B We have not personally tested files larger than 2 GB -- all my +B We have not personally tested files larger than 4 GB -- all my systems have only a 32-bit Perl. However, I have received user reports that -this does indeed work! +this does indeed work. =head1 LOW-LEVEL ACCESS diff --git a/lib/DBM/Deep/Array.pm b/lib/DBM/Deep/Array.pm index 3d5804f..438756b 100644 --- a/lib/DBM/Deep/Array.pm +++ b/lib/DBM/Deep/Array.pm @@ -5,7 +5,7 @@ use 5.006_000; use strict; use warnings; -our $VERSION = '0.99_04'; +our $VERSION = q(1.0000); # This is to allow DBM::Deep::Array to handle negative indices on # its own. Otherwise, Perl would intercept the call to negative diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index a491892..29d4f4f 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -3,8 +3,9 @@ package DBM::Deep::Engine; use 5.006_000; use strict; +use warnings; -our $VERSION = q(0.99_04); +our $VERSION = q(1.0000); use Scalar::Util (); @@ -17,7 +18,6 @@ use Scalar::Util (); # Setup file and tag signatures. These should never change. sub SIG_FILE () { 'DPDB' } sub SIG_HEADER () { 'h' } -sub SIG_INTERNAL () { 'i' } sub SIG_HASH () { 'H' } sub SIG_ARRAY () { 'A' } sub SIG_NULL () { 'N' } @@ -25,13 +25,12 @@ sub SIG_DATA () { 'D' } sub SIG_INDEX () { 'I' } sub SIG_BLIST () { 'B' } sub SIG_FREE () { 'F' } -sub SIG_KEYS () { 'K' } sub SIG_SIZE () { 1 } sub STALE_SIZE () { 1 } # Please refer to the pack() documentation for further information my %StP = ( - 1 => 'C', # Unsigned char value (no order specified, presumably ASCII) + 1 => 'C', # Unsigned char value (no order needed as it's just one byte) 2 => 'n', # Unsigned short in "network" (big-endian) order 4 => 'N', # Unsigned long in "network" (big-endian) order 8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent) @@ -57,6 +56,8 @@ sub new { storage => undef, }, $class; + # Never allow byte_size to be set directly. + delete $args->{byte_size}; if ( defined $args->{pack_size} ) { if ( lc $args->{pack_size} eq 'small' ) { $args->{byte_size} = 2; @@ -84,10 +85,31 @@ sub new { # files. DO NOT decrease this value below 16, due to risk of recursive # reindex overrun. ## - if ( $self->{max_buckets} < 16 ) { + if ( !defined $self->{max_buckets} + || !length $self->{max_buckets} + || $self->{max_buckets} =~ /\D/ + || $self->{max_buckets} < 16 + ) { warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n"; $self->{max_buckets} = 16; } + elsif ( $self->{max_buckets} > 255 ) { + warn "Ceiling of max_buckets is 255. Setting to to 255 from '$self->{max_buckets}'\n"; + $self->{max_buckets} = 255; + } + + if ( !defined $self->{num_txns} + || !length $self->{num_txns} + || $self->{num_txns} =~ /\D/ + || $self->{num_txns} < 2 + ) { + warn "Floor of num_txns is 2. Setting to to 2 from '$self->{num_txns}'\n"; + $self->{num_txns} = 2; + } + elsif ( $self->{num_txns} > 255 ) { + warn "Ceiling of num_txns is 255. Setting to to 255 from '$self->{num_txns}'\n"; + $self->{num_txns} = 255; + } if ( !$self->{digest} ) { require Digest::MD5; @@ -530,32 +552,36 @@ sub clear_entries { { my $header_fixed = length( SIG_FILE ) + 1 + 4 + 4; + my $this_header_version = 2; sub _write_file_header { my $self = shift; - my $header_var = 1 + 1 + 1 + 4 + 4 * $self->num_txns + 3 * $self->byte_size; + my $nt = $self->num_txns; + + my $header_var = 1 + 1 + 1 + 4 + 4 * $nt + 3 * $self->byte_size; my $loc = $self->storage->request_space( $header_fixed + $header_var ); $self->storage->print_at( $loc, SIG_FILE, SIG_HEADER, - pack('N', 1), # header version - at this point, we're at 9 bytes - pack('N', $header_var), # header size + pack('N', $this_header_version), # At this point, we're at 9 bytes + pack('N', $header_var), # header size # --- Above is $header_fixed. Below is $header_var pack('C', $self->byte_size), pack('C', $self->max_buckets), - pack('C', $self->num_txns), + pack('C', $nt), pack('N', 0 ), # Transaction activeness bitfield - pack('N' . $self->num_txns, 0 x $self->num_txns ), # Transaction staleness counters + pack('N' . $nt, 0 x $nt ), # Transaction staleness counters pack($StP{$self->byte_size}, 0), # Start of free chain (blist size) pack($StP{$self->byte_size}, 0), # Start of free chain (data size) pack($StP{$self->byte_size}, 0), # Start of free chain (index size) ); + #XXX Set these less fragilely $self->set_trans_loc( $header_fixed + 3 ); - $self->set_chains_loc( $header_fixed + 3 + 4 + 4 * $self->num_txns ); + $self->set_chains_loc( $header_fixed + 3 + 4 + 4 * $nt ); return; } @@ -577,7 +603,15 @@ sub clear_entries { unless ( $sig_header eq SIG_HEADER ) { $self->storage->close; - DBM::Deep->_throw_error( "Old file version found." ); + DBM::Deep->_throw_error( "Pre-1.00 file version found" ); + } + + unless ( $header_version == $this_header_version ) { + $self->storage->close; + DBM::Deep->_throw_error( + "Wrong file version found - " . $header_version . + " - expected " . $this_header_version + ); } my $buffer2 = $self->storage->read_at( undef, $size ); diff --git a/lib/DBM/Deep/File.pm b/lib/DBM/Deep/File.pm index 84a98d4..4d4a39c 100644 --- a/lib/DBM/Deep/File.pm +++ b/lib/DBM/Deep/File.pm @@ -5,7 +5,7 @@ use 5.006_000; use strict; use warnings; -our $VERSION = q(0.99_04); +our $VERSION = q(1.0000); use Fcntl qw( :DEFAULT :flock :seek ); diff --git a/lib/DBM/Deep/Hash.pm b/lib/DBM/Deep/Hash.pm index fb27097..e3de270 100644 --- a/lib/DBM/Deep/Hash.pm +++ b/lib/DBM/Deep/Hash.pm @@ -5,7 +5,7 @@ use 5.006_000; use strict; use warnings; -our $VERSION = q(0.99_04); +our $VERSION = q(1.0000); use base 'DBM::Deep'; diff --git a/t/06_error.t b/t/06_error.t index ea39773..39c57b6 100644 --- a/t/06_error.t +++ b/t/06_error.t @@ -19,7 +19,7 @@ print FH 'DPDB'; close FH; throws_ok { DBM::Deep->new( $filename ); -} qr/DBM::Deep: Old file version found/, "Fail if there's a bad header"; +} qr/DBM::Deep: Pre-1.00 file version found/, "Fail if there's a bad header"; { my ($fh, $filename) = new_fh(); diff --git a/t/36_verybighash.t b/t/36_verybighash.t new file mode 100644 index 0000000..d218642 --- /dev/null +++ b/t/36_verybighash.t @@ -0,0 +1,46 @@ +# This test was contributed by Fedor Soreks, Jan 2007. + +use strict; +use Test::More; + +plan skip_all => "You must set \$ENV{LONG_TESTS} >= 2 to run the superlong tests" + unless $ENV{LONG_TESTS} >= 2; + +use Test::Deep; +use t::common qw( new_fh ); + +plan tests => 2; + +use_ok( 'DBM::Deep' ); + +diag "This test can take up to several hours to run. Please be VERY patient."; + +my ($fh, $filename) = new_fh(); +my $db = DBM::Deep->new( + file => $filename, + type => DBM::Deep->TYPE_HASH, +); + +my $gigs = 2; + +## +# put/get many keys +## +my $max_keys = 4_000_000; +my $max_record_keys = 10; + +for my $key_no ( 0 .. $max_keys ) { + for my $rec_no ( 0 .. $max_record_keys ) { + $db->{"key_$key_no"}{"rec_key_$rec_no"} = "rec_val_$rec_no"; + } + + my $s = -s $filename; + print "$key_no: $s\n"; + + if ( $s > $gigs * 2**30) { + fail "DB file ($db_fn) size exceeds $gigs GB"; + exit; + } +} + +ok( 1, "We passed the test!" );