From: rkinyon Date: Sun, 28 Jan 2007 19:32:26 +0000 (+0000) Subject: data_sector_size parameterization is proceeding apace X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=80cb1e6744be8ddbc89a28db6bd04254f04a781c;p=dbsrgits%2FDBM-Deep.git data_sector_size parameterization is proceeding apace --- diff --git a/lib/DBM/Deep.pod b/lib/DBM/Deep.pod index bc8993b..3bd15d3 100644 --- a/lib/DBM/Deep.pod +++ b/lib/DBM/Deep.pod @@ -215,6 +215,22 @@ this number is made, the larger a file gets, but the better performance you will have. The default and minimum number this can be is 16. The maximum is 255, but more than 64 isn't recommended. +=item * data_sector_size + +This is the size in bytes of a given data sector. Data sectors will chain, so +a value of any size can be stored. However, chaining is expensive in terms of +time. Setting this value to something close to the expected common length of +your scalars will improve your performance. If it is too small, your file will +have a lot of chaining. If it is too large, your file will have a lot of dead +space in it. + +The default for this is 64 bytes. The minimum value is 32 and the maximum is +255 bytes. + +B There are between 5 and 9 bytes taken up in each data sector for +bookkeeping. (It's 3 + the number of bytes in your L.) This is +included within the data_sector_size. + =item * pack_size This is the size of the file pointer used throughout the file. The valid values diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index b47a065..1eead43 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -52,6 +52,8 @@ sub new { num_txns => 2, # HEAD plus 1 additional transaction for importing trans_id => 0, # Default to the HEAD + data_sector_size => 64, # Size in bytes of each data sector + entries => {}, # This is the list of entries for transactions storage => undef, }, $class; @@ -94,7 +96,7 @@ sub new { $self->{max_buckets} = 16; } elsif ( $self->{max_buckets} > 255 ) { - warn "Ceiling of max_buckets is 255. Setting to to 255 from '$self->{max_buckets}'\n"; + warn "Ceiling of max_buckets is 255. Setting it to 255 from '$self->{max_buckets}'\n"; $self->{max_buckets} = 255; } @@ -103,14 +105,27 @@ sub new { || $self->{num_txns} =~ /\D/ || $self->{num_txns} < 2 ) { - warn "Floor of num_txns is 2. Setting to to 2 from '$self->{num_txns}'\n"; + warn "Floor of num_txns is 2. Setting it to 2 from '$self->{num_txns}'\n"; $self->{num_txns} = 2; } elsif ( $self->{num_txns} > 255 ) { - warn "Ceiling of num_txns is 255. Setting to to 255 from '$self->{num_txns}'\n"; + warn "Ceiling of num_txns is 255. Setting it to 255 from '$self->{num_txns}'\n"; $self->{num_txns} = 255; } + if ( !defined $self->{data_sector_size} + || !length $self->{data_sector_size} + || $self->{data_sector_size} =~ /\D/ + || $self->{data_sector_size} < 32 + ) { + warn "Floor of data_sector_size is 32. Setting it to 32 from '$self->{data_sector_size}'\n"; + $self->{data_sector_size} = 32; + } + elsif ( $self->{data_sector_size} > 255 ) { + warn "Ceiling of data_sector_size is 255. Setting it to 255 from '$self->{data_sector_size}'\n"; + $self->{data_sector_size} = 255; + } + if ( !$self->{digest} ) { require Digest::MD5; $self->{digest} = \&Digest::MD5::md5; @@ -559,7 +574,7 @@ sub clear_entries { my $nt = $self->num_txns; - my $header_var = 1 + 1 + 1 + 4 + 4 * $nt + 3 * $self->byte_size; + my $header_var = 1 + 1 + 1 + 1 + 4 + 4 * $nt + 3 * $self->byte_size; my $loc = $self->storage->request_space( $header_fixed + $header_var ); @@ -571,6 +586,7 @@ sub clear_entries { # --- Above is $header_fixed. Below is $header_var pack('C', $self->byte_size), pack('C', $self->max_buckets), + pack('C', $self->data_sector_size), pack('C', $nt), pack('N', 0 ), # Transaction activeness bitfield pack('N' . $nt, 0 x $nt ), # Transaction staleness counters @@ -580,8 +596,8 @@ sub clear_entries { ); #XXX Set these less fragilely - $self->set_trans_loc( $header_fixed + 3 ); - $self->set_chains_loc( $header_fixed + 3 + 4 + 4 * $nt ); + $self->set_trans_loc( $header_fixed + 4 ); + $self->set_chains_loc( $header_fixed + 4 + 4 + 4 * $nt ); return; } @@ -615,9 +631,9 @@ sub clear_entries { } my $buffer2 = $self->storage->read_at( undef, $size ); - my @values = unpack( 'C C C', $buffer2 ); + my @values = unpack( 'C C C C', $buffer2 ); - if ( @values != 3 || grep { !defined } @values ) { + if ( @values != 4 || grep { !defined } @values ) { $self->storage->close; DBM::Deep->_throw_error("Corrupted file - bad header"); } @@ -626,7 +642,7 @@ sub clear_entries { $self->set_chains_loc( $header_fixed + scalar(@values) + 4 + 4 * $self->num_txns ); #XXX Add warnings if values weren't set right - @{$self}{qw(byte_size max_buckets num_txns)} = @values; + @{$self}{qw(byte_size max_buckets data_sector_size num_txns)} = @values; my $header_var = scalar(@values) + 4 + 4 * $self->num_txns + 3 * $self->byte_size; unless ( $size == $header_var ) { @@ -769,6 +785,7 @@ sub hash_chars { $_[0]{hash_chars} } sub num_txns { $_[0]{num_txns} } sub max_buckets { $_[0]{max_buckets} } sub blank_md5 { chr(0) x $_[0]->hash_size } +sub data_sector_size { $_[0]{data_sector_size} } sub trans_id { $_[0]{trans_id} } sub set_trans_id { $_[0]{trans_id} = $_[1] } @@ -987,7 +1004,7 @@ package DBM::Deep::Engine::Sector::Data; our @ISA = qw( DBM::Deep::Engine::Sector ); # This is in bytes -sub size { return 256 } +sub size { $_[0]{engine}->data_sector_size } sub free_meth { return '_add_free_data_sector' } sub clone { diff --git a/t/36_verybighash.t b/t/36_verybighash.t index d218642..4f0bcc3 100644 --- a/t/36_verybighash.t +++ b/t/36_verybighash.t @@ -4,7 +4,7 @@ use strict; use Test::More; plan skip_all => "You must set \$ENV{LONG_TESTS} >= 2 to run the superlong tests" - unless $ENV{LONG_TESTS} >= 2; + unless $ENV{LONG_TESTS} && $ENV{LONG_TESTS} >= 2; use Test::Deep; use t::common qw( new_fh ); @@ -38,7 +38,7 @@ for my $key_no ( 0 .. $max_keys ) { print "$key_no: $s\n"; if ( $s > $gigs * 2**30) { - fail "DB file ($db_fn) size exceeds $gigs GB"; + fail "DB file ($filename) size exceeds $gigs GB"; exit; } } diff --git a/t/40_freespace.t b/t/40_freespace.t index bc8216d..7b0645d 100644 --- a/t/40_freespace.t +++ b/t/40_freespace.t @@ -75,7 +75,8 @@ use_ok( 'DBM::Deep' ); # we wrote this dreck ... my $size = -s $filename; - my $expected = $size + 9 * ( 256 + 256 ); + my $data_sector_size = $db->_engine->data_sector_size; + my $expected = $size + 9 * ( 2 * $data_sector_size ); $db->{ $_ } = undef for 5 .. 17;