From: rkinyon Date: Wed, 22 Nov 2006 22:17:48 +0000 (+0000) Subject: Cleaned up - moving towards better file format X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c83524c60b4d67ee79a7cd2574a41190ba3fa370;p=dbsrgits%2FDBM-Deep.git Cleaned up - moving towards better file format --- diff --git a/lib/DBM/Deep/Engine3.pm b/lib/DBM/Deep/Engine3.pm index 8f89364..6ac7093 100644 --- a/lib/DBM/Deep/Engine3.pm +++ b/lib/DBM/Deep/Engine3.pm @@ -6,7 +6,6 @@ use strict; our $VERSION = q(0.99_03); -use Digest::MD5 (); use Scalar::Util (); # File-wide notes: @@ -34,17 +33,20 @@ sub HEAD () { 0 } ################################################################################ +my %size_to_pack = ( + 2 => 'n', + 4 => 'N', + 8 => 'Q', +); + sub new { my $class = shift; my ($args) = @_; my $self = bless { - long_size => 4, - long_pack => 'N', - data_size => 4, - data_pack => 'N', + byte_size => 4, - digest => \&Digest::MD5::md5, + digest => undef, hash_size => 16, # In bytes max_buckets => 16, @@ -54,16 +56,13 @@ sub new { if ( defined $args->{pack_size} ) { if ( lc $args->{pack_size} eq 'small' ) { - $args->{long_size} = 2; - $args->{long_pack} = 'n'; + $args->{byte_size} = 2; } elsif ( lc $args->{pack_size} eq 'medium' ) { - $args->{long_size} = 4; - $args->{long_pack} = 'N'; + $args->{byte_size} = 4; } elsif ( lc $args->{pack_size} eq 'large' ) { - $args->{long_size} = 8; - $args->{long_pack} = 'Q'; + $args->{byte_size} = 8; } else { die "Unknown pack_size value: '$args->{pack_size}'\n"; @@ -77,6 +76,8 @@ sub new { } Scalar::Util::weaken( $self->{obj} ) if $self->{obj}; + $self->{byte_pack} = $size_to_pack{ $self->{byte_size} }; + ## # Number of buckets per blist before another level of indexing is # done. Increase this value for slightly greater speed, but larger database @@ -88,6 +89,11 @@ sub new { $self->{max_buckets} = 16; } + if ( !$self->{digest} ) { + require Digest::MD5; + $self->{digest} = \&Digest::MD5::md5; + } + return $self; } @@ -133,6 +139,10 @@ sub setup_fh { # Creating a new file unless ( $bytes_read ) { $self->_write_file_header; + + # 1) Create Array/Hash entry + # 2) Create Index entry + $obj->{base_offset} = $self->_storage->request_space( $self->_tag_size( $self->{index_size} ), ); @@ -176,32 +186,30 @@ sub _calculate_sizes { # current hashing algorithm #XXX Does this need to be updated with different hashing algorithms? $self->{hash_chars_used} = (2**8); - $self->{index_size} = $self->{hash_chars_used} * $self->{long_size}; + $self->{index_size} = $self->{hash_chars_used} * $self->{byte_size}; - $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2; + $self->{bucket_size} = $self->{hash_size} + $self->{byte_size} * 2; $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; - $self->{key_size} = $self->{long_size} * 2; - $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size}; - return; } sub _write_file_header { my $self = shift; - my $loc = $self->_storage->request_space( length( SIG_FILE ) + 33 ); + my $header_fixed = length( SIG_FILE ) + 1 + 4 + 4; + my $header_var = 16 + 1 + 1; + + my $loc = $self->_storage->request_space( $header_fixed + $header_var ); $self->_storage->print_at( $loc, SIG_FILE, SIG_HEADER, - pack('N', 1), # header version - pack('N', 24), # header size + pack('N', 1), # header version - at this point, we're at 9 bytes + pack('N', $header_var), # header size + # --- Above is $header_fixed. Below is $header_var pack('N4', 0, 0, 0, 0), # currently running transaction IDs - pack('n', $self->{long_size}), - pack('A', $self->{long_pack}), - pack('n', $self->{data_size}), - pack('A', $self->{data_pack}), + pack('n', $self->{byte_size}), pack('n', $self->{max_buckets}), ); @@ -213,7 +221,10 @@ sub _write_file_header { sub _read_file_header { my $self = shift; - my $buffer = $self->_storage->read_at( 0, length(SIG_FILE) + 9 ); + my $header_fixed = length( SIG_FILE ) + 1 + 4 + 4; + my $header_var = 16 + 1 + 1; + + my $buffer = $self->_storage->read_at( 0, $header_fixed ); return unless length($buffer); my ($file_signature, $sig_header, $header_version, $size) = unpack( @@ -230,19 +241,25 @@ sub _read_file_header { DBM::Deep->_throw_error( "Old file version found." ); } + unless ( $size eq $header_var ) { + $self->_storage->close; + DBM::Deep->_throw_error( "Unexpected size found." ); + } + my $buffer2 = $self->_storage->read_at( undef, $size ); # $a1-4 are the transaction IDs - my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 ); + my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n n', $buffer2 ); - $self->_storage->set_transaction_offset( 13 ); + # The transaction offset is the first thing after the fixed header section + $self->_storage->set_transaction_offset( $header_fixed ); - if ( @values < 5 || grep { !defined } @values ) { + if ( @values < 2 || grep { !defined } @values ) { $self->_storage->close; DBM::Deep->_throw_error("Corrupted file - bad header"); } #XXX Add warnings if values weren't set right - @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values; + @{$self}{qw(byte_size max_buckets)} = @values; return length($buffer) + length($buffer2); } @@ -254,7 +271,7 @@ sub _write_tag { $self->_storage->print_at( $offset, - $sig, pack($self->{data_pack}, $size), $content, + $sig, pack($self->{byte_pack}, $size), $content, ); return unless defined $offset; @@ -262,7 +279,7 @@ sub _write_tag { return { signature => $sig, start => $offset, - offset => $offset + SIG_SIZE + $self->{data_size}, + offset => $offset + SIG_SIZE + $self->{byte_size}, content => $content, is_new => 1, }; @@ -274,14 +291,14 @@ sub _load_tag { my $storage = $self->_storage; my ($sig, $size) = unpack( - "A $self->{data_pack}", - $storage->read_at( $offset, SIG_SIZE + $self->{data_size} ), + "A $self->{byte_pack}", + $storage->read_at( $offset, SIG_SIZE + $self->{byte_size} ), ); return { signature => $sig, start => $offset, - offset => $offset + SIG_SIZE + $self->{data_size}, + offset => $offset + SIG_SIZE + $self->{byte_size}, content => $storage->read_at( undef, $size ), is_new => 0, }; @@ -290,7 +307,7 @@ sub _load_tag { sub _tag_size { my $self = shift; my ($size) = @_; - return SIG_SIZE + $self->{data_size} + $size; + return SIG_SIZE + $self->{byte_size} + $size; } ################################################################################