X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBM%2FDeep%2FEngine.pm;h=ad75d9144be96fb726faded1474f2c85d6bc9760;hb=898fd1fd98bd753241f6db44855ea5fd6c252605;hp=f89872548245f5781574f48e7c69fe68bbd51c77;hpb=75be64132492b33bfc7bc94a919c7e43bb98187c;p=dbsrgits%2FDBM-Deep.git diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index f898725..ad75d91 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -4,6 +4,20 @@ use strict; use Fcntl qw( :DEFAULT :flock :seek ); +## +# Setup file and tag signatures. These should never change. +## +sub SIG_FILE () { 'DPDB' } +sub SIG_INTERNAL () { 'i' } +sub SIG_HASH () { 'H' } +sub SIG_ARRAY () { 'A' } +sub SIG_NULL () { 'N' } +sub SIG_DATA () { 'D' } +sub SIG_INDEX () { 'I' } +sub SIG_BLIST () { 'B' } +sub SIG_FREE () { 'F' } +sub SIG_SIZE () { 1 } + sub precalc_sizes { ## # Precalculate index, bucket and bucket list sizes @@ -11,7 +25,7 @@ sub precalc_sizes { my $self = shift; $self->{index_size} = (2**8) * $self->{long_size}; - $self->{bucket_size} = $self->{hash_size} + $self->{long_size}; + $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2; $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; return 1; @@ -37,9 +51,9 @@ sub set_pack { ## # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each - # key/value. Upgrading this is possible (see above) but probably not necessary. - # If you need more than 4 GB for a single key or value, this module is really - # not for you :-) + # key/value. Upgrading this is possible (see above) but probably not + # necessary. If you need more than 4 GB for a single key or value, this + # module is really not for you :-) ## $self->{data_size} = $data_s ? $data_s : 4; $self->{data_pack} = $data_p ? $data_p : 'N'; @@ -74,9 +88,11 @@ sub new { hash_size => 16, ## - # Maximum number of buckets per list before another level of indexing is done. - # Increase this value for slightly greater speed, but larger database files. - # DO NOT decrease this value below 16, due to risk of recursive reindex overrun. + # Maximum number of buckets per list before another level of indexing is + # done. + # Increase this value for slightly greater speed, but larger database + # files. DO NOT decrease this value below 16, due to risk of recursive + # reindex overrun. ## max_buckets => 16, }, $class; @@ -92,6 +108,59 @@ sub setup_fh { $self->open( $obj ) if !defined $obj->_fh; + my $fh = $obj->_fh; + flock $fh, LOCK_EX; + + unless ( $obj->{base_offset} ) { + seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); + my $signature; + my $bytes_read = read( $fh, $signature, length(SIG_FILE)); + + ## + # File is empty -- write signature and master index + ## + if (!$bytes_read) { + my $loc = $self->_request_space( $obj, length( SIG_FILE ) ); + seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET); + print( $fh SIG_FILE); + + $obj->{base_offset} = $self->_request_space( + $obj, $self->tag_size( $self->{index_size} ), + ); + + $self->write_tag( + $obj, $obj->_base_offset, $obj->_type, + chr(0)x$self->{index_size}, + ); + + # Flush the filehandle + my $old_fh = select $fh; + my $old_af = $|; $| = 1; $| = $old_af; + select $old_fh; + } + else { + $obj->{base_offset} = $bytes_read; + + ## + # Check signature was valid + ## + unless ($signature eq SIG_FILE) { + $self->close_fh( $obj ); + $obj->_throw_error("Signature not found -- file is not a Deep DB"); + } + + ## + # Get our type from master index signature + ## + my $tag = $self->load_tag($obj, $obj->_base_offset) + or $obj->_throw_error("Corrupted file, no master index record"); + + unless ($obj->{type} eq $tag->{signature}) { + $obj->_throw_error("File type mismatch"); + } + } + } + #XXX We have to make sure we don't mess up when autoflush isn't turned on unless ( $obj->_root->{inode} ) { my @stats = stat($obj->_fh); @@ -99,6 +168,8 @@ sub setup_fh { $obj->_root->{end} = $stats[7]; } + flock $fh, LOCK_UN; + return 1; } @@ -110,15 +181,14 @@ sub open { my $self = shift; my ($obj) = @_; - if (defined($obj->_fh)) { $self->close_fh( $obj ); } - # Theoretically, adding O_BINARY should remove the need for the binmode # Of course, testing it is going to be ... interesting. my $flags = O_RDWR | O_CREAT | O_BINARY; my $fh; - sysopen( $fh, $obj->_root->{file}, $flags ) - or $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!"); + my $filename = $obj->_root->{file}; + sysopen( $fh, $filename, $flags ) + or $obj->_throw_error("Cannot sysopen file '$filename': $!"); $obj->_root->{fh} = $fh; #XXX Can we remove this by using the right sysopen() flags? @@ -131,55 +201,6 @@ sub open { select $old; } - seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); - - my $signature; - my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE)); - - ## - # File is empty -- write signature and master index - ## - if (!$bytes_read) { - seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); - print( $fh DBM::Deep->SIG_FILE); - - $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size}); - - # Why is this being printed here? I'm not seeing where anything actually points to - # this spot. - #XXX $obj->_root->{end} isn't updated from these 10 bytes that are being written - my $plain_key = "[base]"; - print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key ); - - # Flush the filehandle - my $old_fh = select $fh; - my $old_af = $|; $| = 1; $| = $old_af; - select $old_fh; - - return 1; - } - - ## - # Check signature was valid - ## - unless ($signature eq DBM::Deep->SIG_FILE) { - $self->close_fh( $obj ); - $obj->_throw_error("Signature not found -- file is not a Deep DB"); - } - - ## - # Get our type from master index signature - ## - my $tag = $self->load_tag($obj, $obj->_base_offset) - or $obj->_throw_error("Corrupted file, no master index record"); - - unless ($obj->{type} eq $tag->{signature}) { - $obj->_throw_error("File type mismatch"); - } - -#XXX We probably also want to store the hash algorithm name and not assume anything -#XXX The cool thing would be to allow a different hashing algorithm at every level - return 1; } @@ -195,27 +216,34 @@ sub close_fh { return 1; } -sub create_tag { +sub tag_size { + my $self = shift; + my ($size) = @_; + return SIG_SIZE + $self->{data_size} + $size; +} + +sub write_tag { ## # Given offset, signature and content, create tag and write to disk ## my $self = shift; my ($obj, $offset, $sig, $content) = @_; - my $size = length($content); + my $size = length( $content ); my $fh = $obj->_fh; - seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); + if ( defined $offset ) { + seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); + } + print( $fh $sig . pack($self->{data_pack}, $size) . $content ); - if ($offset == $obj->_root->{end}) { - $obj->_root->{end} += DBM::Deep->SIG_SIZE + $self->{data_size} + $size; - } + return unless defined $offset; return { signature => $sig, size => $size, - offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size}, + offset => $offset + SIG_SIZE + $self->{data_size}, content => $content }; } @@ -227,6 +255,8 @@ sub load_tag { my $self = shift; my ($obj, $offset) = @_; +# print join(':',map{$_||''}caller(1)), $/; + my $fh = $obj->_fh; seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); @@ -235,7 +265,7 @@ sub load_tag { return if eof $fh; my $b; - read( $fh, $b, DBM::Deep->SIG_SIZE + $self->{data_size} ); + read( $fh, $b, SIG_SIZE + $self->{data_size} ); my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); my $buffer; @@ -244,11 +274,54 @@ sub load_tag { return { signature => $sig, size => $size, - offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size}, + offset => $offset + SIG_SIZE + $self->{data_size}, content => $buffer }; } +sub _length_needed { + my $self = shift; + my ($obj, $value, $key) = @_; + + my $is_dbm_deep = eval { + local $SIG{'__DIE__'}; + $value->isa( 'DBM::Deep' ); + }; + + my $len = SIG_SIZE + $self->{data_size} + + $self->{data_size} + length( $key ); + + if ( $is_dbm_deep && $value->_root eq $obj->_root ) { + return $len + $self->{long_size}; + } + + my $r = Scalar::Util::reftype( $value ) || ''; + if ( $obj->_root->{autobless} ) { + # This is for the bit saying whether or not this thing is blessed. + $len += 1; + } + + unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { + if ( defined $value ) { + $len += length( $value ); + } + return $len; + } + + $len += $self->{index_size}; + + # if autobless is enabled, must also take into consideration + # the class name as it is stored after the key. + if ( $obj->_root->{autobless} ) { + my $value_class = Scalar::Util::blessed($value); + if ( defined $value_class && !$is_dbm_deep ) { + $len += $self->{data_size} + length($value_class); + } + } + + return $len; +} + sub add_bucket { ## # Adds one key/value pair to bucket list, given offset, MD5 digest of key, @@ -257,190 +330,133 @@ sub add_bucket { my $self = shift; my ($obj, $tag, $md5, $plain_key, $value) = @_; - my $keys = $tag->{content}; + # This verifies that only supported values will be stored. + { + my $r = Scalar::Util::reftype( $value ); + last if !defined $r; + + last if $r eq 'HASH'; + last if $r eq 'ARRAY'; + + $obj->_throw_error( + "Storage of variables of type '$r' is not supported." + ); + } + my $location = 0; my $result = 2; my $root = $obj->_root; + my $fh = $obj->_fh; - my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) }; - my $internal_ref = $is_dbm_deep && ($value->_root eq $root); - - my $fh = $obj->_fh; + my $actual_length = $self->_length_needed( $obj, $value, $plain_key ); - ## - # Iterate through buckets, seeing if this is a new entry or a replace. - ## - BUCKET: - for (my $i = 0; $i < $self->{max_buckets}; $i++) { - my $subloc = $self->_get_subloc( $keys, $i ); + my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); - if (!$subloc) { - ## - # Found empty bucket (end of list). Populate and exit loop. - ## - $result = 2; - - $location = $internal_ref - ? $value->_base_offset - : $root->{end}; -print "NEW: $location\n"; +# $self->_release_space( $obj, $size, $subloc ); + # Updating a known md5 +#XXX This needs updating to use _release_space + if ( $subloc ) { + $result = 1; + if ($actual_length <= $size) { + $location = $subloc; + } + else { + $location = $self->_request_space( $obj, $actual_length ); seek( $fh, - $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, + $tag->{offset} + $offset + + $self->{hash_size} + $root->{file_offset}, SEEK_SET, ); - - print( $fh $md5 . pack($self->{long_pack}, $location) ); - last; + print( $fh pack($self->{long_pack}, $location ) ); + print( $fh pack($self->{long_pack}, $actual_length ) ); } + } + # Adding a new md5 + elsif ( defined $offset ) { + $location = $self->_request_space( $obj, $actual_length ); - my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); - if ( $md5 ne $key ) { - next BUCKET; - } + seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET ); + print( $fh $md5 . pack($self->{long_pack}, $location ) ); + print( $fh pack($self->{long_pack}, $actual_length ) ); + } + # If bucket didn't fit into list, split into a new index level + # split_index() will do the _request_space() call + else { + $location = $self->split_index( $obj, $md5, $tag ); + } - ## - # Found existing bucket with same key. Replace with new value. - ## - $result = 1; + $self->write_value( $obj, $location, $plain_key, $value ); - if ($internal_ref) { - $location = $value->_base_offset; - seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); - print( $fh $md5 . pack($self->{long_pack}, $location) ); - return $result; - } + return $result; +} - seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET); - my $size; - read( $fh, $size, $self->{data_size}); - $size = unpack($self->{data_pack}, $size); +sub write_value { + my $self = shift; + my ($obj, $location, $key, $value) = @_; - ## - # If value is a hash, array, or raw value with equal or less size, we can - # reuse the same content area of the database. Otherwise, we have to create - # a new content area at the EOF. - ## - my $actual_length; - my $r = Scalar::Util::reftype( $value ) || ''; - if ( $r eq 'HASH' || $r eq 'ARRAY' ) { - $actual_length = $self->{index_size}; - - # if autobless is enabled, must also take into consideration - # the class name, as it is stored along with key/value. - if ( $root->{autobless} ) { - my $value_class = Scalar::Util::blessed($value); - if ( defined $value_class && !$value->isa('DBM::Deep') ) { - $actual_length += length($value_class); - } - } - } - else { $actual_length = length($value); } + my $fh = $obj->_fh; + my $root = $obj->_root; - if ($actual_length <= $size) { - $location = $subloc; - } - else { - $location = $root->{end}; - seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, SEEK_SET); - print( $fh pack($self->{long_pack}, $location) ); - } + my $is_dbm_deep = eval { + local $SIG{'__DIE__'}; + $value->isa( 'DBM::Deep' ); + }; - last; - } + my $is_internal_ref = $is_dbm_deep && ($value->_root eq $root); + + seek($fh, $location + $root->{file_offset}, SEEK_SET); ## - # If this is an internal reference, return now. - # No need to write value or plain key + # Write signature based on content type, set content length and write + # actual value. ## - #XXX We need to store the key as a reference to the internal spot - if ($internal_ref) { - return $result; + my $r = Scalar::Util::reftype($value) || ''; + if ( $is_internal_ref ) { + $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $value->_base_offset) ); + } + elsif ($r eq 'HASH') { + $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} ); + } + elsif ($r eq 'ARRAY') { + $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} ); + } + elsif (!defined($value)) { + $self->write_tag( $obj, undef, SIG_NULL, '' ); + } + else { + $self->write_tag( $obj, undef, SIG_DATA, $value ); } ## - # If bucket didn't fit into list, split into a new index level + # Plain key is stored AFTER value, as keys are typically fetched less often. ## - if (!$location) { - # re-index bucket list + print( $fh pack($self->{data_pack}, length($key)) . $key ); - $self->split_index( $obj, $md5, $tag ); - - $location = $root->{end}; - } + # Internal references don't care about autobless + return 1 if $is_internal_ref; ## - # Seek to content area and store signature, value and plaintext key + # If value is blessed, preserve class name ## - if ($location) { - my $content_length; - seek($fh, $location + $root->{file_offset}, SEEK_SET); - - ## - # Write signature based on content type, set content length and write actual value. - ## - my $r = Scalar::Util::reftype($value) || ''; - if ($r eq 'HASH') { - print( $fh DBM::Deep->TYPE_HASH ); - print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); - $content_length = $self->{index_size}; - } - elsif ($r eq 'ARRAY') { - print( $fh DBM::Deep->TYPE_ARRAY ); - print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); - $content_length = $self->{index_size}; - } - elsif (!defined($value)) { - print( $fh DBM::Deep->SIG_NULL ); - print( $fh pack($self->{data_pack}, 0) ); - $content_length = 0; + if ( $root->{autobless} ) { + my $value_class = Scalar::Util::blessed($value); + if ( defined $value_class && !$is_dbm_deep ) { + print( $fh chr(1) ); + print( $fh pack($self->{data_pack}, length($value_class)) . $value_class ); } else { - print( $fh DBM::Deep->SIG_DATA ); - print( $fh pack($self->{data_pack}, length($value)) . $value ); - $content_length = length($value); - } - - ## - # Plain key is stored AFTER value, as keys are typically fetched less often. - ## - print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key ); - - ## - # If value is blessed, preserve class name - ## - if ( $root->{autobless} ) { - my $value_class = Scalar::Util::blessed($value); - if ( defined $value_class && $value_class ne 'DBM::Deep' ) { - ## - # Blessed ref -- will restore later - ## - print( $fh chr(1) ); - print( $fh pack($self->{data_pack}, length($value_class)) . $value_class ); - $content_length += 1; - $content_length += $self->{data_size} + length($value_class); - } - else { - print( $fh chr(0) ); - $content_length += 1; - } - } - - ## - # If this is a new content area, advance EOF counter - ## - if ($location == $root->{end}) { - $root->{end} += DBM::Deep->SIG_SIZE; - $root->{end} += $self->{data_size} + $content_length; - $root->{end} += $self->{data_size} + length($plain_key); + print( $fh chr(0) ); } + } - ## - # If content is a hash or array, create new child DBM::Deep object and - # pass each key or element to it. - ## + ## + # If content is a hash or array, create new child DBM::Deep object and + # pass each key or element to it. + ## + if ( !$is_internal_ref ) { if ($r eq 'HASH') { my $branch = DBM::Deep->new( type => DBM::Deep->TYPE_HASH, @@ -463,11 +479,9 @@ print "NEW: $location\n"; $index++; } } - - return $result; } - $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file"); + return 1; } sub split_index { @@ -476,202 +490,195 @@ sub split_index { my $fh = $obj->_fh; my $root = $obj->_root; - my $keys = $tag->{content}; + + my $loc = $self->_request_space( + $obj, $self->tag_size( $self->{index_size} ), + ); seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); - print( $fh pack($self->{long_pack}, $root->{end}) ); + print( $fh pack($self->{long_pack}, $loc) ); - my $index_tag = $self->create_tag( - $obj, - $root->{end}, - DBM::Deep->SIG_INDEX, - chr(0) x $self->{index_size}, + my $index_tag = $self->write_tag( + $obj, $loc, SIG_INDEX, + chr(0)x$self->{index_size}, ); - my @offsets = (); + my $newtag_loc = $self->_request_space( + $obj, $self->tag_size( $self->{bucket_list_size} ), + ); - $keys .= $md5 . pack($self->{long_pack}, 0); + my $keys = $tag->{content} + . $md5 . pack($self->{long_pack}, $newtag_loc) + . pack($self->{long_pack}, 0); + my @newloc = (); BUCKET: for (my $i = 0; $i <= $self->{max_buckets}; $i++) { - my $key = substr( - $keys, - ($i * $self->{bucket_size}), - $self->{hash_size}, - ); + my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i ); - next BUCKET unless $key; - - my $old_subloc = $self->_get_subloc( $keys, $i ); + die "[INTERNAL ERROR]: No key in split_index()\n" unless $key; + die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc; my $num = ord(substr($key, $tag->{ch} + 1, 1)); - if ($offsets[$num]) { - my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $self->{data_size}; - seek($fh, $offset + $root->{file_offset}, SEEK_SET); + if ($newloc[$num]) { + seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET); my $subkeys; read( $fh, $subkeys, $self->{bucket_list_size}); - for (my $k=0; $k<$self->{max_buckets}; $k++) { - my $subloc = $self->_get_subloc( $subkeys, $k ); + # This is looking for the first empty spot + my ($subloc, $offset, $size) = $self->_find_in_buckets( + { content => $subkeys }, '', + ); + + seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET); + print( $fh $key . pack($self->{long_pack}, $old_subloc) ); - if (!$subloc) { - seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); - print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); - last; - } - } # k loop + next; } - else { - $offsets[$num] = $root->{end}; - seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); - print( $fh pack($self->{long_pack}, $root->{end}) ); - my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $self->{bucket_list_size}); + seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); - seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); - print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); - } - } # i loop + my $loc = $self->_request_space( + $obj, $self->tag_size( $self->{bucket_list_size} ), + ); - return; + print( $fh pack($self->{long_pack}, $loc) ); + + my $blist_tag = $self->write_tag( + $obj, $loc, SIG_BLIST, + chr(0)x$self->{bucket_list_size}, + ); + + seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); + print( $fh $key . pack($self->{long_pack}, $old_subloc) ); + + $newloc[$num] = $blist_tag->{offset}; + } + + $self->_release_space( + $obj, $self->tag_size( $self->{bucket_list_size} ), + $tag->{offset} - SIG_SIZE - $self->{data_size}, + ); + + return $newtag_loc; } -sub get_bucket_value { - ## - # Fetch single value given tag and MD5 digested key. - ## +sub read_from_loc { my $self = shift; - my ($obj, $tag, $md5) = @_; - my $keys = $tag->{content}; + my ($obj, $subloc) = @_; my $fh = $obj->_fh; ## - # Iterate through buckets, looking for a key match + # Found match -- seek to offset and read signature ## - BUCKET: - for (my $i = 0; $i < $self->{max_buckets}; $i++) { - my $subloc = $self->_get_subloc( $keys, $i ); + my $signature; + seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); + read( $fh, $signature, SIG_SIZE); - if (!$subloc) { + ## + # If value is a hash or array, return new DBM::Deep object with correct offset + ## + if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) { + my $obj = DBM::Deep->new( + type => $signature, + base_offset => $subloc, + root => $obj->_root, + ); + + if ($obj->_root->{autobless}) { ## - # Hit end of list, no match + # Skip over value and plain key to see if object needs + # to be re-blessed ## - return; - } + seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); - my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); - if ( $md5 ne $key ) { - next BUCKET; - } - - ## - # Found match -- seek to offset and read signature - ## - my $signature; - seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); - read( $fh, $signature, DBM::Deep->SIG_SIZE); - - ## - # If value is a hash or array, return new DBM::Deep object with correct offset - ## - if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) { - my $obj = DBM::Deep->new( - type => $signature, - base_offset => $subloc, - root => $obj->_root, - ); + my $size; + read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); + if ($size) { seek($fh, $size, SEEK_CUR); } - if ($obj->_root->{autobless}) { + my $bless_bit; + read( $fh, $bless_bit, 1); + if (ord($bless_bit)) { ## - # Skip over value and plain key to see if object needs - # to be re-blessed + # Yes, object needs to be re-blessed ## - seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); - - my $size; + my $class_name; read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); - if ($size) { seek($fh, $size, SEEK_CUR); } - - my $bless_bit; - read( $fh, $bless_bit, 1); - if (ord($bless_bit)) { - ## - # Yes, object needs to be re-blessed - ## - my $class_name; - read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); - if ($size) { read( $fh, $class_name, $size); } - if ($class_name) { $obj = bless( $obj, $class_name ); } - } + if ($size) { read( $fh, $class_name, $size); } + if ($class_name) { $obj = bless( $obj, $class_name ); } } - - return $obj; } - ## - # Otherwise return actual value - ## - elsif ($signature eq DBM::Deep->SIG_DATA) { - my $size; - read( $fh, $size, $self->{data_size}); - $size = unpack($self->{data_pack}, $size); + return $obj; + } + elsif ( $signature eq SIG_INTERNAL ) { + my $size; + read( $fh, $size, $self->{data_size}); + $size = unpack($self->{data_pack}, $size); + + if ( $size ) { + my $new_loc; + read( $fh, $new_loc, $size ); + $new_loc = unpack( $self->{long_pack}, $new_loc ); - my $value = ''; - if ($size) { read( $fh, $value, $size); } - return $value; + return $self->read_from_loc( $obj, $new_loc ); + } + else { + return; } + } + ## + # Otherwise return actual value + ## + elsif ($signature eq SIG_DATA) { + my $size; + read( $fh, $size, $self->{data_size}); + $size = unpack($self->{data_pack}, $size); - ## - # Key exists, but content is null - ## - else { return; } - } # i loop + my $value = ''; + if ($size) { read( $fh, $value, $size); } + return $value; + } + ## + # Key exists, but content is null + ## return; } -sub delete_bucket { +sub get_bucket_value { ## - # Delete single key/value pair given tag and MD5 digested key. + # Fetch single value given tag and MD5 digested key. ## my $self = shift; my ($obj, $tag, $md5) = @_; - my $keys = $tag->{content}; - my $fh = $obj->_fh; + my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); + if ( $subloc ) { + return $self->read_from_loc( $obj, $subloc ); + } + return; +} +sub delete_bucket { ## - # Iterate through buckets, looking for a key match + # Delete single key/value pair given tag and MD5 digested key. ## - BUCKET: - for (my $i=0; $i<$self->{max_buckets}; $i++) { - my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); -# my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); - my $subloc = $self->_get_subloc( $keys, $i ); - - if (!$subloc) { - ## - # Hit end of list, no match - ## - return; - } - - if ( $md5 ne $key ) { - next BUCKET; - } + my $self = shift; + my ($obj, $tag, $md5) = @_; - ## - # Matched key -- delete bucket and return - ## - seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET); - print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) ); + my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); +#XXX This needs _release_space() + if ( $subloc ) { + my $fh = $obj->_fh; + seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET); + print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) ); print( $fh chr(0) x $self->{bucket_size} ); return 1; - } # i loop - + } return; } @@ -681,35 +688,9 @@ sub bucket_exists { ## my $self = shift; my ($obj, $tag, $md5) = @_; - my $keys = $tag->{content}; - ## - # Iterate through buckets, looking for a key match - ## - BUCKET: - for (my $i=0; $i<$self->{max_buckets}; $i++) { - my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); - #my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); - my $subloc = $self->_get_subloc( $keys, $i ); - - if (!$subloc) { - ## - # Hit end of list, no match - ## - return; - } - - if ( $md5 ne $key ) { - next BUCKET; - } - - ## - # Matched key -- return true - ## - return 1; - } # i loop - - return; + my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); + return $subloc && 1; } sub find_bucket_list { @@ -724,42 +705,39 @@ sub find_bucket_list { # Locate offset for bucket list using digest index system ## my $tag = $self->load_tag($obj, $obj->_base_offset) - or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" ); -#print $obj->_base_offset, " : $tag->{signature} : $tag->{offset} : $tag->{size}\n"; + or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" ); my $ch = 0; - while ($tag->{signature} ne DBM::Deep->SIG_BLIST) { + while ($tag->{signature} ne SIG_BLIST) { my $num = ord substr($md5, $ch, 1); my $ref_loc = $tag->{offset} + ($num * $self->{long_size}); $tag = $self->index_lookup( $obj, $tag, $num ); if (!$tag) { - if ( $args->{create} ) { - my $fh = $obj->_fh; - seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); - print( $fh pack($self->{long_pack}, $obj->_root->{end}) ); - - $tag = $self->create_tag( - $obj, $obj->_root->{end}, - DBM::Deep->SIG_BLIST, - chr(0) x $self->{bucket_list_size}, - ); + return if !$args->{create}; - $tag->{ref_loc} = $ref_loc; - $tag->{ch} = $ch; + my $loc = $self->_request_space( + $obj, $self->tag_size( $self->{bucket_list_size} ), + ); - last; - } - else { - return; - } + my $fh = $obj->_fh; + seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); + print( $fh pack($self->{long_pack}, $loc) ); + + $tag = $self->write_tag( + $obj, $loc, SIG_BLIST, + chr(0)x$self->{bucket_list_size}, + ); + + $tag->{ref_loc} = $ref_loc; + $tag->{ch} = $ch; + + last; } - $tag->{ch} = $ch; + $tag->{ch} = $ch++; $tag->{ref_loc} = $ref_loc; - - $ch++; } return $tag; @@ -797,14 +775,18 @@ sub traverse_index { my $fh = $obj->_fh; - if ($tag->{signature} ne DBM::Deep->SIG_BLIST) { + if ($tag->{signature} ne SIG_BLIST) { my $content = $tag->{content}; my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); - for (my $index = $start; $index < 256; $index++) { + for (my $idx = $start; $idx < (2**8); $idx++) { my $subloc = unpack( $self->{long_pack}, - substr($content, $index * $self->{long_size}, $self->{long_size}), + substr( + $content, + $idx * $self->{long_size}, + $self->{long_size}, + ), ); if ($subloc) { @@ -826,49 +808,33 @@ sub traverse_index { ## # Iterate through buckets, looking for a key match ## - for (my $i=0; $i<$self->{max_buckets}; $i++) { - my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); -# my $subloc = unpack( -# $self->{long_pack}, -# substr( -# $keys, -# ($i * $self->{bucket_size}) + $self->{hash_size}, -# $self->{long_size}, -# ), -# ); - my $subloc = $self->_get_subloc( $keys, $i ); + for (my $i = 0; $i < $self->{max_buckets}; $i++) { + my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); + # End of bucket list -- return to outer loop if (!$subloc) { - ## - # End of bucket list -- return to outer loop - ## $obj->{return_next} = 1; last; } + # Located previous key -- return next one found elsif ($key eq $obj->{prev_md5}) { - ## - # Located previous key -- return next one found - ## $obj->{return_next} = 1; next; } + # Seek to bucket location and skip over signature elsif ($obj->{return_next}) { - ## - # Seek to bucket location and skip over signature - ## - seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET); + seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); - ## # Skip over value to get to plain key - ## + my $sig; + read( $fh, $sig, SIG_SIZE ); + my $size; read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); if ($size) { seek($fh, $size, SEEK_CUR); } - ## # Read in plain key and return as scalar - ## my $plain_key; read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); @@ -876,7 +842,7 @@ sub traverse_index { return $plain_key; } - } # bucket loop + } $obj->{return_next} = 1; } # tag is a bucket list @@ -908,21 +874,102 @@ sub get_next_key { # Utilities -sub _get_subloc { +sub _get_key_subloc { my $self = shift; my ($keys, $idx) = @_; - my $subloc = unpack( - $self->{long_pack}, + my ($key, $subloc, $size) = unpack( + "a$self->{hash_size} $self->{long_pack} $self->{long_pack}", substr( $keys, - ($idx * $self->{bucket_size}) + $self->{hash_size}, - $self->{long_size}, + ($idx * $self->{bucket_size}), + $self->{bucket_size}, ), ); - return $subloc; + return ($key, $subloc, $size); +} + +sub _find_in_buckets { + my $self = shift; + my ($tag, $md5) = @_; + + BUCKET: + for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { + my ($key, $subloc, $size) = $self->_get_key_subloc( + $tag->{content}, $i, + ); + + return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc; + + next BUCKET if $key ne $md5; + + return ($subloc, $i * $self->{bucket_size}, $size); + } + + return; +} + +#sub _print_at { +# my $self = shift; +# my ($obj, $spot, $data) = @_; +# +# my $fh = $obj->_fh; +# seek( $fh, $spot, SEEK_SET ); +# print( $fh $data ); +# +# return; +#} + +sub _request_space { + my $self = shift; + my ($obj, $size) = @_; + + my $loc = $obj->_root->{end}; + $obj->_root->{end} += $size; + + return $loc; +} + +sub _release_space { + my $self = shift; + my ($obj, $size, $loc) = @_; + + my $next_loc = 0; + + my $fh = $obj->_fh; + seek( $fh, $loc + $obj->_root->{file_offset}, SEEK_SET ); + print( $fh SIG_FREE + . pack($self->{long_pack}, $size ) + . pack($self->{long_pack}, $next_loc ) + ); + + return; } 1; __END__ + +# This will be added in later, after more refactoring is done. This is an early +# attempt at refactoring on the physical level instead of the virtual level. +sub _read_at { + my $self = shift; + my ($obj, $spot, $amount, $unpack) = @_; + + my $fh = $obj->_fh; + seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET ); + + my $buffer; + my $bytes_read = read( $fh, $buffer, $amount ); + + if ( $unpack ) { + $buffer = unpack( $unpack, $buffer ); + } + + if ( wantarray ) { + return ($buffer, $bytes_read); + } + else { + return $buffer; + } +}