X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBM%2FDeep%2FEngine.pm;h=ad75d9144be96fb726faded1474f2c85d6bc9760;hb=898fd1fd98bd753241f6db44855ea5fd6c252605;hp=681616561178437f1631c2d626e50def309c47b0;hpb=16d1ad9ba9b0bb26f68793236c92b40b64482bac;p=dbsrgits%2FDBM-Deep.git diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index 6816165..ad75d91 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -15,6 +15,7 @@ sub SIG_NULL () { 'N' } sub SIG_DATA () { 'D' } sub SIG_INDEX () { 'I' } sub SIG_BLIST () { 'B' } +sub SIG_FREE () { 'F' } sub SIG_SIZE () { 1 } sub precalc_sizes { @@ -24,7 +25,7 @@ sub precalc_sizes { my $self = shift; $self->{index_size} = (2**8) * $self->{long_size}; - $self->{bucket_size} = $self->{hash_size} + $self->{long_size}; + $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2; $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; return 1; @@ -127,9 +128,9 @@ sub setup_fh { $obj, $self->tag_size( $self->{index_size} ), ); - $self->create_tag( + $self->write_tag( $obj, $obj->_base_offset, $obj->_type, - chr(0) x $self->{index_size}, + chr(0)x$self->{index_size}, ); # Flush the filehandle @@ -221,22 +222,23 @@ sub tag_size { return SIG_SIZE + $self->{data_size} + $size; } -sub create_tag { +sub write_tag { ## # Given offset, signature and content, create tag and write to disk ## my $self = shift; my ($obj, $offset, $sig, $content) = @_; - my $size = length($content); + my $size = length( $content ); my $fh = $obj->_fh; - seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); + if ( defined $offset ) { + seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); + } + print( $fh $sig . pack($self->{data_pack}, $size) . $content ); - if ($offset == $obj->_root->{end}) { - $obj->_root->{end} += $self->tag_size( $size ); - } + return unless defined $offset; return { signature => $sig, @@ -279,36 +281,45 @@ sub load_tag { sub _length_needed { my $self = shift; - my ($obj, $value) = @_; + my ($obj, $value, $key) = @_; my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ); }; - my $internal_ref = $is_dbm_deep && ($value->_root eq $obj->_root); + my $len = SIG_SIZE + $self->{data_size} + + $self->{data_size} + length( $key ); - if ( $internal_ref ) { - return $self->{long_size}; + if ( $is_dbm_deep && $value->_root eq $obj->_root ) { + return $len + $self->{long_size}; } my $r = Scalar::Util::reftype( $value ) || ''; + if ( $obj->_root->{autobless} ) { + # This is for the bit saying whether or not this thing is blessed. + $len += 1; + } + unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { - return length( $value ); + if ( defined $value ) { + $len += length( $value ); + } + return $len; } - my $actual_length = $self->{index_size}; + $len += $self->{index_size}; # if autobless is enabled, must also take into consideration - # the class name, as it is stored along with key/value. + # the class name as it is stored after the key. if ( $obj->_root->{autobless} ) { my $value_class = Scalar::Util::blessed($value); - if ( defined $value_class && !$value->isa('DBM::Deep') ) { - $actual_length += length($value_class); + if ( defined $value_class && !$is_dbm_deep ) { + $len += $self->{data_size} + length($value_class); } } - return $actual_length; + return $len; } sub add_bucket { @@ -336,47 +347,45 @@ sub add_bucket { my $result = 2; my $root = $obj->_root; + my $fh = $obj->_fh; - my $fh = $obj->_fh; + my $actual_length = $self->_length_needed( $obj, $value, $plain_key ); - my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); + my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); +# $self->_release_space( $obj, $size, $subloc ); # Updating a known md5 +#XXX This needs updating to use _release_space if ( $subloc ) { $result = 1; - seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET); - my $size; - read( $fh, $size, $self->{data_size}); - $size = unpack($self->{data_pack}, $size); - - my $actual_length = $self->_length_needed( $obj, $value ); - if ($actual_length <= $size) { $location = $subloc; } else { - $location = $root->{end}; + $location = $self->_request_space( $obj, $actual_length ); seek( $fh, - $tag->{offset} + $offset + $self->{hash_size} + $root->{file_offset}, + $tag->{offset} + $offset + + $self->{hash_size} + $root->{file_offset}, SEEK_SET, ); - print( $fh pack($self->{long_pack}, $location) ); + print( $fh pack($self->{long_pack}, $location ) ); + print( $fh pack($self->{long_pack}, $actual_length ) ); } } # Adding a new md5 elsif ( defined $offset ) { - $location = $root->{end}; + $location = $self->_request_space( $obj, $actual_length ); seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET ); - print( $fh $md5 . pack($self->{long_pack}, $location) ); + print( $fh $md5 . pack($self->{long_pack}, $location ) ); + print( $fh pack($self->{long_pack}, $actual_length ) ); } # If bucket didn't fit into list, split into a new index level + # split_index() will do the _request_space() call else { - $self->split_index( $obj, $md5, $tag ); - - $location = $root->{end}; + $location = $self->split_index( $obj, $md5, $tag ); } $self->write_value( $obj, $location, $plain_key, $value ); @@ -396,7 +405,7 @@ sub write_value { $value->isa( 'DBM::Deep' ); }; - my $internal_ref = $is_dbm_deep && ($value->_root eq $root); + my $is_internal_ref = $is_dbm_deep && ($value->_root eq $root); seek($fh, $location + $root->{file_offset}, SEEK_SET); @@ -405,34 +414,20 @@ sub write_value { # actual value. ## my $r = Scalar::Util::reftype($value) || ''; - my $content_length; - if ( $internal_ref ) { - print( $fh SIG_INTERNAL ); - print( $fh pack($self->{data_pack}, $self->{long_size}) ); - print( $fh pack($self->{long_pack}, $value->_base_offset) ); - $content_length = $self->{long_size}; + if ( $is_internal_ref ) { + $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $value->_base_offset) ); + } + elsif ($r eq 'HASH') { + $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} ); + } + elsif ($r eq 'ARRAY') { + $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} ); + } + elsif (!defined($value)) { + $self->write_tag( $obj, undef, SIG_NULL, '' ); } else { - if ($r eq 'HASH') { - print( $fh SIG_HASH ); - print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); - $content_length = $self->{index_size}; - } - elsif ($r eq 'ARRAY') { - print( $fh SIG_ARRAY ); - print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); - $content_length = $self->{index_size}; - } - elsif (!defined($value)) { - print( $fh SIG_NULL ); - print( $fh pack($self->{data_pack}, 0) ); - $content_length = 0; - } - else { - print( $fh SIG_DATA ); - print( $fh pack($self->{data_pack}, length($value)) . $value ); - $content_length = length($value); - } + $self->write_tag( $obj, undef, SIG_DATA, $value ); } ## @@ -440,40 +435,28 @@ sub write_value { ## print( $fh pack($self->{data_pack}, length($key)) . $key ); + # Internal references don't care about autobless + return 1 if $is_internal_ref; + ## # If value is blessed, preserve class name ## if ( $root->{autobless} ) { my $value_class = Scalar::Util::blessed($value); - if ( defined $value_class && !$value->isa( 'DBM::Deep' ) ) { - ## - # Blessed ref -- will restore later - ## + if ( defined $value_class && !$is_dbm_deep ) { print( $fh chr(1) ); print( $fh pack($self->{data_pack}, length($value_class)) . $value_class ); - $content_length += 1; - $content_length += $self->{data_size} + length($value_class); } else { print( $fh chr(0) ); - $content_length += 1; } } ## - # If this is a new content area, advance EOF counter - ## - if ($location == $root->{end}) { - $root->{end} += SIG_SIZE; - $root->{end} += $self->{data_size} + $content_length; - $root->{end} += $self->{data_size} + length($key); - } - - ## # If content is a hash or array, create new child DBM::Deep object and # pass each key or element to it. ## - if ( ! $internal_ref ) { + if ( !$is_internal_ref ) { if ($r eq 'HASH') { my $branch = DBM::Deep->new( type => DBM::Deep->TYPE_HASH, @@ -507,62 +490,78 @@ sub split_index { my $fh = $obj->_fh; my $root = $obj->_root; - my $keys = $tag->{content}; - - seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); my $loc = $self->_request_space( $obj, $self->tag_size( $self->{index_size} ), ); + seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); print( $fh pack($self->{long_pack}, $loc) ); - my $index_tag = $self->create_tag( + my $index_tag = $self->write_tag( $obj, $loc, SIG_INDEX, - chr(0) x $self->{index_size}, + chr(0)x$self->{index_size}, ); - my @offsets = (); + my $newtag_loc = $self->_request_space( + $obj, $self->tag_size( $self->{bucket_list_size} ), + ); - $keys .= $md5 . pack($self->{long_pack}, 0); + my $keys = $tag->{content} + . $md5 . pack($self->{long_pack}, $newtag_loc) + . pack($self->{long_pack}, 0); + my @newloc = (); BUCKET: for (my $i = 0; $i <= $self->{max_buckets}; $i++) { - my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i ); + my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i ); - next BUCKET unless $key; + die "[INTERNAL ERROR]: No key in split_index()\n" unless $key; + die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc; my $num = ord(substr($key, $tag->{ch} + 1, 1)); - if ($offsets[$num]) { - my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size}; - seek($fh, $offset + $root->{file_offset}, SEEK_SET); + if ($newloc[$num]) { + seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET); my $subkeys; read( $fh, $subkeys, $self->{bucket_list_size}); - for (my $k=0; $k<$self->{max_buckets}; $k++) { - my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k ); + # This is looking for the first empty spot + my ($subloc, $offset, $size) = $self->_find_in_buckets( + { content => $subkeys }, '', + ); + + seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET); + print( $fh $key . pack($self->{long_pack}, $old_subloc) ); - if (!$subloc) { - seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); - print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); - last; - } - } # k loop + next; } - else { - $offsets[$num] = $root->{end}; - seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); - print( $fh pack($self->{long_pack}, $root->{end}) ); - my $blist_tag = $self->create_tag($obj, $root->{end}, SIG_BLIST, chr(0) x $self->{bucket_list_size}); + seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); - seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); - print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); - } - } # i loop + my $loc = $self->_request_space( + $obj, $self->tag_size( $self->{bucket_list_size} ), + ); - return; + print( $fh pack($self->{long_pack}, $loc) ); + + my $blist_tag = $self->write_tag( + $obj, $loc, SIG_BLIST, + chr(0)x$self->{bucket_list_size}, + ); + + seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); + print( $fh $key . pack($self->{long_pack}, $old_subloc) ); + + $newloc[$num] = $blist_tag->{offset}; + } + + $self->_release_space( + $obj, $self->tag_size( $self->{bucket_list_size} ), + $tag->{offset} - SIG_SIZE - $self->{data_size}, + ); + + return $newtag_loc; } sub read_from_loc { @@ -656,7 +655,7 @@ sub get_bucket_value { my $self = shift; my ($obj, $tag, $md5) = @_; - my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); + my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); if ( $subloc ) { return $self->read_from_loc( $obj, $subloc ); } @@ -670,7 +669,8 @@ sub delete_bucket { my $self = shift; my ($obj, $tag, $md5) = @_; - my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); + my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); +#XXX This needs _release_space() if ( $subloc ) { my $fh = $obj->_fh; seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET); @@ -689,7 +689,7 @@ sub bucket_exists { my $self = shift; my ($obj, $tag, $md5) = @_; - my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); + my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); return $subloc && 1; } @@ -717,18 +717,17 @@ sub find_bucket_list { if (!$tag) { return if !$args->{create}; - my $fh = $obj->_fh; - seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); - my $loc = $self->_request_space( $obj, $self->tag_size( $self->{bucket_list_size} ), ); + my $fh = $obj->_fh; + seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); print( $fh pack($self->{long_pack}, $loc) ); - $tag = $self->create_tag( + $tag = $self->write_tag( $obj, $loc, SIG_BLIST, - chr(0) x $self->{bucket_list_size}, + chr(0)x$self->{bucket_list_size}, ); $tag->{ref_loc} = $ref_loc; @@ -879,8 +878,8 @@ sub _get_key_subloc { my $self = shift; my ($keys, $idx) = @_; - my ($key, $subloc) = unpack( - "a$self->{hash_size} $self->{long_pack}", + my ($key, $subloc, $size) = unpack( + "a$self->{hash_size} $self->{long_pack} $self->{long_pack}", substr( $keys, ($idx * $self->{bucket_size}), @@ -888,7 +887,7 @@ sub _get_key_subloc { ), ); - return ($key, $subloc); + return ($key, $subloc, $size); } sub _find_in_buckets { @@ -897,18 +896,31 @@ sub _find_in_buckets { BUCKET: for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { - my ($key, $subloc) = $self->_get_key_subloc( $tag->{content}, $i ); + my ($key, $subloc, $size) = $self->_get_key_subloc( + $tag->{content}, $i, + ); - return ($subloc, $i * $self->{bucket_size}) unless $subloc; + return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc; next BUCKET if $key ne $md5; - return ($subloc, $i * $self->{bucket_size}); + return ($subloc, $i * $self->{bucket_size}, $size); } return; } +#sub _print_at { +# my $self = shift; +# my ($obj, $spot, $data) = @_; +# +# my $fh = $obj->_fh; +# seek( $fh, $spot, SEEK_SET ); +# print( $fh $data ); +# +# return; +#} + sub _request_space { my $self = shift; my ($obj, $size) = @_; @@ -923,6 +935,15 @@ sub _release_space { my $self = shift; my ($obj, $size, $loc) = @_; + my $next_loc = 0; + + my $fh = $obj->_fh; + seek( $fh, $loc + $obj->_root->{file_offset}, SEEK_SET ); + print( $fh SIG_FREE + . pack($self->{long_pack}, $size ) + . pack($self->{long_pack}, $next_loc ) + ); + return; }