X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBM%2FDeep%2FEngine.pm;h=723c029881747378edadda0d8f13d64c40568503;hb=83c43bb572732d2b5402502a2a1e89d480867599;hp=6c8e9594e4b911065b128ea58cbdd2a5657228ac;hpb=a97c8f6725c0b1dd93f64b03848edee67567ac93;p=dbsrgits%2FDBM-Deep.git diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index 6c8e959..723c029 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -5,15 +5,21 @@ use 5.6.0; use strict; use warnings; -use Fcntl qw( :DEFAULT :flock :seek ); +our $VERSION = q(0.99_03); + +use Fcntl qw( :DEFAULT :flock ); use Scalar::Util (); # File-wide notes: -# * All the local($/,$\); are to protect read() and print() from -l. # * To add to bucket_size, make sure you modify the following: # - calculate_sizes() # - _get_key_subloc() # - add_bucket() - where the buckets are printed +# +# * Every method in here assumes that the _fileobj has been appropriately +# safeguarded. This can be anything from flock() to some sort of manual +# mutex. But, it's the caller's responsability to make sure that this has +# been done. ## # Setup file and tag signatures. These should never change. @@ -28,23 +34,98 @@ sub SIG_DATA () { 'D' } sub SIG_INDEX () { 'I' } sub SIG_BLIST () { 'B' } sub SIG_FREE () { 'F' } +sub SIG_KEYS () { 'K' } sub SIG_SIZE () { 1 } +################################################################################ +# +# This is new code. It is a complete rewrite of the engine based on a new API +# +################################################################################ + +sub write_value { + my $self = shift; + my ($offset, $key, $value, $orig_key) = @_; + + my $dig_key = $self->apply_digest( $key ); + my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } ); + return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key ); +} + +sub read_value { + my $self = shift; + my ($offset, $key, $orig_key) = @_; + + my $dig_key = $self->apply_digest( $key ); + my $tag = $self->find_blist( $offset, $dig_key ) or return; + return $self->get_bucket_value( $tag, $dig_key, $orig_key ); +} + +sub delete_key { + my $self = shift; + my ($offset, $key, $orig_key) = @_; + + my $dig_key = $self->apply_digest( $key ); + my $tag = $self->find_blist( $offset, $dig_key ) or return; + my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key ); + $self->delete_bucket( $tag, $dig_key, $orig_key ); + return $value; +} + +sub key_exists { + my $self = shift; + my ($offset, $key) = @_; + + my $dig_key = $self->apply_digest( $key ); + # exists() returns the empty string, not undef + my $tag = $self->find_blist( $offset, $dig_key ) or return ''; + return $self->bucket_exists( $tag, $dig_key, $key ); +} + +sub get_next_key { + my $self = shift; + my ($offset) = @_; + + # If the previous key was not specifed, start at the top and + # return the first one found. + my $temp; + if ( @_ > 1 ) { + $temp = { + prev_md5 => $self->apply_digest($_[1]), + return_next => 0, + }; + } + else { + $temp = { + prev_md5 => chr(0) x $self->{hash_size}, + return_next => 1, + }; + } + + return $self->traverse_index( $temp, $offset, 0 ); +} + +################################################################################ +# +# Below here is the old code. It will be folded into the code above as it can. +# +################################################################################ + sub new { my $class = shift; my ($args) = @_; my $self = bless { - long_size => 4, - long_pack => 'N', - data_size => 4, - data_pack => 'N', + long_size => 4, + long_pack => 'N', + data_size => 4, + data_pack => 'N', - digest => \&Digest::MD5::md5, - hash_size => 16, + digest => \&Digest::MD5::md5, + hash_size => 16, # In bytes ## - # Maximum number of buckets per list before another level of indexing is + # Number of buckets per blist before another level of indexing is # done. Increase this value for slightly greater speed, but larger database # files. DO NOT decrease this value below 16, due to risk of recursive # reindex overrun. @@ -89,34 +170,41 @@ sub new { } sub _fileobj { return $_[0]{fileobj} } -sub _fh { return $_[0]->_fileobj->{fh} } + +sub apply_digest { + my $self = shift; + return $self->{digest}->(@_); +} sub calculate_sizes { my $self = shift; + # The 2**8 here indicates the number of different characters in the + # current hashing algorithm #XXX Does this need to be updated with different hashing algorithms? - $self->{index_size} = (2**8) * $self->{long_size}; - $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3; + $self->{hash_chars_used} = (2**8); + $self->{index_size} = $self->{hash_chars_used} * $self->{long_size}; + + $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2; $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; + $self->{key_size} = $self->{long_size} * 2; + $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size}; + return; } sub write_file_header { my $self = shift; - local($/,$\); + my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 33 ); - my $fh = $self->_fh; - - my $loc = $self->_request_space( length( SIG_FILE ) + 21 ); - seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET); - print( $fh + $self->_fileobj->print_at( $loc, SIG_FILE, SIG_HEADER, pack('N', 1), # header version - pack('N', 12), # header size - pack('N', 0), # currently running transaction IDs + pack('N', 24), # header size + pack('N4', 0, 0, 0, 0), # currently running transaction IDs pack('n', $self->{long_size}), pack('A', $self->{long_pack}), pack('n', $self->{data_size}), @@ -132,15 +220,8 @@ sub write_file_header { sub read_file_header { my $self = shift; - local($/,$\); - - my $fh = $self->_fh; - - seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET); - my $buffer; - my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 ); - - return unless $bytes_read; + my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 ); + return unless length($buffer); my ($file_signature, $sig_header, $header_version, $size) = unpack( 'A4 A N N', $buffer @@ -156,9 +237,8 @@ sub read_file_header { $self->_throw_error( "Old file version found." ); } - my $buffer2; - $bytes_read += read( $fh, $buffer2, $size ); - my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 ); + my $buffer2 = $self->_fileobj->read_at( undef, $size ); + my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 ); $self->_fileobj->set_transaction_offset( 13 ); @@ -170,16 +250,15 @@ sub read_file_header { #XXX Add warnings if values weren't set right @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values; - return $bytes_read; + return length($buffer) + length($buffer2); } sub setup_fh { my $self = shift; my ($obj) = @_; - local($/,$\); - - my $fh = $self->_fh; + # Need to remove use of $fh here + my $fh = $self->_fileobj->{fh}; flock $fh, LOCK_EX; #XXX The duplication of calculate_sizes needs to go away @@ -192,15 +271,13 @@ sub setup_fh { # File is empty -- write header and master index ## if (!$bytes_read) { - if ( my $afh = $self->_fileobj->{audit_fh} ) { - flock( $afh, LOCK_EX ); - print( $afh "# Database created on " . localtime(time) . "\n" ); - flock( $afh, LOCK_UN ); - } + $self->_fileobj->audit( "# Database created on" ); $self->write_file_header; - $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) ); + $obj->{base_offset} = $self->_fileobj->request_space( + $self->tag_size( $self->{index_size} ), + ); $self->write_tag( $obj->_base_offset, $obj->_type, @@ -235,11 +312,7 @@ sub setup_fh { } #XXX We have to make sure we don't mess up when autoflush isn't turned on - unless ( $self->_fileobj->{inode} ) { - my @stats = stat($fh); - $self->_fileobj->{inode} = $stats[1]; - $self->_fileobj->{end} = $stats[7]; - } + $self->_fileobj->set_inode; flock $fh, LOCK_UN; @@ -260,23 +333,19 @@ sub write_tag { my ($offset, $sig, $content) = @_; my $size = length( $content ); - local($/,$\); - - my $fh = $self->_fh; - - if ( defined $offset ) { - seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET); - } - - print( $fh $sig . pack($self->{data_pack}, $size) . $content ); + $self->_fileobj->print_at( + $offset, + $sig, pack($self->{data_pack}, $size), $content, + ); return unless defined $offset; return { signature => $sig, - size => $size, - offset => $offset + SIG_SIZE + $self->{data_size}, - content => $content + #XXX Is this even used? + size => $size, + offset => $offset + SIG_SIZE + $self->{data_size}, + content => $content }; } @@ -287,112 +356,43 @@ sub load_tag { my $self = shift; my ($offset) = @_; - local($/,$\); + my $fileobj = $self->_fileobj; -# print join(':',map{$_||''}caller(1)), $/; - - my $fh = $self->_fh; - - seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET); - - #XXX I'm not sure this check will work if autoflush isn't enabled ... - return if eof $fh; - - my $b; - read( $fh, $b, SIG_SIZE + $self->{data_size} ); - my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); - - my $buffer; - read( $fh, $buffer, $size); + my ($sig, $size) = unpack( + "A $self->{data_pack}", + $fileobj->read_at( $offset, SIG_SIZE + $self->{data_size} ), + ); return { signature => $sig, - size => $size, - offset => $offset + SIG_SIZE + $self->{data_size}, - content => $buffer + size => $size, #XXX Is this even used? + offset => $offset + SIG_SIZE + $self->{data_size}, + content => $fileobj->read_at( undef, $size ), }; } -sub _get_dbm_object { - my $item = shift; - - my $obj = eval { - local $SIG{__DIE__}; - if ($item->isa( 'DBM::Deep' )) { - return $item; - } - return; - }; - return $obj if $obj; - - my $r = Scalar::Util::reftype( $item ) || ''; - if ( $r eq 'HASH' ) { - my $obj = eval { - local $SIG{__DIE__}; - my $obj = tied(%$item); - if ($obj->isa( 'DBM::Deep' )) { - return $obj; - } - return; - }; - return $obj if $obj; - } - elsif ( $r eq 'ARRAY' ) { - my $obj = eval { - local $SIG{__DIE__}; - my $obj = tied(@$item); - if ($obj->isa( 'DBM::Deep' )) { - return $obj; - } - return; - }; - return $obj if $obj; - } - - return; -} - -sub _length_needed { +sub find_keyloc { my $self = shift; - my ($value, $key) = @_; + my ($tag, $transaction_id) = @_; + $transaction_id = $self->_fileobj->transaction_id + unless defined $transaction_id; - my $is_dbm_deep = eval { - local $SIG{'__DIE__'}; - $value->isa( 'DBM::Deep' ); - }; - - my $len = SIG_SIZE + $self->{data_size} - + $self->{data_size} + length( $key ); - - if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) { - return $len + $self->{long_size}; - } - - my $r = Scalar::Util::reftype( $value ) || ''; - if ( $self->_fileobj->{autobless} ) { - # This is for the bit saying whether or not this thing is blessed. - $len += 1; - } + for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { + my ($loc, $trans_id, $is_deleted) = unpack( + "$self->{long_pack} C C", + substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ), + ); - unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { - if ( defined $value ) { - $len += length( $value ); + if ( $loc == 0 ) { + return ( $loc, $is_deleted, $i * $self->{key_size} ); } - return $len; - } - $len += $self->{index_size}; + next if $transaction_id != $trans_id; - # if autobless is enabled, must also take into consideration - # the class name as it is stored after the key. - if ( $self->_fileobj->{autobless} ) { - my $c = Scalar::Util::blessed($value); - if ( defined $c && !$is_dbm_deep ) { - $len += $self->{data_size} + length($c); - } + return ( $loc, $is_deleted, $i * $self->{key_size} ); } - return $len; + return; } sub add_bucket { @@ -402,135 +402,146 @@ sub add_bucket { ## my $self = shift; my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_; - $deleted ||= 0; - - local($/,$\); # This verifies that only supported values will be stored. { my $r = Scalar::Util::reftype( $value ); - last if !defined $r; + last if !defined $r; last if $r eq 'HASH'; last if $r eq 'ARRAY'; $self->_throw_error( - "Storage of variables of type '$r' is not supported." + "Storage of references of type '$r' is not supported." ); } - my $location = 0; - my $result = 2; - - my $root = $self->_fileobj; - my $fh = $self->_fh; - - my $actual_length = $self->_length_needed( $value, $plain_key ); + my $fileobj = $self->_fileobj; #ACID - This is a mutation. Must only find the exact transaction - my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 ); + my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 ); my @transactions; - if ( $self->_fileobj->transaction_id == 0 ) { - @transactions = $self->_fileobj->current_transactions; + if ( $fileobj->transaction_id == 0 ) { + @transactions = $fileobj->current_transactions; } # $self->_release_space( $size, $subloc ); - # Updating a known md5 #XXX This needs updating to use _release_space - if ( $subloc ) { - $result = 1; - if ($actual_length <= $size) { - $location = $subloc; - } - else { - $location = $self->_request_space( $actual_length ); - seek( - $fh, - $tag->{offset} + $offset - + $self->{hash_size} + $root->{file_offset}, - SEEK_SET, - ); - print( $fh pack($self->{long_pack}, $location ) ); - print( $fh pack($self->{long_pack}, $actual_length ) ); - print( $fh pack('n n', $root->transaction_id, $deleted ) ); + my $location; + my $size = $self->_length_needed( $value, $plain_key ); + + # Updating a known md5 + if ( $keyloc ) { + my $keytag = $self->load_tag( $keyloc ); + my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag ); + + if ( $subloc && !$is_deleted && @transactions ) { + my $old_value = $self->read_from_loc( $subloc, $orig_key ); + my $old_size = $self->_length_needed( $old_value, $plain_key ); + + for my $trans_id ( @transactions ) { + my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id ); + unless ($loc) { + my $location2 = $fileobj->request_space( $old_size ); + $fileobj->print_at( $keytag->{offset} + $offset2, + pack($self->{long_pack}, $location2 ), + pack( 'C C', $trans_id, 0 ), + ); + $self->_write_value( $location2, $plain_key, $old_value, $orig_key ); + } + } } + + $location = $self->_fileobj->request_space( $size ); + #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use + $fileobj->print_at( $keytag->{offset} + $offset, + pack($self->{long_pack}, $location ), + pack( 'C C', $fileobj->transaction_id, 0 ), + ); } # Adding a new md5 - elsif ( defined $offset ) { - $location = $self->_request_space( $actual_length ); - - seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET ); - print( $fh $md5 . pack($self->{long_pack}, $location ) ); - print( $fh pack($self->{long_pack}, $actual_length ) ); - print( $fh pack('n n', $root->transaction_id, $deleted ) ); - - for ( @transactions ) { - my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} ); - $self->_fileobj->{transaction_id} = $_; - $self->add_bucket( $tag2, $md5, '', '', 1, $orig_key ); - $self->_fileobj->{transaction_id} = 0; - } - } - # If bucket didn't fit into list, split into a new index level - # split_index() will do the _request_space() call else { - $location = $self->split_index( $md5, $tag ); + my $keyloc = $fileobj->request_space( $self->tag_size( $self->{keyloc_size} ) ); + + # The bucket fit into list + if ( defined $offset ) { + $fileobj->print_at( $tag->{offset} + $offset, + $md5, pack( $self->{long_pack}, $keyloc ), + ); + } + # If bucket didn't fit into list, split into a new index level + else { + $self->split_index( $tag, $md5, $keyloc ); + } + + my $keytag = $self->write_tag( + $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size}, + ); + + $location = $self->_fileobj->request_space( $size ); + $fileobj->print_at( $keytag->{offset}, + pack( $self->{long_pack}, $location ), + pack( 'C C', $fileobj->transaction_id, 0 ), + ); + + my $offset = 1; + for my $trans_id ( @transactions ) { + $fileobj->print_at( $keytag->{offset} + $self->{key_size} * $offset++, + pack( $self->{long_pack}, 0 ), + pack( 'C C', $trans_id, 1 ), + ); + } } - $self->write_value( $location, $plain_key, $value, $orig_key ); + $self->_write_value( $location, $plain_key, $value, $orig_key ); - return $result; + return 1; } -sub write_value { +sub _write_value { my $self = shift; my ($location, $key, $value, $orig_key) = @_; - local($/,$\); - - my $fh = $self->_fh; - my $root = $self->_fileobj; + my $fileobj = $self->_fileobj; my $dbm_deep_obj = _get_dbm_object( $value ); - if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) { + if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) { $self->_throw_error( "Cannot cross-reference. Use export() instead" ); } - seek($fh, $location + $root->{file_offset}, SEEK_SET); - ## # Write signature based on content type, set content length and write # actual value. ## my $r = Scalar::Util::reftype( $value ) || ''; if ( $dbm_deep_obj ) { - $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) ); + $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) ); } elsif ($r eq 'HASH') { if ( !$dbm_deep_obj && tied %{$value} ) { $self->_throw_error( "Cannot store something that is tied" ); } - $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} ); + $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} ); } elsif ($r eq 'ARRAY') { if ( !$dbm_deep_obj && tied @{$value} ) { $self->_throw_error( "Cannot store something that is tied" ); } - $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} ); + $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} ); } elsif (!defined($value)) { - $self->write_tag( undef, SIG_NULL, '' ); + $self->write_tag( $location, SIG_NULL, '' ); } else { - $self->write_tag( undef, SIG_DATA, $value ); + $self->write_tag( $location, SIG_DATA, $value ); } ## # Plain key is stored AFTER value, as keys are typically fetched less often. ## - print( $fh pack($self->{data_pack}, length($key)) . $key ); + $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key ); # Internal references don't care about autobless return 1 if $dbm_deep_obj; @@ -538,14 +549,12 @@ sub write_value { ## # If value is blessed, preserve class name ## - if ( $root->{autobless} ) { - my $c = Scalar::Util::blessed($value); - if ( defined $c && !$dbm_deep_obj ) { - print( $fh chr(1) ); - print( $fh pack($self->{data_pack}, length($c)) . $c ); + if ( $fileobj->{autobless} ) { + if ( defined( my $c = Scalar::Util::blessed($value) ) ) { + $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c ); } else { - print( $fh chr(0) ); + $fileobj->print_at( undef, chr(0) ); } } @@ -561,7 +570,7 @@ sub write_value { my %x = %$value; tie %$value, 'DBM::Deep', { base_offset => $location, - fileobj => $root, + fileobj => $fileobj, parent => $self->{obj}, parent_key => $orig_key, }; @@ -571,7 +580,7 @@ sub write_value { my @x = @$value; tie @$value, 'DBM::Deep', { base_offset => $location, - fileobj => $root, + fileobj => $fileobj, parent => $self->{obj}, parent_key => $orig_key, }; @@ -583,38 +592,30 @@ sub write_value { sub split_index { my $self = shift; - my ($md5, $tag) = @_; + my ($tag, $md5, $keyloc) = @_; - local($/,$\); + my $fileobj = $self->_fileobj; - my $fh = $self->_fh; - my $root = $self->_fileobj; - - my $loc = $self->_request_space( + my $loc = $fileobj->request_space( $self->tag_size( $self->{index_size} ), ); - seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); - print( $fh pack($self->{long_pack}, $loc) ); + $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) ); my $index_tag = $self->write_tag( $loc, SIG_INDEX, chr(0)x$self->{index_size}, ); - my $newtag_loc = $self->_request_space( - $self->tag_size( $self->{bucket_list_size} ), - ); - my $keys = $tag->{content} - . $md5 . pack($self->{long_pack}, $newtag_loc) - . pack($self->{long_pack}, 0) # size - . pack($self->{long_pack}, 0); # transaction ID + . $md5 . pack($self->{long_pack}, $keyloc); my @newloc = (); BUCKET: + # The <= here is deliberate - we have max_buckets+1 keys to iterate + # through, unlike every other loop that uses max_buckets as a stop. for (my $i = 0; $i <= $self->{max_buckets}; $i++) { - my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i ); + my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i ); die "[INTERNAL ERROR]: No key in split_index()\n" unless $key; die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc; @@ -622,36 +623,36 @@ sub split_index { my $num = ord(substr($key, $tag->{ch} + 1, 1)); if ($newloc[$num]) { - seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET); - my $subkeys; - read( $fh, $subkeys, $self->{bucket_list_size}); + my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} ); # This is looking for the first empty spot - my ($subloc, $offset, $size) = $self->_find_in_buckets( + my ($subloc, $offset) = $self->_find_in_buckets( { content => $subkeys }, '', ); - seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET); - print( $fh $key . pack($self->{long_pack}, $old_subloc) ); + $fileobj->print_at( + $newloc[$num] + $offset, + $key, pack($self->{long_pack}, $old_subloc), + ); next; } - seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); - - my $loc = $self->_request_space( + my $loc = $fileobj->request_space( $self->tag_size( $self->{bucket_list_size} ), ); - print( $fh pack($self->{long_pack}, $loc) ); + $fileobj->print_at( + $index_tag->{offset} + ($num * $self->{long_size}), + pack($self->{long_pack}, $loc), + ); my $blist_tag = $self->write_tag( $loc, SIG_BLIST, chr(0)x$self->{bucket_list_size}, ); - seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); - print( $fh $key . pack($self->{long_pack}, $old_subloc) ); + $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) ); $newloc[$num] = $blist_tag->{offset}; } @@ -661,28 +662,22 @@ sub split_index { $tag->{offset} - SIG_SIZE - $self->{data_size}, ); - return $newtag_loc; + return 1; } sub read_from_loc { my $self = shift; my ($subloc, $orig_key) = @_; - local($/,$\); - - my $fh = $self->_fh; + my $fileobj = $self->_fileobj; - ## - # Found match -- seek to offset and read signature - ## - my $signature; - seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET); - read( $fh, $signature, SIG_SIZE); + my $signature = $fileobj->read_at( $subloc, SIG_SIZE ); ## # If value is a hash or array, return new DBM::Deep object with correct offset ## if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) { + #XXX This needs to be a singleton my $new_obj = DBM::Deep->new({ type => $signature, base_offset => $subloc, @@ -696,39 +691,34 @@ sub read_from_loc { # Skip over value and plain key to see if object needs # to be re-blessed ## - seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); + $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} ); - my $size; - read( $fh, $size, $self->{data_size}); + my $size = $fileobj->read_at( undef, $self->{data_size} ); $size = unpack($self->{data_pack}, $size); - if ($size) { seek($fh, $size, SEEK_CUR); } - - my $bless_bit; - read( $fh, $bless_bit, 1); - if (ord($bless_bit)) { - ## - # Yes, object needs to be re-blessed - ## - my $class_name; - read( $fh, $size, $self->{data_size}); - $size = unpack($self->{data_pack}, $size); - if ($size) { read( $fh, $class_name, $size); } - if ($class_name) { $new_obj = bless( $new_obj, $class_name ); } + if ($size) { $fileobj->increment_pointer( $size ); } + + my $bless_bit = $fileobj->read_at( undef, 1 ); + if ( ord($bless_bit) ) { + my $size = unpack( + $self->{data_pack}, + $fileobj->read_at( undef, $self->{data_size} ), + ); + + if ( $size ) { + $new_obj = bless $new_obj, $fileobj->read_at( undef, $size ); + } } } return $new_obj; } elsif ( $signature eq SIG_INTERNAL ) { - my $size; - read( $fh, $size, $self->{data_size}); + my $size = $fileobj->read_at( undef, $self->{data_size} ); $size = unpack($self->{data_pack}, $size); if ( $size ) { - my $new_loc; - read( $fh, $new_loc, $size ); - $new_loc = unpack( $self->{long_pack}, $new_loc ); - + my $new_loc = $fileobj->read_at( undef, $size ); + $new_loc = unpack( $self->{long_pack}, $new_loc ); return $self->read_from_loc( $new_loc, $orig_key ); } else { @@ -739,12 +729,10 @@ sub read_from_loc { # Otherwise return actual value ## elsif ( $signature eq SIG_DATA ) { - my $size; - read( $fh, $size, $self->{data_size}); + my $size = $fileobj->read_at( undef, $self->{data_size} ); $size = unpack($self->{data_pack}, $size); - my $value = ''; - if ($size) { read( $fh, $value, $size); } + my $value = $size ? $fileobj->read_at( undef, $size ) : ''; return $value; } @@ -762,10 +750,25 @@ sub get_bucket_value { my ($tag, $md5, $orig_key) = @_; #ACID - This is a read. Can find exact or HEAD - my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 ); - if ( $subloc && !$is_deleted ) { - return $self->read_from_loc( $subloc, $orig_key ); + my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); + + if ( !$keyloc ) { + #XXX Need to use real key +# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key ); +# return; + } +# elsif ( !$is_deleted ) { + else { + my $keytag = $self->load_tag( $keyloc ); + my ($subloc, $is_deleted) = $self->find_keyloc( $keytag ); + if (!$subloc && !$is_deleted) { + ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 ); + } + if ( $subloc && !$is_deleted ) { + return $self->read_from_loc( $subloc, $orig_key ); + } } + return; } @@ -776,20 +779,60 @@ sub delete_bucket { my $self = shift; my ($tag, $md5, $orig_key) = @_; - local($/,$\); + #ACID - Although this is a mutation, we must find any transaction. + # This is because we need to mark something as deleted that is in the HEAD. + my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); - #ACID - This is a mutation. Must only find the exact transaction - my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 ); -#XXX This needs _release_space() - if ( $subloc ) { - my $fh = $self->_fh; - seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET); - print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) ); - print( $fh chr(0) x $self->{bucket_size} ); - - return 1; + return if !$keyloc; + + my $fileobj = $self->_fileobj; + + my @transactions; + if ( $fileobj->transaction_id == 0 ) { + @transactions = $fileobj->current_transactions; } - return; + + if ( $fileobj->transaction_id == 0 ) { + my $keytag = $self->load_tag( $keyloc ); + + my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag ); + return if !$subloc || $is_deleted; + + my $value = $self->read_from_loc( $subloc, $orig_key ); + + my $size = $self->_length_needed( $value, $orig_key ); + + for my $trans_id ( @transactions ) { + my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id ); + unless ($loc) { + my $location2 = $fileobj->request_space( $size ); + $fileobj->print_at( $keytag->{offset} + $offset2, + pack($self->{long_pack}, $location2 ), + pack( 'C C', $trans_id, 0 ), + ); + $self->_write_value( $location2, $orig_key, $value, $orig_key ); + } + } + + $keytag = $self->load_tag( $keyloc ); + ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag ); + $fileobj->print_at( $keytag->{offset} + $offset, + substr( $keytag->{content}, $offset + $self->{key_size} ), + chr(0) x $self->{key_size}, + ); + } + else { + my $keytag = $self->load_tag( $keyloc ); + + my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag ); + + $fileobj->print_at( $keytag->{offset} + $offset, + pack($self->{long_pack}, 0 ), + pack( 'C C', $fileobj->transaction_id, 1 ), + ); + } + + return 1; } sub bucket_exists { @@ -800,11 +843,16 @@ sub bucket_exists { my ($tag, $md5) = @_; #ACID - This is a read. Can find exact or HEAD - my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 ); + my ($keyloc) = $self->_find_in_buckets( $tag, $md5 ); + my $keytag = $self->load_tag( $keyloc ); + my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag ); + if ( !$subloc && !$is_deleted ) { + ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 ); + } return ($subloc && !$is_deleted) && 1; } -sub find_bucket_list { +sub find_blist { ## # Locate offset for bucket list, given digested key ## @@ -812,8 +860,6 @@ sub find_bucket_list { my ($offset, $md5, $args) = @_; $args = {} unless $args; - local($/,$\); - ## # Locate offset for bucket list using digest index system ## @@ -830,13 +876,11 @@ sub find_bucket_list { if (!$tag) { return if !$args->{create}; - my $loc = $self->_request_space( + my $loc = $self->_fileobj->request_space( $self->tag_size( $self->{bucket_list_size} ), ); - my $fh = $self->_fh; - seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET); - print( $fh pack($self->{long_pack}, $loc) ); + $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) ); $tag = $self->write_tag( $loc, SIG_BLIST, @@ -882,23 +926,18 @@ sub traverse_index { # Scan index and recursively step into deeper levels, looking for next key. ## my $self = shift; - my ($obj, $offset, $ch, $force_return_next) = @_; - - local($/,$\); + my ($xxxx, $offset, $ch, $force_return_next) = @_; my $tag = $self->load_tag( $offset ); - my $fh = $self->_fh; - if ($tag->{signature} ne SIG_BLIST) { - my $content = $tag->{content}; - my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); + my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1)); - for (my $idx = $start; $idx < (2**8); $idx++) { + for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) { my $subloc = unpack( $self->{long_pack}, substr( - $content, + $tag->{content}, $idx * $self->{long_size}, $self->{long_size}, ), @@ -906,85 +945,69 @@ sub traverse_index { if ($subloc) { my $result = $self->traverse_index( - $obj, $subloc, $ch + 1, $force_return_next, + $xxxx, $subloc, $ch + 1, $force_return_next, ); - if (defined($result)) { return $result; } + if (defined $result) { return $result; } } } # index loop - $obj->{return_next} = 1; - } # tag is an index - + $xxxx->{return_next} = 1; + } + # This is the bucket list else { my $keys = $tag->{content}; - if ($force_return_next) { $obj->{return_next} = 1; } + if ($force_return_next) { $xxxx->{return_next} = 1; } ## # Iterate through buckets, looking for a key match ## + my $transaction_id = $self->_fileobj->transaction_id; for (my $i = 0; $i < $self->{max_buckets}; $i++) { - my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); + my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i ); # End of bucket list -- return to outer loop - if (!$subloc) { - $obj->{return_next} = 1; + if (!$keyloc) { + $xxxx->{return_next} = 1; last; } # Located previous key -- return next one found - elsif ($key eq $obj->{prev_md5}) { - $obj->{return_next} = 1; + elsif ($key eq $xxxx->{prev_md5}) { + $xxxx->{return_next} = 1; next; } # Seek to bucket location and skip over signature - elsif ($obj->{return_next}) { - seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET); + elsif ($xxxx->{return_next}) { + my $fileobj = $self->_fileobj; + + my $keytag = $self->load_tag( $keyloc ); + my ($subloc, $is_deleted) = $self->find_keyloc( $keytag ); + if ( $subloc == 0 && !$is_deleted ) { + ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 ); + } + next if $is_deleted; # Skip over value to get to plain key - my $sig; - read( $fh, $sig, SIG_SIZE ); + my $sig = $fileobj->read_at( $subloc, SIG_SIZE ); - my $size; - read( $fh, $size, $self->{data_size}); + my $size = $fileobj->read_at( undef, $self->{data_size} ); $size = unpack($self->{data_pack}, $size); - if ($size) { seek($fh, $size, SEEK_CUR); } + if ($size) { $fileobj->increment_pointer( $size ); } # Read in plain key and return as scalar - my $plain_key; - read( $fh, $size, $self->{data_size}); + $size = $fileobj->read_at( undef, $self->{data_size} ); $size = unpack($self->{data_pack}, $size); - if ($size) { read( $fh, $plain_key, $size); } + my $plain_key; + if ($size) { $plain_key = $fileobj->read_at( undef, $size); } return $plain_key; } } - $obj->{return_next} = 1; - } # tag is a bucket list - - return; -} - -sub get_next_key { - ## - # Locate next key, given digested previous one - ## - my $self = shift; - my ($obj) = @_; - - $obj->{prev_md5} = $_[1] ? $_[1] : undef; - $obj->{return_next} = 0; - - ## - # If the previous key was not specifed, start at the top and - # return the first one found. - ## - if (!$obj->{prev_md5}) { - $obj->{prev_md5} = chr(0) x $self->{hash_size}; - $obj->{return_next} = 1; + $xxxx->{return_next} = 1; } - return $self->traverse_index( $obj, $obj->_base_offset, 0 ); + return; } # Utilities @@ -993,79 +1016,52 @@ sub _get_key_subloc { my $self = shift; my ($keys, $idx) = @_; - my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack( + return unpack( # This is 'a', not 'A'. Please read the pack() documentation for the # difference between the two and why it's important. - "a$self->{hash_size} $self->{long_pack}2 n2", + "a$self->{hash_size} $self->{long_pack}", substr( $keys, ($idx * $self->{bucket_size}), $self->{bucket_size}, ), ); - - return ($key, $subloc, $size, $transaction_id, $is_deleted); } sub _find_in_buckets { my $self = shift; - my ($tag, $md5, $exact) = @_; - - my $trans_id = $self->_fileobj->transaction_id; - - my @zero; + my ($tag, $md5) = @_; BUCKET: for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { - my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc( + my ($key, $subloc) = $self->_get_key_subloc( $tag->{content}, $i, ); - my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted); + my @rv = ($subloc, $i * $self->{bucket_size}); unless ( $subloc ) { - if ( !$exact && @zero and $trans_id ) { - @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted); - } return @rv; } next BUCKET if $key ne $md5; - # Save off the HEAD in case we need it. - @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0; - - next BUCKET if $transaction_id != $trans_id; - return @rv; } return; } -sub _request_space { - my $self = shift; - my ($size) = @_; - - my $loc = $self->_fileobj->{end}; - $self->_fileobj->{end} += $size; - - return $loc; -} - sub _release_space { my $self = shift; my ($size, $loc) = @_; - local($/,$\); - my $next_loc = 0; - my $fh = $self->_fh; - seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET ); - print( $fh SIG_FREE - . pack($self->{long_pack}, $size ) - . pack($self->{long_pack}, $next_loc ) + $self->_fileobj->print_at( $loc, + SIG_FREE, + pack($self->{long_pack}, $size ), + pack($self->{long_pack}, $next_loc ), ); return; @@ -1075,76 +1071,90 @@ sub _throw_error { die "DBM::Deep: $_[1]\n"; } -1; -__END__ - -# This will be added in later, after more refactoring is done. This is an early -# attempt at refactoring on the physical level instead of the virtual level. -sub _read_at { - my $self = shift; - my ($spot, $amount, $unpack) = @_; - - local($/,$\); - - my $fh = $self->_fh; - seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET ); - - my $buffer; - my $bytes_read = read( $fh, $buffer, $amount ); +sub _get_dbm_object { + my $item = shift; - if ( $unpack ) { - $buffer = unpack( $unpack, $buffer ); - } + my $obj = eval { + local $SIG{__DIE__}; + if ($item->isa( 'DBM::Deep' )) { + return $item; + } + return; + }; + return $obj if $obj; - if ( wantarray ) { - return ($buffer, $bytes_read); + my $r = Scalar::Util::reftype( $item ) || ''; + if ( $r eq 'HASH' ) { + my $obj = eval { + local $SIG{__DIE__}; + my $obj = tied(%$item); + if ($obj->isa( 'DBM::Deep' )) { + return $obj; + } + return; + }; + return $obj if $obj; } - else { - return $buffer; + elsif ( $r eq 'ARRAY' ) { + my $obj = eval { + local $SIG{__DIE__}; + my $obj = tied(@$item); + if ($obj->isa( 'DBM::Deep' )) { + return $obj; + } + return; + }; + return $obj if $obj; } -} - -sub _print_at { - my $self = shift; - my ($spot, $data) = @_; - - local($/,$\); - - my $fh = $self->_fh; - seek( $fh, $spot, SEEK_SET ); - print( $fh $data ); return; } -sub get_file_version { +sub _length_needed { my $self = shift; + my ($value, $key) = @_; - local($/,$\); + my $is_dbm_deep = eval { + local $SIG{'__DIE__'}; + $value->isa( 'DBM::Deep' ); + }; - my $fh = $self->_fh; + my $len = SIG_SIZE + + $self->{data_size} # size for value + + $self->{data_size} # size for key + + length( $key ); # length of key - seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET ); - my $buffer; - my $bytes_read = read( $fh, $buffer, 4 ); - unless ( $bytes_read == 4 ) { - $self->_throw_error( "Cannot read file version" ); + if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) { + # long_size is for the internal reference + return $len + $self->{long_size}; } - return unpack( 'N', $buffer ); -} - -sub write_file_version { - my $self = shift; - my ($new_version) = @_; + if ( $self->_fileobj->{autobless} ) { + # This is for the bit saying whether or not this thing is blessed. + $len += 1; + } - local($/,$\); + my $r = Scalar::Util::reftype( $value ) || ''; + unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { + if ( defined $value ) { + $len += length( $value ); + } + return $len; + } - my $fh = $self->_fh; + $len += $self->{index_size}; - seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET ); - print( $fh pack( 'N', $new_version ) ); + # if autobless is enabled, must also take into consideration + # the class name as it is stored after the key. + if ( $self->_fileobj->{autobless} ) { + my $c = Scalar::Util::blessed($value); + if ( defined $c && !$is_dbm_deep ) { + $len += $self->{data_size} + length($c); + } + } - return; + return $len; } +1; +__END__