use strict;
use warnings;
-use Fcntl qw( :DEFAULT :flock :seek );
+our $VERSION = q(0.99_03);
+
+use Fcntl qw( :DEFAULT :flock );
use Scalar::Util ();
# File-wide notes:
-# * All the local($/,$\); are to protect read() and print() from -l.
# * To add to bucket_size, make sure you modify the following:
# - calculate_sizes()
# - _get_key_subloc()
# - add_bucket() - where the buckets are printed
+#
+# * Every method in here assumes that the _fileobj has been appropriately
+# safeguarded. This can be anything from flock() to some sort of manual
+# mutex. But, it's the caller's responsability to make sure that this has
+# been done.
##
# Setup file and tag signatures. These should never change.
sub SIG_INDEX () { 'I' }
sub SIG_BLIST () { 'B' }
sub SIG_FREE () { 'F' }
+sub SIG_KEYS () { 'K' }
sub SIG_SIZE () { 1 }
+################################################################################
+#
+# This is new code. It is a complete rewrite of the engine based on a new API
+#
+################################################################################
+
+sub write_value {
+ my $self = shift;
+ my ($offset, $key, $value, $orig_key) = @_;
+
+ my $dig_key = $self->apply_digest( $key );
+ my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
+ return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
+}
+
+sub read_value {
+ my $self = shift;
+ my ($offset, $key, $orig_key) = @_;
+
+ my $dig_key = $self->apply_digest( $key );
+ my $tag = $self->find_blist( $offset, $dig_key ) or return;
+ return $self->get_bucket_value( $tag, $dig_key, $orig_key );
+}
+
+sub delete_key {
+ my $self = shift;
+ my ($offset, $key, $orig_key) = @_;
+
+ my $dig_key = $self->apply_digest( $key );
+ my $tag = $self->find_blist( $offset, $dig_key ) or return;
+ my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key );
+ $self->delete_bucket( $tag, $dig_key, $orig_key );
+ return $value;
+}
+
+sub key_exists {
+ my $self = shift;
+ my ($offset, $key) = @_;
+
+ my $dig_key = $self->apply_digest( $key );
+ # exists() returns the empty string, not undef
+ my $tag = $self->find_blist( $offset, $dig_key ) or return '';
+ return $self->bucket_exists( $tag, $dig_key, $key );
+}
+
+sub get_next_key {
+ my $self = shift;
+ my ($offset) = @_;
+
+ # If the previous key was not specifed, start at the top and
+ # return the first one found.
+ my $temp;
+ if ( @_ > 1 ) {
+ $temp = {
+ prev_md5 => $self->apply_digest($_[1]),
+ return_next => 0,
+ };
+ }
+ else {
+ $temp = {
+ prev_md5 => chr(0) x $self->{hash_size},
+ return_next => 1,
+ };
+ }
+
+ return $self->traverse_index( $temp, $offset, 0 );
+}
+
+################################################################################
+#
+# Below here is the old code. It will be folded into the code above as it can.
+#
+################################################################################
+
sub new {
my $class = shift;
my ($args) = @_;
my $self = bless {
- long_size => 4,
- long_pack => 'N',
- data_size => 4,
- data_pack => 'N',
+ long_size => 4,
+ long_pack => 'N',
+ data_size => 4,
+ data_pack => 'N',
- digest => \&Digest::MD5::md5,
- hash_size => 16,
+ digest => \&Digest::MD5::md5,
+ hash_size => 16, # In bytes
##
- # Maximum number of buckets per list before another level of indexing is
+ # Number of buckets per blist before another level of indexing is
# done. Increase this value for slightly greater speed, but larger database
# files. DO NOT decrease this value below 16, due to risk of recursive
# reindex overrun.
}
sub _fileobj { return $_[0]{fileobj} }
-sub _fh { return $_[0]->_fileobj->{fh} }
+
+sub apply_digest {
+ my $self = shift;
+ return $self->{digest}->(@_);
+}
sub calculate_sizes {
my $self = shift;
+ # The 2**8 here indicates the number of different characters in the
+ # current hashing algorithm
#XXX Does this need to be updated with different hashing algorithms?
- $self->{index_size} = (2**8) * $self->{long_size};
- $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
+ $self->{hash_chars_used} = (2**8);
+ $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
+
+ $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
$self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
+ $self->{key_size} = $self->{long_size} * 2;
+ $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
+
return;
}
sub write_file_header {
my $self = shift;
- local($/,$\);
+ my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 33 );
- my $fh = $self->_fh;
-
- my $loc = $self->_request_space( length( SIG_FILE ) + 21 );
- seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET);
- print( $fh
+ $self->_fileobj->print_at( $loc,
SIG_FILE,
SIG_HEADER,
pack('N', 1), # header version
- pack('N', 12), # header size
- pack('N', 0), # currently running transaction IDs
+ pack('N', 24), # header size
+ pack('N4', 0, 0, 0, 0), # currently running transaction IDs
pack('n', $self->{long_size}),
pack('A', $self->{long_pack}),
pack('n', $self->{data_size}),
sub read_file_header {
my $self = shift;
- local($/,$\);
-
- my $fh = $self->_fh;
-
- seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET);
- my $buffer;
- my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
-
- return unless $bytes_read;
+ my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 );
+ return unless length($buffer);
my ($file_signature, $sig_header, $header_version, $size) = unpack(
'A4 A N N', $buffer
$self->_throw_error( "Old file version found." );
}
- my $buffer2;
- $bytes_read += read( $fh, $buffer2, $size );
- my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 );
+ my $buffer2 = $self->_fileobj->read_at( undef, $size );
+ my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
$self->_fileobj->set_transaction_offset( 13 );
#XXX Add warnings if values weren't set right
@{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
- return $bytes_read;
+ return length($buffer) + length($buffer2);
}
sub setup_fh {
my $self = shift;
my ($obj) = @_;
- local($/,$\);
-
- my $fh = $self->_fh;
+ # Need to remove use of $fh here
+ my $fh = $self->_fileobj->{fh};
flock $fh, LOCK_EX;
#XXX The duplication of calculate_sizes needs to go away
# File is empty -- write header and master index
##
if (!$bytes_read) {
- if ( my $afh = $self->_fileobj->{audit_fh} ) {
- flock( $afh, LOCK_EX );
- print( $afh "# Database created on " . localtime(time) . "\n" );
- flock( $afh, LOCK_UN );
- }
+ $self->_fileobj->audit( "# Database created on" );
$self->write_file_header;
- $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) );
+ $obj->{base_offset} = $self->_fileobj->request_space(
+ $self->tag_size( $self->{index_size} ),
+ );
$self->write_tag(
$obj->_base_offset, $obj->_type,
}
#XXX We have to make sure we don't mess up when autoflush isn't turned on
- unless ( $self->_fileobj->{inode} ) {
- my @stats = stat($fh);
- $self->_fileobj->{inode} = $stats[1];
- $self->_fileobj->{end} = $stats[7];
- }
+ $self->_fileobj->set_inode;
flock $fh, LOCK_UN;
my ($offset, $sig, $content) = @_;
my $size = length( $content );
- local($/,$\);
-
- my $fh = $self->_fh;
-
- if ( defined $offset ) {
- seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
- }
-
- print( $fh $sig . pack($self->{data_pack}, $size) . $content );
+ $self->_fileobj->print_at(
+ $offset,
+ $sig, pack($self->{data_pack}, $size), $content,
+ );
return unless defined $offset;
return {
signature => $sig,
- size => $size,
- offset => $offset + SIG_SIZE + $self->{data_size},
- content => $content
+ #XXX Is this even used?
+ size => $size,
+ offset => $offset + SIG_SIZE + $self->{data_size},
+ content => $content
};
}
my $self = shift;
my ($offset) = @_;
- local($/,$\);
+ my $fileobj = $self->_fileobj;
-# print join(':',map{$_||''}caller(1)), $/;
-
- my $fh = $self->_fh;
-
- seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
-
- #XXX I'm not sure this check will work if autoflush isn't enabled ...
- return if eof $fh;
-
- my $b;
- read( $fh, $b, SIG_SIZE + $self->{data_size} );
- my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
-
- my $buffer;
- read( $fh, $buffer, $size);
+ my ($sig, $size) = unpack(
+ "A $self->{data_pack}",
+ $fileobj->read_at( $offset, SIG_SIZE + $self->{data_size} ),
+ );
return {
signature => $sig,
- size => $size,
- offset => $offset + SIG_SIZE + $self->{data_size},
- content => $buffer
+ size => $size, #XXX Is this even used?
+ offset => $offset + SIG_SIZE + $self->{data_size},
+ content => $fileobj->read_at( undef, $size ),
};
}
-sub _get_dbm_object {
- my $item = shift;
-
- my $obj = eval {
- local $SIG{__DIE__};
- if ($item->isa( 'DBM::Deep' )) {
- return $item;
- }
- return;
- };
- return $obj if $obj;
-
- my $r = Scalar::Util::reftype( $item ) || '';
- if ( $r eq 'HASH' ) {
- my $obj = eval {
- local $SIG{__DIE__};
- my $obj = tied(%$item);
- if ($obj->isa( 'DBM::Deep' )) {
- return $obj;
- }
- return;
- };
- return $obj if $obj;
- }
- elsif ( $r eq 'ARRAY' ) {
- my $obj = eval {
- local $SIG{__DIE__};
- my $obj = tied(@$item);
- if ($obj->isa( 'DBM::Deep' )) {
- return $obj;
- }
- return;
- };
- return $obj if $obj;
- }
-
- return;
-}
-
-sub _length_needed {
+sub find_keyloc {
my $self = shift;
- my ($value, $key) = @_;
+ my ($tag, $transaction_id) = @_;
+ $transaction_id = $self->_fileobj->transaction_id
+ unless defined $transaction_id;
- my $is_dbm_deep = eval {
- local $SIG{'__DIE__'};
- $value->isa( 'DBM::Deep' );
- };
-
- my $len = SIG_SIZE + $self->{data_size}
- + $self->{data_size} + length( $key );
-
- if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
- return $len + $self->{long_size};
- }
-
- my $r = Scalar::Util::reftype( $value ) || '';
- if ( $self->_fileobj->{autobless} ) {
- # This is for the bit saying whether or not this thing is blessed.
- $len += 1;
- }
+ for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
+ my ($loc, $trans_id, $is_deleted) = unpack(
+ "$self->{long_pack} C C",
+ substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
+ );
- unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
- if ( defined $value ) {
- $len += length( $value );
+ if ( $loc == 0 ) {
+ return ( $loc, $is_deleted, $i * $self->{key_size} );
}
- return $len;
- }
- $len += $self->{index_size};
+ next if $transaction_id != $trans_id;
- # if autobless is enabled, must also take into consideration
- # the class name as it is stored after the key.
- if ( $self->_fileobj->{autobless} ) {
- my $c = Scalar::Util::blessed($value);
- if ( defined $c && !$is_dbm_deep ) {
- $len += $self->{data_size} + length($c);
- }
+ return ( $loc, $is_deleted, $i * $self->{key_size} );
}
- return $len;
+ return;
}
sub add_bucket {
##
my $self = shift;
my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
- $deleted ||= 0;
-
- local($/,$\);
# This verifies that only supported values will be stored.
{
my $r = Scalar::Util::reftype( $value );
- last if !defined $r;
+ last if !defined $r;
last if $r eq 'HASH';
last if $r eq 'ARRAY';
$self->_throw_error(
- "Storage of variables of type '$r' is not supported."
+ "Storage of references of type '$r' is not supported."
);
}
- my $location = 0;
- my $result = 2;
-
- my $root = $self->_fileobj;
- my $fh = $self->_fh;
-
- my $actual_length = $self->_length_needed( $value, $plain_key );
+ my $fileobj = $self->_fileobj;
#ACID - This is a mutation. Must only find the exact transaction
- my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 );
+ my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
my @transactions;
- if ( $self->_fileobj->transaction_id == 0 ) {
- @transactions = $self->_fileobj->current_transactions;
+ if ( $fileobj->transaction_id == 0 ) {
+ @transactions = $fileobj->current_transactions;
}
# $self->_release_space( $size, $subloc );
- # Updating a known md5
#XXX This needs updating to use _release_space
- if ( $subloc ) {
- $result = 1;
- if ($actual_length <= $size) {
- $location = $subloc;
- }
- else {
- $location = $self->_request_space( $actual_length );
- seek(
- $fh,
- $tag->{offset} + $offset
- + $self->{hash_size} + $root->{file_offset},
- SEEK_SET,
- );
- print( $fh pack($self->{long_pack}, $location ) );
- print( $fh pack($self->{long_pack}, $actual_length ) );
- print( $fh pack('n n', $root->transaction_id, $deleted ) );
+ my $location;
+ my $size = $self->_length_needed( $value, $plain_key );
+
+ # Updating a known md5
+ if ( $keyloc ) {
+ my $keytag = $self->load_tag( $keyloc );
+ my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
+
+ if ( $subloc && !$is_deleted && @transactions ) {
+ my $old_value = $self->read_from_loc( $subloc, $orig_key );
+ my $old_size = $self->_length_needed( $old_value, $plain_key );
+
+ for my $trans_id ( @transactions ) {
+ my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
+ unless ($loc) {
+ my $location2 = $fileobj->request_space( $old_size );
+ $fileobj->print_at( $keytag->{offset} + $offset2,
+ pack($self->{long_pack}, $location2 ),
+ pack( 'C C', $trans_id, 0 ),
+ );
+ $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
+ }
+ }
}
+
+ $location = $self->_fileobj->request_space( $size );
+ #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
+ $fileobj->print_at( $keytag->{offset} + $offset,
+ pack($self->{long_pack}, $location ),
+ pack( 'C C', $fileobj->transaction_id, 0 ),
+ );
}
# Adding a new md5
- elsif ( defined $offset ) {
- $location = $self->_request_space( $actual_length );
-
- seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
- print( $fh $md5 . pack($self->{long_pack}, $location ) );
- print( $fh pack($self->{long_pack}, $actual_length ) );
- print( $fh pack('n n', $root->transaction_id, $deleted ) );
-
- for ( @transactions ) {
- my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
- $self->_fileobj->{transaction_id} = $_;
- $self->add_bucket( $tag2, $md5, '', '', 1, $orig_key );
- $self->_fileobj->{transaction_id} = 0;
- }
- }
- # If bucket didn't fit into list, split into a new index level
- # split_index() will do the _request_space() call
else {
- $location = $self->split_index( $md5, $tag );
+ my $keyloc = $fileobj->request_space( $self->tag_size( $self->{keyloc_size} ) );
+
+ # The bucket fit into list
+ if ( defined $offset ) {
+ $fileobj->print_at( $tag->{offset} + $offset,
+ $md5, pack( $self->{long_pack}, $keyloc ),
+ );
+ }
+ # If bucket didn't fit into list, split into a new index level
+ else {
+ $self->split_index( $tag, $md5, $keyloc );
+ }
+
+ my $keytag = $self->write_tag(
+ $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
+ );
+
+ $location = $self->_fileobj->request_space( $size );
+ $fileobj->print_at( $keytag->{offset},
+ pack( $self->{long_pack}, $location ),
+ pack( 'C C', $fileobj->transaction_id, 0 ),
+ );
+
+ my $offset = 1;
+ for my $trans_id ( @transactions ) {
+ $fileobj->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
+ pack( $self->{long_pack}, 0 ),
+ pack( 'C C', $trans_id, 1 ),
+ );
+ }
}
- $self->write_value( $location, $plain_key, $value, $orig_key );
+ $self->_write_value( $location, $plain_key, $value, $orig_key );
- return $result;
+ return 1;
}
-sub write_value {
+sub _write_value {
my $self = shift;
my ($location, $key, $value, $orig_key) = @_;
- local($/,$\);
-
- my $fh = $self->_fh;
- my $root = $self->_fileobj;
+ my $fileobj = $self->_fileobj;
my $dbm_deep_obj = _get_dbm_object( $value );
- if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) {
+ if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) {
$self->_throw_error( "Cannot cross-reference. Use export() instead" );
}
- seek($fh, $location + $root->{file_offset}, SEEK_SET);
-
##
# Write signature based on content type, set content length and write
# actual value.
##
my $r = Scalar::Util::reftype( $value ) || '';
if ( $dbm_deep_obj ) {
- $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
+ $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
}
elsif ($r eq 'HASH') {
if ( !$dbm_deep_obj && tied %{$value} ) {
$self->_throw_error( "Cannot store something that is tied" );
}
- $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} );
+ $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
}
elsif ($r eq 'ARRAY') {
if ( !$dbm_deep_obj && tied @{$value} ) {
$self->_throw_error( "Cannot store something that is tied" );
}
- $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} );
+ $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
}
elsif (!defined($value)) {
- $self->write_tag( undef, SIG_NULL, '' );
+ $self->write_tag( $location, SIG_NULL, '' );
}
else {
- $self->write_tag( undef, SIG_DATA, $value );
+ $self->write_tag( $location, SIG_DATA, $value );
}
##
# Plain key is stored AFTER value, as keys are typically fetched less often.
##
- print( $fh pack($self->{data_pack}, length($key)) . $key );
+ $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
# Internal references don't care about autobless
return 1 if $dbm_deep_obj;
##
# If value is blessed, preserve class name
##
- if ( $root->{autobless} ) {
- my $c = Scalar::Util::blessed($value);
- if ( defined $c && !$dbm_deep_obj ) {
- print( $fh chr(1) );
- print( $fh pack($self->{data_pack}, length($c)) . $c );
+ if ( $fileobj->{autobless} ) {
+ if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
+ $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
}
else {
- print( $fh chr(0) );
+ $fileobj->print_at( undef, chr(0) );
}
}
my %x = %$value;
tie %$value, 'DBM::Deep', {
base_offset => $location,
- fileobj => $root,
+ fileobj => $fileobj,
parent => $self->{obj},
parent_key => $orig_key,
};
my @x = @$value;
tie @$value, 'DBM::Deep', {
base_offset => $location,
- fileobj => $root,
+ fileobj => $fileobj,
parent => $self->{obj},
parent_key => $orig_key,
};
sub split_index {
my $self = shift;
- my ($md5, $tag) = @_;
+ my ($tag, $md5, $keyloc) = @_;
- local($/,$\);
+ my $fileobj = $self->_fileobj;
- my $fh = $self->_fh;
- my $root = $self->_fileobj;
-
- my $loc = $self->_request_space(
+ my $loc = $fileobj->request_space(
$self->tag_size( $self->{index_size} ),
);
- seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
- print( $fh pack($self->{long_pack}, $loc) );
+ $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
my $index_tag = $self->write_tag(
$loc, SIG_INDEX,
chr(0)x$self->{index_size},
);
- my $newtag_loc = $self->_request_space(
- $self->tag_size( $self->{bucket_list_size} ),
- );
-
my $keys = $tag->{content}
- . $md5 . pack($self->{long_pack}, $newtag_loc)
- . pack($self->{long_pack}, 0) # size
- . pack($self->{long_pack}, 0); # transaction ID
+ . $md5 . pack($self->{long_pack}, $keyloc);
my @newloc = ();
BUCKET:
+ # The <= here is deliberate - we have max_buckets+1 keys to iterate
+ # through, unlike every other loop that uses max_buckets as a stop.
for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
- my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
+ my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
my $num = ord(substr($key, $tag->{ch} + 1, 1));
if ($newloc[$num]) {
- seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
- my $subkeys;
- read( $fh, $subkeys, $self->{bucket_list_size});
+ my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} );
# This is looking for the first empty spot
- my ($subloc, $offset, $size) = $self->_find_in_buckets(
+ my ($subloc, $offset) = $self->_find_in_buckets(
{ content => $subkeys }, '',
);
- seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
- print( $fh $key . pack($self->{long_pack}, $old_subloc) );
+ $fileobj->print_at(
+ $newloc[$num] + $offset,
+ $key, pack($self->{long_pack}, $old_subloc),
+ );
next;
}
- seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
-
- my $loc = $self->_request_space(
+ my $loc = $fileobj->request_space(
$self->tag_size( $self->{bucket_list_size} ),
);
- print( $fh pack($self->{long_pack}, $loc) );
+ $fileobj->print_at(
+ $index_tag->{offset} + ($num * $self->{long_size}),
+ pack($self->{long_pack}, $loc),
+ );
my $blist_tag = $self->write_tag(
$loc, SIG_BLIST,
chr(0)x$self->{bucket_list_size},
);
- seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
- print( $fh $key . pack($self->{long_pack}, $old_subloc) );
+ $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
$newloc[$num] = $blist_tag->{offset};
}
$tag->{offset} - SIG_SIZE - $self->{data_size},
);
- return $newtag_loc;
+ return 1;
}
sub read_from_loc {
my $self = shift;
my ($subloc, $orig_key) = @_;
- local($/,$\);
-
- my $fh = $self->_fh;
+ my $fileobj = $self->_fileobj;
- ##
- # Found match -- seek to offset and read signature
- ##
- my $signature;
- seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
- read( $fh, $signature, SIG_SIZE);
+ my $signature = $fileobj->read_at( $subloc, SIG_SIZE );
##
# If value is a hash or array, return new DBM::Deep object with correct offset
##
if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
+ #XXX This needs to be a singleton
my $new_obj = DBM::Deep->new({
type => $signature,
base_offset => $subloc,
# Skip over value and plain key to see if object needs
# to be re-blessed
##
- seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
+ $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} );
- my $size;
- read( $fh, $size, $self->{data_size});
+ my $size = $fileobj->read_at( undef, $self->{data_size} );
$size = unpack($self->{data_pack}, $size);
- if ($size) { seek($fh, $size, SEEK_CUR); }
-
- my $bless_bit;
- read( $fh, $bless_bit, 1);
- if (ord($bless_bit)) {
- ##
- # Yes, object needs to be re-blessed
- ##
- my $class_name;
- read( $fh, $size, $self->{data_size});
- $size = unpack($self->{data_pack}, $size);
- if ($size) { read( $fh, $class_name, $size); }
- if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
+ if ($size) { $fileobj->increment_pointer( $size ); }
+
+ my $bless_bit = $fileobj->read_at( undef, 1 );
+ if ( ord($bless_bit) ) {
+ my $size = unpack(
+ $self->{data_pack},
+ $fileobj->read_at( undef, $self->{data_size} ),
+ );
+
+ if ( $size ) {
+ $new_obj = bless $new_obj, $fileobj->read_at( undef, $size );
+ }
}
}
return $new_obj;
}
elsif ( $signature eq SIG_INTERNAL ) {
- my $size;
- read( $fh, $size, $self->{data_size});
+ my $size = $fileobj->read_at( undef, $self->{data_size} );
$size = unpack($self->{data_pack}, $size);
if ( $size ) {
- my $new_loc;
- read( $fh, $new_loc, $size );
- $new_loc = unpack( $self->{long_pack}, $new_loc );
-
+ my $new_loc = $fileobj->read_at( undef, $size );
+ $new_loc = unpack( $self->{long_pack}, $new_loc );
return $self->read_from_loc( $new_loc, $orig_key );
}
else {
# Otherwise return actual value
##
elsif ( $signature eq SIG_DATA ) {
- my $size;
- read( $fh, $size, $self->{data_size});
+ my $size = $fileobj->read_at( undef, $self->{data_size} );
$size = unpack($self->{data_pack}, $size);
- my $value = '';
- if ($size) { read( $fh, $value, $size); }
+ my $value = $size ? $fileobj->read_at( undef, $size ) : '';
return $value;
}
my ($tag, $md5, $orig_key) = @_;
#ACID - This is a read. Can find exact or HEAD
- my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 );
- if ( $subloc && !$is_deleted ) {
- return $self->read_from_loc( $subloc, $orig_key );
+ my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
+
+ if ( !$keyloc ) {
+ #XXX Need to use real key
+# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
+# return;
+ }
+# elsif ( !$is_deleted ) {
+ else {
+ my $keytag = $self->load_tag( $keyloc );
+ my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
+ if (!$subloc && !$is_deleted) {
+ ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
+ }
+ if ( $subloc && !$is_deleted ) {
+ return $self->read_from_loc( $subloc, $orig_key );
+ }
}
+
return;
}
my $self = shift;
my ($tag, $md5, $orig_key) = @_;
- local($/,$\);
+ #ACID - Although this is a mutation, we must find any transaction.
+ # This is because we need to mark something as deleted that is in the HEAD.
+ my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
- #ACID - This is a mutation. Must only find the exact transaction
- my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
-#XXX This needs _release_space()
- if ( $subloc ) {
- my $fh = $self->_fh;
- seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET);
- print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
- print( $fh chr(0) x $self->{bucket_size} );
-
- return 1;
+ return if !$keyloc;
+
+ my $fileobj = $self->_fileobj;
+
+ my @transactions;
+ if ( $fileobj->transaction_id == 0 ) {
+ @transactions = $fileobj->current_transactions;
}
- return;
+
+ if ( $fileobj->transaction_id == 0 ) {
+ my $keytag = $self->load_tag( $keyloc );
+
+ my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
+ return if !$subloc || $is_deleted;
+
+ my $value = $self->read_from_loc( $subloc, $orig_key );
+
+ my $size = $self->_length_needed( $value, $orig_key );
+
+ for my $trans_id ( @transactions ) {
+ my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
+ unless ($loc) {
+ my $location2 = $fileobj->request_space( $size );
+ $fileobj->print_at( $keytag->{offset} + $offset2,
+ pack($self->{long_pack}, $location2 ),
+ pack( 'C C', $trans_id, 0 ),
+ );
+ $self->_write_value( $location2, $orig_key, $value, $orig_key );
+ }
+ }
+
+ $keytag = $self->load_tag( $keyloc );
+ ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
+ $fileobj->print_at( $keytag->{offset} + $offset,
+ substr( $keytag->{content}, $offset + $self->{key_size} ),
+ chr(0) x $self->{key_size},
+ );
+ }
+ else {
+ my $keytag = $self->load_tag( $keyloc );
+
+ my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
+
+ $fileobj->print_at( $keytag->{offset} + $offset,
+ pack($self->{long_pack}, 0 ),
+ pack( 'C C', $fileobj->transaction_id, 1 ),
+ );
+ }
+
+ return 1;
}
sub bucket_exists {
my ($tag, $md5) = @_;
#ACID - This is a read. Can find exact or HEAD
- my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
+ my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
+ my $keytag = $self->load_tag( $keyloc );
+ my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
+ if ( !$subloc && !$is_deleted ) {
+ ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
+ }
return ($subloc && !$is_deleted) && 1;
}
-sub find_bucket_list {
+sub find_blist {
##
# Locate offset for bucket list, given digested key
##
my ($offset, $md5, $args) = @_;
$args = {} unless $args;
- local($/,$\);
-
##
# Locate offset for bucket list using digest index system
##
if (!$tag) {
return if !$args->{create};
- my $loc = $self->_request_space(
+ my $loc = $self->_fileobj->request_space(
$self->tag_size( $self->{bucket_list_size} ),
);
- my $fh = $self->_fh;
- seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET);
- print( $fh pack($self->{long_pack}, $loc) );
+ $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
$tag = $self->write_tag(
$loc, SIG_BLIST,
# Scan index and recursively step into deeper levels, looking for next key.
##
my $self = shift;
- my ($obj, $offset, $ch, $force_return_next) = @_;
-
- local($/,$\);
+ my ($xxxx, $offset, $ch, $force_return_next) = @_;
my $tag = $self->load_tag( $offset );
- my $fh = $self->_fh;
-
if ($tag->{signature} ne SIG_BLIST) {
- my $content = $tag->{content};
- my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
+ my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
- for (my $idx = $start; $idx < (2**8); $idx++) {
+ for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
my $subloc = unpack(
$self->{long_pack},
substr(
- $content,
+ $tag->{content},
$idx * $self->{long_size},
$self->{long_size},
),
if ($subloc) {
my $result = $self->traverse_index(
- $obj, $subloc, $ch + 1, $force_return_next,
+ $xxxx, $subloc, $ch + 1, $force_return_next,
);
- if (defined($result)) { return $result; }
+ if (defined $result) { return $result; }
}
} # index loop
- $obj->{return_next} = 1;
- } # tag is an index
-
+ $xxxx->{return_next} = 1;
+ }
+ # This is the bucket list
else {
my $keys = $tag->{content};
- if ($force_return_next) { $obj->{return_next} = 1; }
+ if ($force_return_next) { $xxxx->{return_next} = 1; }
##
# Iterate through buckets, looking for a key match
##
+ my $transaction_id = $self->_fileobj->transaction_id;
for (my $i = 0; $i < $self->{max_buckets}; $i++) {
- my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
+ my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
# End of bucket list -- return to outer loop
- if (!$subloc) {
- $obj->{return_next} = 1;
+ if (!$keyloc) {
+ $xxxx->{return_next} = 1;
last;
}
# Located previous key -- return next one found
- elsif ($key eq $obj->{prev_md5}) {
- $obj->{return_next} = 1;
+ elsif ($key eq $xxxx->{prev_md5}) {
+ $xxxx->{return_next} = 1;
next;
}
# Seek to bucket location and skip over signature
- elsif ($obj->{return_next}) {
- seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
+ elsif ($xxxx->{return_next}) {
+ my $fileobj = $self->_fileobj;
+
+ my $keytag = $self->load_tag( $keyloc );
+ my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
+ if ( $subloc == 0 && !$is_deleted ) {
+ ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
+ }
+ next if $is_deleted;
# Skip over value to get to plain key
- my $sig;
- read( $fh, $sig, SIG_SIZE );
+ my $sig = $fileobj->read_at( $subloc, SIG_SIZE );
- my $size;
- read( $fh, $size, $self->{data_size});
+ my $size = $fileobj->read_at( undef, $self->{data_size} );
$size = unpack($self->{data_pack}, $size);
- if ($size) { seek($fh, $size, SEEK_CUR); }
+ if ($size) { $fileobj->increment_pointer( $size ); }
# Read in plain key and return as scalar
- my $plain_key;
- read( $fh, $size, $self->{data_size});
+ $size = $fileobj->read_at( undef, $self->{data_size} );
$size = unpack($self->{data_pack}, $size);
- if ($size) { read( $fh, $plain_key, $size); }
+ my $plain_key;
+ if ($size) { $plain_key = $fileobj->read_at( undef, $size); }
return $plain_key;
}
}
- $obj->{return_next} = 1;
- } # tag is a bucket list
-
- return;
-}
-
-sub get_next_key {
- ##
- # Locate next key, given digested previous one
- ##
- my $self = shift;
- my ($obj) = @_;
-
- $obj->{prev_md5} = $_[1] ? $_[1] : undef;
- $obj->{return_next} = 0;
-
- ##
- # If the previous key was not specifed, start at the top and
- # return the first one found.
- ##
- if (!$obj->{prev_md5}) {
- $obj->{prev_md5} = chr(0) x $self->{hash_size};
- $obj->{return_next} = 1;
+ $xxxx->{return_next} = 1;
}
- return $self->traverse_index( $obj, $obj->_base_offset, 0 );
+ return;
}
# Utilities
my $self = shift;
my ($keys, $idx) = @_;
- my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack(
+ return unpack(
# This is 'a', not 'A'. Please read the pack() documentation for the
# difference between the two and why it's important.
- "a$self->{hash_size} $self->{long_pack}2 n2",
+ "a$self->{hash_size} $self->{long_pack}",
substr(
$keys,
($idx * $self->{bucket_size}),
$self->{bucket_size},
),
);
-
- return ($key, $subloc, $size, $transaction_id, $is_deleted);
}
sub _find_in_buckets {
my $self = shift;
- my ($tag, $md5, $exact) = @_;
-
- my $trans_id = $self->_fileobj->transaction_id;
-
- my @zero;
+ my ($tag, $md5) = @_;
BUCKET:
for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
- my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc(
+ my ($key, $subloc) = $self->_get_key_subloc(
$tag->{content}, $i,
);
- my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted);
+ my @rv = ($subloc, $i * $self->{bucket_size});
unless ( $subloc ) {
- if ( !$exact && @zero and $trans_id ) {
- @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted);
- }
return @rv;
}
next BUCKET if $key ne $md5;
- # Save off the HEAD in case we need it.
- @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0;
-
- next BUCKET if $transaction_id != $trans_id;
-
return @rv;
}
return;
}
-sub _request_space {
- my $self = shift;
- my ($size) = @_;
-
- my $loc = $self->_fileobj->{end};
- $self->_fileobj->{end} += $size;
-
- return $loc;
-}
-
sub _release_space {
my $self = shift;
my ($size, $loc) = @_;
- local($/,$\);
-
my $next_loc = 0;
- my $fh = $self->_fh;
- seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET );
- print( $fh SIG_FREE
- . pack($self->{long_pack}, $size )
- . pack($self->{long_pack}, $next_loc )
+ $self->_fileobj->print_at( $loc,
+ SIG_FREE,
+ pack($self->{long_pack}, $size ),
+ pack($self->{long_pack}, $next_loc ),
);
return;
die "DBM::Deep: $_[1]\n";
}
-1;
-__END__
-
-# This will be added in later, after more refactoring is done. This is an early
-# attempt at refactoring on the physical level instead of the virtual level.
-sub _read_at {
- my $self = shift;
- my ($spot, $amount, $unpack) = @_;
-
- local($/,$\);
-
- my $fh = $self->_fh;
- seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET );
-
- my $buffer;
- my $bytes_read = read( $fh, $buffer, $amount );
+sub _get_dbm_object {
+ my $item = shift;
- if ( $unpack ) {
- $buffer = unpack( $unpack, $buffer );
- }
+ my $obj = eval {
+ local $SIG{__DIE__};
+ if ($item->isa( 'DBM::Deep' )) {
+ return $item;
+ }
+ return;
+ };
+ return $obj if $obj;
- if ( wantarray ) {
- return ($buffer, $bytes_read);
+ my $r = Scalar::Util::reftype( $item ) || '';
+ if ( $r eq 'HASH' ) {
+ my $obj = eval {
+ local $SIG{__DIE__};
+ my $obj = tied(%$item);
+ if ($obj->isa( 'DBM::Deep' )) {
+ return $obj;
+ }
+ return;
+ };
+ return $obj if $obj;
}
- else {
- return $buffer;
+ elsif ( $r eq 'ARRAY' ) {
+ my $obj = eval {
+ local $SIG{__DIE__};
+ my $obj = tied(@$item);
+ if ($obj->isa( 'DBM::Deep' )) {
+ return $obj;
+ }
+ return;
+ };
+ return $obj if $obj;
}
-}
-
-sub _print_at {
- my $self = shift;
- my ($spot, $data) = @_;
-
- local($/,$\);
-
- my $fh = $self->_fh;
- seek( $fh, $spot, SEEK_SET );
- print( $fh $data );
return;
}
-sub get_file_version {
+sub _length_needed {
my $self = shift;
+ my ($value, $key) = @_;
- local($/,$\);
+ my $is_dbm_deep = eval {
+ local $SIG{'__DIE__'};
+ $value->isa( 'DBM::Deep' );
+ };
- my $fh = $self->_fh;
+ my $len = SIG_SIZE
+ + $self->{data_size} # size for value
+ + $self->{data_size} # size for key
+ + length( $key ); # length of key
- seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
- my $buffer;
- my $bytes_read = read( $fh, $buffer, 4 );
- unless ( $bytes_read == 4 ) {
- $self->_throw_error( "Cannot read file version" );
+ if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
+ # long_size is for the internal reference
+ return $len + $self->{long_size};
}
- return unpack( 'N', $buffer );
-}
-
-sub write_file_version {
- my $self = shift;
- my ($new_version) = @_;
+ if ( $self->_fileobj->{autobless} ) {
+ # This is for the bit saying whether or not this thing is blessed.
+ $len += 1;
+ }
- local($/,$\);
+ my $r = Scalar::Util::reftype( $value ) || '';
+ unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
+ if ( defined $value ) {
+ $len += length( $value );
+ }
+ return $len;
+ }
- my $fh = $self->_fh;
+ $len += $self->{index_size};
- seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
- print( $fh pack( 'N', $new_version ) );
+ # if autobless is enabled, must also take into consideration
+ # the class name as it is stored after the key.
+ if ( $self->_fileobj->{autobless} ) {
+ my $c = Scalar::Util::blessed($value);
+ if ( defined $c && !$is_dbm_deep ) {
+ $len += $self->{data_size} + length($c);
+ }
+ }
- return;
+ return $len;
}
+1;
+__END__