From: rkinyon@cpan.org Date: Wed, 25 Jun 2008 14:59:31 +0000 (+0000) Subject: Hash tests pass again with header being read and cached X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=badf847ca0fd2a37bf21caf92454a92d568e3ccf;p=dbsrgits%2FDBM-Deep.git Hash tests pass again with header being read and cached git-svn-id: http://svn.ali.as/cpan/trunk/DBM-Deep@3637 88f4d9cd-8a04-0410-9d60-8f63309c3137 --- diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index 7841b1e..9c41951 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -416,9 +416,7 @@ sub setup_fh { return 1 if $obj->_base_offset; - my $header = DBM::Deep::Engine::Sector::FileHeader->new({ - engine => $self, - }); + my $header = $self->_load_header; # Creating a new file if ( $header->is_new ) { @@ -656,137 +654,99 @@ sub clear_entries { ################################################################################ -sub _load_sector { - my $self = shift; - my ($offset) = @_; - - # Add a catch for offset of 0 or 1 - return if !$offset || $offset <= 1; - - unless ( exists $self->sector_cache->{ $offset } ) { - my $type = $self->storage->read_at( $offset, $self->SIG_SIZE ); - - # XXX Don't we want to do something more proactive here? -RobK, 2008-06-19 - return if $type eq chr(0); - - if ( $type eq $self->SIG_ARRAY || $type eq $self->SIG_HASH ) { - $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::Reference->new({ - engine => $self, - type => $type, - offset => $offset, - }); - } - # XXX Don't we need key_md5 here? - elsif ( $type eq $self->SIG_BLIST ) { - $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::BucketList->new({ - engine => $self, - type => $type, - offset => $offset, - }); - } - elsif ( $type eq $self->SIG_INDEX ) { - $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::Index->new({ - engine => $self, - type => $type, - offset => $offset, - }); - } - elsif ( $type eq $self->SIG_NULL ) { - $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::Null->new({ - engine => $self, - type => $type, - offset => $offset, - }); - } - elsif ( $type eq $self->SIG_DATA ) { - $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::Scalar->new({ - engine => $self, - type => $type, - offset => $offset, - }); - } - # This was deleted from under us, so just return and let the caller figure it out. - elsif ( $type eq $self->SIG_FREE ) { - return; - } - else { - DBM::Deep->_throw_error( "'$offset': Don't know what to do with type '$type'" ); - } - } - - return $self->sector_cache->{$offset}; -} - sub _apply_digest { my $self = shift; return $self->{digest}->(@_); } sub _add_free_blist_sector { shift->_add_free_sector( 0, @_ ) } -sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) } +sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) } sub _add_free_index_sector { shift->_add_free_sector( 2, @_ ) } +sub _add_free_sector { shift->_load_header->add_free_sector( @_ ) } -sub _add_free_sector { - my $self = shift; - my ($multiple, $offset, $size) = @_; +sub _request_blist_sector { shift->_request_sector( 0, @_ ) } +sub _request_data_sector { shift->_request_sector( 1, @_ ) } +sub _request_index_sector { shift->_request_sector( 2, @_ ) } +sub _request_sector { shift->_load_header->request_sector( @_ ) } - my $chains_offset = $multiple * $self->byte_size; +################################################################################ - my $storage = $self->storage; +{ + my %t = ( + SIG_ARRAY => 'Reference', + SIG_HASH => 'Reference', + SIG_BLIST => 'BucketList', + SIG_INDEX => 'Index', + SIG_NULL => 'Null', + SIG_DATA => 'Scalar', + ); - # Increment staleness. - # XXX Can this increment+modulo be done by "&= 0x1" ? - my $staleness = unpack( $StP{$STALE_SIZE}, $storage->read_at( $offset + SIG_SIZE, $STALE_SIZE ) ); - $staleness = ($staleness + 1 ) % ( 2 ** ( 8 * $STALE_SIZE ) ); - $storage->print_at( $offset + SIG_SIZE, pack( $StP{$STALE_SIZE}, $staleness ) ); + my %class_for; + while ( my ($k,$v) = each %t ) { + $class_for{ DBM::Deep::Engine->$k } = "DBM::Deep::Engine::Sector::$v"; + } - my $old_head = $storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size ); + sub load_sector { + my $self = shift; + my ($offset) = @_; - $storage->print_at( $self->chains_loc + $chains_offset, - pack( $StP{$self->byte_size}, $offset ), - ); + #warn join(':',(caller)[0,2]) . " -> $offset\n"; + my $data = $self->get_data( $offset ) + or return;#die "Cannot read from '$offset'\n"; + my $type = substr( $$data, 0, 1 ); + my $class = $class_for{ $type }; + return $class->new({ + engine => $self, + type => $type, + offset => $offset, + }); + } + *_load_sector = \&load_sector; - # Record the old head in the new sector after the signature and staleness counter - $storage->print_at( $offset + SIG_SIZE + $STALE_SIZE, $old_head ); -} + sub load_header { + my $self = shift; -sub _request_blist_sector { shift->_request_sector( 0, @_ ) } -sub _request_data_sector { shift->_request_sector( 1, @_ ) } -sub _request_index_sector { shift->_request_sector( 2, @_ ) } + #XXX Does this mean we make too many objects? -RobK, 2008-06-23 + return DBM::Deep::Engine::Sector::FileHeader->new({ + engine => $self, + offset => 0, + }); + } + *_load_header = \&load_header; -sub _request_sector { - my $self = shift; - my ($multiple, $size) = @_; + sub get_data { + my $self = shift; + my ($offset, $size) = @_; + return unless defined $offset; - my $chains_offset = $multiple * $self->byte_size; + unless ( exists $self->sector_cache->{$offset} ) { + # Don't worry about the header sector. It will manage itself. + return unless $offset; - my $old_head = $self->storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size ); - my $loc = unpack( $StP{$self->byte_size}, $old_head ); + if ( !defined $size ) { + my $type = $self->storage->read_at( $offset, 1 ) + or die "($offset): Cannot read from '$offset' to find the type\n"; - # We don't have any free sectors of the right size, so allocate a new one. - unless ( $loc ) { - my $offset = $self->storage->request_space( $size ); + if ( $type eq $self->SIG_FREE ) { + return; + } - # Zero out the new sector. This also guarantees correct increases - # in the filesize. - $self->storage->print_at( $offset, chr(0) x $size ); + my $class = $class_for{$type} + or die "($offset): Cannot find class for '$type'\n"; + $size = $class->size( $self ) + or die "($offset): '$class' doesn't return a size\n"; + $self->sector_cache->{$offset} = $type . $self->storage->read_at( undef, $size - 1 ); + } + else { + $self->sector_cache->{$offset} = $self->storage->read_at( $offset, $size ) + or return; + } + } - return $offset; + return \$self->sector_cache->{$offset}; } - - # Read the new head after the signature and the staleness counter - my $new_head = $self->storage->read_at( $loc + SIG_SIZE + $STALE_SIZE, $self->byte_size ); - $self->storage->print_at( $self->chains_loc + $chains_offset, $new_head ); - $self->storage->print_at( - $loc + SIG_SIZE + $STALE_SIZE, - pack( $StP{$self->byte_size}, 0 ), - ); - - return $loc; } -################################################################################ - sub sector_cache { my $self = shift; return $self->{sector_cache} ||= {}; @@ -809,13 +769,9 @@ sub clear_dirty_sectors { sub add_dirty_sector { my $self = shift; - my ($sector) = @_; - -# if ( exists $self->dirty_sectors->{ $sector->offset } ) { -# DBM::Deep->_throw_error( "We have a duplicate sector!! " . $sector->offset ); -# } + my ($offset) = @_; - $self->dirty_sectors->{ $sector->offset } = $sector; + $self->dirty_sectors->{ $offset } = undef; } sub flush { @@ -823,7 +779,7 @@ sub flush { my $sectors = $self->dirty_sectors; for my $offset (sort { $a <=> $b } keys %{ $sectors }) { - $sectors->{$offset}->flush; + $self->storage->print_at( $offset, $self->sector_cache->{$offset} ); } $self->clear_dirty_sectors; diff --git a/lib/DBM/Deep/Engine/Sector.pm b/lib/DBM/Deep/Engine/Sector.pm index 1438b5c..c997ec7 100644 --- a/lib/DBM/Deep/Engine/Sector.pm +++ b/lib/DBM/Deep/Engine/Sector.pm @@ -11,20 +11,8 @@ sub new { my $self = bless $_[1], $_[0]; Scalar::Util::weaken( $self->{engine} ); - if ( $self->offset ) { - $self->{string} = $self->engine->storage->read_at( - $self->offset, $self->size, - ); - } - else { - $self->{string} = chr(0) x $self->size; - } - $self->_init; - # Add new sectors to the sector cache. - $self->engine->sector_cache->{$self->offset} = $self; - return $self; } @@ -36,9 +24,13 @@ sub offset { $_[0]{offset} } sub type { $_[0]{type} } sub base_size { - my $self = shift; - no warnings 'once'; - return $self->engine->SIG_SIZE + $DBM::Deep::Engine::STALE_SIZE; + my $self = shift; + if ( ref($self) ) { + return $self->engine->SIG_SIZE + $DBM::Deep::Engine::STALE_SIZE; + } + else { + return $_[0]->SIG_SIZE + $DBM::Deep::Engine::STALE_SIZE; + } } sub free { @@ -49,33 +41,26 @@ sub free { $self->write( 0, $e->SIG_FREE ); $self->write( $self->base_size, chr(0) x ($self->size - $self->base_size) ); - $e->flush; - -# $e->storage->print_at( $self->offset, $e->SIG_FREE ); -# # Skip staleness counter -# $e->storage->print_at( $self->offset + $self->base_size, -# chr(0) x ($self->size - $self->base_size), -# ); - - #TODO When freeing two sectors, we cannot flush them right away! This means the following: - # 1) The header has to understand about unflushed items. - # 2) Loading a sector has to go through a cache to make sure we see what's already been loaded. - # 3) The header should be cached. - my $free_meth = $self->free_meth; - $e->$free_meth( $self->offset, $self->size ); + $e->$free_meth( $self ); return; } sub read { my $self = shift; - my ($start, $length) = @_; - if ( $length ) { - return substr( $self->{string}, $start, $length ); + + if ( @_ == 1 ) { + return substr( ${$self->engine->get_data( $self->offset, $self->size )}, $_[0] ); + } + elsif ( @_ == 2 ) { + return substr( ${$self->engine->get_data( $self->offset, $self->size )}, $_[0], $_[1] ); + } + elsif ( @_ < 1 ) { + die "read( start [, length ]): No parameters found."; } else { - return substr( $self->{string}, $start ); + die "read( start [, length ]): Too many parameters found (@_)."; } } @@ -83,19 +68,14 @@ sub write { my $self = shift; my ($start, $text) = @_; - substr( $self->{string}, $start, length($text) ) = $text; + substr( ${$self->engine->get_data( $self->offset, $self->size )}, $start, length($text) ) = $text; $self->mark_dirty; } sub mark_dirty { my $self = shift; - $self->engine->add_dirty_sector( $self ); -} - -sub flush { - my $self = shift; - $self->engine->storage->print_at( $self->offset, $self->{string} ); + $self->engine->add_dirty_sector( $self->offset ); } 1; diff --git a/lib/DBM/Deep/Engine/Sector/BucketList.pm b/lib/DBM/Deep/Engine/Sector/BucketList.pm index b3bd6b2..36537aa 100644 --- a/lib/DBM/Deep/Engine/Sector/BucketList.pm +++ b/lib/DBM/Deep/Engine/Sector/BucketList.pm @@ -35,11 +35,17 @@ sub clear { sub size { my $self = shift; - unless ( $self->{size} ) { - # Base + numbuckets * bucketsize - $self->{size} = $self->base_size + $self->engine->max_buckets * $self->bucket_size; + if ( ref($self) ) { + unless ( $self->{size} ) { + # Base + numbuckets * bucketsize + $self->{size} = $self->base_size + $self->engine->max_buckets * $self->bucket_size; + } + return $self->{size}; + } + else { + my $e = shift; + return $self->base_size($e) + $e->max_buckets * $self->bucket_size($e); } - return $self->{size}; } sub free_meth { return '_add_free_blist_sector' } @@ -80,13 +86,20 @@ sub free { sub bucket_size { my $self = shift; - unless ( $self->{bucket_size} ) { - my $e = $self->engine; - # Key + head (location) + transactions (location + staleness-counter) + if ( ref($self) ) { + unless ( $self->{bucket_size} ) { + my $e = $self->engine; + # Key + head (location) + transactions (location + staleness-counter) + my $location_size = $e->byte_size + $e->byte_size + ($e->num_txns - 1) * ($e->byte_size + $DBM::Deep::Engine::STALE_SIZE); + $self->{bucket_size} = $e->hash_size + $location_size; + } + return $self->{bucket_size}; + } + else { + my $e = shift; my $location_size = $e->byte_size + $e->byte_size + ($e->num_txns - 1) * ($e->byte_size + $DBM::Deep::Engine::STALE_SIZE); - $self->{bucket_size} = $e->hash_size + $location_size; + return $e->hash_size + $location_size; } - return $self->{bucket_size}; } # XXX This is such a poor hack. I need to rethink this code. diff --git a/lib/DBM/Deep/Engine/Sector/Data.pm b/lib/DBM/Deep/Engine/Sector/Data.pm index c9695ee..e12e942 100644 --- a/lib/DBM/Deep/Engine/Sector/Data.pm +++ b/lib/DBM/Deep/Engine/Sector/Data.pm @@ -9,7 +9,10 @@ use DBM::Deep::Engine::Sector; our @ISA = qw( DBM::Deep::Engine::Sector ); # This is in bytes -sub size { $_[0]{engine}->data_sector_size } +sub size { + my $e = ref($_[0]) ? $_[0]{engine} : $_[1]; + return $e->data_sector_size; +} sub free_meth { return '_add_free_data_sector' } sub clone { diff --git a/lib/DBM/Deep/Engine/Sector/FileHeader.pm b/lib/DBM/Deep/Engine/Sector/FileHeader.pm index b069065..ae56c0f 100644 --- a/lib/DBM/Deep/Engine/Sector/FileHeader.pm +++ b/lib/DBM/Deep/Engine/Sector/FileHeader.pm @@ -17,8 +17,7 @@ sub _init { my $e = $self->engine; # This means the file is being created. - # Use defined() here because the offset should always be 0. -RobK. 2008-06-20 - unless ( $e->storage->size ) { + unless ( exists $self->engine->sector_cache->{0} || $self->engine->storage->size ) { my $nt = $e->num_txns; my $bl = $e->txn_bitfield_len; @@ -27,7 +26,10 @@ sub _init { $self->{offset} = $e->storage->request_space( $header_fixed + $header_var ); DBM::Deep::_throw_error( "Offset wasn't 0, it's '$self->{offset}'" ) unless $self->offset == 0; - $self->write( $self->offset, + # Make sure we set up sector caching so that get_data() works. -RobK, 2008-06-24 + $self->engine->sector_cache->{$self->offset} = chr(0) x ($header_fixed + $header_var); + + $self->write( 0, $e->SIG_FILE . $e->SIG_HEADER . pack('N', $this_file_version) # At this point, we're at 9 bytes @@ -54,6 +56,9 @@ sub _init { } else { $self->{offset} = 0; + $self->{is_new} = 0; + + return if exists $self->engine->sector_cache->{0}; my $s = $e->storage; @@ -108,24 +113,85 @@ sub _init { my $bl = $e->txn_bitfield_len; $e->set_chains_loc( $header_fixed + scalar(@values) + $bl + $DBM::Deep::Engine::STALE_SIZE * ($e->num_txns - 1) ); - # Make sure we set up the string so that the caching works. -RobK, 2008-06-20 - $self->{string} = $buffer . $buffer2; - - $self->{is_new} = 0; + # Make sure we set up sector caching so that get_data() works. -RobK, 2008-06-24 + $self->engine->sector_cache->{$self->offset} = $buffer . $buffer2; } } sub header_var_size { my $self = shift; - my $e = $self->engine; + my $e = shift || $self->engine; return 1 + 1 + 1 + 1 + $e->txn_bitfield_len + $DBM::Deep::Engine::STALE_SIZE * ($e->num_txns - 1) + 3 * $e->byte_size; } -sub size { +sub size { my $self = shift; - $self->{size} ||= $header_fixed + $self->header_var_size; + if ( ref($self) ) { + $self->{size} ||= $header_fixed + $self->header_var_size; + } + else { + return $header_fixed + $self->header_var_size( @_ ); + } } + sub is_new { $_[0]{is_new} } +sub add_free_sector { + my $self = shift; + my ($multiple, $sector) = @_; + + my $e = $self->engine; + + my $chains_offset = $multiple * $e->byte_size; + + # Increment staleness. + # XXX Can this increment+modulo be done by "&= 0x1" ? + my $staleness = unpack( $e->StP($DBM::Deep::Engine::STALE_SIZE), $sector->read( $e->SIG_SIZE, $DBM::Deep::Engine::STALE_SIZE ) ); + $staleness = ($staleness + 1 ) % ( 2 ** ( 8 * $DBM::Deep::Engine::STALE_SIZE ) ); + $sector->write( $e->SIG_SIZE, pack( $e->StP($DBM::Deep::Engine::STALE_SIZE), $staleness ) ); + + my $old_head = $self->read( $e->chains_loc + $chains_offset, $e->byte_size ); + + $self->write( $e->chains_loc + $chains_offset, + pack( $e->StP($e->byte_size), $sector->offset ), + ); + + # Record the old head in the new sector after the signature and staleness counter + $sector->write( $e->SIG_SIZE + $DBM::Deep::Engine::STALE_SIZE, $old_head ); +} + +sub request_sector { + my $self = shift; + my ($multiple, $size) = @_; + + my $e = $self->engine; + + my $chains_offset = $multiple * $e->byte_size; + + my $old_head = $self->read( $e->chains_loc + $chains_offset, $e->byte_size ); + my $loc = unpack( $e->StP($e->byte_size), $old_head ); + + # We don't have any free sectors of the right size, so allocate a new one. + unless ( $loc ) { + my $offset = $e->storage->request_space( $size ); + + # Zero out the new sector. This also guarantees correct increases + # in the filesize. + $self->engine->sector_cache->{$offset} = chr(0) x $size; + + return $offset; + } + + # Need to load the new sector so we can read from it. + my $new_sector = $self->engine->storage->read_at( $loc, $size ); + + # Read the new head after the signature and the staleness counter + my $new_head = substr( $new_sector, $e->SIG_SIZE + $DBM::Deep::Engine::STALE_SIZE, $e->byte_size ); + + $self->write( $e->chains_loc + $chains_offset, $new_head ); + + return $loc; +} + 1; __END__ diff --git a/lib/DBM/Deep/Engine/Sector/Index.pm b/lib/DBM/Deep/Engine/Sector/Index.pm index e937233..a7c9334 100644 --- a/lib/DBM/Deep/Engine/Sector/Index.pm +++ b/lib/DBM/Deep/Engine/Sector/Index.pm @@ -26,11 +26,17 @@ sub _init { #XXX Why? -RobK, 2008-06-18 sub size { my $self = shift; - unless ( $self->{size} ) { - my $e = $self->engine; - $self->{size} = $self->base_size + $e->byte_size * $e->hash_chars; + if ( ref($self) ) { + unless ( $self->{size} ) { + my $e = $self->engine; + $self->{size} = $self->base_size + $e->byte_size * $e->hash_chars; + } + return $self->{size}; + } + else { + my $e = shift; + return $self->base_size($e) + $e->byte_size * $e->hash_chars; } - return $self->{size}; } sub free_meth { return '_add_free_index_sector' } diff --git a/t/02_hash.t b/t/02_hash.t index 4ad5d92..ab428e5 100644 --- a/t/02_hash.t +++ b/t/02_hash.t @@ -181,4 +181,3 @@ throws_ok { throws_ok { $db->exists(undef); } qr/Cannot use an undefined hash key/, "EXISTS fails on an undefined key"; - diff --git a/t/04_array.t b/t/04_array.t index 3bfc933..f746f4e 100644 --- a/t/04_array.t +++ b/t/04_array.t @@ -19,13 +19,13 @@ my $db = DBM::Deep->new( # basic put/get/push ## $db->[0] = "elem1"; -#$db->push( "elem2" ); -#$db->put(2, "elem3"); -#$db->store(3, "elem4"); -warn $db->_engine->_dump_file; +$db->push( "elem2" ); +$db->put(2, "elem3"); +$db->store(3, "elem4"); +#warn $db->_engine->_dump_file; $db->unshift("elem0"); -warn $db->_engine->_dump_file; -__END__ +#warn $db->_engine->_dump_file; +#__END__ is( $db->[0], 'elem0', "Array get for shift works" ); is( $db->[1], 'elem1', "Array get for array set works" ); @@ -68,14 +68,23 @@ throws_ok { $db->[-6] = 'whoops!'; } qr/Modification of non-creatable array value attempted, subscript -6/, "Correct error thrown"; +warn "1: \n" . $db->_engine->_dump_file; my $popped = $db->pop; +warn "2: \n" . $db->_engine->_dump_file; is( $db->length, 4, "... and we have four after popping" ); +warn "3: \n" . $db->_engine->_dump_file; is( $db->[0], 'elem0', "0th element still there after popping" ); +warn "4: \n" . $db->_engine->_dump_file; is( $db->[1], 'elem1', "1st element still there after popping" ); +warn "5: \n" . $db->_engine->_dump_file; is( $db->[2], 'elem2', "2nd element still there after popping" ); +warn "6: \n" . $db->_engine->_dump_file; is( $db->[3], 'elem3', "3rd element still there after popping" ); +warn "7: \n" . $db->_engine->_dump_file; is( $popped, 'elem4.1', "Popped value is correct" ); +die $db->_engine->_dump_file; + my $shifted = $db->shift; is( $db->length, 3, "... and we have three after shifting" ); is( $db->[0], 'elem1', "0th element still there after shifting" ); @@ -136,6 +145,8 @@ is( $db->length(), 0, "After pop() on empty array, length is still 0" ); is( $db->shift, undef, "shift on an empty array returns undef" ); is( $db->length(), 0, "After shift() on empty array, length is still 0" ); +warn "BEFORE: " . $db->_engine->_dump_file; +__END__ is( $db->unshift( 1, 2, 3 ), 3, "unshift returns the number of elements in the array" ); is( $db->unshift( 1, 2, 3 ), 6, "unshift returns the number of elements in the array" ); is( $db->push( 1, 2, 3 ), 9, "push returns the number of elements in the array" );