From: rkinyon Date: Thu, 28 Dec 2006 06:26:40 +0000 (+0000) Subject: Reindexing works ... sort of X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=6a4f323c2407cab4cecb9abfafa59ed163830705;p=dbsrgits%2FDBM-Deep.git Reindexing works ... sort of --- diff --git a/lib/DBM/Deep/Engine3.pm b/lib/DBM/Deep/Engine3.pm index 41d2a08..ee6319d 100644 --- a/lib/DBM/Deep/Engine3.pm +++ b/lib/DBM/Deep/Engine3.pm @@ -47,7 +47,8 @@ sub new { byte_size => 4, digest => undef, - hash_size => 16, # In bytes + hash_size => 16, # In bytes + hash_chars => 256, # Number of chars the algorithm uses per byte max_buckets => 16, num_txns => 16, # HEAD plus 15 running txns trans_id => 0, # Default to the HEAD @@ -706,6 +707,7 @@ sub _request_sector { sub storage { $_[0]{storage} } sub byte_size { $_[0]{byte_size} } sub hash_size { $_[0]{hash_size} } +sub hash_chars { $_[0]{hash_chars} } sub num_txns { $_[0]{num_txns} } sub max_buckets { $_[0]{max_buckets} } sub blank_md5 { chr(0) x $_[0]->hash_size } @@ -1220,7 +1222,7 @@ sub get_bucket_list { my $i = 0; my $last_sector = undef; while ( $sector->isa( 'DBM::Deep::Engine::Sector::Index' ) ) { - $blist_loc = $sector->location_for( ord( substr( $args->{key_md5}, $i++, 1 ) ) ); + $blist_loc = $sector->get_entry( ord( substr( $args->{key_md5}, $i++, 1 ) ) ); $last_sector = $sector; $sector = $engine->_load_sector( $blist_loc ) or die "Cannot read sector at $blist_loc in get_bucket_list()"; @@ -1229,7 +1231,42 @@ sub get_bucket_list { $sector->find_md5( $args->{key_md5} ); # See whether or not we need to reindex the bucketlist - if ( !$sector->has_md5 && $args->{create} ) { + if ( !$sector->has_md5 && $args->{create} && $sector->{idx} == -1 ) { + #print "Reindexing\n"; + my $new_index = DBM::Deep::Engine::Sector::Index->new({ + engine => $engine, + }); + + my %blist_cache; + + foreach my $md5 ( $args->{key_md5}, $sector->chopped_up ) { + my $idx = ord( substr( $md5, $i, 1 ) ); + + my $blist = $blist_cache{$idx} + ||= DBM::Deep::Engine::Sector::BucketList->new({ + engine => $engine, + }); + + $new_index->set_entry( $idx => $blist->offset ); + + $blist->write_at_next_open( $md5 ); + } + + if ( $last_sector ) { + $last_sector->set_entry( + ord( substr( $args->{key_md5}, $i - 1, 1 ) ), + $new_index->offset, + ); + } else { + $engine->storage->print_at( $self->offset + $self->base_size, + pack( $StP{$engine->byte_size}, $new_index->offset ), + ); + } + + $sector->free; + + $sector = $blist_cache{ ord( substr( $args->{key_md5}, $i, 1 ) ) }; + $sector->find_md5( $args->{key_md5} ); } return $sector; @@ -1328,6 +1365,39 @@ sub bucket_size { return $self->{bucket_size}; } +sub chopped_up { + my $self = shift; + + my $e = $self->engine; + + my @md5s; + foreach my $idx ( 0 .. $e->max_buckets - 1 ) { + my $md5 = $e->storage->read_at( + $self->offset + $self->base_size + $idx * $self->bucket_size, $e->hash_size, + ); + + last if $md5 eq $e->blank_md5; + + my $rest = $e->storage->read_at( undef, $self->bucket_size - $e->hash_size ); + push @md5s, $md5 . $rest; + } + + return @md5s; +} + +sub write_at_next_open { + my $self = shift; + my ($md5) = @_; + + #XXX This is such a hack! + $self->{_idx} = 0 unless exists $self->{_idx}; + + $self->engine->storage->print_at( + $self->offset + $self->base_size + $self->{_idx}++ * $self->bucket_size, + $md5, + ); +} + sub has_md5 { my $self = shift; unless ( exists $self->{found} ) { @@ -1549,7 +1619,7 @@ sub _init { my $leftover = $self->size - $self->base_size; $self->{offset} = $engine->_request_index_sector( $self->size ); - $engine->storage->print_at( $self->offset, $engine->SIG_BLIST ); # Sector type + $engine->storage->print_at( $self->offset, $engine->SIG_INDEX ); # Sector type # Skip staleness counter $engine->storage->print_at( $self->offset + $self->base_size, chr(0) x $leftover, # Zero-fill the rest @@ -1582,7 +1652,13 @@ sub free { $self->SUPER::free(); } -sub location_for { +sub _loc_for { + my $self = shift; + my ($idx) = @_; + return $self->offset + $self->base_size + $idx * $self->engine->byte_size; +} + +sub get_entry { my $self = shift; my ($idx) = @_; @@ -1590,10 +1666,17 @@ sub location_for { return unpack( $StP{$e->byte_size}, - $e->storage->read_at( - $self->offset + $self->base_size + $idx * $self->byte_size, - $self->byte_size, - ), + $e->storage->read_at( $self->_loc_for( $idx ), $e->byte_size ), + ); +} + +sub set_entry { + my $self = shift; + my ($idx, $loc) = @_; + + $self->engine->storage->print_at( + $self->_loc_for( $idx ), + pack( $StP{$self->engine->byte_size}, $loc ), ); } diff --git a/t/03_bighash.todo b/t/03_bighash.todo index 9b81f87..81215fd 100644 --- a/t/03_bighash.todo +++ b/t/03_bighash.todo @@ -28,6 +28,7 @@ my $db = DBM::Deep->new( my $max_keys = 4000; for ( 0 .. $max_keys ) { + print "Adding $_\n"; $db->put( "hello $_" => "there " . $_ * 2 ); }