added test for freespace management
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
612969fb 7sub precalc_sizes {
beac1dff 8 ##
9 # Precalculate index, bucket and bucket list sizes
10 ##
251dfd0e 11 my $self = shift;
1bf65be7 12
251dfd0e 13 $self->{index_size} = (2**8) * $self->{long_size};
14 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
15 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 16
251dfd0e 17 return 1;
1bf65be7 18}
19
20sub set_pack {
beac1dff 21 ##
22 # Set pack/unpack modes (see file header for more)
23 ##
251dfd0e 24 my $self = shift;
1bf65be7 25 my ($long_s, $long_p, $data_s, $data_p) = @_;
26
81d16922 27 ##
28 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file.
beac1dff 29 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 30 #
31 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 32 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 33 ##
251dfd0e 34 $self->{long_size} = $long_s ? $long_s : 4;
35 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 36
81d16922 37 ##
38 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value.
39 # Upgrading this is possible (see above) but probably not necessary. If you need
40 # more than 4 GB for a single key or value, this module is really not for you :-)
41 ##
251dfd0e 42 $self->{data_size} = $data_s ? $data_s : 4;
43 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 44
beac1dff 45 return $self->precalc_sizes();
1bf65be7 46}
47
48sub set_digest {
beac1dff 49 ##
50 # Set key digest function (default is MD5)
51 ##
251dfd0e 52 my $self = shift;
1bf65be7 53 my ($digest_func, $hash_size) = @_;
54
d0b74c17 55 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 56 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 57
beac1dff 58 return $self->precalc_sizes();
612969fb 59}
60
61sub new {
62 my $class = shift;
63 my ($args) = @_;
64
65 my $self = bless {
66 long_size => 4,
67 long_pack => 'N',
68 data_size => 4,
69 data_pack => 'N',
251dfd0e 70
612969fb 71 digest => \&Digest::MD5::md5,
72 hash_size => 16,
251dfd0e 73
81d16922 74 ##
75 # Maximum number of buckets per list before another level of indexing is done.
76 # Increase this value for slightly greater speed, but larger database files.
77 # DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
78 ##
612969fb 79 max_buckets => 16,
80 }, $class;
81
251dfd0e 82 $self->precalc_sizes;
612969fb 83
84 return $self;
1bf65be7 85}
86
70b55428 87sub setup_fh {
88 my $self = shift;
89 my ($obj) = @_;
90
91 $self->open( $obj ) if !defined $obj->_fh;
92
93 #XXX Is {end} busted? Can't we just seek( $fh, 0, SEEK_END ) ?
94 unless ( $obj->_root->{inode} ) {
95 my @stats = stat($obj->_fh);
96 $obj->_root->{inode} = $stats[1];
97 $obj->_root->{end} = $stats[7];
98 }
99
100 return 1;
101}
102
a20d9a3f 103sub open {
20f7b20c 104 ##
105 # Open a fh to the database, create if nonexistent.
106 # Make sure file signature matches DBM::Deep spec.
107 ##
a20d9a3f 108 my $self = shift;
70b55428 109 my ($obj) = @_;
a20d9a3f 110
3d1b8be9 111 if (defined($obj->_fh)) { $self->close_fh( $obj ); }
20f7b20c 112
a20d9a3f 113 eval {
114 local $SIG{'__DIE__'};
115 # Theoretically, adding O_BINARY should remove the need for the binmode
116 # Of course, testing it is going to be ... interesting.
117 my $flags = O_RDWR | O_CREAT | O_BINARY;
118
119 my $fh;
cd59cad8 120 sysopen( $fh, $obj->_root->{file}, $flags )
a20d9a3f 121 or $fh = undef;
cd59cad8 122 $obj->_root->{fh} = $fh;
123 }; if ($@ ) { $obj->_throw_error( "Received error: $@\n" ); }
20f7b20c 124 if (! defined($obj->_fh)) {
125 return $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!");
126 }
a20d9a3f 127
cd59cad8 128 my $fh = $obj->_fh;
a20d9a3f 129
130 #XXX Can we remove this by using the right sysopen() flags?
131 # Maybe ... q.v. above
132 binmode $fh; # for win32
133
cd59cad8 134 if ($obj->_root->{autoflush}) {
a20d9a3f 135 my $old = select $fh;
136 $|=1;
137 select $old;
138 }
20f7b20c 139
cd59cad8 140 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 141
142 my $signature;
143 my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE));
20f7b20c 144
a20d9a3f 145 ##
146 # File is empty -- write signature and master index
147 ##
148 if (!$bytes_read) {
cd59cad8 149 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 150 print( $fh DBM::Deep->SIG_FILE);
d0b74c17 151
ec1bce6b 152 $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size});
a20d9a3f 153
154 my $plain_key = "[base]";
251dfd0e 155 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
a20d9a3f 156
157 # Flush the filehandle
158 my $old_fh = select $fh;
159 my $old_af = $|; $| = 1; $| = $old_af;
160 select $old_fh;
161
a20d9a3f 162 return 1;
163 }
20f7b20c 164
a20d9a3f 165 ##
166 # Check signature was valid
167 ##
168 unless ($signature eq DBM::Deep->SIG_FILE) {
3d1b8be9 169 $self->close_fh( $obj );
cd59cad8 170 return $obj->_throw_error("Signature not found -- file is not a Deep DB");
a20d9a3f 171 }
172
a20d9a3f 173 ##
174 # Get our type from master index signature
175 ##
d4b1166e 176 my $tag = $self->load_tag($obj, $obj->_base_offset);
a20d9a3f 177 if (!$tag) {
20f7b20c 178 return $obj->_throw_error("Corrupted file, no master index record");
a20d9a3f 179 }
d0b74c17 180
cd59cad8 181 if ($obj->{type} ne $tag->{signature}) {
20f7b20c 182 return $obj->_throw_error("File type mismatch");
a20d9a3f 183 }
20f7b20c 184
d0b74c17 185#XXX We probably also want to store the hash algorithm name and not assume anything
186#XXX The cool thing would be to allow a different hashing algorithm at every level
187
a20d9a3f 188 return 1;
189}
190
3d1b8be9 191sub close_fh {
cd59cad8 192 my $self = shift;
a21f2d90 193 my ($obj) = @_;
cd59cad8 194
195 if ( my $fh = $obj->_root->{fh} ) {
196 close $fh;
197 }
198 $obj->_root->{fh} = undef;
199
200 return 1;
201}
202
d4b1166e 203sub create_tag {
20f7b20c 204 ##
205 # Given offset, signature and content, create tag and write to disk
206 ##
d4b1166e 207 my $self = shift;
20f7b20c 208 my ($obj, $offset, $sig, $content) = @_;
209 my $size = length($content);
210
d4b1166e 211 my $fh = $obj->_fh;
212
20f7b20c 213 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 214 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 215
216 if ($offset == $obj->_root->{end}) {
251dfd0e 217 $obj->_root->{end} += DBM::Deep->SIG_SIZE + $self->{data_size} + $size;
20f7b20c 218 }
219
220 return {
221 signature => $sig,
222 size => $size,
251dfd0e 223 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 224 content => $content
225 };
d4b1166e 226}
227
228sub load_tag {
20f7b20c 229 ##
230 # Given offset, load single tag and return signature, size and data
231 ##
d4b1166e 232 my $self = shift;
20f7b20c 233 my ($obj, $offset) = @_;
234
d4b1166e 235 my $fh = $obj->_fh;
236
20f7b20c 237 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
238 if (eof $fh) { return undef; }
239
d4b1166e 240 my $b;
251dfd0e 241 read( $fh, $b, DBM::Deep->SIG_SIZE + $self->{data_size} );
242 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 243
244 my $buffer;
245 read( $fh, $buffer, $size);
246
247 return {
248 signature => $sig,
249 size => $size,
251dfd0e 250 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 251 content => $buffer
252 };
d4b1166e 253}
254
20f7b20c 255sub add_bucket {
256 ##
257 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
258 # plain (undigested) key and value.
259 ##
d4b1166e 260 my $self = shift;
20f7b20c 261 my ($obj, $tag, $md5, $plain_key, $value) = @_;
262 my $keys = $tag->{content};
263 my $location = 0;
264 my $result = 2;
265
266 my $root = $obj->_root;
267
268 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) };
269 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
270
271 my $fh = $obj->_fh;
272
273 ##
274 # Iterate through buckets, seeing if this is a new entry or a replace.
275 ##
d0b74c17 276 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
251dfd0e 277 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
20f7b20c 278 if (!$subloc) {
279 ##
280 # Found empty bucket (end of list). Populate and exit loop.
281 ##
282 $result = 2;
283
284 $location = $internal_ref
285 ? $value->_base_offset
286 : $root->{end};
287
251dfd0e 288 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
289 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 290 last;
291 }
292
251dfd0e 293 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
20f7b20c 294 if ($md5 eq $key) {
295 ##
296 # Found existing bucket with same key. Replace with new value.
297 ##
298 $result = 1;
299
300 if ($internal_ref) {
301 $location = $value->_base_offset;
251dfd0e 302 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
303 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 304 return $result;
305 }
306
307 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET);
308 my $size;
251dfd0e 309 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
20f7b20c 310
311 ##
312 # If value is a hash, array, or raw value with equal or less size, we can
313 # reuse the same content area of the database. Otherwise, we have to create
314 # a new content area at the EOF.
315 ##
316 my $actual_length;
317 my $r = Scalar::Util::reftype( $value ) || '';
318 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
ec1bce6b 319 $actual_length = $self->{index_size};
20f7b20c 320
321 # if autobless is enabled, must also take into consideration
322 # the class name, as it is stored along with key/value.
323 if ( $root->{autobless} ) {
324 my $value_class = Scalar::Util::blessed($value);
325 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
326 $actual_length += length($value_class);
327 }
328 }
329 }
330 else { $actual_length = length($value); }
331
332 if ($actual_length <= $size) {
333 $location = $subloc;
334 }
335 else {
336 $location = $root->{end};
251dfd0e 337 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, SEEK_SET);
338 print( $fh pack($self->{long_pack}, $location) );
20f7b20c 339 }
340
341 last;
342 }
343 }
344
345 ##
346 # If this is an internal reference, return now.
347 # No need to write value or plain key
348 ##
349 if ($internal_ref) {
350 return $result;
351 }
352
353 ##
354 # If bucket didn't fit into list, split into a new index level
355 ##
356 if (!$location) {
357 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
251dfd0e 358 print( $fh pack($self->{long_pack}, $root->{end}) );
20f7b20c 359
ec1bce6b 360 my $index_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_INDEX, chr(0) x $self->{index_size});
20f7b20c 361 my @offsets = ();
362
251dfd0e 363 $keys .= $md5 . pack($self->{long_pack}, 0);
20f7b20c 364
251dfd0e 365 for (my $i=0; $i<=$self->{max_buckets}; $i++) {
366 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
20f7b20c 367 if ($key) {
251dfd0e 368 my $old_subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) +
369 $self->{hash_size}, $self->{long_size}));
20f7b20c 370 my $num = ord(substr($key, $tag->{ch} + 1, 1));
371
372 if ($offsets[$num]) {
251dfd0e 373 my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $self->{data_size};
20f7b20c 374 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
375 my $subkeys;
251dfd0e 376 read( $fh, $subkeys, $self->{bucket_list_size});
20f7b20c 377
251dfd0e 378 for (my $k=0; $k<$self->{max_buckets}; $k++) {
379 my $subloc = unpack($self->{long_pack}, substr($subkeys, ($k * $self->{bucket_size}) +
380 $self->{hash_size}, $self->{long_size}));
20f7b20c 381 if (!$subloc) {
251dfd0e 382 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
383 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
20f7b20c 384 last;
385 }
386 } # k loop
387 }
388 else {
389 $offsets[$num] = $root->{end};
251dfd0e 390 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
391 print( $fh pack($self->{long_pack}, $root->{end}) );
20f7b20c 392
251dfd0e 393 my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $self->{bucket_list_size});
20f7b20c 394
395 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
251dfd0e 396 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
20f7b20c 397 }
398 } # key is real
399 } # i loop
400
401 $location ||= $root->{end};
402 } # re-index bucket list
403
404 ##
405 # Seek to content area and store signature, value and plaintext key
406 ##
407 if ($location) {
408 my $content_length;
409 seek($fh, $location + $root->{file_offset}, SEEK_SET);
410
411 ##
412 # Write signature based on content type, set content length and write actual value.
413 ##
414 my $r = Scalar::Util::reftype($value) || '';
415 if ($r eq 'HASH') {
416 print( $fh DBM::Deep->TYPE_HASH );
251dfd0e 417 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 418 $content_length = $self->{index_size};
20f7b20c 419 }
420 elsif ($r eq 'ARRAY') {
421 print( $fh DBM::Deep->TYPE_ARRAY );
251dfd0e 422 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 423 $content_length = $self->{index_size};
20f7b20c 424 }
425 elsif (!defined($value)) {
426 print( $fh DBM::Deep->SIG_NULL );
251dfd0e 427 print( $fh pack($self->{data_pack}, 0) );
20f7b20c 428 $content_length = 0;
429 }
430 else {
431 print( $fh DBM::Deep->SIG_DATA );
251dfd0e 432 print( $fh pack($self->{data_pack}, length($value)) . $value );
20f7b20c 433 $content_length = length($value);
434 }
435
436 ##
437 # Plain key is stored AFTER value, as keys are typically fetched less often.
438 ##
251dfd0e 439 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
20f7b20c 440
441 ##
442 # If value is blessed, preserve class name
443 ##
444 if ( $root->{autobless} ) {
445 my $value_class = Scalar::Util::blessed($value);
446 if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
447 ##
448 # Blessed ref -- will restore later
449 ##
450 print( $fh chr(1) );
251dfd0e 451 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 452 $content_length += 1;
251dfd0e 453 $content_length += $self->{data_size} + length($value_class);
20f7b20c 454 }
455 else {
456 print( $fh chr(0) );
457 $content_length += 1;
458 }
459 }
460
461 ##
462 # If this is a new content area, advance EOF counter
463 ##
464 if ($location == $root->{end}) {
465 $root->{end} += DBM::Deep->SIG_SIZE;
251dfd0e 466 $root->{end} += $self->{data_size} + $content_length;
467 $root->{end} += $self->{data_size} + length($plain_key);
20f7b20c 468 }
469
470 ##
471 # If content is a hash or array, create new child DBM::Deep object and
472 # pass each key or element to it.
473 ##
474 if ($r eq 'HASH') {
475 my $branch = DBM::Deep->new(
476 type => DBM::Deep->TYPE_HASH,
477 base_offset => $location,
478 root => $root,
479 );
480 foreach my $key (keys %{$value}) {
481 $branch->STORE( $key, $value->{$key} );
482 }
483 }
484 elsif ($r eq 'ARRAY') {
485 my $branch = DBM::Deep->new(
486 type => DBM::Deep->TYPE_ARRAY,
487 base_offset => $location,
488 root => $root,
489 );
490 my $index = 0;
491 foreach my $element (@{$value}) {
492 $branch->STORE( $index, $element );
493 $index++;
494 }
495 }
496
497 return $result;
498 }
d4b1166e 499
20f7b20c 500 return $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
d4b1166e 501}
502
9020ee8c 503sub get_bucket_value {
beac1dff 504 ##
505 # Fetch single value given tag and MD5 digested key.
506 ##
507 my $self = shift;
508 my ($obj, $tag, $md5) = @_;
509 my $keys = $tag->{content};
9020ee8c 510
511 my $fh = $obj->_fh;
512
beac1dff 513 ##
514 # Iterate through buckets, looking for a key match
515 ##
9020ee8c 516 BUCKET:
beac1dff 517 for (my $i=0; $i<$self->{max_buckets}; $i++) {
518 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
519 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
9020ee8c 520
beac1dff 521 if (!$subloc) {
522 ##
523 # Hit end of list, no match
524 ##
525 return;
526 }
9020ee8c 527
528 if ( $md5 ne $key ) {
529 next BUCKET;
530 }
531
532 ##
533 # Found match -- seek to offset and read signature
534 ##
535 my $signature;
536 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
537 read( $fh, $signature, DBM::Deep->SIG_SIZE);
d0b74c17 538
9020ee8c 539 ##
540 # If value is a hash or array, return new DBM::Deep object with correct offset
541 ##
542 if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) {
543 my $obj = DBM::Deep->new(
544 type => $signature,
545 base_offset => $subloc,
546 root => $obj->_root,
547 );
d0b74c17 548
9020ee8c 549 if ($obj->_root->{autobless}) {
550 ##
551 # Skip over value and plain key to see if object needs
552 # to be re-blessed
553 ##
251dfd0e 554 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
d0b74c17 555
9020ee8c 556 my $size;
251dfd0e 557 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 558 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 559
9020ee8c 560 my $bless_bit;
561 read( $fh, $bless_bit, 1);
562 if (ord($bless_bit)) {
563 ##
564 # Yes, object needs to be re-blessed
565 ##
566 my $class_name;
251dfd0e 567 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 568 if ($size) { read( $fh, $class_name, $size); }
569 if ($class_name) { $obj = bless( $obj, $class_name ); }
570 }
571 }
d0b74c17 572
9020ee8c 573 return $obj;
574 }
d0b74c17 575
9020ee8c 576 ##
577 # Otherwise return actual value
578 ##
579 elsif ($signature eq DBM::Deep->SIG_DATA) {
580 my $size;
581 my $value = '';
251dfd0e 582 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 583 if ($size) { read( $fh, $value, $size); }
584 return $value;
585 }
d0b74c17 586
9020ee8c 587 ##
588 # Key exists, but content is null
589 ##
590 else { return; }
beac1dff 591 } # i loop
9020ee8c 592
beac1dff 593 return;
9020ee8c 594}
ab0e4957 595
596sub delete_bucket {
beac1dff 597 ##
598 # Delete single key/value pair given tag and MD5 digested key.
599 ##
600 my $self = shift;
601 my ($obj, $tag, $md5) = @_;
602 my $keys = $tag->{content};
ab0e4957 603
604 my $fh = $obj->_fh;
d0b74c17 605
beac1dff 606 ##
607 # Iterate through buckets, looking for a key match
608 ##
ab0e4957 609 BUCKET:
beac1dff 610 for (my $i=0; $i<$self->{max_buckets}; $i++) {
611 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
612 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
ab0e4957 613
beac1dff 614 if (!$subloc) {
615 ##
616 # Hit end of list, no match
617 ##
618 return;
619 }
ab0e4957 620
621 if ( $md5 ne $key ) {
622 next BUCKET;
623 }
624
625 ##
626 # Matched key -- delete bucket and return
627 ##
251dfd0e 628 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET);
629 print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) );
630 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 631
ab0e4957 632 return 1;
beac1dff 633 } # i loop
ab0e4957 634
beac1dff 635 return;
ab0e4957 636}
637
912d50b1 638sub bucket_exists {
beac1dff 639 ##
640 # Check existence of single key given tag and MD5 digested key.
641 ##
642 my $self = shift;
643 my ($obj, $tag, $md5) = @_;
644 my $keys = $tag->{content};
d0b74c17 645
beac1dff 646 ##
647 # Iterate through buckets, looking for a key match
648 ##
912d50b1 649 BUCKET:
beac1dff 650 for (my $i=0; $i<$self->{max_buckets}; $i++) {
651 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
652 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
912d50b1 653
beac1dff 654 if (!$subloc) {
655 ##
656 # Hit end of list, no match
657 ##
658 return;
659 }
912d50b1 660
661 if ( $md5 ne $key ) {
662 next BUCKET;
663 }
664
665 ##
666 # Matched key -- return true
667 ##
668 return 1;
beac1dff 669 } # i loop
912d50b1 670
beac1dff 671 return;
912d50b1 672}
673
6736c116 674sub find_bucket_list {
beac1dff 675 ##
676 # Locate offset for bucket list, given digested key
677 ##
678 my $self = shift;
d0b74c17 679 my ($obj, $md5, $args) = @_;
680 $args = {} unless $args;
681
beac1dff 682 ##
683 # Locate offset for bucket list using digest index system
684 ##
685 my $ch = 0;
686 my $tag = $self->load_tag($obj, $obj->_base_offset);
d0b74c17 687 if (!$tag) {
688 return $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
689 }
690
beac1dff 691 while ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
d0b74c17 692 my $num = ord substr($md5, $ch, 1);
693
694 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
695 $tag = $self->index_lookup( $obj, $tag, $num );
696
697 if (!$tag) {
698 if ( $args->{create} ) {
699 my $fh = $obj->_fh;
700 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
701 print( $fh pack($self->{long_pack}, $obj->_root->{end}) );
702
703 $tag = $self->create_tag(
704 $obj, $obj->_root->{end},
705 DBM::Deep->SIG_BLIST,
706 chr(0) x $self->{bucket_list_size},
707 );
708
709 $tag->{ref_loc} = $ref_loc;
710 $tag->{ch} = $ch;
711
712 last;
713 }
714 else {
715 return;
716 }
717 }
718
719 $tag->{ch} = $ch;
720 $tag->{ref_loc} = $ref_loc;
721
beac1dff 722 $ch++;
723 }
d0b74c17 724
beac1dff 725 return $tag;
6736c116 726}
727
d0b74c17 728sub index_lookup {
729 ##
730 # Given index tag, lookup single entry in index and return .
731 ##
732 my $self = shift;
733 my ($obj, $tag, $index) = @_;
734
735 my $location = unpack(
736 $self->{long_pack},
737 substr(
738 $tag->{content},
739 $index * $self->{long_size},
740 $self->{long_size},
741 ),
742 );
743
744 if (!$location) { return; }
745
746 return $self->load_tag( $obj, $location );
747}
748
6736c116 749sub traverse_index {
beac1dff 750 ##
751 # Scan index and recursively step into deeper levels, looking for next key.
752 ##
6736c116 753 my $self = shift;
754 my ($obj, $offset, $ch, $force_return_next) = @_;
755 $force_return_next = undef unless $force_return_next;
d0b74c17 756
beac1dff 757 my $tag = $self->load_tag($obj, $offset );
6736c116 758
759 my $fh = $obj->_fh;
d0b74c17 760
beac1dff 761 if ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
762 my $content = $tag->{content};
763 my $start;
764 if ($obj->{return_next}) { $start = 0; }
765 else { $start = ord(substr($obj->{prev_md5}, $ch, 1)); }
d0b74c17 766
beac1dff 767 for (my $index = $start; $index < 256; $index++) {
768 my $subloc = unpack($self->{long_pack}, substr($content, $index * $self->{long_size}, $self->{long_size}) );
769 if ($subloc) {
770 my $result = $self->traverse_index( $obj, $subloc, $ch + 1, $force_return_next );
771 if (defined($result)) { return $result; }
772 }
773 } # index loop
d0b74c17 774
beac1dff 775 $obj->{return_next} = 1;
776 } # tag is an index
d0b74c17 777
beac1dff 778 elsif ($tag->{signature} eq DBM::Deep->SIG_BLIST) {
779 my $keys = $tag->{content};
780 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 781
beac1dff 782 ##
783 # Iterate through buckets, looking for a key match
784 ##
785 for (my $i=0; $i<$self->{max_buckets}; $i++) {
786 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
787 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
d0b74c17 788
beac1dff 789 if (!$subloc) {
790 ##
791 # End of bucket list -- return to outer loop
792 ##
793 $obj->{return_next} = 1;
794 last;
795 }
796 elsif ($key eq $obj->{prev_md5}) {
797 ##
798 # Located previous key -- return next one found
799 ##
800 $obj->{return_next} = 1;
801 next;
802 }
803 elsif ($obj->{return_next}) {
804 ##
805 # Seek to bucket location and skip over signature
806 ##
807 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 808
beac1dff 809 ##
810 # Skip over value to get to plain key
811 ##
812 my $size;
813 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
814 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 815
beac1dff 816 ##
817 # Read in plain key and return as scalar
818 ##
819 my $plain_key;
820 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
821 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 822
beac1dff 823 return $plain_key;
824 }
825 } # bucket loop
d0b74c17 826
beac1dff 827 $obj->{return_next} = 1;
828 } # tag is a bucket list
d0b74c17 829
beac1dff 830 return;
6736c116 831}
832
833sub get_next_key {
beac1dff 834 ##
835 # Locate next key, given digested previous one
836 ##
6736c116 837 my $self = shift;
838 my ($obj) = @_;
d0b74c17 839
beac1dff 840 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
841 $obj->{return_next} = 0;
d0b74c17 842
beac1dff 843 ##
844 # If the previous key was not specifed, start at the top and
845 # return the first one found.
846 ##
847 if (!$obj->{prev_md5}) {
848 $obj->{prev_md5} = chr(0) x $self->{hash_size};
849 $obj->{return_next} = 1;
850 }
d0b74c17 851
beac1dff 852 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 853}
854
a20d9a3f 8551;
856__END__