Added setup_fh that handles inodes separate from open()
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
1bf65be7 7##
8# Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file.
9# (Perl must be compiled with largefile support for files > 2 GB)
10#
11# Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
12# (Perl must be compiled with largefile and 64-bit long support)
13##
14##
15# Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value.
16# Upgrading this is possible (see above) but probably not necessary. If you need
17# more than 4 GB for a single key or value, this module is really not for you :-)
18##
19our ($LONG_SIZE, $LONG_PACK, $DATA_LENGTH_SIZE, $DATA_LENGTH_PACK);
20##
21# Maximum number of buckets per list before another level of indexing is done.
22# Increase this value for slightly greater speed, but larger database files.
23# DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
24##
25our $MAX_BUCKETS = 16;
26our ($DIGEST_FUNC, $HASH_SIZE);
27our ($INDEX_SIZE, $BUCKET_SIZE, $BUCKET_LIST_SIZE);
28set_digest();
29
30sub _precalc_sizes {
31 ##
32 # Precalculate index, bucket and bucket list sizes
33 ##
34
35 #XXX I don't like this ...
36 set_pack() unless defined $LONG_SIZE;
37
38 $INDEX_SIZE = 256 * $LONG_SIZE;
39 $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE;
40 $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE;
41}
42
43sub set_pack {
44 ##
45 # Set pack/unpack modes (see file header for more)
46 ##
47 my ($long_s, $long_p, $data_s, $data_p) = @_;
48
49 $LONG_SIZE = $long_s ? $long_s : 4;
50 $LONG_PACK = $long_p ? $long_p : 'N';
51
52 $DATA_LENGTH_SIZE = $data_s ? $data_s : 4;
53 $DATA_LENGTH_PACK = $data_p ? $data_p : 'N';
54
55 _precalc_sizes();
56}
57
58sub set_digest {
59 ##
60 # Set key digest function (default is MD5)
61 ##
62 my ($digest_func, $hash_size) = @_;
63
64 $DIGEST_FUNC = $digest_func ? $digest_func : \&Digest::MD5::md5;
65 $HASH_SIZE = $hash_size ? $hash_size : 16;
66
67 _precalc_sizes();
68}
69
70b55428 70sub setup_fh {
71 my $self = shift;
72 my ($obj) = @_;
73
74 $self->open( $obj ) if !defined $obj->_fh;
75
76 #XXX Is {end} busted? Can't we just seek( $fh, 0, SEEK_END ) ?
77 unless ( $obj->_root->{inode} ) {
78 my @stats = stat($obj->_fh);
79 $obj->_root->{inode} = $stats[1];
80 $obj->_root->{end} = $stats[7];
81 }
82
83 return 1;
84}
85
a20d9a3f 86sub open {
20f7b20c 87 ##
88 # Open a fh to the database, create if nonexistent.
89 # Make sure file signature matches DBM::Deep spec.
90 ##
a20d9a3f 91 my $self = shift;
70b55428 92 my ($obj) = @_;
a20d9a3f 93
20f7b20c 94 if (defined($obj->_fh)) { $self->close( $obj ); }
95
a20d9a3f 96 eval {
97 local $SIG{'__DIE__'};
98 # Theoretically, adding O_BINARY should remove the need for the binmode
99 # Of course, testing it is going to be ... interesting.
100 my $flags = O_RDWR | O_CREAT | O_BINARY;
101
102 my $fh;
cd59cad8 103 sysopen( $fh, $obj->_root->{file}, $flags )
a20d9a3f 104 or $fh = undef;
cd59cad8 105 $obj->_root->{fh} = $fh;
106 }; if ($@ ) { $obj->_throw_error( "Received error: $@\n" ); }
20f7b20c 107 if (! defined($obj->_fh)) {
108 return $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!");
109 }
a20d9a3f 110
cd59cad8 111 my $fh = $obj->_fh;
a20d9a3f 112
113 #XXX Can we remove this by using the right sysopen() flags?
114 # Maybe ... q.v. above
115 binmode $fh; # for win32
116
cd59cad8 117 if ($obj->_root->{autoflush}) {
a20d9a3f 118 my $old = select $fh;
119 $|=1;
120 select $old;
121 }
20f7b20c 122
cd59cad8 123 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 124
125 my $signature;
126 my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE));
20f7b20c 127
a20d9a3f 128 ##
129 # File is empty -- write signature and master index
130 ##
131 if (!$bytes_read) {
cd59cad8 132 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 133 print( $fh DBM::Deep->SIG_FILE);
1bf65be7 134 $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $INDEX_SIZE);
a20d9a3f 135
136 my $plain_key = "[base]";
1bf65be7 137 print( $fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key );
a20d9a3f 138
139 # Flush the filehandle
140 my $old_fh = select $fh;
141 my $old_af = $|; $| = 1; $| = $old_af;
142 select $old_fh;
143
a20d9a3f 144 return 1;
145 }
20f7b20c 146
a20d9a3f 147 ##
148 # Check signature was valid
149 ##
150 unless ($signature eq DBM::Deep->SIG_FILE) {
cd59cad8 151 $self->close( $obj );
152 return $obj->_throw_error("Signature not found -- file is not a Deep DB");
a20d9a3f 153 }
154
a20d9a3f 155 ##
156 # Get our type from master index signature
157 ##
d4b1166e 158 my $tag = $self->load_tag($obj, $obj->_base_offset);
a20d9a3f 159
160#XXX We probably also want to store the hash algorithm name and not assume anything
161#XXX The cool thing would be to allow a different hashing algorithm at every level
162
163 if (!$tag) {
20f7b20c 164 return $obj->_throw_error("Corrupted file, no master index record");
a20d9a3f 165 }
cd59cad8 166 if ($obj->{type} ne $tag->{signature}) {
20f7b20c 167 return $obj->_throw_error("File type mismatch");
a20d9a3f 168 }
20f7b20c 169
a20d9a3f 170 return 1;
171}
172
cd59cad8 173sub close {
174 my $self = shift;
175 my $obj = shift;
176
177 if ( my $fh = $obj->_root->{fh} ) {
178 close $fh;
179 }
180 $obj->_root->{fh} = undef;
181
182 return 1;
183}
184
d4b1166e 185sub create_tag {
20f7b20c 186 ##
187 # Given offset, signature and content, create tag and write to disk
188 ##
d4b1166e 189 my $self = shift;
20f7b20c 190 my ($obj, $offset, $sig, $content) = @_;
191 my $size = length($content);
192
d4b1166e 193 my $fh = $obj->_fh;
194
20f7b20c 195 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
1bf65be7 196 print( $fh $sig . pack($DATA_LENGTH_PACK, $size) . $content );
20f7b20c 197
198 if ($offset == $obj->_root->{end}) {
1bf65be7 199 $obj->_root->{end} += DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE + $size;
20f7b20c 200 }
201
202 return {
203 signature => $sig,
204 size => $size,
1bf65be7 205 offset => $offset + DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE,
20f7b20c 206 content => $content
207 };
d4b1166e 208}
209
210sub load_tag {
20f7b20c 211 ##
212 # Given offset, load single tag and return signature, size and data
213 ##
d4b1166e 214 my $self = shift;
20f7b20c 215 my ($obj, $offset) = @_;
216
d4b1166e 217 my $fh = $obj->_fh;
218
20f7b20c 219 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
220 if (eof $fh) { return undef; }
221
d4b1166e 222 my $b;
1bf65be7 223 read( $fh, $b, DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE );
224 my ($sig, $size) = unpack( "A $DATA_LENGTH_PACK", $b );
20f7b20c 225
226 my $buffer;
227 read( $fh, $buffer, $size);
228
229 return {
230 signature => $sig,
231 size => $size,
1bf65be7 232 offset => $offset + DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE,
20f7b20c 233 content => $buffer
234 };
d4b1166e 235}
236
237sub index_lookup {
20f7b20c 238 ##
239 # Given index tag, lookup single entry in index and return .
240 ##
241 my $self = shift;
242 my ($obj, $tag, $index) = @_;
243
1bf65be7 244 my $location = unpack($LONG_PACK, substr($tag->{content}, $index * $LONG_SIZE, $LONG_SIZE) );
20f7b20c 245 if (!$location) { return; }
246
247 return $self->load_tag( $obj, $location );
248}
249
250sub add_bucket {
251 ##
252 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
253 # plain (undigested) key and value.
254 ##
d4b1166e 255 my $self = shift;
20f7b20c 256 my ($obj, $tag, $md5, $plain_key, $value) = @_;
257 my $keys = $tag->{content};
258 my $location = 0;
259 my $result = 2;
260
261 my $root = $obj->_root;
262
263 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) };
264 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
265
266 my $fh = $obj->_fh;
267
268 ##
269 # Iterate through buckets, seeing if this is a new entry or a replace.
270 ##
1bf65be7 271 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
272 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
20f7b20c 273 if (!$subloc) {
274 ##
275 # Found empty bucket (end of list). Populate and exit loop.
276 ##
277 $result = 2;
278
279 $location = $internal_ref
280 ? $value->_base_offset
281 : $root->{end};
282
1bf65be7 283 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
284 print( $fh $md5 . pack($LONG_PACK, $location) );
20f7b20c 285 last;
286 }
287
1bf65be7 288 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
20f7b20c 289 if ($md5 eq $key) {
290 ##
291 # Found existing bucket with same key. Replace with new value.
292 ##
293 $result = 1;
294
295 if ($internal_ref) {
296 $location = $value->_base_offset;
1bf65be7 297 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
298 print( $fh $md5 . pack($LONG_PACK, $location) );
20f7b20c 299 return $result;
300 }
301
302 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET);
303 my $size;
1bf65be7 304 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
20f7b20c 305
306 ##
307 # If value is a hash, array, or raw value with equal or less size, we can
308 # reuse the same content area of the database. Otherwise, we have to create
309 # a new content area at the EOF.
310 ##
311 my $actual_length;
312 my $r = Scalar::Util::reftype( $value ) || '';
313 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1bf65be7 314 $actual_length = $INDEX_SIZE;
20f7b20c 315
316 # if autobless is enabled, must also take into consideration
317 # the class name, as it is stored along with key/value.
318 if ( $root->{autobless} ) {
319 my $value_class = Scalar::Util::blessed($value);
320 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
321 $actual_length += length($value_class);
322 }
323 }
324 }
325 else { $actual_length = length($value); }
326
327 if ($actual_length <= $size) {
328 $location = $subloc;
329 }
330 else {
331 $location = $root->{end};
1bf65be7 332 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $HASH_SIZE + $root->{file_offset}, SEEK_SET);
333 print( $fh pack($LONG_PACK, $location) );
20f7b20c 334 }
335
336 last;
337 }
338 }
339
340 ##
341 # If this is an internal reference, return now.
342 # No need to write value or plain key
343 ##
344 if ($internal_ref) {
345 return $result;
346 }
347
348 ##
349 # If bucket didn't fit into list, split into a new index level
350 ##
351 if (!$location) {
352 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
1bf65be7 353 print( $fh pack($LONG_PACK, $root->{end}) );
20f7b20c 354
1bf65be7 355 my $index_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_INDEX, chr(0) x $INDEX_SIZE);
20f7b20c 356 my @offsets = ();
357
1bf65be7 358 $keys .= $md5 . pack($LONG_PACK, 0);
20f7b20c 359
1bf65be7 360 for (my $i=0; $i<=$MAX_BUCKETS; $i++) {
361 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
20f7b20c 362 if ($key) {
1bf65be7 363 my $old_subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) +
364 $HASH_SIZE, $LONG_SIZE));
20f7b20c 365 my $num = ord(substr($key, $tag->{ch} + 1, 1));
366
367 if ($offsets[$num]) {
1bf65be7 368 my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE;
20f7b20c 369 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
370 my $subkeys;
1bf65be7 371 read( $fh, $subkeys, $BUCKET_LIST_SIZE);
20f7b20c 372
1bf65be7 373 for (my $k=0; $k<$MAX_BUCKETS; $k++) {
374 my $subloc = unpack($LONG_PACK, substr($subkeys, ($k * $BUCKET_SIZE) +
375 $HASH_SIZE, $LONG_SIZE));
20f7b20c 376 if (!$subloc) {
1bf65be7 377 seek($fh, $offset + ($k * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
378 print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) );
20f7b20c 379 last;
380 }
381 } # k loop
382 }
383 else {
384 $offsets[$num] = $root->{end};
1bf65be7 385 seek($fh, $index_tag->{offset} + ($num * $LONG_SIZE) + $root->{file_offset}, SEEK_SET);
386 print( $fh pack($LONG_PACK, $root->{end}) );
20f7b20c 387
1bf65be7 388 my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE);
20f7b20c 389
390 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
1bf65be7 391 print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) );
20f7b20c 392 }
393 } # key is real
394 } # i loop
395
396 $location ||= $root->{end};
397 } # re-index bucket list
398
399 ##
400 # Seek to content area and store signature, value and plaintext key
401 ##
402 if ($location) {
403 my $content_length;
404 seek($fh, $location + $root->{file_offset}, SEEK_SET);
405
406 ##
407 # Write signature based on content type, set content length and write actual value.
408 ##
409 my $r = Scalar::Util::reftype($value) || '';
410 if ($r eq 'HASH') {
411 print( $fh DBM::Deep->TYPE_HASH );
1bf65be7 412 print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE );
413 $content_length = $INDEX_SIZE;
20f7b20c 414 }
415 elsif ($r eq 'ARRAY') {
416 print( $fh DBM::Deep->TYPE_ARRAY );
1bf65be7 417 print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE );
418 $content_length = $INDEX_SIZE;
20f7b20c 419 }
420 elsif (!defined($value)) {
421 print( $fh DBM::Deep->SIG_NULL );
1bf65be7 422 print( $fh pack($DATA_LENGTH_PACK, 0) );
20f7b20c 423 $content_length = 0;
424 }
425 else {
426 print( $fh DBM::Deep->SIG_DATA );
1bf65be7 427 print( $fh pack($DATA_LENGTH_PACK, length($value)) . $value );
20f7b20c 428 $content_length = length($value);
429 }
430
431 ##
432 # Plain key is stored AFTER value, as keys are typically fetched less often.
433 ##
1bf65be7 434 print( $fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key );
20f7b20c 435
436 ##
437 # If value is blessed, preserve class name
438 ##
439 if ( $root->{autobless} ) {
440 my $value_class = Scalar::Util::blessed($value);
441 if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
442 ##
443 # Blessed ref -- will restore later
444 ##
445 print( $fh chr(1) );
1bf65be7 446 print( $fh pack($DATA_LENGTH_PACK, length($value_class)) . $value_class );
20f7b20c 447 $content_length += 1;
1bf65be7 448 $content_length += $DATA_LENGTH_SIZE + length($value_class);
20f7b20c 449 }
450 else {
451 print( $fh chr(0) );
452 $content_length += 1;
453 }
454 }
455
456 ##
457 # If this is a new content area, advance EOF counter
458 ##
459 if ($location == $root->{end}) {
460 $root->{end} += DBM::Deep->SIG_SIZE;
1bf65be7 461 $root->{end} += $DATA_LENGTH_SIZE + $content_length;
462 $root->{end} += $DATA_LENGTH_SIZE + length($plain_key);
20f7b20c 463 }
464
465 ##
466 # If content is a hash or array, create new child DBM::Deep object and
467 # pass each key or element to it.
468 ##
469 if ($r eq 'HASH') {
470 my $branch = DBM::Deep->new(
471 type => DBM::Deep->TYPE_HASH,
472 base_offset => $location,
473 root => $root,
474 );
475 foreach my $key (keys %{$value}) {
476 $branch->STORE( $key, $value->{$key} );
477 }
478 }
479 elsif ($r eq 'ARRAY') {
480 my $branch = DBM::Deep->new(
481 type => DBM::Deep->TYPE_ARRAY,
482 base_offset => $location,
483 root => $root,
484 );
485 my $index = 0;
486 foreach my $element (@{$value}) {
487 $branch->STORE( $index, $element );
488 $index++;
489 }
490 }
491
492 return $result;
493 }
d4b1166e 494
20f7b20c 495 return $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
d4b1166e 496}
497
9020ee8c 498sub get_bucket_value {
499 ##
500 # Fetch single value given tag and MD5 digested key.
501 ##
502 my $self = shift;
503 my ($obj, $tag, $md5) = @_;
504 my $keys = $tag->{content};
505
506 my $fh = $obj->_fh;
507
508 ##
509 # Iterate through buckets, looking for a key match
510 ##
511 BUCKET:
1bf65be7 512 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
513 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
514 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
9020ee8c 515
516 if (!$subloc) {
517 ##
518 # Hit end of list, no match
519 ##
520 return;
521 }
522
523 if ( $md5 ne $key ) {
524 next BUCKET;
525 }
526
527 ##
528 # Found match -- seek to offset and read signature
529 ##
530 my $signature;
531 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
532 read( $fh, $signature, DBM::Deep->SIG_SIZE);
533
534 ##
535 # If value is a hash or array, return new DBM::Deep object with correct offset
536 ##
537 if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) {
538 my $obj = DBM::Deep->new(
539 type => $signature,
540 base_offset => $subloc,
541 root => $obj->_root,
542 );
543
544 if ($obj->_root->{autobless}) {
545 ##
546 # Skip over value and plain key to see if object needs
547 # to be re-blessed
548 ##
1bf65be7 549 seek($fh, $DATA_LENGTH_SIZE + $INDEX_SIZE, SEEK_CUR);
9020ee8c 550
551 my $size;
1bf65be7 552 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
9020ee8c 553 if ($size) { seek($fh, $size, SEEK_CUR); }
554
555 my $bless_bit;
556 read( $fh, $bless_bit, 1);
557 if (ord($bless_bit)) {
558 ##
559 # Yes, object needs to be re-blessed
560 ##
561 my $class_name;
1bf65be7 562 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
9020ee8c 563 if ($size) { read( $fh, $class_name, $size); }
564 if ($class_name) { $obj = bless( $obj, $class_name ); }
565 }
566 }
567
568 return $obj;
569 }
570
571 ##
572 # Otherwise return actual value
573 ##
574 elsif ($signature eq DBM::Deep->SIG_DATA) {
575 my $size;
576 my $value = '';
1bf65be7 577 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
9020ee8c 578 if ($size) { read( $fh, $value, $size); }
579 return $value;
580 }
581
582 ##
583 # Key exists, but content is null
584 ##
585 else { return; }
586 } # i loop
587
588 return;
589}
ab0e4957 590
591sub delete_bucket {
592 ##
593 # Delete single key/value pair given tag and MD5 digested key.
594 ##
595 my $self = shift;
596 my ($obj, $tag, $md5) = @_;
597 my $keys = $tag->{content};
598
599 my $fh = $obj->_fh;
600
601 ##
602 # Iterate through buckets, looking for a key match
603 ##
604 BUCKET:
1bf65be7 605 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
606 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
607 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
ab0e4957 608
609 if (!$subloc) {
610 ##
611 # Hit end of list, no match
612 ##
613 return;
614 }
615
616 if ( $md5 ne $key ) {
617 next BUCKET;
618 }
619
620 ##
621 # Matched key -- delete bucket and return
622 ##
1bf65be7 623 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $obj->_root->{file_offset}, SEEK_SET);
624 print( $fh substr($keys, ($i+1) * $BUCKET_SIZE ) );
625 print( $fh chr(0) x $BUCKET_SIZE );
ab0e4957 626
627 return 1;
628 } # i loop
629
630 return;
631}
632
912d50b1 633sub bucket_exists {
634 ##
635 # Check existence of single key given tag and MD5 digested key.
636 ##
637 my $self = shift;
638 my ($obj, $tag, $md5) = @_;
639 my $keys = $tag->{content};
640
641 ##
642 # Iterate through buckets, looking for a key match
643 ##
644 BUCKET:
1bf65be7 645 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
646 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
647 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
912d50b1 648
649 if (!$subloc) {
650 ##
651 # Hit end of list, no match
652 ##
653 return;
654 }
655
656 if ( $md5 ne $key ) {
657 next BUCKET;
658 }
659
660 ##
661 # Matched key -- return true
662 ##
663 return 1;
664 } # i loop
665
666 return;
667}
668
6736c116 669sub find_bucket_list {
670 ##
671 # Locate offset for bucket list, given digested key
672 ##
673 my $self = shift;
674 my ($obj, $md5) = @_;
675
676 ##
677 # Locate offset for bucket list using digest index system
678 ##
679 my $ch = 0;
680 my $tag = $self->load_tag($obj, $obj->_base_offset);
681 if (!$tag) { return; }
682
683 while ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
684 $tag = $self->index_lookup($obj, $tag, ord(substr($md5, $ch, 1)));
685 if (!$tag) { return; }
686 $ch++;
687 }
688
689 return $tag;
690}
691
692sub traverse_index {
693 ##
694 # Scan index and recursively step into deeper levels, looking for next key.
695 ##
696 my $self = shift;
697 my ($obj, $offset, $ch, $force_return_next) = @_;
698 $force_return_next = undef unless $force_return_next;
699
700 my $tag = $self->load_tag($obj, $offset );
701
702 my $fh = $obj->_fh;
703
704 if ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
705 my $content = $tag->{content};
706 my $start;
707 if ($obj->{return_next}) { $start = 0; }
708 else { $start = ord(substr($obj->{prev_md5}, $ch, 1)); }
709
710 for (my $index = $start; $index < 256; $index++) {
1bf65be7 711 my $subloc = unpack($LONG_PACK, substr($content, $index * $LONG_SIZE, $LONG_SIZE) );
6736c116 712 if ($subloc) {
713 my $result = $self->traverse_index( $obj, $subloc, $ch + 1, $force_return_next );
714 if (defined($result)) { return $result; }
715 }
716 } # index loop
717
718 $obj->{return_next} = 1;
719 } # tag is an index
720
721 elsif ($tag->{signature} eq DBM::Deep->SIG_BLIST) {
722 my $keys = $tag->{content};
723 if ($force_return_next) { $obj->{return_next} = 1; }
724
725 ##
726 # Iterate through buckets, looking for a key match
727 ##
1bf65be7 728 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
729 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
730 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
6736c116 731
732 if (!$subloc) {
733 ##
734 # End of bucket list -- return to outer loop
735 ##
736 $obj->{return_next} = 1;
737 last;
738 }
739 elsif ($key eq $obj->{prev_md5}) {
740 ##
741 # Located previous key -- return next one found
742 ##
743 $obj->{return_next} = 1;
744 next;
745 }
746 elsif ($obj->{return_next}) {
747 ##
748 # Seek to bucket location and skip over signature
749 ##
750 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET);
751
752 ##
753 # Skip over value to get to plain key
754 ##
755 my $size;
1bf65be7 756 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
6736c116 757 if ($size) { seek($fh, $size, SEEK_CUR); }
758
759 ##
760 # Read in plain key and return as scalar
761 ##
762 my $plain_key;
1bf65be7 763 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
6736c116 764 if ($size) { read( $fh, $plain_key, $size); }
765
766 return $plain_key;
767 }
768 } # bucket loop
769
770 $obj->{return_next} = 1;
771 } # tag is a bucket list
772
773 return;
774}
775
776sub get_next_key {
777 ##
778 # Locate next key, given digested previous one
779 ##
780 my $self = shift;
781 my ($obj) = @_;
782
783 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
784 $obj->{return_next} = 0;
785
786 ##
787 # If the previous key was not specifed, start at the top and
788 # return the first one found.
789 ##
790 if (!$obj->{prev_md5}) {
1bf65be7 791 $obj->{prev_md5} = chr(0) x $HASH_SIZE;
6736c116 792 $obj->{return_next} = 1;
793 }
794
795 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
796}
797
a20d9a3f 7981;
799__END__