All globals are now converted to variables within the engine object
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
1bf65be7 7##
8# Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file.
9# (Perl must be compiled with largefile support for files > 2 GB)
10#
11# Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
12# (Perl must be compiled with largefile and 64-bit long support)
13##
14##
15# Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value.
16# Upgrading this is possible (see above) but probably not necessary. If you need
17# more than 4 GB for a single key or value, this module is really not for you :-)
18##
251dfd0e 19
1bf65be7 20##
21# Maximum number of buckets per list before another level of indexing is done.
22# Increase this value for slightly greater speed, but larger database files.
23# DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
24##
1bf65be7 25
612969fb 26sub precalc_sizes {
1bf65be7 27 ##
28 # Precalculate index, bucket and bucket list sizes
29 ##
251dfd0e 30 my $self = shift;
1bf65be7 31
251dfd0e 32 $self->{index_size} = (2**8) * $self->{long_size};
33 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
34 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 35
251dfd0e 36 return 1;
1bf65be7 37}
38
39sub set_pack {
40 ##
41 # Set pack/unpack modes (see file header for more)
42 ##
251dfd0e 43 my $self = shift;
1bf65be7 44 my ($long_s, $long_p, $data_s, $data_p) = @_;
45
251dfd0e 46 $self->{long_size} = $long_s ? $long_s : 4;
47 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 48
251dfd0e 49 $self->{data_size} = $data_s ? $data_s : 4;
50 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 51
251dfd0e 52 return $self->precalc_sizes();
1bf65be7 53}
54
55sub set_digest {
56 ##
57 # Set key digest function (default is MD5)
58 ##
251dfd0e 59 my $self = shift;
1bf65be7 60 my ($digest_func, $hash_size) = @_;
61
612969fb 62 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 63 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 64
251dfd0e 65 return $self->precalc_sizes();
612969fb 66}
67
68sub new {
69 my $class = shift;
70 my ($args) = @_;
71
72 my $self = bless {
73 long_size => 4,
74 long_pack => 'N',
75 data_size => 4,
76 data_pack => 'N',
251dfd0e 77
612969fb 78 digest => \&Digest::MD5::md5,
79 hash_size => 16,
251dfd0e 80
612969fb 81 max_buckets => 16,
82 }, $class;
83
251dfd0e 84 $self->precalc_sizes;
612969fb 85
86 return $self;
1bf65be7 87}
88
70b55428 89sub setup_fh {
90 my $self = shift;
91 my ($obj) = @_;
92
93 $self->open( $obj ) if !defined $obj->_fh;
94
95 #XXX Is {end} busted? Can't we just seek( $fh, 0, SEEK_END ) ?
96 unless ( $obj->_root->{inode} ) {
97 my @stats = stat($obj->_fh);
98 $obj->_root->{inode} = $stats[1];
99 $obj->_root->{end} = $stats[7];
100 }
101
102 return 1;
103}
104
a20d9a3f 105sub open {
20f7b20c 106 ##
107 # Open a fh to the database, create if nonexistent.
108 # Make sure file signature matches DBM::Deep spec.
109 ##
a20d9a3f 110 my $self = shift;
70b55428 111 my ($obj) = @_;
a20d9a3f 112
20f7b20c 113 if (defined($obj->_fh)) { $self->close( $obj ); }
114
a20d9a3f 115 eval {
116 local $SIG{'__DIE__'};
117 # Theoretically, adding O_BINARY should remove the need for the binmode
118 # Of course, testing it is going to be ... interesting.
119 my $flags = O_RDWR | O_CREAT | O_BINARY;
120
121 my $fh;
cd59cad8 122 sysopen( $fh, $obj->_root->{file}, $flags )
a20d9a3f 123 or $fh = undef;
cd59cad8 124 $obj->_root->{fh} = $fh;
125 }; if ($@ ) { $obj->_throw_error( "Received error: $@\n" ); }
20f7b20c 126 if (! defined($obj->_fh)) {
127 return $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!");
128 }
a20d9a3f 129
cd59cad8 130 my $fh = $obj->_fh;
a20d9a3f 131
132 #XXX Can we remove this by using the right sysopen() flags?
133 # Maybe ... q.v. above
134 binmode $fh; # for win32
135
cd59cad8 136 if ($obj->_root->{autoflush}) {
a20d9a3f 137 my $old = select $fh;
138 $|=1;
139 select $old;
140 }
20f7b20c 141
cd59cad8 142 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 143
144 my $signature;
145 my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE));
20f7b20c 146
a20d9a3f 147 ##
148 # File is empty -- write signature and master index
149 ##
150 if (!$bytes_read) {
cd59cad8 151 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 152 print( $fh DBM::Deep->SIG_FILE);
ec1bce6b 153 $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size});
a20d9a3f 154
155 my $plain_key = "[base]";
251dfd0e 156 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
a20d9a3f 157
158 # Flush the filehandle
159 my $old_fh = select $fh;
160 my $old_af = $|; $| = 1; $| = $old_af;
161 select $old_fh;
162
a20d9a3f 163 return 1;
164 }
20f7b20c 165
a20d9a3f 166 ##
167 # Check signature was valid
168 ##
169 unless ($signature eq DBM::Deep->SIG_FILE) {
cd59cad8 170 $self->close( $obj );
171 return $obj->_throw_error("Signature not found -- file is not a Deep DB");
a20d9a3f 172 }
173
a20d9a3f 174 ##
175 # Get our type from master index signature
176 ##
d4b1166e 177 my $tag = $self->load_tag($obj, $obj->_base_offset);
a20d9a3f 178
179#XXX We probably also want to store the hash algorithm name and not assume anything
180#XXX The cool thing would be to allow a different hashing algorithm at every level
181
182 if (!$tag) {
20f7b20c 183 return $obj->_throw_error("Corrupted file, no master index record");
a20d9a3f 184 }
cd59cad8 185 if ($obj->{type} ne $tag->{signature}) {
20f7b20c 186 return $obj->_throw_error("File type mismatch");
a20d9a3f 187 }
20f7b20c 188
a20d9a3f 189 return 1;
190}
191
cd59cad8 192sub close {
193 my $self = shift;
194 my $obj = shift;
195
196 if ( my $fh = $obj->_root->{fh} ) {
197 close $fh;
198 }
199 $obj->_root->{fh} = undef;
200
201 return 1;
202}
203
d4b1166e 204sub create_tag {
20f7b20c 205 ##
206 # Given offset, signature and content, create tag and write to disk
207 ##
d4b1166e 208 my $self = shift;
20f7b20c 209 my ($obj, $offset, $sig, $content) = @_;
210 my $size = length($content);
211
d4b1166e 212 my $fh = $obj->_fh;
213
20f7b20c 214 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 215 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 216
217 if ($offset == $obj->_root->{end}) {
251dfd0e 218 $obj->_root->{end} += DBM::Deep->SIG_SIZE + $self->{data_size} + $size;
20f7b20c 219 }
220
221 return {
222 signature => $sig,
223 size => $size,
251dfd0e 224 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 225 content => $content
226 };
d4b1166e 227}
228
229sub load_tag {
20f7b20c 230 ##
231 # Given offset, load single tag and return signature, size and data
232 ##
d4b1166e 233 my $self = shift;
20f7b20c 234 my ($obj, $offset) = @_;
235
d4b1166e 236 my $fh = $obj->_fh;
237
20f7b20c 238 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
239 if (eof $fh) { return undef; }
240
d4b1166e 241 my $b;
251dfd0e 242 read( $fh, $b, DBM::Deep->SIG_SIZE + $self->{data_size} );
243 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 244
245 my $buffer;
246 read( $fh, $buffer, $size);
247
248 return {
249 signature => $sig,
250 size => $size,
251dfd0e 251 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 252 content => $buffer
253 };
d4b1166e 254}
255
256sub index_lookup {
20f7b20c 257 ##
258 # Given index tag, lookup single entry in index and return .
259 ##
260 my $self = shift;
261 my ($obj, $tag, $index) = @_;
262
251dfd0e 263 my $location = unpack($self->{long_pack}, substr($tag->{content}, $index * $self->{long_size}, $self->{long_size}) );
20f7b20c 264 if (!$location) { return; }
265
266 return $self->load_tag( $obj, $location );
267}
268
269sub add_bucket {
270 ##
271 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
272 # plain (undigested) key and value.
273 ##
d4b1166e 274 my $self = shift;
20f7b20c 275 my ($obj, $tag, $md5, $plain_key, $value) = @_;
276 my $keys = $tag->{content};
277 my $location = 0;
278 my $result = 2;
279
280 my $root = $obj->_root;
281
282 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) };
283 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
284
285 my $fh = $obj->_fh;
286
287 ##
288 # Iterate through buckets, seeing if this is a new entry or a replace.
289 ##
251dfd0e 290 for (my $i=0; $i<$self->{max_buckets}; $i++) {
291 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
20f7b20c 292 if (!$subloc) {
293 ##
294 # Found empty bucket (end of list). Populate and exit loop.
295 ##
296 $result = 2;
297
298 $location = $internal_ref
299 ? $value->_base_offset
300 : $root->{end};
301
251dfd0e 302 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
303 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 304 last;
305 }
306
251dfd0e 307 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
20f7b20c 308 if ($md5 eq $key) {
309 ##
310 # Found existing bucket with same key. Replace with new value.
311 ##
312 $result = 1;
313
314 if ($internal_ref) {
315 $location = $value->_base_offset;
251dfd0e 316 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
317 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 318 return $result;
319 }
320
321 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET);
322 my $size;
251dfd0e 323 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
20f7b20c 324
325 ##
326 # If value is a hash, array, or raw value with equal or less size, we can
327 # reuse the same content area of the database. Otherwise, we have to create
328 # a new content area at the EOF.
329 ##
330 my $actual_length;
331 my $r = Scalar::Util::reftype( $value ) || '';
332 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
ec1bce6b 333 $actual_length = $self->{index_size};
20f7b20c 334
335 # if autobless is enabled, must also take into consideration
336 # the class name, as it is stored along with key/value.
337 if ( $root->{autobless} ) {
338 my $value_class = Scalar::Util::blessed($value);
339 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
340 $actual_length += length($value_class);
341 }
342 }
343 }
344 else { $actual_length = length($value); }
345
346 if ($actual_length <= $size) {
347 $location = $subloc;
348 }
349 else {
350 $location = $root->{end};
251dfd0e 351 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, SEEK_SET);
352 print( $fh pack($self->{long_pack}, $location) );
20f7b20c 353 }
354
355 last;
356 }
357 }
358
359 ##
360 # If this is an internal reference, return now.
361 # No need to write value or plain key
362 ##
363 if ($internal_ref) {
364 return $result;
365 }
366
367 ##
368 # If bucket didn't fit into list, split into a new index level
369 ##
370 if (!$location) {
371 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
251dfd0e 372 print( $fh pack($self->{long_pack}, $root->{end}) );
20f7b20c 373
ec1bce6b 374 my $index_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_INDEX, chr(0) x $self->{index_size});
20f7b20c 375 my @offsets = ();
376
251dfd0e 377 $keys .= $md5 . pack($self->{long_pack}, 0);
20f7b20c 378
251dfd0e 379 for (my $i=0; $i<=$self->{max_buckets}; $i++) {
380 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
20f7b20c 381 if ($key) {
251dfd0e 382 my $old_subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) +
383 $self->{hash_size}, $self->{long_size}));
20f7b20c 384 my $num = ord(substr($key, $tag->{ch} + 1, 1));
385
386 if ($offsets[$num]) {
251dfd0e 387 my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $self->{data_size};
20f7b20c 388 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
389 my $subkeys;
251dfd0e 390 read( $fh, $subkeys, $self->{bucket_list_size});
20f7b20c 391
251dfd0e 392 for (my $k=0; $k<$self->{max_buckets}; $k++) {
393 my $subloc = unpack($self->{long_pack}, substr($subkeys, ($k * $self->{bucket_size}) +
394 $self->{hash_size}, $self->{long_size}));
20f7b20c 395 if (!$subloc) {
251dfd0e 396 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
397 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
20f7b20c 398 last;
399 }
400 } # k loop
401 }
402 else {
403 $offsets[$num] = $root->{end};
251dfd0e 404 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
405 print( $fh pack($self->{long_pack}, $root->{end}) );
20f7b20c 406
251dfd0e 407 my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $self->{bucket_list_size});
20f7b20c 408
409 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
251dfd0e 410 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
20f7b20c 411 }
412 } # key is real
413 } # i loop
414
415 $location ||= $root->{end};
416 } # re-index bucket list
417
418 ##
419 # Seek to content area and store signature, value and plaintext key
420 ##
421 if ($location) {
422 my $content_length;
423 seek($fh, $location + $root->{file_offset}, SEEK_SET);
424
425 ##
426 # Write signature based on content type, set content length and write actual value.
427 ##
428 my $r = Scalar::Util::reftype($value) || '';
429 if ($r eq 'HASH') {
430 print( $fh DBM::Deep->TYPE_HASH );
251dfd0e 431 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 432 $content_length = $self->{index_size};
20f7b20c 433 }
434 elsif ($r eq 'ARRAY') {
435 print( $fh DBM::Deep->TYPE_ARRAY );
251dfd0e 436 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 437 $content_length = $self->{index_size};
20f7b20c 438 }
439 elsif (!defined($value)) {
440 print( $fh DBM::Deep->SIG_NULL );
251dfd0e 441 print( $fh pack($self->{data_pack}, 0) );
20f7b20c 442 $content_length = 0;
443 }
444 else {
445 print( $fh DBM::Deep->SIG_DATA );
251dfd0e 446 print( $fh pack($self->{data_pack}, length($value)) . $value );
20f7b20c 447 $content_length = length($value);
448 }
449
450 ##
451 # Plain key is stored AFTER value, as keys are typically fetched less often.
452 ##
251dfd0e 453 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
20f7b20c 454
455 ##
456 # If value is blessed, preserve class name
457 ##
458 if ( $root->{autobless} ) {
459 my $value_class = Scalar::Util::blessed($value);
460 if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
461 ##
462 # Blessed ref -- will restore later
463 ##
464 print( $fh chr(1) );
251dfd0e 465 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 466 $content_length += 1;
251dfd0e 467 $content_length += $self->{data_size} + length($value_class);
20f7b20c 468 }
469 else {
470 print( $fh chr(0) );
471 $content_length += 1;
472 }
473 }
474
475 ##
476 # If this is a new content area, advance EOF counter
477 ##
478 if ($location == $root->{end}) {
479 $root->{end} += DBM::Deep->SIG_SIZE;
251dfd0e 480 $root->{end} += $self->{data_size} + $content_length;
481 $root->{end} += $self->{data_size} + length($plain_key);
20f7b20c 482 }
483
484 ##
485 # If content is a hash or array, create new child DBM::Deep object and
486 # pass each key or element to it.
487 ##
488 if ($r eq 'HASH') {
489 my $branch = DBM::Deep->new(
490 type => DBM::Deep->TYPE_HASH,
491 base_offset => $location,
492 root => $root,
493 );
494 foreach my $key (keys %{$value}) {
495 $branch->STORE( $key, $value->{$key} );
496 }
497 }
498 elsif ($r eq 'ARRAY') {
499 my $branch = DBM::Deep->new(
500 type => DBM::Deep->TYPE_ARRAY,
501 base_offset => $location,
502 root => $root,
503 );
504 my $index = 0;
505 foreach my $element (@{$value}) {
506 $branch->STORE( $index, $element );
507 $index++;
508 }
509 }
510
511 return $result;
512 }
d4b1166e 513
20f7b20c 514 return $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
d4b1166e 515}
516
9020ee8c 517sub get_bucket_value {
518 ##
519 # Fetch single value given tag and MD5 digested key.
520 ##
521 my $self = shift;
522 my ($obj, $tag, $md5) = @_;
523 my $keys = $tag->{content};
524
525 my $fh = $obj->_fh;
526
527 ##
528 # Iterate through buckets, looking for a key match
529 ##
530 BUCKET:
251dfd0e 531 for (my $i=0; $i<$self->{max_buckets}; $i++) {
532 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
533 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
9020ee8c 534
535 if (!$subloc) {
536 ##
537 # Hit end of list, no match
538 ##
539 return;
540 }
541
542 if ( $md5 ne $key ) {
543 next BUCKET;
544 }
545
546 ##
547 # Found match -- seek to offset and read signature
548 ##
549 my $signature;
550 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
551 read( $fh, $signature, DBM::Deep->SIG_SIZE);
552
553 ##
554 # If value is a hash or array, return new DBM::Deep object with correct offset
555 ##
556 if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) {
557 my $obj = DBM::Deep->new(
558 type => $signature,
559 base_offset => $subloc,
560 root => $obj->_root,
561 );
562
563 if ($obj->_root->{autobless}) {
564 ##
565 # Skip over value and plain key to see if object needs
566 # to be re-blessed
567 ##
251dfd0e 568 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
9020ee8c 569
570 my $size;
251dfd0e 571 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 572 if ($size) { seek($fh, $size, SEEK_CUR); }
573
574 my $bless_bit;
575 read( $fh, $bless_bit, 1);
576 if (ord($bless_bit)) {
577 ##
578 # Yes, object needs to be re-blessed
579 ##
580 my $class_name;
251dfd0e 581 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 582 if ($size) { read( $fh, $class_name, $size); }
583 if ($class_name) { $obj = bless( $obj, $class_name ); }
584 }
585 }
586
587 return $obj;
588 }
589
590 ##
591 # Otherwise return actual value
592 ##
593 elsif ($signature eq DBM::Deep->SIG_DATA) {
594 my $size;
595 my $value = '';
251dfd0e 596 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 597 if ($size) { read( $fh, $value, $size); }
598 return $value;
599 }
600
601 ##
602 # Key exists, but content is null
603 ##
604 else { return; }
605 } # i loop
606
607 return;
608}
ab0e4957 609
610sub delete_bucket {
611 ##
612 # Delete single key/value pair given tag and MD5 digested key.
613 ##
614 my $self = shift;
615 my ($obj, $tag, $md5) = @_;
616 my $keys = $tag->{content};
617
618 my $fh = $obj->_fh;
619
620 ##
621 # Iterate through buckets, looking for a key match
622 ##
623 BUCKET:
251dfd0e 624 for (my $i=0; $i<$self->{max_buckets}; $i++) {
625 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
626 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
ab0e4957 627
628 if (!$subloc) {
629 ##
630 # Hit end of list, no match
631 ##
632 return;
633 }
634
635 if ( $md5 ne $key ) {
636 next BUCKET;
637 }
638
639 ##
640 # Matched key -- delete bucket and return
641 ##
251dfd0e 642 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET);
643 print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) );
644 print( $fh chr(0) x $self->{bucket_size} );
ab0e4957 645
646 return 1;
647 } # i loop
648
649 return;
650}
651
912d50b1 652sub bucket_exists {
653 ##
654 # Check existence of single key given tag and MD5 digested key.
655 ##
656 my $self = shift;
657 my ($obj, $tag, $md5) = @_;
658 my $keys = $tag->{content};
659
660 ##
661 # Iterate through buckets, looking for a key match
662 ##
663 BUCKET:
251dfd0e 664 for (my $i=0; $i<$self->{max_buckets}; $i++) {
665 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
666 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
912d50b1 667
668 if (!$subloc) {
669 ##
670 # Hit end of list, no match
671 ##
672 return;
673 }
674
675 if ( $md5 ne $key ) {
676 next BUCKET;
677 }
678
679 ##
680 # Matched key -- return true
681 ##
682 return 1;
683 } # i loop
684
685 return;
686}
687
6736c116 688sub find_bucket_list {
689 ##
690 # Locate offset for bucket list, given digested key
691 ##
692 my $self = shift;
693 my ($obj, $md5) = @_;
694
695 ##
696 # Locate offset for bucket list using digest index system
697 ##
698 my $ch = 0;
699 my $tag = $self->load_tag($obj, $obj->_base_offset);
700 if (!$tag) { return; }
701
702 while ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
703 $tag = $self->index_lookup($obj, $tag, ord(substr($md5, $ch, 1)));
704 if (!$tag) { return; }
705 $ch++;
706 }
707
708 return $tag;
709}
710
711sub traverse_index {
712 ##
713 # Scan index and recursively step into deeper levels, looking for next key.
714 ##
715 my $self = shift;
716 my ($obj, $offset, $ch, $force_return_next) = @_;
717 $force_return_next = undef unless $force_return_next;
718
719 my $tag = $self->load_tag($obj, $offset );
720
721 my $fh = $obj->_fh;
722
723 if ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
724 my $content = $tag->{content};
725 my $start;
726 if ($obj->{return_next}) { $start = 0; }
727 else { $start = ord(substr($obj->{prev_md5}, $ch, 1)); }
728
729 for (my $index = $start; $index < 256; $index++) {
251dfd0e 730 my $subloc = unpack($self->{long_pack}, substr($content, $index * $self->{long_size}, $self->{long_size}) );
6736c116 731 if ($subloc) {
732 my $result = $self->traverse_index( $obj, $subloc, $ch + 1, $force_return_next );
733 if (defined($result)) { return $result; }
734 }
735 } # index loop
736
737 $obj->{return_next} = 1;
738 } # tag is an index
739
740 elsif ($tag->{signature} eq DBM::Deep->SIG_BLIST) {
741 my $keys = $tag->{content};
742 if ($force_return_next) { $obj->{return_next} = 1; }
743
744 ##
745 # Iterate through buckets, looking for a key match
746 ##
251dfd0e 747 for (my $i=0; $i<$self->{max_buckets}; $i++) {
748 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
749 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
6736c116 750
751 if (!$subloc) {
752 ##
753 # End of bucket list -- return to outer loop
754 ##
755 $obj->{return_next} = 1;
756 last;
757 }
758 elsif ($key eq $obj->{prev_md5}) {
759 ##
760 # Located previous key -- return next one found
761 ##
762 $obj->{return_next} = 1;
763 next;
764 }
765 elsif ($obj->{return_next}) {
766 ##
767 # Seek to bucket location and skip over signature
768 ##
769 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET);
770
771 ##
772 # Skip over value to get to plain key
773 ##
774 my $size;
251dfd0e 775 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
6736c116 776 if ($size) { seek($fh, $size, SEEK_CUR); }
777
778 ##
779 # Read in plain key and return as scalar
780 ##
781 my $plain_key;
251dfd0e 782 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
6736c116 783 if ($size) { read( $fh, $plain_key, $size); }
784
785 return $plain_key;
786 }
787 } # bucket loop
788
789 $obj->{return_next} = 1;
790 } # tag is a bucket list
791
792 return;
793}
794
795sub get_next_key {
796 ##
797 # Locate next key, given digested previous one
798 ##
799 my $self = shift;
800 my ($obj) = @_;
801
802 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
803 $obj->{return_next} = 0;
804
805 ##
806 # If the previous key was not specifed, start at the top and
807 # return the first one found.
808 ##
809 if (!$obj->{prev_md5}) {
251dfd0e 810 $obj->{prev_md5} = chr(0) x $self->{hash_size};
6736c116 811 $obj->{return_next} = 1;
812 }
813
814 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
815}
816
a20d9a3f 8171;
818__END__