Removed warning, but left test directory
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
e5fc7e69 2use XXX;
a20d9a3f 3
4use strict;
5
6use Fcntl qw( :DEFAULT :flock :seek );
7
612969fb 8sub precalc_sizes {
beac1dff 9 ##
10 # Precalculate index, bucket and bucket list sizes
11 ##
251dfd0e 12 my $self = shift;
1bf65be7 13
251dfd0e 14 $self->{index_size} = (2**8) * $self->{long_size};
15 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
16 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 17
251dfd0e 18 return 1;
1bf65be7 19}
20
21sub set_pack {
beac1dff 22 ##
23 # Set pack/unpack modes (see file header for more)
24 ##
251dfd0e 25 my $self = shift;
1bf65be7 26 my ($long_s, $long_p, $data_s, $data_p) = @_;
27
81d16922 28 ##
29 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file.
beac1dff 30 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 31 #
32 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 33 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 34 ##
251dfd0e 35 $self->{long_size} = $long_s ? $long_s : 4;
36 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 37
81d16922 38 ##
39 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value.
40 # Upgrading this is possible (see above) but probably not necessary. If you need
41 # more than 4 GB for a single key or value, this module is really not for you :-)
42 ##
251dfd0e 43 $self->{data_size} = $data_s ? $data_s : 4;
44 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 45
beac1dff 46 return $self->precalc_sizes();
1bf65be7 47}
48
49sub set_digest {
beac1dff 50 ##
51 # Set key digest function (default is MD5)
52 ##
251dfd0e 53 my $self = shift;
1bf65be7 54 my ($digest_func, $hash_size) = @_;
55
d0b74c17 56 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 57 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 58
beac1dff 59 return $self->precalc_sizes();
612969fb 60}
61
62sub new {
63 my $class = shift;
64 my ($args) = @_;
65
66 my $self = bless {
67 long_size => 4,
68 long_pack => 'N',
69 data_size => 4,
70 data_pack => 'N',
251dfd0e 71
612969fb 72 digest => \&Digest::MD5::md5,
73 hash_size => 16,
251dfd0e 74
81d16922 75 ##
76 # Maximum number of buckets per list before another level of indexing is done.
77 # Increase this value for slightly greater speed, but larger database files.
78 # DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
79 ##
612969fb 80 max_buckets => 16,
81 }, $class;
82
251dfd0e 83 $self->precalc_sizes;
612969fb 84
85 return $self;
1bf65be7 86}
87
70b55428 88sub setup_fh {
89 my $self = shift;
90 my ($obj) = @_;
91
92 $self->open( $obj ) if !defined $obj->_fh;
93
673464d9 94 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 95 unless ( $obj->_root->{inode} ) {
96 my @stats = stat($obj->_fh);
97 $obj->_root->{inode} = $stats[1];
98 $obj->_root->{end} = $stats[7];
99 }
100
101 return 1;
102}
103
a20d9a3f 104sub open {
20f7b20c 105 ##
106 # Open a fh to the database, create if nonexistent.
107 # Make sure file signature matches DBM::Deep spec.
108 ##
a20d9a3f 109 my $self = shift;
70b55428 110 my ($obj) = @_;
a20d9a3f 111
3d1b8be9 112 if (defined($obj->_fh)) { $self->close_fh( $obj ); }
20f7b20c 113
673464d9 114 # Theoretically, adding O_BINARY should remove the need for the binmode
115 # Of course, testing it is going to be ... interesting.
116 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 117
673464d9 118 my $fh;
119 sysopen( $fh, $obj->_root->{file}, $flags )
120 or $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!");
121 $obj->_root->{fh} = $fh;
a20d9a3f 122
123 #XXX Can we remove this by using the right sysopen() flags?
124 # Maybe ... q.v. above
125 binmode $fh; # for win32
126
cd59cad8 127 if ($obj->_root->{autoflush}) {
a20d9a3f 128 my $old = select $fh;
129 $|=1;
130 select $old;
131 }
20f7b20c 132
cd59cad8 133 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 134
135 my $signature;
136 my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE));
20f7b20c 137
a20d9a3f 138 ##
139 # File is empty -- write signature and master index
140 ##
141 if (!$bytes_read) {
cd59cad8 142 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 143 print( $fh DBM::Deep->SIG_FILE);
d0b74c17 144
ec1bce6b 145 $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size});
a20d9a3f 146
e5fc7e69 147 # Why is this being printed here? I'm not seeing where anything actually points to
148 # this spot.
149 #XXX $obj->_root->{end} isn't updated from these 10 bytes that are being written
a20d9a3f 150 my $plain_key = "[base]";
251dfd0e 151 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
a20d9a3f 152
153 # Flush the filehandle
154 my $old_fh = select $fh;
155 my $old_af = $|; $| = 1; $| = $old_af;
156 select $old_fh;
157
a20d9a3f 158 return 1;
159 }
20f7b20c 160
a20d9a3f 161 ##
162 # Check signature was valid
163 ##
164 unless ($signature eq DBM::Deep->SIG_FILE) {
3d1b8be9 165 $self->close_fh( $obj );
e5fc7e69 166 $obj->_throw_error("Signature not found -- file is not a Deep DB");
a20d9a3f 167 }
168
a20d9a3f 169 ##
170 # Get our type from master index signature
171 ##
e5fc7e69 172 my $tag = $self->load_tag($obj, $obj->_base_offset)
173 or $obj->_throw_error("Corrupted file, no master index record");
d0b74c17 174
e5fc7e69 175 unless ($obj->{type} eq $tag->{signature}) {
176 $obj->_throw_error("File type mismatch");
a20d9a3f 177 }
20f7b20c 178
d0b74c17 179#XXX We probably also want to store the hash algorithm name and not assume anything
180#XXX The cool thing would be to allow a different hashing algorithm at every level
181
a20d9a3f 182 return 1;
183}
184
3d1b8be9 185sub close_fh {
cd59cad8 186 my $self = shift;
a21f2d90 187 my ($obj) = @_;
cd59cad8 188
189 if ( my $fh = $obj->_root->{fh} ) {
190 close $fh;
191 }
192 $obj->_root->{fh} = undef;
193
194 return 1;
195}
196
d4b1166e 197sub create_tag {
20f7b20c 198 ##
199 # Given offset, signature and content, create tag and write to disk
200 ##
d4b1166e 201 my $self = shift;
20f7b20c 202 my ($obj, $offset, $sig, $content) = @_;
203 my $size = length($content);
204
d4b1166e 205 my $fh = $obj->_fh;
206
20f7b20c 207 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 208 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 209
210 if ($offset == $obj->_root->{end}) {
251dfd0e 211 $obj->_root->{end} += DBM::Deep->SIG_SIZE + $self->{data_size} + $size;
20f7b20c 212 }
213
214 return {
215 signature => $sig,
216 size => $size,
251dfd0e 217 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 218 content => $content
219 };
d4b1166e 220}
221
222sub load_tag {
20f7b20c 223 ##
224 # Given offset, load single tag and return signature, size and data
225 ##
d4b1166e 226 my $self = shift;
20f7b20c 227 my ($obj, $offset) = @_;
228
d4b1166e 229 my $fh = $obj->_fh;
230
20f7b20c 231 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 232
233 #XXX I'm not sure this check will work given autoflush ...
234 return if eof $fh;
20f7b20c 235
d4b1166e 236 my $b;
251dfd0e 237 read( $fh, $b, DBM::Deep->SIG_SIZE + $self->{data_size} );
238 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 239
240 my $buffer;
241 read( $fh, $buffer, $size);
242
243 return {
244 signature => $sig,
245 size => $size,
251dfd0e 246 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 247 content => $buffer
248 };
d4b1166e 249}
250
20f7b20c 251sub add_bucket {
252 ##
253 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
254 # plain (undigested) key and value.
255 ##
d4b1166e 256 my $self = shift;
20f7b20c 257 my ($obj, $tag, $md5, $plain_key, $value) = @_;
258 my $keys = $tag->{content};
259 my $location = 0;
260 my $result = 2;
261
262 my $root = $obj->_root;
263
264 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) };
265 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
266
267 my $fh = $obj->_fh;
268
269 ##
270 # Iterate through buckets, seeing if this is a new entry or a replace.
271 ##
d0b74c17 272 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
251dfd0e 273 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
20f7b20c 274 if (!$subloc) {
275 ##
276 # Found empty bucket (end of list). Populate and exit loop.
277 ##
278 $result = 2;
279
280 $location = $internal_ref
281 ? $value->_base_offset
282 : $root->{end};
e5fc7e69 283print "NEW: $location\n";
20f7b20c 284
251dfd0e 285 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
286 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 287 last;
288 }
289
251dfd0e 290 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
20f7b20c 291 if ($md5 eq $key) {
292 ##
293 # Found existing bucket with same key. Replace with new value.
294 ##
295 $result = 1;
296
297 if ($internal_ref) {
298 $location = $value->_base_offset;
251dfd0e 299 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
300 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 301 return $result;
302 }
303
304 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET);
305 my $size;
251dfd0e 306 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
20f7b20c 307
308 ##
309 # If value is a hash, array, or raw value with equal or less size, we can
310 # reuse the same content area of the database. Otherwise, we have to create
311 # a new content area at the EOF.
312 ##
313 my $actual_length;
314 my $r = Scalar::Util::reftype( $value ) || '';
315 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
ec1bce6b 316 $actual_length = $self->{index_size};
20f7b20c 317
318 # if autobless is enabled, must also take into consideration
319 # the class name, as it is stored along with key/value.
320 if ( $root->{autobless} ) {
321 my $value_class = Scalar::Util::blessed($value);
322 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
323 $actual_length += length($value_class);
324 }
325 }
326 }
327 else { $actual_length = length($value); }
328
329 if ($actual_length <= $size) {
330 $location = $subloc;
331 }
332 else {
333 $location = $root->{end};
251dfd0e 334 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, SEEK_SET);
335 print( $fh pack($self->{long_pack}, $location) );
20f7b20c 336 }
337
338 last;
339 }
340 }
341
342 ##
343 # If this is an internal reference, return now.
344 # No need to write value or plain key
345 ##
346 if ($internal_ref) {
347 return $result;
348 }
349
350 ##
351 # If bucket didn't fit into list, split into a new index level
352 ##
353 if (!$location) {
354 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
251dfd0e 355 print( $fh pack($self->{long_pack}, $root->{end}) );
20f7b20c 356
ec1bce6b 357 my $index_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_INDEX, chr(0) x $self->{index_size});
20f7b20c 358 my @offsets = ();
359
251dfd0e 360 $keys .= $md5 . pack($self->{long_pack}, 0);
20f7b20c 361
251dfd0e 362 for (my $i=0; $i<=$self->{max_buckets}; $i++) {
363 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
20f7b20c 364 if ($key) {
251dfd0e 365 my $old_subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) +
366 $self->{hash_size}, $self->{long_size}));
20f7b20c 367 my $num = ord(substr($key, $tag->{ch} + 1, 1));
368
369 if ($offsets[$num]) {
251dfd0e 370 my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $self->{data_size};
20f7b20c 371 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
372 my $subkeys;
251dfd0e 373 read( $fh, $subkeys, $self->{bucket_list_size});
20f7b20c 374
251dfd0e 375 for (my $k=0; $k<$self->{max_buckets}; $k++) {
376 my $subloc = unpack($self->{long_pack}, substr($subkeys, ($k * $self->{bucket_size}) +
377 $self->{hash_size}, $self->{long_size}));
20f7b20c 378 if (!$subloc) {
251dfd0e 379 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
380 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
20f7b20c 381 last;
382 }
383 } # k loop
384 }
385 else {
386 $offsets[$num] = $root->{end};
251dfd0e 387 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
388 print( $fh pack($self->{long_pack}, $root->{end}) );
20f7b20c 389
251dfd0e 390 my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $self->{bucket_list_size});
20f7b20c 391
392 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
251dfd0e 393 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
20f7b20c 394 }
395 } # key is real
396 } # i loop
397
398 $location ||= $root->{end};
399 } # re-index bucket list
400
401 ##
402 # Seek to content area and store signature, value and plaintext key
403 ##
404 if ($location) {
405 my $content_length;
406 seek($fh, $location + $root->{file_offset}, SEEK_SET);
407
408 ##
409 # Write signature based on content type, set content length and write actual value.
410 ##
411 my $r = Scalar::Util::reftype($value) || '';
412 if ($r eq 'HASH') {
413 print( $fh DBM::Deep->TYPE_HASH );
251dfd0e 414 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 415 $content_length = $self->{index_size};
20f7b20c 416 }
417 elsif ($r eq 'ARRAY') {
418 print( $fh DBM::Deep->TYPE_ARRAY );
251dfd0e 419 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 420 $content_length = $self->{index_size};
20f7b20c 421 }
422 elsif (!defined($value)) {
423 print( $fh DBM::Deep->SIG_NULL );
251dfd0e 424 print( $fh pack($self->{data_pack}, 0) );
20f7b20c 425 $content_length = 0;
426 }
427 else {
428 print( $fh DBM::Deep->SIG_DATA );
251dfd0e 429 print( $fh pack($self->{data_pack}, length($value)) . $value );
20f7b20c 430 $content_length = length($value);
431 }
432
433 ##
434 # Plain key is stored AFTER value, as keys are typically fetched less often.
435 ##
251dfd0e 436 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
20f7b20c 437
438 ##
439 # If value is blessed, preserve class name
440 ##
441 if ( $root->{autobless} ) {
442 my $value_class = Scalar::Util::blessed($value);
443 if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
444 ##
445 # Blessed ref -- will restore later
446 ##
447 print( $fh chr(1) );
251dfd0e 448 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 449 $content_length += 1;
251dfd0e 450 $content_length += $self->{data_size} + length($value_class);
20f7b20c 451 }
452 else {
453 print( $fh chr(0) );
454 $content_length += 1;
455 }
456 }
457
458 ##
459 # If this is a new content area, advance EOF counter
460 ##
461 if ($location == $root->{end}) {
462 $root->{end} += DBM::Deep->SIG_SIZE;
251dfd0e 463 $root->{end} += $self->{data_size} + $content_length;
464 $root->{end} += $self->{data_size} + length($plain_key);
20f7b20c 465 }
466
467 ##
468 # If content is a hash or array, create new child DBM::Deep object and
469 # pass each key or element to it.
470 ##
471 if ($r eq 'HASH') {
472 my $branch = DBM::Deep->new(
473 type => DBM::Deep->TYPE_HASH,
474 base_offset => $location,
475 root => $root,
476 );
477 foreach my $key (keys %{$value}) {
478 $branch->STORE( $key, $value->{$key} );
479 }
480 }
481 elsif ($r eq 'ARRAY') {
482 my $branch = DBM::Deep->new(
483 type => DBM::Deep->TYPE_ARRAY,
484 base_offset => $location,
485 root => $root,
486 );
487 my $index = 0;
488 foreach my $element (@{$value}) {
489 $branch->STORE( $index, $element );
490 $index++;
491 }
492 }
493
494 return $result;
495 }
d4b1166e 496
e5fc7e69 497 $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
d4b1166e 498}
499
9020ee8c 500sub get_bucket_value {
beac1dff 501 ##
502 # Fetch single value given tag and MD5 digested key.
503 ##
504 my $self = shift;
505 my ($obj, $tag, $md5) = @_;
506 my $keys = $tag->{content};
9020ee8c 507
508 my $fh = $obj->_fh;
509
beac1dff 510 ##
511 # Iterate through buckets, looking for a key match
512 ##
9020ee8c 513 BUCKET:
beac1dff 514 for (my $i=0; $i<$self->{max_buckets}; $i++) {
515 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
516 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
9020ee8c 517
beac1dff 518 if (!$subloc) {
519 ##
520 # Hit end of list, no match
521 ##
522 return;
523 }
9020ee8c 524
525 if ( $md5 ne $key ) {
526 next BUCKET;
527 }
528
529 ##
530 # Found match -- seek to offset and read signature
531 ##
532 my $signature;
533 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
534 read( $fh, $signature, DBM::Deep->SIG_SIZE);
d0b74c17 535
9020ee8c 536 ##
537 # If value is a hash or array, return new DBM::Deep object with correct offset
538 ##
539 if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) {
540 my $obj = DBM::Deep->new(
541 type => $signature,
542 base_offset => $subloc,
543 root => $obj->_root,
544 );
d0b74c17 545
9020ee8c 546 if ($obj->_root->{autobless}) {
547 ##
548 # Skip over value and plain key to see if object needs
549 # to be re-blessed
550 ##
251dfd0e 551 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
d0b74c17 552
9020ee8c 553 my $size;
251dfd0e 554 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 555 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 556
9020ee8c 557 my $bless_bit;
558 read( $fh, $bless_bit, 1);
559 if (ord($bless_bit)) {
560 ##
561 # Yes, object needs to be re-blessed
562 ##
563 my $class_name;
251dfd0e 564 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 565 if ($size) { read( $fh, $class_name, $size); }
566 if ($class_name) { $obj = bless( $obj, $class_name ); }
567 }
568 }
d0b74c17 569
9020ee8c 570 return $obj;
571 }
d0b74c17 572
9020ee8c 573 ##
574 # Otherwise return actual value
575 ##
576 elsif ($signature eq DBM::Deep->SIG_DATA) {
577 my $size;
578 my $value = '';
251dfd0e 579 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 580 if ($size) { read( $fh, $value, $size); }
581 return $value;
582 }
d0b74c17 583
9020ee8c 584 ##
585 # Key exists, but content is null
586 ##
587 else { return; }
beac1dff 588 } # i loop
9020ee8c 589
beac1dff 590 return;
9020ee8c 591}
ab0e4957 592
593sub delete_bucket {
beac1dff 594 ##
595 # Delete single key/value pair given tag and MD5 digested key.
596 ##
597 my $self = shift;
598 my ($obj, $tag, $md5) = @_;
599 my $keys = $tag->{content};
ab0e4957 600
601 my $fh = $obj->_fh;
d0b74c17 602
beac1dff 603 ##
604 # Iterate through buckets, looking for a key match
605 ##
ab0e4957 606 BUCKET:
beac1dff 607 for (my $i=0; $i<$self->{max_buckets}; $i++) {
608 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
609 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
ab0e4957 610
beac1dff 611 if (!$subloc) {
612 ##
613 # Hit end of list, no match
614 ##
615 return;
616 }
ab0e4957 617
618 if ( $md5 ne $key ) {
619 next BUCKET;
620 }
621
622 ##
623 # Matched key -- delete bucket and return
624 ##
251dfd0e 625 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET);
626 print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) );
627 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 628
ab0e4957 629 return 1;
beac1dff 630 } # i loop
ab0e4957 631
beac1dff 632 return;
ab0e4957 633}
634
912d50b1 635sub bucket_exists {
beac1dff 636 ##
637 # Check existence of single key given tag and MD5 digested key.
638 ##
639 my $self = shift;
640 my ($obj, $tag, $md5) = @_;
641 my $keys = $tag->{content};
d0b74c17 642
beac1dff 643 ##
644 # Iterate through buckets, looking for a key match
645 ##
912d50b1 646 BUCKET:
beac1dff 647 for (my $i=0; $i<$self->{max_buckets}; $i++) {
648 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
649 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
912d50b1 650
beac1dff 651 if (!$subloc) {
652 ##
653 # Hit end of list, no match
654 ##
655 return;
656 }
912d50b1 657
658 if ( $md5 ne $key ) {
659 next BUCKET;
660 }
661
662 ##
663 # Matched key -- return true
664 ##
665 return 1;
beac1dff 666 } # i loop
912d50b1 667
beac1dff 668 return;
912d50b1 669}
670
6736c116 671sub find_bucket_list {
beac1dff 672 ##
673 # Locate offset for bucket list, given digested key
674 ##
675 my $self = shift;
d0b74c17 676 my ($obj, $md5, $args) = @_;
677 $args = {} unless $args;
678
beac1dff 679 ##
680 # Locate offset for bucket list using digest index system
681 ##
e5fc7e69 682 my $tag = $self->load_tag($obj, $obj->_base_offset)
683 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
684#print $obj->_base_offset, " : $tag->{signature} : $tag->{offset} : $tag->{size}\n";
d0b74c17 685
e5fc7e69 686 my $ch = 0;
beac1dff 687 while ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
d0b74c17 688 my $num = ord substr($md5, $ch, 1);
689
690 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
691 $tag = $self->index_lookup( $obj, $tag, $num );
692
693 if (!$tag) {
694 if ( $args->{create} ) {
695 my $fh = $obj->_fh;
696 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
697 print( $fh pack($self->{long_pack}, $obj->_root->{end}) );
698
699 $tag = $self->create_tag(
700 $obj, $obj->_root->{end},
701 DBM::Deep->SIG_BLIST,
702 chr(0) x $self->{bucket_list_size},
703 );
704
705 $tag->{ref_loc} = $ref_loc;
706 $tag->{ch} = $ch;
707
708 last;
709 }
710 else {
711 return;
712 }
713 }
714
715 $tag->{ch} = $ch;
716 $tag->{ref_loc} = $ref_loc;
717
beac1dff 718 $ch++;
719 }
d0b74c17 720
beac1dff 721 return $tag;
6736c116 722}
723
d0b74c17 724sub index_lookup {
725 ##
726 # Given index tag, lookup single entry in index and return .
727 ##
728 my $self = shift;
729 my ($obj, $tag, $index) = @_;
730
731 my $location = unpack(
732 $self->{long_pack},
733 substr(
734 $tag->{content},
735 $index * $self->{long_size},
736 $self->{long_size},
737 ),
738 );
739
740 if (!$location) { return; }
741
742 return $self->load_tag( $obj, $location );
743}
744
6736c116 745sub traverse_index {
beac1dff 746 ##
747 # Scan index and recursively step into deeper levels, looking for next key.
748 ##
6736c116 749 my $self = shift;
750 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 751
beac1dff 752 my $tag = $self->load_tag($obj, $offset );
6736c116 753
754 my $fh = $obj->_fh;
d0b74c17 755
beac1dff 756 if ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
757 my $content = $tag->{content};
e5fc7e69 758 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 759
beac1dff 760 for (my $index = $start; $index < 256; $index++) {
e5fc7e69 761 my $subloc = unpack(
762 $self->{long_pack},
763 substr($content, $index * $self->{long_size}, $self->{long_size}),
764 );
765
beac1dff 766 if ($subloc) {
e5fc7e69 767 my $result = $self->traverse_index(
768 $obj, $subloc, $ch + 1, $force_return_next,
769 );
770
beac1dff 771 if (defined($result)) { return $result; }
772 }
773 } # index loop
d0b74c17 774
beac1dff 775 $obj->{return_next} = 1;
776 } # tag is an index
d0b74c17 777
e5fc7e69 778 else {
beac1dff 779 my $keys = $tag->{content};
780 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 781
beac1dff 782 ##
783 # Iterate through buckets, looking for a key match
784 ##
785 for (my $i=0; $i<$self->{max_buckets}; $i++) {
786 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
e5fc7e69 787 my $subloc = unpack(
788 $self->{long_pack},
789 substr(
790 $keys,
791 ($i * $self->{bucket_size}) + $self->{hash_size},
792 $self->{long_size},
793 ),
794 );
d0b74c17 795
beac1dff 796 if (!$subloc) {
797 ##
798 # End of bucket list -- return to outer loop
799 ##
800 $obj->{return_next} = 1;
801 last;
802 }
803 elsif ($key eq $obj->{prev_md5}) {
804 ##
805 # Located previous key -- return next one found
806 ##
807 $obj->{return_next} = 1;
808 next;
809 }
810 elsif ($obj->{return_next}) {
811 ##
812 # Seek to bucket location and skip over signature
813 ##
814 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 815
beac1dff 816 ##
817 # Skip over value to get to plain key
818 ##
819 my $size;
e5fc7e69 820 read( $fh, $size, $self->{data_size});
821 $size = unpack($self->{data_pack}, $size);
beac1dff 822 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 823
beac1dff 824 ##
825 # Read in plain key and return as scalar
826 ##
827 my $plain_key;
e5fc7e69 828 read( $fh, $size, $self->{data_size});
829 $size = unpack($self->{data_pack}, $size);
beac1dff 830 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 831
beac1dff 832 return $plain_key;
833 }
834 } # bucket loop
d0b74c17 835
beac1dff 836 $obj->{return_next} = 1;
837 } # tag is a bucket list
d0b74c17 838
beac1dff 839 return;
6736c116 840}
841
842sub get_next_key {
beac1dff 843 ##
844 # Locate next key, given digested previous one
845 ##
6736c116 846 my $self = shift;
847 my ($obj) = @_;
d0b74c17 848
beac1dff 849 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
850 $obj->{return_next} = 0;
d0b74c17 851
beac1dff 852 ##
853 # If the previous key was not specifed, start at the top and
854 # return the first one found.
855 ##
856 if (!$obj->{prev_md5}) {
857 $obj->{prev_md5} = chr(0) x $self->{hash_size};
858 $obj->{return_next} = 1;
859 }
d0b74c17 860
beac1dff 861 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 862}
863
a20d9a3f 8641;
865__END__