added key to _get_subloc after figuring out the correct unpack magic
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
612969fb 7sub precalc_sizes {
beac1dff 8 ##
9 # Precalculate index, bucket and bucket list sizes
10 ##
251dfd0e 11 my $self = shift;
1bf65be7 12
251dfd0e 13 $self->{index_size} = (2**8) * $self->{long_size};
14 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
15 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 16
251dfd0e 17 return 1;
1bf65be7 18}
19
20sub set_pack {
beac1dff 21 ##
22 # Set pack/unpack modes (see file header for more)
23 ##
251dfd0e 24 my $self = shift;
1bf65be7 25 my ($long_s, $long_p, $data_s, $data_p) = @_;
26
81d16922 27 ##
75be6413 28 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
29 # GB per file.
beac1dff 30 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 31 #
32 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 33 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 34 ##
251dfd0e 35 $self->{long_size} = $long_s ? $long_s : 4;
36 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 37
81d16922 38 ##
75be6413 39 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
40 # key/value. Upgrading this is possible (see above) but probably not necessary.
41 # If you need more than 4 GB for a single key or value, this module is really
42 # not for you :-)
81d16922 43 ##
251dfd0e 44 $self->{data_size} = $data_s ? $data_s : 4;
45 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 46
beac1dff 47 return $self->precalc_sizes();
1bf65be7 48}
49
50sub set_digest {
beac1dff 51 ##
52 # Set key digest function (default is MD5)
53 ##
251dfd0e 54 my $self = shift;
1bf65be7 55 my ($digest_func, $hash_size) = @_;
56
d0b74c17 57 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 58 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 59
beac1dff 60 return $self->precalc_sizes();
612969fb 61}
62
63sub new {
64 my $class = shift;
65 my ($args) = @_;
66
67 my $self = bless {
68 long_size => 4,
69 long_pack => 'N',
70 data_size => 4,
71 data_pack => 'N',
251dfd0e 72
612969fb 73 digest => \&Digest::MD5::md5,
74 hash_size => 16,
251dfd0e 75
81d16922 76 ##
77 # Maximum number of buckets per list before another level of indexing is done.
78 # Increase this value for slightly greater speed, but larger database files.
79 # DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
80 ##
612969fb 81 max_buckets => 16,
82 }, $class;
83
251dfd0e 84 $self->precalc_sizes;
612969fb 85
86 return $self;
1bf65be7 87}
88
70b55428 89sub setup_fh {
90 my $self = shift;
91 my ($obj) = @_;
92
93 $self->open( $obj ) if !defined $obj->_fh;
94
673464d9 95 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 96 unless ( $obj->_root->{inode} ) {
97 my @stats = stat($obj->_fh);
98 $obj->_root->{inode} = $stats[1];
99 $obj->_root->{end} = $stats[7];
100 }
101
102 return 1;
103}
104
a20d9a3f 105sub open {
20f7b20c 106 ##
107 # Open a fh to the database, create if nonexistent.
108 # Make sure file signature matches DBM::Deep spec.
109 ##
a20d9a3f 110 my $self = shift;
70b55428 111 my ($obj) = @_;
a20d9a3f 112
3d1b8be9 113 if (defined($obj->_fh)) { $self->close_fh( $obj ); }
20f7b20c 114
673464d9 115 # Theoretically, adding O_BINARY should remove the need for the binmode
116 # Of course, testing it is going to be ... interesting.
117 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 118
673464d9 119 my $fh;
120 sysopen( $fh, $obj->_root->{file}, $flags )
121 or $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!");
122 $obj->_root->{fh} = $fh;
a20d9a3f 123
124 #XXX Can we remove this by using the right sysopen() flags?
125 # Maybe ... q.v. above
126 binmode $fh; # for win32
127
cd59cad8 128 if ($obj->_root->{autoflush}) {
a20d9a3f 129 my $old = select $fh;
130 $|=1;
131 select $old;
132 }
20f7b20c 133
cd59cad8 134 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 135
136 my $signature;
137 my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE));
20f7b20c 138
a20d9a3f 139 ##
140 # File is empty -- write signature and master index
141 ##
142 if (!$bytes_read) {
cd59cad8 143 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 144 print( $fh DBM::Deep->SIG_FILE);
d0b74c17 145
ec1bce6b 146 $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size});
a20d9a3f 147
e5fc7e69 148 # Why is this being printed here? I'm not seeing where anything actually points to
149 # this spot.
150 #XXX $obj->_root->{end} isn't updated from these 10 bytes that are being written
a20d9a3f 151 my $plain_key = "[base]";
251dfd0e 152 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
a20d9a3f 153
154 # Flush the filehandle
155 my $old_fh = select $fh;
156 my $old_af = $|; $| = 1; $| = $old_af;
157 select $old_fh;
158
a20d9a3f 159 return 1;
160 }
20f7b20c 161
a20d9a3f 162 ##
163 # Check signature was valid
164 ##
165 unless ($signature eq DBM::Deep->SIG_FILE) {
3d1b8be9 166 $self->close_fh( $obj );
e5fc7e69 167 $obj->_throw_error("Signature not found -- file is not a Deep DB");
a20d9a3f 168 }
169
a20d9a3f 170 ##
171 # Get our type from master index signature
172 ##
e5fc7e69 173 my $tag = $self->load_tag($obj, $obj->_base_offset)
174 or $obj->_throw_error("Corrupted file, no master index record");
d0b74c17 175
e5fc7e69 176 unless ($obj->{type} eq $tag->{signature}) {
177 $obj->_throw_error("File type mismatch");
a20d9a3f 178 }
20f7b20c 179
d0b74c17 180#XXX We probably also want to store the hash algorithm name and not assume anything
181#XXX The cool thing would be to allow a different hashing algorithm at every level
182
a20d9a3f 183 return 1;
184}
185
3d1b8be9 186sub close_fh {
cd59cad8 187 my $self = shift;
a21f2d90 188 my ($obj) = @_;
cd59cad8 189
190 if ( my $fh = $obj->_root->{fh} ) {
191 close $fh;
192 }
193 $obj->_root->{fh} = undef;
194
195 return 1;
196}
197
d4b1166e 198sub create_tag {
20f7b20c 199 ##
200 # Given offset, signature and content, create tag and write to disk
201 ##
d4b1166e 202 my $self = shift;
20f7b20c 203 my ($obj, $offset, $sig, $content) = @_;
204 my $size = length($content);
205
d4b1166e 206 my $fh = $obj->_fh;
207
20f7b20c 208 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 209 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 210
211 if ($offset == $obj->_root->{end}) {
251dfd0e 212 $obj->_root->{end} += DBM::Deep->SIG_SIZE + $self->{data_size} + $size;
20f7b20c 213 }
214
215 return {
216 signature => $sig,
217 size => $size,
251dfd0e 218 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 219 content => $content
220 };
d4b1166e 221}
222
223sub load_tag {
20f7b20c 224 ##
225 # Given offset, load single tag and return signature, size and data
226 ##
d4b1166e 227 my $self = shift;
20f7b20c 228 my ($obj, $offset) = @_;
229
d4b1166e 230 my $fh = $obj->_fh;
231
20f7b20c 232 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 233
75be6413 234 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 235 return if eof $fh;
20f7b20c 236
d4b1166e 237 my $b;
251dfd0e 238 read( $fh, $b, DBM::Deep->SIG_SIZE + $self->{data_size} );
239 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 240
241 my $buffer;
242 read( $fh, $buffer, $size);
243
244 return {
245 signature => $sig,
246 size => $size,
251dfd0e 247 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 248 content => $buffer
249 };
d4b1166e 250}
251
20f7b20c 252sub add_bucket {
253 ##
254 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
255 # plain (undigested) key and value.
256 ##
d4b1166e 257 my $self = shift;
20f7b20c 258 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 259
20f7b20c 260 my $keys = $tag->{content};
261 my $location = 0;
262 my $result = 2;
263
264 my $root = $obj->_root;
265
266 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) };
267 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
268
269 my $fh = $obj->_fh;
270
271 ##
272 # Iterate through buckets, seeing if this is a new entry or a replace.
273 ##
75be6413 274 BUCKET:
d0b74c17 275 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 276 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 277
20f7b20c 278 if (!$subloc) {
279 ##
280 # Found empty bucket (end of list). Populate and exit loop.
281 ##
282 $result = 2;
283
284 $location = $internal_ref
285 ? $value->_base_offset
286 : $root->{end};
e5fc7e69 287print "NEW: $location\n";
20f7b20c 288
75be6413 289 seek(
290 $fh,
291 $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset},
292 SEEK_SET,
293 );
294
251dfd0e 295 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 296 last;
297 }
298
75be6413 299 if ( $md5 ne $key ) {
300 next BUCKET;
301 }
20f7b20c 302
75be6413 303 ##
304 # Found existing bucket with same key. Replace with new value.
305 ##
306 $result = 1;
20f7b20c 307
75be6413 308 if ($internal_ref) {
309 $location = $value->_base_offset;
310 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
311 print( $fh $md5 . pack($self->{long_pack}, $location) );
312 return $result;
313 }
20f7b20c 314
75be6413 315 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET);
316 my $size;
317 read( $fh, $size, $self->{data_size});
318 $size = unpack($self->{data_pack}, $size);
20f7b20c 319
75be6413 320 ##
321 # If value is a hash, array, or raw value with equal or less size, we can
322 # reuse the same content area of the database. Otherwise, we have to create
323 # a new content area at the EOF.
324 ##
325 my $actual_length;
326 my $r = Scalar::Util::reftype( $value ) || '';
327 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
328 $actual_length = $self->{index_size};
329
330 # if autobless is enabled, must also take into consideration
331 # the class name, as it is stored along with key/value.
332 if ( $root->{autobless} ) {
333 my $value_class = Scalar::Util::blessed($value);
334 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
335 $actual_length += length($value_class);
336 }
20f7b20c 337 }
75be6413 338 }
339 else { $actual_length = length($value); }
20f7b20c 340
75be6413 341 if ($actual_length <= $size) {
342 $location = $subloc;
20f7b20c 343 }
75be6413 344 else {
345 $location = $root->{end};
346 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, SEEK_SET);
347 print( $fh pack($self->{long_pack}, $location) );
348 }
349
350 last;
20f7b20c 351 }
352
353 ##
354 # If this is an internal reference, return now.
355 # No need to write value or plain key
356 ##
75be6413 357 #XXX We need to store the key as a reference to the internal spot
20f7b20c 358 if ($internal_ref) {
359 return $result;
360 }
361
362 ##
363 # If bucket didn't fit into list, split into a new index level
364 ##
365 if (!$location) {
75be6413 366 # re-index bucket list
20f7b20c 367
75be6413 368 $self->split_index( $obj, $md5, $tag );
20f7b20c 369
75be6413 370 $location = $root->{end};
371 }
20f7b20c 372
373 ##
374 # Seek to content area and store signature, value and plaintext key
375 ##
376 if ($location) {
377 my $content_length;
378 seek($fh, $location + $root->{file_offset}, SEEK_SET);
379
380 ##
381 # Write signature based on content type, set content length and write actual value.
382 ##
383 my $r = Scalar::Util::reftype($value) || '';
384 if ($r eq 'HASH') {
385 print( $fh DBM::Deep->TYPE_HASH );
251dfd0e 386 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 387 $content_length = $self->{index_size};
20f7b20c 388 }
389 elsif ($r eq 'ARRAY') {
390 print( $fh DBM::Deep->TYPE_ARRAY );
251dfd0e 391 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 392 $content_length = $self->{index_size};
20f7b20c 393 }
394 elsif (!defined($value)) {
395 print( $fh DBM::Deep->SIG_NULL );
251dfd0e 396 print( $fh pack($self->{data_pack}, 0) );
20f7b20c 397 $content_length = 0;
398 }
399 else {
400 print( $fh DBM::Deep->SIG_DATA );
251dfd0e 401 print( $fh pack($self->{data_pack}, length($value)) . $value );
20f7b20c 402 $content_length = length($value);
403 }
404
405 ##
406 # Plain key is stored AFTER value, as keys are typically fetched less often.
407 ##
251dfd0e 408 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
20f7b20c 409
410 ##
411 # If value is blessed, preserve class name
412 ##
413 if ( $root->{autobless} ) {
414 my $value_class = Scalar::Util::blessed($value);
415 if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
416 ##
417 # Blessed ref -- will restore later
418 ##
419 print( $fh chr(1) );
251dfd0e 420 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 421 $content_length += 1;
251dfd0e 422 $content_length += $self->{data_size} + length($value_class);
20f7b20c 423 }
424 else {
425 print( $fh chr(0) );
426 $content_length += 1;
427 }
428 }
429
430 ##
431 # If this is a new content area, advance EOF counter
432 ##
433 if ($location == $root->{end}) {
434 $root->{end} += DBM::Deep->SIG_SIZE;
251dfd0e 435 $root->{end} += $self->{data_size} + $content_length;
436 $root->{end} += $self->{data_size} + length($plain_key);
20f7b20c 437 }
438
439 ##
440 # If content is a hash or array, create new child DBM::Deep object and
441 # pass each key or element to it.
442 ##
443 if ($r eq 'HASH') {
444 my $branch = DBM::Deep->new(
445 type => DBM::Deep->TYPE_HASH,
446 base_offset => $location,
447 root => $root,
448 );
449 foreach my $key (keys %{$value}) {
450 $branch->STORE( $key, $value->{$key} );
451 }
452 }
453 elsif ($r eq 'ARRAY') {
454 my $branch = DBM::Deep->new(
455 type => DBM::Deep->TYPE_ARRAY,
456 base_offset => $location,
457 root => $root,
458 );
459 my $index = 0;
460 foreach my $element (@{$value}) {
461 $branch->STORE( $index, $element );
462 $index++;
463 }
464 }
465
466 return $result;
467 }
d4b1166e 468
e5fc7e69 469 $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
d4b1166e 470}
471
75be6413 472sub split_index {
473 my $self = shift;
474 my ($obj, $md5, $tag) = @_;
475
476 my $fh = $obj->_fh;
477 my $root = $obj->_root;
478 my $keys = $tag->{content};
479
480 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
481 print( $fh pack($self->{long_pack}, $root->{end}) );
482
483 my $index_tag = $self->create_tag(
484 $obj,
485 $root->{end},
486 DBM::Deep->SIG_INDEX,
487 chr(0) x $self->{index_size},
488 );
489
490 my @offsets = ();
491
492 $keys .= $md5 . pack($self->{long_pack}, 0);
493
494 BUCKET:
495 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9cec1360 496 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 497
498 next BUCKET unless $key;
499
75be6413 500 my $num = ord(substr($key, $tag->{ch} + 1, 1));
501
502 if ($offsets[$num]) {
503 my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $self->{data_size};
504 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
505 my $subkeys;
506 read( $fh, $subkeys, $self->{bucket_list_size});
507
508 for (my $k=0; $k<$self->{max_buckets}; $k++) {
9cec1360 509 my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k );
75be6413 510
511 if (!$subloc) {
512 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
513 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
514 last;
515 }
516 } # k loop
517 }
518 else {
519 $offsets[$num] = $root->{end};
520 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
521 print( $fh pack($self->{long_pack}, $root->{end}) );
522
523 my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $self->{bucket_list_size});
524
525 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
526 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
527 }
528 } # i loop
529
530 return;
531}
532
9020ee8c 533sub get_bucket_value {
beac1dff 534 ##
535 # Fetch single value given tag and MD5 digested key.
536 ##
537 my $self = shift;
538 my ($obj, $tag, $md5) = @_;
539 my $keys = $tag->{content};
9020ee8c 540
541 my $fh = $obj->_fh;
542
beac1dff 543 ##
544 # Iterate through buckets, looking for a key match
545 ##
9020ee8c 546 BUCKET:
75be6413 547 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 548 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
9020ee8c 549
beac1dff 550 if (!$subloc) {
551 ##
552 # Hit end of list, no match
553 ##
554 return;
555 }
9020ee8c 556
557 if ( $md5 ne $key ) {
558 next BUCKET;
559 }
560
561 ##
562 # Found match -- seek to offset and read signature
563 ##
564 my $signature;
565 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
566 read( $fh, $signature, DBM::Deep->SIG_SIZE);
d0b74c17 567
9020ee8c 568 ##
569 # If value is a hash or array, return new DBM::Deep object with correct offset
570 ##
571 if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) {
572 my $obj = DBM::Deep->new(
573 type => $signature,
574 base_offset => $subloc,
575 root => $obj->_root,
576 );
d0b74c17 577
9020ee8c 578 if ($obj->_root->{autobless}) {
579 ##
580 # Skip over value and plain key to see if object needs
581 # to be re-blessed
582 ##
251dfd0e 583 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
d0b74c17 584
9020ee8c 585 my $size;
251dfd0e 586 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 587 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 588
9020ee8c 589 my $bless_bit;
590 read( $fh, $bless_bit, 1);
591 if (ord($bless_bit)) {
592 ##
593 # Yes, object needs to be re-blessed
594 ##
595 my $class_name;
251dfd0e 596 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 597 if ($size) { read( $fh, $class_name, $size); }
598 if ($class_name) { $obj = bless( $obj, $class_name ); }
599 }
600 }
d0b74c17 601
9020ee8c 602 return $obj;
603 }
d0b74c17 604
9020ee8c 605 ##
606 # Otherwise return actual value
607 ##
608 elsif ($signature eq DBM::Deep->SIG_DATA) {
609 my $size;
75be6413 610 read( $fh, $size, $self->{data_size});
611 $size = unpack($self->{data_pack}, $size);
612
9020ee8c 613 my $value = '';
9020ee8c 614 if ($size) { read( $fh, $value, $size); }
615 return $value;
616 }
d0b74c17 617
9020ee8c 618 ##
619 # Key exists, but content is null
620 ##
621 else { return; }
beac1dff 622 } # i loop
9020ee8c 623
beac1dff 624 return;
9020ee8c 625}
ab0e4957 626
627sub delete_bucket {
beac1dff 628 ##
629 # Delete single key/value pair given tag and MD5 digested key.
630 ##
631 my $self = shift;
632 my ($obj, $tag, $md5) = @_;
633 my $keys = $tag->{content};
ab0e4957 634
635 my $fh = $obj->_fh;
d0b74c17 636
beac1dff 637 ##
638 # Iterate through buckets, looking for a key match
639 ##
ab0e4957 640 BUCKET:
beac1dff 641 for (my $i=0; $i<$self->{max_buckets}; $i++) {
9cec1360 642 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
ab0e4957 643
beac1dff 644 if (!$subloc) {
645 ##
646 # Hit end of list, no match
647 ##
648 return;
649 }
ab0e4957 650
651 if ( $md5 ne $key ) {
652 next BUCKET;
653 }
654
655 ##
656 # Matched key -- delete bucket and return
657 ##
251dfd0e 658 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET);
659 print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) );
660 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 661
ab0e4957 662 return 1;
beac1dff 663 } # i loop
ab0e4957 664
beac1dff 665 return;
ab0e4957 666}
667
912d50b1 668sub bucket_exists {
beac1dff 669 ##
670 # Check existence of single key given tag and MD5 digested key.
671 ##
672 my $self = shift;
673 my ($obj, $tag, $md5) = @_;
674 my $keys = $tag->{content};
d0b74c17 675
beac1dff 676 ##
677 # Iterate through buckets, looking for a key match
678 ##
912d50b1 679 BUCKET:
beac1dff 680 for (my $i=0; $i<$self->{max_buckets}; $i++) {
9cec1360 681 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
912d50b1 682
beac1dff 683 if (!$subloc) {
684 ##
685 # Hit end of list, no match
686 ##
687 return;
688 }
912d50b1 689
690 if ( $md5 ne $key ) {
691 next BUCKET;
692 }
693
694 ##
695 # Matched key -- return true
696 ##
697 return 1;
beac1dff 698 } # i loop
912d50b1 699
beac1dff 700 return;
912d50b1 701}
702
6736c116 703sub find_bucket_list {
beac1dff 704 ##
705 # Locate offset for bucket list, given digested key
706 ##
707 my $self = shift;
d0b74c17 708 my ($obj, $md5, $args) = @_;
709 $args = {} unless $args;
710
beac1dff 711 ##
712 # Locate offset for bucket list using digest index system
713 ##
e5fc7e69 714 my $tag = $self->load_tag($obj, $obj->_base_offset)
715 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
716#print $obj->_base_offset, " : $tag->{signature} : $tag->{offset} : $tag->{size}\n";
d0b74c17 717
e5fc7e69 718 my $ch = 0;
beac1dff 719 while ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
d0b74c17 720 my $num = ord substr($md5, $ch, 1);
721
722 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
723 $tag = $self->index_lookup( $obj, $tag, $num );
724
725 if (!$tag) {
726 if ( $args->{create} ) {
727 my $fh = $obj->_fh;
728 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
729 print( $fh pack($self->{long_pack}, $obj->_root->{end}) );
730
731 $tag = $self->create_tag(
732 $obj, $obj->_root->{end},
733 DBM::Deep->SIG_BLIST,
734 chr(0) x $self->{bucket_list_size},
735 );
736
737 $tag->{ref_loc} = $ref_loc;
738 $tag->{ch} = $ch;
739
740 last;
741 }
742 else {
743 return;
744 }
745 }
746
747 $tag->{ch} = $ch;
748 $tag->{ref_loc} = $ref_loc;
749
beac1dff 750 $ch++;
751 }
d0b74c17 752
beac1dff 753 return $tag;
6736c116 754}
755
d0b74c17 756sub index_lookup {
757 ##
758 # Given index tag, lookup single entry in index and return .
759 ##
760 my $self = shift;
761 my ($obj, $tag, $index) = @_;
762
763 my $location = unpack(
764 $self->{long_pack},
765 substr(
766 $tag->{content},
767 $index * $self->{long_size},
768 $self->{long_size},
769 ),
770 );
771
772 if (!$location) { return; }
773
774 return $self->load_tag( $obj, $location );
775}
776
6736c116 777sub traverse_index {
beac1dff 778 ##
779 # Scan index and recursively step into deeper levels, looking for next key.
780 ##
6736c116 781 my $self = shift;
782 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 783
beac1dff 784 my $tag = $self->load_tag($obj, $offset );
6736c116 785
786 my $fh = $obj->_fh;
d0b74c17 787
beac1dff 788 if ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
789 my $content = $tag->{content};
e5fc7e69 790 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 791
beac1dff 792 for (my $index = $start; $index < 256; $index++) {
e5fc7e69 793 my $subloc = unpack(
794 $self->{long_pack},
795 substr($content, $index * $self->{long_size}, $self->{long_size}),
796 );
797
beac1dff 798 if ($subloc) {
e5fc7e69 799 my $result = $self->traverse_index(
800 $obj, $subloc, $ch + 1, $force_return_next,
801 );
802
beac1dff 803 if (defined($result)) { return $result; }
804 }
805 } # index loop
d0b74c17 806
beac1dff 807 $obj->{return_next} = 1;
808 } # tag is an index
d0b74c17 809
e5fc7e69 810 else {
beac1dff 811 my $keys = $tag->{content};
812 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 813
beac1dff 814 ##
815 # Iterate through buckets, looking for a key match
816 ##
817 for (my $i=0; $i<$self->{max_buckets}; $i++) {
9cec1360 818 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 819
beac1dff 820 if (!$subloc) {
821 ##
822 # End of bucket list -- return to outer loop
823 ##
824 $obj->{return_next} = 1;
825 last;
826 }
827 elsif ($key eq $obj->{prev_md5}) {
828 ##
829 # Located previous key -- return next one found
830 ##
831 $obj->{return_next} = 1;
832 next;
833 }
834 elsif ($obj->{return_next}) {
835 ##
836 # Seek to bucket location and skip over signature
837 ##
838 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 839
beac1dff 840 ##
841 # Skip over value to get to plain key
842 ##
843 my $size;
e5fc7e69 844 read( $fh, $size, $self->{data_size});
845 $size = unpack($self->{data_pack}, $size);
beac1dff 846 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 847
beac1dff 848 ##
849 # Read in plain key and return as scalar
850 ##
851 my $plain_key;
e5fc7e69 852 read( $fh, $size, $self->{data_size});
853 $size = unpack($self->{data_pack}, $size);
beac1dff 854 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 855
beac1dff 856 return $plain_key;
857 }
858 } # bucket loop
d0b74c17 859
beac1dff 860 $obj->{return_next} = 1;
861 } # tag is a bucket list
d0b74c17 862
beac1dff 863 return;
6736c116 864}
865
866sub get_next_key {
beac1dff 867 ##
868 # Locate next key, given digested previous one
869 ##
6736c116 870 my $self = shift;
871 my ($obj) = @_;
d0b74c17 872
beac1dff 873 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
874 $obj->{return_next} = 0;
d0b74c17 875
beac1dff 876 ##
877 # If the previous key was not specifed, start at the top and
878 # return the first one found.
879 ##
880 if (!$obj->{prev_md5}) {
881 $obj->{prev_md5} = chr(0) x $self->{hash_size};
882 $obj->{return_next} = 1;
883 }
d0b74c17 884
beac1dff 885 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 886}
887
75be6413 888# Utilities
889
9cec1360 890sub _get_key_subloc {
75be6413 891 my $self = shift;
892 my ($keys, $idx) = @_;
893
9cec1360 894 my ($key, $subloc) = unpack(
895 "a$self->{hash_size} $self->{long_pack}",
75be6413 896 substr(
897 $keys,
9cec1360 898 ($idx * $self->{bucket_size}),
899 $self->{bucket_size},
75be6413 900 ),
901 );
902
9cec1360 903 return ($key, $subloc);
75be6413 904}
905
a20d9a3f 9061;
907__END__