Cleaned up open() a bit
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
612969fb 7sub precalc_sizes {
beac1dff 8 ##
9 # Precalculate index, bucket and bucket list sizes
10 ##
251dfd0e 11 my $self = shift;
1bf65be7 12
251dfd0e 13 $self->{index_size} = (2**8) * $self->{long_size};
14 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
15 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 16
251dfd0e 17 return 1;
1bf65be7 18}
19
20sub set_pack {
beac1dff 21 ##
22 # Set pack/unpack modes (see file header for more)
23 ##
251dfd0e 24 my $self = shift;
1bf65be7 25 my ($long_s, $long_p, $data_s, $data_p) = @_;
26
81d16922 27 ##
28 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file.
beac1dff 29 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 30 #
31 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 32 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 33 ##
251dfd0e 34 $self->{long_size} = $long_s ? $long_s : 4;
35 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 36
81d16922 37 ##
38 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value.
39 # Upgrading this is possible (see above) but probably not necessary. If you need
40 # more than 4 GB for a single key or value, this module is really not for you :-)
41 ##
251dfd0e 42 $self->{data_size} = $data_s ? $data_s : 4;
43 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 44
beac1dff 45 return $self->precalc_sizes();
1bf65be7 46}
47
48sub set_digest {
beac1dff 49 ##
50 # Set key digest function (default is MD5)
51 ##
251dfd0e 52 my $self = shift;
1bf65be7 53 my ($digest_func, $hash_size) = @_;
54
d0b74c17 55 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 56 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 57
beac1dff 58 return $self->precalc_sizes();
612969fb 59}
60
61sub new {
62 my $class = shift;
63 my ($args) = @_;
64
65 my $self = bless {
66 long_size => 4,
67 long_pack => 'N',
68 data_size => 4,
69 data_pack => 'N',
251dfd0e 70
612969fb 71 digest => \&Digest::MD5::md5,
72 hash_size => 16,
251dfd0e 73
81d16922 74 ##
75 # Maximum number of buckets per list before another level of indexing is done.
76 # Increase this value for slightly greater speed, but larger database files.
77 # DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
78 ##
612969fb 79 max_buckets => 16,
80 }, $class;
81
251dfd0e 82 $self->precalc_sizes;
612969fb 83
84 return $self;
1bf65be7 85}
86
70b55428 87sub setup_fh {
88 my $self = shift;
89 my ($obj) = @_;
90
91 $self->open( $obj ) if !defined $obj->_fh;
92
673464d9 93 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 94 unless ( $obj->_root->{inode} ) {
95 my @stats = stat($obj->_fh);
96 $obj->_root->{inode} = $stats[1];
97 $obj->_root->{end} = $stats[7];
98 }
99
100 return 1;
101}
102
a20d9a3f 103sub open {
20f7b20c 104 ##
105 # Open a fh to the database, create if nonexistent.
106 # Make sure file signature matches DBM::Deep spec.
107 ##
a20d9a3f 108 my $self = shift;
70b55428 109 my ($obj) = @_;
a20d9a3f 110
3d1b8be9 111 if (defined($obj->_fh)) { $self->close_fh( $obj ); }
20f7b20c 112
673464d9 113 # Theoretically, adding O_BINARY should remove the need for the binmode
114 # Of course, testing it is going to be ... interesting.
115 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 116
673464d9 117 my $fh;
118 sysopen( $fh, $obj->_root->{file}, $flags )
119 or $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!");
120 $obj->_root->{fh} = $fh;
a20d9a3f 121
122 #XXX Can we remove this by using the right sysopen() flags?
123 # Maybe ... q.v. above
124 binmode $fh; # for win32
125
cd59cad8 126 if ($obj->_root->{autoflush}) {
a20d9a3f 127 my $old = select $fh;
128 $|=1;
129 select $old;
130 }
20f7b20c 131
cd59cad8 132 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 133
134 my $signature;
135 my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE));
20f7b20c 136
a20d9a3f 137 ##
138 # File is empty -- write signature and master index
139 ##
140 if (!$bytes_read) {
cd59cad8 141 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 142 print( $fh DBM::Deep->SIG_FILE);
d0b74c17 143
ec1bce6b 144 $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size});
a20d9a3f 145
146 my $plain_key = "[base]";
251dfd0e 147 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
a20d9a3f 148
149 # Flush the filehandle
150 my $old_fh = select $fh;
151 my $old_af = $|; $| = 1; $| = $old_af;
152 select $old_fh;
153
a20d9a3f 154 return 1;
155 }
20f7b20c 156
a20d9a3f 157 ##
158 # Check signature was valid
159 ##
160 unless ($signature eq DBM::Deep->SIG_FILE) {
3d1b8be9 161 $self->close_fh( $obj );
cd59cad8 162 return $obj->_throw_error("Signature not found -- file is not a Deep DB");
a20d9a3f 163 }
164
a20d9a3f 165 ##
166 # Get our type from master index signature
167 ##
d4b1166e 168 my $tag = $self->load_tag($obj, $obj->_base_offset);
a20d9a3f 169 if (!$tag) {
20f7b20c 170 return $obj->_throw_error("Corrupted file, no master index record");
a20d9a3f 171 }
d0b74c17 172
cd59cad8 173 if ($obj->{type} ne $tag->{signature}) {
20f7b20c 174 return $obj->_throw_error("File type mismatch");
a20d9a3f 175 }
20f7b20c 176
d0b74c17 177#XXX We probably also want to store the hash algorithm name and not assume anything
178#XXX The cool thing would be to allow a different hashing algorithm at every level
179
a20d9a3f 180 return 1;
181}
182
3d1b8be9 183sub close_fh {
cd59cad8 184 my $self = shift;
a21f2d90 185 my ($obj) = @_;
cd59cad8 186
187 if ( my $fh = $obj->_root->{fh} ) {
188 close $fh;
189 }
190 $obj->_root->{fh} = undef;
191
192 return 1;
193}
194
d4b1166e 195sub create_tag {
20f7b20c 196 ##
197 # Given offset, signature and content, create tag and write to disk
198 ##
d4b1166e 199 my $self = shift;
20f7b20c 200 my ($obj, $offset, $sig, $content) = @_;
201 my $size = length($content);
202
d4b1166e 203 my $fh = $obj->_fh;
204
20f7b20c 205 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 206 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 207
208 if ($offset == $obj->_root->{end}) {
251dfd0e 209 $obj->_root->{end} += DBM::Deep->SIG_SIZE + $self->{data_size} + $size;
20f7b20c 210 }
211
212 return {
213 signature => $sig,
214 size => $size,
251dfd0e 215 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 216 content => $content
217 };
d4b1166e 218}
219
220sub load_tag {
20f7b20c 221 ##
222 # Given offset, load single tag and return signature, size and data
223 ##
d4b1166e 224 my $self = shift;
20f7b20c 225 my ($obj, $offset) = @_;
226
d4b1166e 227 my $fh = $obj->_fh;
228
20f7b20c 229 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
230 if (eof $fh) { return undef; }
231
d4b1166e 232 my $b;
251dfd0e 233 read( $fh, $b, DBM::Deep->SIG_SIZE + $self->{data_size} );
234 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 235
236 my $buffer;
237 read( $fh, $buffer, $size);
238
239 return {
240 signature => $sig,
241 size => $size,
251dfd0e 242 offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size},
20f7b20c 243 content => $buffer
244 };
d4b1166e 245}
246
20f7b20c 247sub add_bucket {
248 ##
249 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
250 # plain (undigested) key and value.
251 ##
d4b1166e 252 my $self = shift;
20f7b20c 253 my ($obj, $tag, $md5, $plain_key, $value) = @_;
254 my $keys = $tag->{content};
255 my $location = 0;
256 my $result = 2;
257
258 my $root = $obj->_root;
259
260 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) };
261 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
262
263 my $fh = $obj->_fh;
264
265 ##
266 # Iterate through buckets, seeing if this is a new entry or a replace.
267 ##
d0b74c17 268 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
251dfd0e 269 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
20f7b20c 270 if (!$subloc) {
271 ##
272 # Found empty bucket (end of list). Populate and exit loop.
273 ##
274 $result = 2;
275
276 $location = $internal_ref
277 ? $value->_base_offset
278 : $root->{end};
279
251dfd0e 280 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
281 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 282 last;
283 }
284
251dfd0e 285 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
20f7b20c 286 if ($md5 eq $key) {
287 ##
288 # Found existing bucket with same key. Replace with new value.
289 ##
290 $result = 1;
291
292 if ($internal_ref) {
293 $location = $value->_base_offset;
251dfd0e 294 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
295 print( $fh $md5 . pack($self->{long_pack}, $location) );
20f7b20c 296 return $result;
297 }
298
299 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET);
300 my $size;
251dfd0e 301 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
20f7b20c 302
303 ##
304 # If value is a hash, array, or raw value with equal or less size, we can
305 # reuse the same content area of the database. Otherwise, we have to create
306 # a new content area at the EOF.
307 ##
308 my $actual_length;
309 my $r = Scalar::Util::reftype( $value ) || '';
310 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
ec1bce6b 311 $actual_length = $self->{index_size};
20f7b20c 312
313 # if autobless is enabled, must also take into consideration
314 # the class name, as it is stored along with key/value.
315 if ( $root->{autobless} ) {
316 my $value_class = Scalar::Util::blessed($value);
317 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
318 $actual_length += length($value_class);
319 }
320 }
321 }
322 else { $actual_length = length($value); }
323
324 if ($actual_length <= $size) {
325 $location = $subloc;
326 }
327 else {
328 $location = $root->{end};
251dfd0e 329 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, SEEK_SET);
330 print( $fh pack($self->{long_pack}, $location) );
20f7b20c 331 }
332
333 last;
334 }
335 }
336
337 ##
338 # If this is an internal reference, return now.
339 # No need to write value or plain key
340 ##
341 if ($internal_ref) {
342 return $result;
343 }
344
345 ##
346 # If bucket didn't fit into list, split into a new index level
347 ##
348 if (!$location) {
349 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
251dfd0e 350 print( $fh pack($self->{long_pack}, $root->{end}) );
20f7b20c 351
ec1bce6b 352 my $index_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_INDEX, chr(0) x $self->{index_size});
20f7b20c 353 my @offsets = ();
354
251dfd0e 355 $keys .= $md5 . pack($self->{long_pack}, 0);
20f7b20c 356
251dfd0e 357 for (my $i=0; $i<=$self->{max_buckets}; $i++) {
358 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
20f7b20c 359 if ($key) {
251dfd0e 360 my $old_subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) +
361 $self->{hash_size}, $self->{long_size}));
20f7b20c 362 my $num = ord(substr($key, $tag->{ch} + 1, 1));
363
364 if ($offsets[$num]) {
251dfd0e 365 my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $self->{data_size};
20f7b20c 366 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
367 my $subkeys;
251dfd0e 368 read( $fh, $subkeys, $self->{bucket_list_size});
20f7b20c 369
251dfd0e 370 for (my $k=0; $k<$self->{max_buckets}; $k++) {
371 my $subloc = unpack($self->{long_pack}, substr($subkeys, ($k * $self->{bucket_size}) +
372 $self->{hash_size}, $self->{long_size}));
20f7b20c 373 if (!$subloc) {
251dfd0e 374 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
375 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
20f7b20c 376 last;
377 }
378 } # k loop
379 }
380 else {
381 $offsets[$num] = $root->{end};
251dfd0e 382 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
383 print( $fh pack($self->{long_pack}, $root->{end}) );
20f7b20c 384
251dfd0e 385 my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $self->{bucket_list_size});
20f7b20c 386
387 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
251dfd0e 388 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
20f7b20c 389 }
390 } # key is real
391 } # i loop
392
393 $location ||= $root->{end};
394 } # re-index bucket list
395
396 ##
397 # Seek to content area and store signature, value and plaintext key
398 ##
399 if ($location) {
400 my $content_length;
401 seek($fh, $location + $root->{file_offset}, SEEK_SET);
402
403 ##
404 # Write signature based on content type, set content length and write actual value.
405 ##
406 my $r = Scalar::Util::reftype($value) || '';
407 if ($r eq 'HASH') {
408 print( $fh DBM::Deep->TYPE_HASH );
251dfd0e 409 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 410 $content_length = $self->{index_size};
20f7b20c 411 }
412 elsif ($r eq 'ARRAY') {
413 print( $fh DBM::Deep->TYPE_ARRAY );
251dfd0e 414 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
ec1bce6b 415 $content_length = $self->{index_size};
20f7b20c 416 }
417 elsif (!defined($value)) {
418 print( $fh DBM::Deep->SIG_NULL );
251dfd0e 419 print( $fh pack($self->{data_pack}, 0) );
20f7b20c 420 $content_length = 0;
421 }
422 else {
423 print( $fh DBM::Deep->SIG_DATA );
251dfd0e 424 print( $fh pack($self->{data_pack}, length($value)) . $value );
20f7b20c 425 $content_length = length($value);
426 }
427
428 ##
429 # Plain key is stored AFTER value, as keys are typically fetched less often.
430 ##
251dfd0e 431 print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key );
20f7b20c 432
433 ##
434 # If value is blessed, preserve class name
435 ##
436 if ( $root->{autobless} ) {
437 my $value_class = Scalar::Util::blessed($value);
438 if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
439 ##
440 # Blessed ref -- will restore later
441 ##
442 print( $fh chr(1) );
251dfd0e 443 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 444 $content_length += 1;
251dfd0e 445 $content_length += $self->{data_size} + length($value_class);
20f7b20c 446 }
447 else {
448 print( $fh chr(0) );
449 $content_length += 1;
450 }
451 }
452
453 ##
454 # If this is a new content area, advance EOF counter
455 ##
456 if ($location == $root->{end}) {
457 $root->{end} += DBM::Deep->SIG_SIZE;
251dfd0e 458 $root->{end} += $self->{data_size} + $content_length;
459 $root->{end} += $self->{data_size} + length($plain_key);
20f7b20c 460 }
461
462 ##
463 # If content is a hash or array, create new child DBM::Deep object and
464 # pass each key or element to it.
465 ##
466 if ($r eq 'HASH') {
467 my $branch = DBM::Deep->new(
468 type => DBM::Deep->TYPE_HASH,
469 base_offset => $location,
470 root => $root,
471 );
472 foreach my $key (keys %{$value}) {
473 $branch->STORE( $key, $value->{$key} );
474 }
475 }
476 elsif ($r eq 'ARRAY') {
477 my $branch = DBM::Deep->new(
478 type => DBM::Deep->TYPE_ARRAY,
479 base_offset => $location,
480 root => $root,
481 );
482 my $index = 0;
483 foreach my $element (@{$value}) {
484 $branch->STORE( $index, $element );
485 $index++;
486 }
487 }
488
489 return $result;
490 }
d4b1166e 491
20f7b20c 492 return $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
d4b1166e 493}
494
9020ee8c 495sub get_bucket_value {
beac1dff 496 ##
497 # Fetch single value given tag and MD5 digested key.
498 ##
499 my $self = shift;
500 my ($obj, $tag, $md5) = @_;
501 my $keys = $tag->{content};
9020ee8c 502
503 my $fh = $obj->_fh;
504
beac1dff 505 ##
506 # Iterate through buckets, looking for a key match
507 ##
9020ee8c 508 BUCKET:
beac1dff 509 for (my $i=0; $i<$self->{max_buckets}; $i++) {
510 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
511 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
9020ee8c 512
beac1dff 513 if (!$subloc) {
514 ##
515 # Hit end of list, no match
516 ##
517 return;
518 }
9020ee8c 519
520 if ( $md5 ne $key ) {
521 next BUCKET;
522 }
523
524 ##
525 # Found match -- seek to offset and read signature
526 ##
527 my $signature;
528 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
529 read( $fh, $signature, DBM::Deep->SIG_SIZE);
d0b74c17 530
9020ee8c 531 ##
532 # If value is a hash or array, return new DBM::Deep object with correct offset
533 ##
534 if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) {
535 my $obj = DBM::Deep->new(
536 type => $signature,
537 base_offset => $subloc,
538 root => $obj->_root,
539 );
d0b74c17 540
9020ee8c 541 if ($obj->_root->{autobless}) {
542 ##
543 # Skip over value and plain key to see if object needs
544 # to be re-blessed
545 ##
251dfd0e 546 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
d0b74c17 547
9020ee8c 548 my $size;
251dfd0e 549 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 550 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 551
9020ee8c 552 my $bless_bit;
553 read( $fh, $bless_bit, 1);
554 if (ord($bless_bit)) {
555 ##
556 # Yes, object needs to be re-blessed
557 ##
558 my $class_name;
251dfd0e 559 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 560 if ($size) { read( $fh, $class_name, $size); }
561 if ($class_name) { $obj = bless( $obj, $class_name ); }
562 }
563 }
d0b74c17 564
9020ee8c 565 return $obj;
566 }
d0b74c17 567
9020ee8c 568 ##
569 # Otherwise return actual value
570 ##
571 elsif ($signature eq DBM::Deep->SIG_DATA) {
572 my $size;
573 my $value = '';
251dfd0e 574 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
9020ee8c 575 if ($size) { read( $fh, $value, $size); }
576 return $value;
577 }
d0b74c17 578
9020ee8c 579 ##
580 # Key exists, but content is null
581 ##
582 else { return; }
beac1dff 583 } # i loop
9020ee8c 584
beac1dff 585 return;
9020ee8c 586}
ab0e4957 587
588sub delete_bucket {
beac1dff 589 ##
590 # Delete single key/value pair given tag and MD5 digested key.
591 ##
592 my $self = shift;
593 my ($obj, $tag, $md5) = @_;
594 my $keys = $tag->{content};
ab0e4957 595
596 my $fh = $obj->_fh;
d0b74c17 597
beac1dff 598 ##
599 # Iterate through buckets, looking for a key match
600 ##
ab0e4957 601 BUCKET:
beac1dff 602 for (my $i=0; $i<$self->{max_buckets}; $i++) {
603 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
604 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
ab0e4957 605
beac1dff 606 if (!$subloc) {
607 ##
608 # Hit end of list, no match
609 ##
610 return;
611 }
ab0e4957 612
613 if ( $md5 ne $key ) {
614 next BUCKET;
615 }
616
617 ##
618 # Matched key -- delete bucket and return
619 ##
251dfd0e 620 seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET);
621 print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) );
622 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 623
ab0e4957 624 return 1;
beac1dff 625 } # i loop
ab0e4957 626
beac1dff 627 return;
ab0e4957 628}
629
912d50b1 630sub bucket_exists {
beac1dff 631 ##
632 # Check existence of single key given tag and MD5 digested key.
633 ##
634 my $self = shift;
635 my ($obj, $tag, $md5) = @_;
636 my $keys = $tag->{content};
d0b74c17 637
beac1dff 638 ##
639 # Iterate through buckets, looking for a key match
640 ##
912d50b1 641 BUCKET:
beac1dff 642 for (my $i=0; $i<$self->{max_buckets}; $i++) {
643 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
644 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
912d50b1 645
beac1dff 646 if (!$subloc) {
647 ##
648 # Hit end of list, no match
649 ##
650 return;
651 }
912d50b1 652
653 if ( $md5 ne $key ) {
654 next BUCKET;
655 }
656
657 ##
658 # Matched key -- return true
659 ##
660 return 1;
beac1dff 661 } # i loop
912d50b1 662
beac1dff 663 return;
912d50b1 664}
665
6736c116 666sub find_bucket_list {
beac1dff 667 ##
668 # Locate offset for bucket list, given digested key
669 ##
670 my $self = shift;
d0b74c17 671 my ($obj, $md5, $args) = @_;
672 $args = {} unless $args;
673
beac1dff 674 ##
675 # Locate offset for bucket list using digest index system
676 ##
677 my $ch = 0;
678 my $tag = $self->load_tag($obj, $obj->_base_offset);
d0b74c17 679 if (!$tag) {
680 return $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
681 }
682
beac1dff 683 while ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
d0b74c17 684 my $num = ord substr($md5, $ch, 1);
685
686 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
687 $tag = $self->index_lookup( $obj, $tag, $num );
688
689 if (!$tag) {
690 if ( $args->{create} ) {
691 my $fh = $obj->_fh;
692 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
693 print( $fh pack($self->{long_pack}, $obj->_root->{end}) );
694
695 $tag = $self->create_tag(
696 $obj, $obj->_root->{end},
697 DBM::Deep->SIG_BLIST,
698 chr(0) x $self->{bucket_list_size},
699 );
700
701 $tag->{ref_loc} = $ref_loc;
702 $tag->{ch} = $ch;
703
704 last;
705 }
706 else {
707 return;
708 }
709 }
710
711 $tag->{ch} = $ch;
712 $tag->{ref_loc} = $ref_loc;
713
beac1dff 714 $ch++;
715 }
d0b74c17 716
beac1dff 717 return $tag;
6736c116 718}
719
d0b74c17 720sub index_lookup {
721 ##
722 # Given index tag, lookup single entry in index and return .
723 ##
724 my $self = shift;
725 my ($obj, $tag, $index) = @_;
726
727 my $location = unpack(
728 $self->{long_pack},
729 substr(
730 $tag->{content},
731 $index * $self->{long_size},
732 $self->{long_size},
733 ),
734 );
735
736 if (!$location) { return; }
737
738 return $self->load_tag( $obj, $location );
739}
740
6736c116 741sub traverse_index {
beac1dff 742 ##
743 # Scan index and recursively step into deeper levels, looking for next key.
744 ##
6736c116 745 my $self = shift;
746 my ($obj, $offset, $ch, $force_return_next) = @_;
747 $force_return_next = undef unless $force_return_next;
d0b74c17 748
beac1dff 749 my $tag = $self->load_tag($obj, $offset );
6736c116 750
751 my $fh = $obj->_fh;
d0b74c17 752
beac1dff 753 if ($tag->{signature} ne DBM::Deep->SIG_BLIST) {
754 my $content = $tag->{content};
755 my $start;
756 if ($obj->{return_next}) { $start = 0; }
757 else { $start = ord(substr($obj->{prev_md5}, $ch, 1)); }
d0b74c17 758
beac1dff 759 for (my $index = $start; $index < 256; $index++) {
760 my $subloc = unpack($self->{long_pack}, substr($content, $index * $self->{long_size}, $self->{long_size}) );
761 if ($subloc) {
762 my $result = $self->traverse_index( $obj, $subloc, $ch + 1, $force_return_next );
763 if (defined($result)) { return $result; }
764 }
765 } # index loop
d0b74c17 766
beac1dff 767 $obj->{return_next} = 1;
768 } # tag is an index
d0b74c17 769
beac1dff 770 elsif ($tag->{signature} eq DBM::Deep->SIG_BLIST) {
771 my $keys = $tag->{content};
772 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 773
beac1dff 774 ##
775 # Iterate through buckets, looking for a key match
776 ##
777 for (my $i=0; $i<$self->{max_buckets}; $i++) {
778 my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size});
779 my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size}));
d0b74c17 780
beac1dff 781 if (!$subloc) {
782 ##
783 # End of bucket list -- return to outer loop
784 ##
785 $obj->{return_next} = 1;
786 last;
787 }
788 elsif ($key eq $obj->{prev_md5}) {
789 ##
790 # Located previous key -- return next one found
791 ##
792 $obj->{return_next} = 1;
793 next;
794 }
795 elsif ($obj->{return_next}) {
796 ##
797 # Seek to bucket location and skip over signature
798 ##
799 seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 800
beac1dff 801 ##
802 # Skip over value to get to plain key
803 ##
804 my $size;
805 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
806 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 807
beac1dff 808 ##
809 # Read in plain key and return as scalar
810 ##
811 my $plain_key;
812 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
813 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 814
beac1dff 815 return $plain_key;
816 }
817 } # bucket loop
d0b74c17 818
beac1dff 819 $obj->{return_next} = 1;
820 } # tag is a bucket list
d0b74c17 821
beac1dff 822 return;
6736c116 823}
824
825sub get_next_key {
beac1dff 826 ##
827 # Locate next key, given digested previous one
828 ##
6736c116 829 my $self = shift;
830 my ($obj) = @_;
d0b74c17 831
beac1dff 832 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
833 $obj->{return_next} = 0;
d0b74c17 834
beac1dff 835 ##
836 # If the previous key was not specifed, start at the top and
837 # return the first one found.
838 ##
839 if (!$obj->{prev_md5}) {
840 $obj->{prev_md5} = chr(0) x $self->{hash_size};
841 $obj->{return_next} = 1;
842 }
d0b74c17 843
beac1dff 844 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 845}
846
a20d9a3f 8471;
848__END__