Added guard to make sure values that cannot be read correctly aren't stored, plus...
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
d5d7c51d 14sub SIG_REF () { 'R' }
8db25060 15sub SIG_NULL () { 'N' }
16sub SIG_DATA () { 'D' }
17sub SIG_INDEX () { 'I' }
18sub SIG_BLIST () { 'B' }
19sub SIG_SIZE () { 1 }
20
612969fb 21sub precalc_sizes {
beac1dff 22 ##
23 # Precalculate index, bucket and bucket list sizes
24 ##
251dfd0e 25 my $self = shift;
1bf65be7 26
251dfd0e 27 $self->{index_size} = (2**8) * $self->{long_size};
28 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
29 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 30
251dfd0e 31 return 1;
1bf65be7 32}
33
34sub set_pack {
beac1dff 35 ##
36 # Set pack/unpack modes (see file header for more)
37 ##
251dfd0e 38 my $self = shift;
1bf65be7 39 my ($long_s, $long_p, $data_s, $data_p) = @_;
40
81d16922 41 ##
75be6413 42 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
43 # GB per file.
beac1dff 44 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 45 #
46 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 47 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 48 ##
251dfd0e 49 $self->{long_size} = $long_s ? $long_s : 4;
50 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 51
81d16922 52 ##
75be6413 53 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 54 # key/value. Upgrading this is possible (see above) but probably not
55 # necessary. If you need more than 4 GB for a single key or value, this
56 # module is really not for you :-)
81d16922 57 ##
251dfd0e 58 $self->{data_size} = $data_s ? $data_s : 4;
59 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 60
beac1dff 61 return $self->precalc_sizes();
1bf65be7 62}
63
64sub set_digest {
beac1dff 65 ##
66 # Set key digest function (default is MD5)
67 ##
251dfd0e 68 my $self = shift;
1bf65be7 69 my ($digest_func, $hash_size) = @_;
70
d0b74c17 71 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 72 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 73
beac1dff 74 return $self->precalc_sizes();
612969fb 75}
76
77sub new {
78 my $class = shift;
79 my ($args) = @_;
80
81 my $self = bless {
82 long_size => 4,
83 long_pack => 'N',
84 data_size => 4,
85 data_pack => 'N',
251dfd0e 86
612969fb 87 digest => \&Digest::MD5::md5,
88 hash_size => 16,
251dfd0e 89
81d16922 90 ##
d5d7c51d 91 # Maximum number of buckets per list before another level of indexing is
92 # done.
93 # Increase this value for slightly greater speed, but larger database
94 # files. DO NOT decrease this value below 16, due to risk of recursive
95 # reindex overrun.
81d16922 96 ##
612969fb 97 max_buckets => 16,
98 }, $class;
99
251dfd0e 100 $self->precalc_sizes;
612969fb 101
102 return $self;
1bf65be7 103}
104
70b55428 105sub setup_fh {
106 my $self = shift;
107 my ($obj) = @_;
108
109 $self->open( $obj ) if !defined $obj->_fh;
110
673464d9 111 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 112 unless ( $obj->_root->{inode} ) {
113 my @stats = stat($obj->_fh);
114 $obj->_root->{inode} = $stats[1];
115 $obj->_root->{end} = $stats[7];
116 }
117
118 return 1;
119}
120
a20d9a3f 121sub open {
20f7b20c 122 ##
123 # Open a fh to the database, create if nonexistent.
124 # Make sure file signature matches DBM::Deep spec.
125 ##
a20d9a3f 126 my $self = shift;
70b55428 127 my ($obj) = @_;
a20d9a3f 128
3d1b8be9 129 if (defined($obj->_fh)) { $self->close_fh( $obj ); }
20f7b20c 130
673464d9 131 # Theoretically, adding O_BINARY should remove the need for the binmode
132 # Of course, testing it is going to be ... interesting.
133 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 134
673464d9 135 my $fh;
d5d7c51d 136 my $filename = $obj->_root->{file};
137 sysopen( $fh, $filename, $flags )
138 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 139 $obj->_root->{fh} = $fh;
a20d9a3f 140
141 #XXX Can we remove this by using the right sysopen() flags?
142 # Maybe ... q.v. above
143 binmode $fh; # for win32
144
cd59cad8 145 if ($obj->_root->{autoflush}) {
a20d9a3f 146 my $old = select $fh;
147 $|=1;
148 select $old;
149 }
20f7b20c 150
cd59cad8 151 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
a20d9a3f 152 my $signature;
8db25060 153 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
20f7b20c 154
a20d9a3f 155 ##
156 # File is empty -- write signature and master index
157 ##
158 if (!$bytes_read) {
cd59cad8 159 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
8db25060 160 print( $fh SIG_FILE);
d0b74c17 161
d5d7c51d 162 $self->create_tag(
163 $obj, $obj->_base_offset, $obj->_type,
164 chr(0) x $self->{index_size},
165 );
a20d9a3f 166
a20d9a3f 167 # Flush the filehandle
168 my $old_fh = select $fh;
169 my $old_af = $|; $| = 1; $| = $old_af;
170 select $old_fh;
171
a20d9a3f 172 return 1;
173 }
20f7b20c 174
a20d9a3f 175 ##
176 # Check signature was valid
177 ##
8db25060 178 unless ($signature eq SIG_FILE) {
3d1b8be9 179 $self->close_fh( $obj );
e5fc7e69 180 $obj->_throw_error("Signature not found -- file is not a Deep DB");
a20d9a3f 181 }
182
a20d9a3f 183 ##
184 # Get our type from master index signature
185 ##
e5fc7e69 186 my $tag = $self->load_tag($obj, $obj->_base_offset)
187 or $obj->_throw_error("Corrupted file, no master index record");
d0b74c17 188
e5fc7e69 189 unless ($obj->{type} eq $tag->{signature}) {
190 $obj->_throw_error("File type mismatch");
a20d9a3f 191 }
20f7b20c 192
d0b74c17 193#XXX We probably also want to store the hash algorithm name and not assume anything
194#XXX The cool thing would be to allow a different hashing algorithm at every level
195
a20d9a3f 196 return 1;
197}
198
3d1b8be9 199sub close_fh {
cd59cad8 200 my $self = shift;
a21f2d90 201 my ($obj) = @_;
cd59cad8 202
203 if ( my $fh = $obj->_root->{fh} ) {
204 close $fh;
205 }
206 $obj->_root->{fh} = undef;
207
208 return 1;
209}
210
d4b1166e 211sub create_tag {
20f7b20c 212 ##
213 # Given offset, signature and content, create tag and write to disk
214 ##
d4b1166e 215 my $self = shift;
20f7b20c 216 my ($obj, $offset, $sig, $content) = @_;
217 my $size = length($content);
218
d4b1166e 219 my $fh = $obj->_fh;
220
20f7b20c 221 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 222 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 223
224 if ($offset == $obj->_root->{end}) {
8db25060 225 $obj->_root->{end} += SIG_SIZE + $self->{data_size} + $size;
20f7b20c 226 }
227
228 return {
229 signature => $sig,
230 size => $size,
8db25060 231 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 232 content => $content
233 };
d4b1166e 234}
235
236sub load_tag {
20f7b20c 237 ##
238 # Given offset, load single tag and return signature, size and data
239 ##
d4b1166e 240 my $self = shift;
20f7b20c 241 my ($obj, $offset) = @_;
242
d4b1166e 243 my $fh = $obj->_fh;
244
20f7b20c 245 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 246
75be6413 247 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 248 return if eof $fh;
20f7b20c 249
d4b1166e 250 my $b;
8db25060 251 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 252 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 253
254 my $buffer;
255 read( $fh, $buffer, $size);
256
257 return {
258 signature => $sig,
259 size => $size,
8db25060 260 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 261 content => $buffer
262 };
d4b1166e 263}
264
20f7b20c 265sub add_bucket {
266 ##
267 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
268 # plain (undigested) key and value.
269 ##
d4b1166e 270 my $self = shift;
20f7b20c 271 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 272
eea0d863 273 # This verifies that only supported values will be stored.
274 {
275 my $r = Scalar::Util::reftype( $value );
276 last if !defined $r;
277
278 last if $r eq 'HASH';
279 last if $r eq 'ARRAY';
280
281 $obj->_throw_error(
282 "Storage of variables of type '$r' is not supported."
283 );
284 }
285
20f7b20c 286 my $location = 0;
287 my $result = 2;
288
289 my $root = $obj->_root;
290
d5d7c51d 291 my $is_dbm_deep = eval {
292 local $SIG{'__DIE__'};
293 $value->isa( 'DBM::Deep' );
294 };
295
20f7b20c 296 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
297
298 my $fh = $obj->_fh;
299
386bab6c 300 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
75be6413 301
386bab6c 302 # Updating a known md5
303 if ( $subloc ) {
304 $result = 1;
20f7b20c 305
386bab6c 306 ##
d5d7c51d 307 # If value is a hash, array, or raw value with equal or less size, we
308 # can reuse the same content area of the database. Otherwise, we have
309 # to create a new content area at the EOF.
386bab6c 310 ##
311 my $actual_length;
312 if ( $internal_ref ) {
313 $actual_length = $self->{long_size};
314 }
315 else {
316 my $r = Scalar::Util::reftype( $value ) || '';
317 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
318 $actual_length = $self->{index_size};
319
320 # if autobless is enabled, must also take into consideration
321 # the class name, as it is stored along with key/value.
322 if ( $root->{autobless} ) {
323 my $value_class = Scalar::Util::blessed($value);
324 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
325 $actual_length += length($value_class);
8db25060 326 }
75be6413 327 }
20f7b20c 328 }
386bab6c 329 else { $actual_length = length($value); }
75be6413 330 }
20f7b20c 331
d5d7c51d 332 seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET);
333 my $size;
334 read( $fh, $size, $self->{data_size});
335 $size = unpack($self->{data_pack}, $size);
336
386bab6c 337 if ($actual_length <= $size) {
338 $location = $subloc;
20f7b20c 339 }
75be6413 340 else {
341 $location = $root->{end};
386bab6c 342 seek(
343 $fh,
344 $tag->{offset} + $offset + $self->{hash_size} + $root->{file_offset},
345 SEEK_SET,
346 );
347 print( $fh pack($self->{long_pack}, $location) );
75be6413 348 }
75be6413 349 }
386bab6c 350 # Adding a new md5
351 elsif ( defined $offset ) {
386bab6c 352 $location = $root->{end};
353
354 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
355 print( $fh $md5 . pack($self->{long_pack}, $location) );
356 }
357 # If bucket didn't fit into list, split into a new index level
358 else {
359 $self->split_index( $obj, $md5, $tag );
360
361 $location = $root->{end};
362 }
20f7b20c 363
d5d7c51d 364 $self->write_value( $obj, $location, $plain_key, $value );
365
366 return $result;
367}
368
369sub write_value {
370 my $self = shift;
371 my ($obj, $location, $key, $value) = @_;
372
373 my $fh = $obj->_fh;
374 my $root = $obj->_root;
375
376 my $is_dbm_deep = eval {
377 local $SIG{'__DIE__'};
378 $value->isa( 'DBM::Deep' );
379 };
380
381 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
382
383 seek($fh, $location + $root->{file_offset}, SEEK_SET);
384
20f7b20c 385 ##
d5d7c51d 386 # Write signature based on content type, set content length and write
387 # actual value.
20f7b20c 388 ##
d5d7c51d 389 my $r = Scalar::Util::reftype($value) || '';
390 my $content_length;
391 if ( $internal_ref ) {
392 print( $fh SIG_INTERNAL );
393 print( $fh pack($self->{data_pack}, $self->{long_size}) );
394 print( $fh pack($self->{long_pack}, $value->_base_offset) );
395 $content_length = $self->{long_size};
396 }
397 else {
398 if ($r eq 'HASH') {
399 print( $fh SIG_HASH );
400 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
401 $content_length = $self->{index_size};
402 }
403 elsif ($r eq 'ARRAY') {
404 print( $fh SIG_ARRAY );
405 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
406 $content_length = $self->{index_size};
407 }
408 elsif (!defined($value)) {
409 print( $fh SIG_NULL );
410 print( $fh pack($self->{data_pack}, 0) );
411 $content_length = 0;
20f7b20c 412 }
413 else {
d5d7c51d 414 print( $fh SIG_DATA );
415 print( $fh pack($self->{data_pack}, length($value)) . $value );
416 $content_length = length($value);
20f7b20c 417 }
d5d7c51d 418 }
20f7b20c 419
d5d7c51d 420 ##
421 # Plain key is stored AFTER value, as keys are typically fetched less often.
422 ##
423 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 424
d5d7c51d 425 ##
426 # If value is blessed, preserve class name
427 ##
428 if ( $root->{autobless} ) {
429 my $value_class = Scalar::Util::blessed($value);
430 if ( defined $value_class && !$value->isa( 'DBM::Deep' ) ) {
431 ##
432 # Blessed ref -- will restore later
433 ##
434 print( $fh chr(1) );
435 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
436 $content_length += 1;
437 $content_length += $self->{data_size} + length($value_class);
20f7b20c 438 }
d5d7c51d 439 else {
440 print( $fh chr(0) );
441 $content_length += 1;
20f7b20c 442 }
d5d7c51d 443 }
20f7b20c 444
d5d7c51d 445 ##
446 # If this is a new content area, advance EOF counter
447 ##
448 if ($location == $root->{end}) {
449 $root->{end} += SIG_SIZE;
450 $root->{end} += $self->{data_size} + $content_length;
451 $root->{end} += $self->{data_size} + length($key);
452 }
453
454 ##
455 # If content is a hash or array, create new child DBM::Deep object and
456 # pass each key or element to it.
457 ##
458 if ( ! $internal_ref ) {
459 if ($r eq 'HASH') {
460 my $branch = DBM::Deep->new(
461 type => DBM::Deep->TYPE_HASH,
462 base_offset => $location,
463 root => $root,
464 );
465 foreach my $key (keys %{$value}) {
466 $branch->STORE( $key, $value->{$key} );
20f7b20c 467 }
d5d7c51d 468 }
469 elsif ($r eq 'ARRAY') {
470 my $branch = DBM::Deep->new(
471 type => DBM::Deep->TYPE_ARRAY,
472 base_offset => $location,
473 root => $root,
474 );
475 my $index = 0;
476 foreach my $element (@{$value}) {
477 $branch->STORE( $index, $element );
478 $index++;
20f7b20c 479 }
480 }
20f7b20c 481 }
d4b1166e 482
d5d7c51d 483 return 1;
d4b1166e 484}
485
75be6413 486sub split_index {
487 my $self = shift;
488 my ($obj, $md5, $tag) = @_;
489
490 my $fh = $obj->_fh;
491 my $root = $obj->_root;
492 my $keys = $tag->{content};
493
494 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
495 print( $fh pack($self->{long_pack}, $root->{end}) );
496
497 my $index_tag = $self->create_tag(
498 $obj,
499 $root->{end},
8db25060 500 SIG_INDEX,
75be6413 501 chr(0) x $self->{index_size},
502 );
503
504 my @offsets = ();
505
506 $keys .= $md5 . pack($self->{long_pack}, 0);
507
508 BUCKET:
509 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9cec1360 510 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 511
512 next BUCKET unless $key;
513
75be6413 514 my $num = ord(substr($key, $tag->{ch} + 1, 1));
515
516 if ($offsets[$num]) {
8db25060 517 my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size};
75be6413 518 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
519 my $subkeys;
520 read( $fh, $subkeys, $self->{bucket_list_size});
521
522 for (my $k=0; $k<$self->{max_buckets}; $k++) {
9cec1360 523 my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k );
75be6413 524
525 if (!$subloc) {
526 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
527 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
528 last;
529 }
530 } # k loop
531 }
532 else {
533 $offsets[$num] = $root->{end};
534 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
535 print( $fh pack($self->{long_pack}, $root->{end}) );
536
8db25060 537 my $blist_tag = $self->create_tag($obj, $root->{end}, SIG_BLIST, chr(0) x $self->{bucket_list_size});
75be6413 538
539 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
540 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
541 }
542 } # i loop
543
544 return;
545}
546
8db25060 547sub read_from_loc {
548 my $self = shift;
549 my ($obj, $subloc) = @_;
550
551 my $fh = $obj->_fh;
552
553 ##
554 # Found match -- seek to offset and read signature
555 ##
556 my $signature;
557 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
558 read( $fh, $signature, SIG_SIZE);
559
560 ##
561 # If value is a hash or array, return new DBM::Deep object with correct offset
562 ##
563 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
564 my $obj = DBM::Deep->new(
565 type => $signature,
566 base_offset => $subloc,
567 root => $obj->_root,
568 );
569
570 if ($obj->_root->{autobless}) {
571 ##
572 # Skip over value and plain key to see if object needs
573 # to be re-blessed
574 ##
575 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
576
577 my $size;
578 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
579 if ($size) { seek($fh, $size, SEEK_CUR); }
580
581 my $bless_bit;
582 read( $fh, $bless_bit, 1);
583 if (ord($bless_bit)) {
584 ##
585 # Yes, object needs to be re-blessed
586 ##
587 my $class_name;
588 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
589 if ($size) { read( $fh, $class_name, $size); }
590 if ($class_name) { $obj = bless( $obj, $class_name ); }
591 }
592 }
593
594 return $obj;
595 }
596 elsif ( $signature eq SIG_INTERNAL ) {
597 my $size;
598 read( $fh, $size, $self->{data_size});
599 $size = unpack($self->{data_pack}, $size);
600
601 if ( $size ) {
602 my $new_loc;
603 read( $fh, $new_loc, $size );
604 $new_loc = unpack( $self->{long_pack}, $new_loc );
605
606 return $self->read_from_loc( $obj, $new_loc );
607 }
608 else {
609 return;
610 }
611 }
612 ##
613 # Otherwise return actual value
614 ##
615 elsif ($signature eq SIG_DATA) {
616 my $size;
617 read( $fh, $size, $self->{data_size});
618 $size = unpack($self->{data_pack}, $size);
619
620 my $value = '';
621 if ($size) { read( $fh, $value, $size); }
622 return $value;
623 }
624
625 ##
626 # Key exists, but content is null
627 ##
628 return;
629}
630
9020ee8c 631sub get_bucket_value {
beac1dff 632 ##
633 # Fetch single value given tag and MD5 digested key.
634 ##
635 my $self = shift;
636 my ($obj, $tag, $md5) = @_;
9020ee8c 637
386bab6c 638 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
639 if ( $subloc ) {
8db25060 640 return $self->read_from_loc( $obj, $subloc );
386bab6c 641 }
beac1dff 642 return;
9020ee8c 643}
ab0e4957 644
645sub delete_bucket {
beac1dff 646 ##
647 # Delete single key/value pair given tag and MD5 digested key.
648 ##
649 my $self = shift;
650 my ($obj, $tag, $md5) = @_;
ab0e4957 651
386bab6c 652 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
653 if ( $subloc ) {
654 my $fh = $obj->_fh;
655 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
656 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 657 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 658
ab0e4957 659 return 1;
386bab6c 660 }
beac1dff 661 return;
ab0e4957 662}
663
912d50b1 664sub bucket_exists {
beac1dff 665 ##
666 # Check existence of single key given tag and MD5 digested key.
667 ##
668 my $self = shift;
669 my ($obj, $tag, $md5) = @_;
912d50b1 670
386bab6c 671 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 672 return $subloc && 1;
912d50b1 673}
674
6736c116 675sub find_bucket_list {
beac1dff 676 ##
677 # Locate offset for bucket list, given digested key
678 ##
679 my $self = shift;
d0b74c17 680 my ($obj, $md5, $args) = @_;
681 $args = {} unless $args;
682
beac1dff 683 ##
684 # Locate offset for bucket list using digest index system
685 ##
e5fc7e69 686 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 687 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 688
e5fc7e69 689 my $ch = 0;
8db25060 690 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 691 my $num = ord substr($md5, $ch, 1);
692
693 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
694 $tag = $self->index_lookup( $obj, $tag, $num );
695
696 if (!$tag) {
d5d7c51d 697 return if ! $args->{create};
d0b74c17 698
d5d7c51d 699 my $fh = $obj->_fh;
700 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
701 print( $fh pack($self->{long_pack}, $obj->_root->{end}) );
d0b74c17 702
d5d7c51d 703 $tag = $self->create_tag(
704 $obj, $obj->_root->{end},
705 SIG_BLIST,
706 chr(0) x $self->{bucket_list_size},
707 );
708
709 $tag->{ref_loc} = $ref_loc;
710 $tag->{ch} = $ch;
711
712 last;
d0b74c17 713 }
714
715 $tag->{ch} = $ch;
716 $tag->{ref_loc} = $ref_loc;
717
beac1dff 718 $ch++;
719 }
d0b74c17 720
beac1dff 721 return $tag;
6736c116 722}
723
d0b74c17 724sub index_lookup {
725 ##
726 # Given index tag, lookup single entry in index and return .
727 ##
728 my $self = shift;
729 my ($obj, $tag, $index) = @_;
730
731 my $location = unpack(
732 $self->{long_pack},
733 substr(
734 $tag->{content},
735 $index * $self->{long_size},
736 $self->{long_size},
737 ),
738 );
739
740 if (!$location) { return; }
741
742 return $self->load_tag( $obj, $location );
743}
744
6736c116 745sub traverse_index {
beac1dff 746 ##
747 # Scan index and recursively step into deeper levels, looking for next key.
748 ##
6736c116 749 my $self = shift;
750 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 751
beac1dff 752 my $tag = $self->load_tag($obj, $offset );
6736c116 753
754 my $fh = $obj->_fh;
d0b74c17 755
8db25060 756 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 757 my $content = $tag->{content};
e5fc7e69 758 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 759
d5d7c51d 760 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 761 my $subloc = unpack(
762 $self->{long_pack},
d5d7c51d 763 substr($content, $idx * $self->{long_size}, $self->{long_size}),
e5fc7e69 764 );
765
beac1dff 766 if ($subloc) {
e5fc7e69 767 my $result = $self->traverse_index(
768 $obj, $subloc, $ch + 1, $force_return_next,
769 );
770
beac1dff 771 if (defined($result)) { return $result; }
772 }
773 } # index loop
d0b74c17 774
beac1dff 775 $obj->{return_next} = 1;
776 } # tag is an index
d0b74c17 777
e5fc7e69 778 else {
beac1dff 779 my $keys = $tag->{content};
780 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 781
beac1dff 782 ##
783 # Iterate through buckets, looking for a key match
784 ##
8db25060 785 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 786 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 787
8db25060 788 # End of bucket list -- return to outer loop
beac1dff 789 if (!$subloc) {
beac1dff 790 $obj->{return_next} = 1;
791 last;
792 }
8db25060 793 # Located previous key -- return next one found
beac1dff 794 elsif ($key eq $obj->{prev_md5}) {
beac1dff 795 $obj->{return_next} = 1;
796 next;
797 }
8db25060 798 # Seek to bucket location and skip over signature
beac1dff 799 elsif ($obj->{return_next}) {
8db25060 800 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 801
beac1dff 802 # Skip over value to get to plain key
8db25060 803 my $sig;
804 read( $fh, $sig, SIG_SIZE );
805
beac1dff 806 my $size;
e5fc7e69 807 read( $fh, $size, $self->{data_size});
808 $size = unpack($self->{data_pack}, $size);
beac1dff 809 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 810
beac1dff 811 # Read in plain key and return as scalar
beac1dff 812 my $plain_key;
e5fc7e69 813 read( $fh, $size, $self->{data_size});
814 $size = unpack($self->{data_pack}, $size);
beac1dff 815 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 816
beac1dff 817 return $plain_key;
818 }
8db25060 819 }
d0b74c17 820
beac1dff 821 $obj->{return_next} = 1;
822 } # tag is a bucket list
d0b74c17 823
beac1dff 824 return;
6736c116 825}
826
827sub get_next_key {
beac1dff 828 ##
829 # Locate next key, given digested previous one
830 ##
6736c116 831 my $self = shift;
832 my ($obj) = @_;
d0b74c17 833
beac1dff 834 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
835 $obj->{return_next} = 0;
d0b74c17 836
beac1dff 837 ##
838 # If the previous key was not specifed, start at the top and
839 # return the first one found.
840 ##
841 if (!$obj->{prev_md5}) {
842 $obj->{prev_md5} = chr(0) x $self->{hash_size};
843 $obj->{return_next} = 1;
844 }
d0b74c17 845
beac1dff 846 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 847}
848
75be6413 849# Utilities
850
9cec1360 851sub _get_key_subloc {
75be6413 852 my $self = shift;
853 my ($keys, $idx) = @_;
854
9cec1360 855 my ($key, $subloc) = unpack(
856 "a$self->{hash_size} $self->{long_pack}",
75be6413 857 substr(
858 $keys,
9cec1360 859 ($idx * $self->{bucket_size}),
860 $self->{bucket_size},
75be6413 861 ),
862 );
863
9cec1360 864 return ($key, $subloc);
75be6413 865}
866
d608b06e 867sub _find_in_buckets {
868 my $self = shift;
869 my ($tag, $md5) = @_;
870
871 BUCKET:
872 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
873 my ($key, $subloc) = $self->_get_key_subloc( $tag->{content}, $i );
874
875 return ($subloc, $i * $self->{bucket_size}) unless $subloc;
876
877 next BUCKET if $key ne $md5;
878
879 return ($subloc, $i * $self->{bucket_size});
880 }
881
882 return;
883}
884
a20d9a3f 8851;
886__END__
d5d7c51d 887
888# This will be added in later, after more refactoring is done. This is an early
889# attempt at refactoring on the physical level instead of the virtual level.
890sub _read_at {
891 my $self = shift;
892 my ($obj, $spot, $amount, $unpack) = @_;
893
894 my $fh = $obj->_fh;
895 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
896
897 my $buffer;
898 my $bytes_read = read( $fh, $buffer, $amount );
899
900 if ( $unpack ) {
901 $buffer = unpack( $unpack, $buffer );
902 }
903
904 if ( wantarray ) {
905 return ($buffer, $bytes_read);
906 }
907 else {
908 return $buffer;
909 }
910}