More cleanup ... almost ready for _release_space()
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
8db25060 14sub SIG_NULL () { 'N' }
15sub SIG_DATA () { 'D' }
16sub SIG_INDEX () { 'I' }
17sub SIG_BLIST () { 'B' }
18sub SIG_SIZE () { 1 }
19
612969fb 20sub precalc_sizes {
beac1dff 21 ##
22 # Precalculate index, bucket and bucket list sizes
23 ##
251dfd0e 24 my $self = shift;
1bf65be7 25
251dfd0e 26 $self->{index_size} = (2**8) * $self->{long_size};
27 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
28 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 29
251dfd0e 30 return 1;
1bf65be7 31}
32
33sub set_pack {
beac1dff 34 ##
35 # Set pack/unpack modes (see file header for more)
36 ##
251dfd0e 37 my $self = shift;
1bf65be7 38 my ($long_s, $long_p, $data_s, $data_p) = @_;
39
81d16922 40 ##
75be6413 41 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
42 # GB per file.
beac1dff 43 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 44 #
45 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 46 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 47 ##
251dfd0e 48 $self->{long_size} = $long_s ? $long_s : 4;
49 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 50
81d16922 51 ##
75be6413 52 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 53 # key/value. Upgrading this is possible (see above) but probably not
54 # necessary. If you need more than 4 GB for a single key or value, this
55 # module is really not for you :-)
81d16922 56 ##
251dfd0e 57 $self->{data_size} = $data_s ? $data_s : 4;
58 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 59
beac1dff 60 return $self->precalc_sizes();
1bf65be7 61}
62
63sub set_digest {
beac1dff 64 ##
65 # Set key digest function (default is MD5)
66 ##
251dfd0e 67 my $self = shift;
1bf65be7 68 my ($digest_func, $hash_size) = @_;
69
d0b74c17 70 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 71 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 72
beac1dff 73 return $self->precalc_sizes();
612969fb 74}
75
76sub new {
77 my $class = shift;
78 my ($args) = @_;
79
80 my $self = bless {
81 long_size => 4,
82 long_pack => 'N',
83 data_size => 4,
84 data_pack => 'N',
251dfd0e 85
612969fb 86 digest => \&Digest::MD5::md5,
87 hash_size => 16,
251dfd0e 88
81d16922 89 ##
d5d7c51d 90 # Maximum number of buckets per list before another level of indexing is
91 # done.
92 # Increase this value for slightly greater speed, but larger database
93 # files. DO NOT decrease this value below 16, due to risk of recursive
94 # reindex overrun.
81d16922 95 ##
612969fb 96 max_buckets => 16,
97 }, $class;
98
251dfd0e 99 $self->precalc_sizes;
612969fb 100
101 return $self;
1bf65be7 102}
103
70b55428 104sub setup_fh {
105 my $self = shift;
106 my ($obj) = @_;
107
108 $self->open( $obj ) if !defined $obj->_fh;
109
6fde4ed2 110 my $fh = $obj->_fh;
111 flock $fh, LOCK_EX;
118ba343 112
6fde4ed2 113 unless ( $obj->{base_offset} ) {
118ba343 114 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
115 my $signature;
116 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
117
118 ##
119 # File is empty -- write signature and master index
120 ##
121 if (!$bytes_read) {
c9ec091a 122 my $loc = $self->_request_space( $obj, length( SIG_FILE ) );
123 seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET);
118ba343 124 print( $fh SIG_FILE);
125
c9ec091a 126 $obj->{base_offset} = $self->_request_space(
16d1ad9b 127 $obj, $self->tag_size( $self->{index_size} ),
c9ec091a 128 );
118ba343 129
130 $self->create_tag(
c9ec091a 131 $obj, $obj->_base_offset, $obj->_type,
f37c15ab 132 chr(0)x$self->{index_size},
118ba343 133 );
134
135 # Flush the filehandle
136 my $old_fh = select $fh;
137 my $old_af = $|; $| = 1; $| = $old_af;
138 select $old_fh;
139 }
140 else {
141 $obj->{base_offset} = $bytes_read;
142
143 ##
144 # Check signature was valid
145 ##
146 unless ($signature eq SIG_FILE) {
147 $self->close_fh( $obj );
148 $obj->_throw_error("Signature not found -- file is not a Deep DB");
149 }
150
151 ##
152 # Get our type from master index signature
153 ##
154 my $tag = $self->load_tag($obj, $obj->_base_offset)
155 or $obj->_throw_error("Corrupted file, no master index record");
156
157 unless ($obj->{type} eq $tag->{signature}) {
158 $obj->_throw_error("File type mismatch");
159 }
160 }
118ba343 161 }
e06824f8 162
673464d9 163 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 164 unless ( $obj->_root->{inode} ) {
165 my @stats = stat($obj->_fh);
166 $obj->_root->{inode} = $stats[1];
167 $obj->_root->{end} = $stats[7];
168 }
169
6fde4ed2 170 flock $fh, LOCK_UN;
171
70b55428 172 return 1;
173}
174
a20d9a3f 175sub open {
20f7b20c 176 ##
177 # Open a fh to the database, create if nonexistent.
178 # Make sure file signature matches DBM::Deep spec.
179 ##
a20d9a3f 180 my $self = shift;
70b55428 181 my ($obj) = @_;
a20d9a3f 182
673464d9 183 # Theoretically, adding O_BINARY should remove the need for the binmode
184 # Of course, testing it is going to be ... interesting.
185 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 186
673464d9 187 my $fh;
d5d7c51d 188 my $filename = $obj->_root->{file};
189 sysopen( $fh, $filename, $flags )
190 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 191 $obj->_root->{fh} = $fh;
a20d9a3f 192
193 #XXX Can we remove this by using the right sysopen() flags?
194 # Maybe ... q.v. above
195 binmode $fh; # for win32
196
cd59cad8 197 if ($obj->_root->{autoflush}) {
a20d9a3f 198 my $old = select $fh;
199 $|=1;
200 select $old;
201 }
20f7b20c 202
a20d9a3f 203 return 1;
204}
205
3d1b8be9 206sub close_fh {
cd59cad8 207 my $self = shift;
a21f2d90 208 my ($obj) = @_;
cd59cad8 209
210 if ( my $fh = $obj->_root->{fh} ) {
211 close $fh;
212 }
213 $obj->_root->{fh} = undef;
214
215 return 1;
216}
217
16d1ad9b 218sub tag_size {
219 my $self = shift;
220 my ($size) = @_;
221 return SIG_SIZE + $self->{data_size} + $size;
222}
223
d4b1166e 224sub create_tag {
20f7b20c 225 ##
226 # Given offset, signature and content, create tag and write to disk
227 ##
d4b1166e 228 my $self = shift;
20f7b20c 229 my ($obj, $offset, $sig, $content) = @_;
f37c15ab 230 my $size = length( $content );
20f7b20c 231
d4b1166e 232 my $fh = $obj->_fh;
233
f37c15ab 234 if ( defined $offset ) {
235 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
236 }
237
251dfd0e 238 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 239
f37c15ab 240 return unless defined $offset;
241
20f7b20c 242 return {
243 signature => $sig,
244 size => $size,
8db25060 245 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 246 content => $content
247 };
d4b1166e 248}
249
250sub load_tag {
20f7b20c 251 ##
252 # Given offset, load single tag and return signature, size and data
253 ##
d4b1166e 254 my $self = shift;
20f7b20c 255 my ($obj, $offset) = @_;
256
e06824f8 257# print join(':',map{$_||''}caller(1)), $/;
258
d4b1166e 259 my $fh = $obj->_fh;
260
20f7b20c 261 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 262
75be6413 263 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 264 return if eof $fh;
20f7b20c 265
d4b1166e 266 my $b;
8db25060 267 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 268 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 269
270 my $buffer;
271 read( $fh, $buffer, $size);
272
273 return {
274 signature => $sig,
275 size => $size,
8db25060 276 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 277 content => $buffer
278 };
d4b1166e 279}
280
29b01632 281sub _length_needed {
282 my $self = shift;
f37c15ab 283 my ($obj, $value, $key) = @_;
29b01632 284
285 my $is_dbm_deep = eval {
286 local $SIG{'__DIE__'};
287 $value->isa( 'DBM::Deep' );
288 };
289
f37c15ab 290 my $len = SIG_SIZE + $self->{data_size}
291 + $self->{data_size} + length( $key );
29b01632 292
f37c15ab 293 if ( $is_dbm_deep && $value->_root eq $obj->_root ) {
294 return $len + $self->{long_size};
29b01632 295 }
296
297 my $r = Scalar::Util::reftype( $value ) || '';
298 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 299 if ( defined $value ) {
300 $len += length( $value );
301 }
302 return $len;
29b01632 303 }
304
f37c15ab 305 $len += $self->{index_size};
29b01632 306
307 # if autobless is enabled, must also take into consideration
f37c15ab 308 # the class name as it is stored after the key.
29b01632 309 if ( $obj->_root->{autobless} ) {
f37c15ab 310 # This is for the bit saying whether or not this thing is blessed.
311 $len += 1;
312
29b01632 313 my $value_class = Scalar::Util::blessed($value);
f37c15ab 314 if ( defined $value_class && !$is_dbm_deep ) {
315 $len += $self->{data_size} + length($value_class);
29b01632 316 }
317 }
318
f37c15ab 319 return $len;
29b01632 320}
321
20f7b20c 322sub add_bucket {
323 ##
324 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
325 # plain (undigested) key and value.
326 ##
d4b1166e 327 my $self = shift;
20f7b20c 328 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 329
eea0d863 330 # This verifies that only supported values will be stored.
331 {
332 my $r = Scalar::Util::reftype( $value );
333 last if !defined $r;
334
335 last if $r eq 'HASH';
336 last if $r eq 'ARRAY';
337
338 $obj->_throw_error(
339 "Storage of variables of type '$r' is not supported."
340 );
341 }
342
20f7b20c 343 my $location = 0;
344 my $result = 2;
345
346 my $root = $obj->_root;
f37c15ab 347 my $fh = $obj->_fh;
20f7b20c 348
f37c15ab 349 my $actual_length = $self->_length_needed( $obj, $value, $plain_key );
20f7b20c 350
386bab6c 351 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
75be6413 352
386bab6c 353 # Updating a known md5
354 if ( $subloc ) {
355 $result = 1;
20f7b20c 356
d5d7c51d 357 seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET);
358 my $size;
359 read( $fh, $size, $self->{data_size});
360 $size = unpack($self->{data_pack}, $size);
361
386bab6c 362 if ($actual_length <= $size) {
363 $location = $subloc;
20f7b20c 364 }
75be6413 365 else {
f37c15ab 366 $location = $self->_request_space( $obj, $actual_length );
386bab6c 367 seek(
368 $fh,
369 $tag->{offset} + $offset + $self->{hash_size} + $root->{file_offset},
370 SEEK_SET,
371 );
372 print( $fh pack($self->{long_pack}, $location) );
75be6413 373 }
75be6413 374 }
386bab6c 375 # Adding a new md5
376 elsif ( defined $offset ) {
f37c15ab 377 $location = $self->_request_space( $obj, $actual_length );
386bab6c 378
379 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
380 print( $fh $md5 . pack($self->{long_pack}, $location) );
381 }
382 # If bucket didn't fit into list, split into a new index level
383 else {
384 $self->split_index( $obj, $md5, $tag );
385
f37c15ab 386 $location = $self->_request_space( $obj, $actual_length );
386bab6c 387 }
20f7b20c 388
d5d7c51d 389 $self->write_value( $obj, $location, $plain_key, $value );
390
391 return $result;
392}
393
394sub write_value {
395 my $self = shift;
396 my ($obj, $location, $key, $value) = @_;
397
398 my $fh = $obj->_fh;
399 my $root = $obj->_root;
400
401 my $is_dbm_deep = eval {
402 local $SIG{'__DIE__'};
403 $value->isa( 'DBM::Deep' );
404 };
405
406 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
407
408 seek($fh, $location + $root->{file_offset}, SEEK_SET);
409
20f7b20c 410 ##
d5d7c51d 411 # Write signature based on content type, set content length and write
412 # actual value.
20f7b20c 413 ##
d5d7c51d 414 my $r = Scalar::Util::reftype($value) || '';
d5d7c51d 415 if ( $internal_ref ) {
f37c15ab 416 $self->create_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $value->_base_offset) );
417 }
418 elsif ($r eq 'HASH') {
419 $self->create_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} );
420 }
421 elsif ($r eq 'ARRAY') {
422 $self->create_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} );
423 }
424 elsif (!defined($value)) {
425 $self->create_tag( $obj, undef, SIG_INTERNAL, '' );
d5d7c51d 426 }
427 else {
f37c15ab 428 $self->create_tag( $obj, undef, SIG_DATA, $value );
d5d7c51d 429 }
20f7b20c 430
d5d7c51d 431 ##
432 # Plain key is stored AFTER value, as keys are typically fetched less often.
433 ##
434 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 435
d5d7c51d 436 ##
437 # If value is blessed, preserve class name
438 ##
439 if ( $root->{autobless} ) {
440 my $value_class = Scalar::Util::blessed($value);
f37c15ab 441 if ( defined $value_class && !$is_dbm_deep ) {
d5d7c51d 442 print( $fh chr(1) );
443 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 444 }
d5d7c51d 445 else {
446 print( $fh chr(0) );
20f7b20c 447 }
d5d7c51d 448 }
20f7b20c 449
d5d7c51d 450 ##
d5d7c51d 451 # If content is a hash or array, create new child DBM::Deep object and
452 # pass each key or element to it.
453 ##
f37c15ab 454 if ( !$internal_ref ) {
d5d7c51d 455 if ($r eq 'HASH') {
456 my $branch = DBM::Deep->new(
457 type => DBM::Deep->TYPE_HASH,
458 base_offset => $location,
459 root => $root,
460 );
461 foreach my $key (keys %{$value}) {
462 $branch->STORE( $key, $value->{$key} );
20f7b20c 463 }
d5d7c51d 464 }
465 elsif ($r eq 'ARRAY') {
466 my $branch = DBM::Deep->new(
467 type => DBM::Deep->TYPE_ARRAY,
468 base_offset => $location,
469 root => $root,
470 );
471 my $index = 0;
472 foreach my $element (@{$value}) {
473 $branch->STORE( $index, $element );
474 $index++;
20f7b20c 475 }
476 }
20f7b20c 477 }
d4b1166e 478
d5d7c51d 479 return 1;
d4b1166e 480}
481
75be6413 482sub split_index {
483 my $self = shift;
484 my ($obj, $md5, $tag) = @_;
485
486 my $fh = $obj->_fh;
487 my $root = $obj->_root;
488 my $keys = $tag->{content};
489
490 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 491
492 my $loc = $self->_request_space(
493 $obj, $self->tag_size( $self->{index_size} ),
494 );
495
496 print( $fh pack($self->{long_pack}, $loc) );
75be6413 497
498 my $index_tag = $self->create_tag(
16d1ad9b 499 $obj, $loc, SIG_INDEX,
f37c15ab 500 chr(0)x$self->{index_size},
75be6413 501 );
502
503 my @offsets = ();
504
505 $keys .= $md5 . pack($self->{long_pack}, 0);
506
507 BUCKET:
508 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9cec1360 509 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 510
511 next BUCKET unless $key;
512
75be6413 513 my $num = ord(substr($key, $tag->{ch} + 1, 1));
514
515 if ($offsets[$num]) {
8db25060 516 my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size};
75be6413 517 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
518 my $subkeys;
519 read( $fh, $subkeys, $self->{bucket_list_size});
520
521 for (my $k=0; $k<$self->{max_buckets}; $k++) {
9cec1360 522 my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k );
75be6413 523
524 if (!$subloc) {
525 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
526 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
527 last;
528 }
529 } # k loop
530 }
531 else {
532 $offsets[$num] = $root->{end};
533 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
75be6413 534
2603d86e 535 my $loc = $self->_request_space(
536 $obj, $self->tag_size( $self->{bucket_list_size} ),
537 );
538
539 print( $fh pack($self->{long_pack}, $loc) );
540
541 my $blist_tag = $self->create_tag(
542 $obj, $loc, SIG_BLIST,
f37c15ab 543 chr(0)x$self->{bucket_list_size},
2603d86e 544 );
75be6413 545
546 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
547 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
548 }
549 } # i loop
550
551 return;
552}
553
8db25060 554sub read_from_loc {
555 my $self = shift;
556 my ($obj, $subloc) = @_;
557
558 my $fh = $obj->_fh;
559
560 ##
561 # Found match -- seek to offset and read signature
562 ##
563 my $signature;
564 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
565 read( $fh, $signature, SIG_SIZE);
566
567 ##
568 # If value is a hash or array, return new DBM::Deep object with correct offset
569 ##
570 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
571 my $obj = DBM::Deep->new(
572 type => $signature,
573 base_offset => $subloc,
574 root => $obj->_root,
575 );
576
577 if ($obj->_root->{autobless}) {
578 ##
579 # Skip over value and plain key to see if object needs
580 # to be re-blessed
581 ##
582 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
583
584 my $size;
585 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
586 if ($size) { seek($fh, $size, SEEK_CUR); }
587
588 my $bless_bit;
589 read( $fh, $bless_bit, 1);
590 if (ord($bless_bit)) {
591 ##
592 # Yes, object needs to be re-blessed
593 ##
594 my $class_name;
595 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
596 if ($size) { read( $fh, $class_name, $size); }
597 if ($class_name) { $obj = bless( $obj, $class_name ); }
598 }
599 }
600
601 return $obj;
602 }
603 elsif ( $signature eq SIG_INTERNAL ) {
604 my $size;
605 read( $fh, $size, $self->{data_size});
606 $size = unpack($self->{data_pack}, $size);
607
608 if ( $size ) {
609 my $new_loc;
610 read( $fh, $new_loc, $size );
611 $new_loc = unpack( $self->{long_pack}, $new_loc );
612
613 return $self->read_from_loc( $obj, $new_loc );
614 }
615 else {
616 return;
617 }
618 }
619 ##
620 # Otherwise return actual value
621 ##
622 elsif ($signature eq SIG_DATA) {
623 my $size;
624 read( $fh, $size, $self->{data_size});
625 $size = unpack($self->{data_pack}, $size);
626
627 my $value = '';
628 if ($size) { read( $fh, $value, $size); }
629 return $value;
630 }
631
632 ##
633 # Key exists, but content is null
634 ##
635 return;
636}
637
9020ee8c 638sub get_bucket_value {
beac1dff 639 ##
640 # Fetch single value given tag and MD5 digested key.
641 ##
642 my $self = shift;
643 my ($obj, $tag, $md5) = @_;
9020ee8c 644
386bab6c 645 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
646 if ( $subloc ) {
8db25060 647 return $self->read_from_loc( $obj, $subloc );
386bab6c 648 }
beac1dff 649 return;
9020ee8c 650}
ab0e4957 651
652sub delete_bucket {
beac1dff 653 ##
654 # Delete single key/value pair given tag and MD5 digested key.
655 ##
656 my $self = shift;
657 my ($obj, $tag, $md5) = @_;
ab0e4957 658
386bab6c 659 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
660 if ( $subloc ) {
661 my $fh = $obj->_fh;
662 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
663 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 664 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 665
ab0e4957 666 return 1;
386bab6c 667 }
beac1dff 668 return;
ab0e4957 669}
670
912d50b1 671sub bucket_exists {
beac1dff 672 ##
673 # Check existence of single key given tag and MD5 digested key.
674 ##
675 my $self = shift;
676 my ($obj, $tag, $md5) = @_;
912d50b1 677
386bab6c 678 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 679 return $subloc && 1;
912d50b1 680}
681
6736c116 682sub find_bucket_list {
beac1dff 683 ##
684 # Locate offset for bucket list, given digested key
685 ##
686 my $self = shift;
d0b74c17 687 my ($obj, $md5, $args) = @_;
688 $args = {} unless $args;
689
beac1dff 690 ##
691 # Locate offset for bucket list using digest index system
692 ##
e5fc7e69 693 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 694 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 695
e5fc7e69 696 my $ch = 0;
8db25060 697 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 698 my $num = ord substr($md5, $ch, 1);
699
700 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
701 $tag = $self->index_lookup( $obj, $tag, $num );
702
703 if (!$tag) {
29b01632 704 return if !$args->{create};
d0b74c17 705
d5d7c51d 706 my $fh = $obj->_fh;
707 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
16d1ad9b 708
709 my $loc = $self->_request_space(
710 $obj, $self->tag_size( $self->{bucket_list_size} ),
711 );
712
713 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 714
d5d7c51d 715 $tag = $self->create_tag(
16d1ad9b 716 $obj, $loc, SIG_BLIST,
f37c15ab 717 chr(0)x$self->{bucket_list_size},
d5d7c51d 718 );
719
720 $tag->{ref_loc} = $ref_loc;
721 $tag->{ch} = $ch;
722
723 last;
d0b74c17 724 }
725
16d1ad9b 726 $tag->{ch} = $ch++;
d0b74c17 727 $tag->{ref_loc} = $ref_loc;
beac1dff 728 }
d0b74c17 729
beac1dff 730 return $tag;
6736c116 731}
732
d0b74c17 733sub index_lookup {
734 ##
735 # Given index tag, lookup single entry in index and return .
736 ##
737 my $self = shift;
738 my ($obj, $tag, $index) = @_;
739
740 my $location = unpack(
741 $self->{long_pack},
742 substr(
743 $tag->{content},
744 $index * $self->{long_size},
745 $self->{long_size},
746 ),
747 );
748
749 if (!$location) { return; }
750
751 return $self->load_tag( $obj, $location );
752}
753
6736c116 754sub traverse_index {
beac1dff 755 ##
756 # Scan index and recursively step into deeper levels, looking for next key.
757 ##
6736c116 758 my $self = shift;
759 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 760
beac1dff 761 my $tag = $self->load_tag($obj, $offset );
6736c116 762
763 my $fh = $obj->_fh;
d0b74c17 764
8db25060 765 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 766 my $content = $tag->{content};
e5fc7e69 767 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 768
d5d7c51d 769 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 770 my $subloc = unpack(
771 $self->{long_pack},
e06824f8 772 substr(
773 $content,
774 $idx * $self->{long_size},
775 $self->{long_size},
776 ),
e5fc7e69 777 );
778
beac1dff 779 if ($subloc) {
e5fc7e69 780 my $result = $self->traverse_index(
781 $obj, $subloc, $ch + 1, $force_return_next,
782 );
783
beac1dff 784 if (defined($result)) { return $result; }
785 }
786 } # index loop
d0b74c17 787
beac1dff 788 $obj->{return_next} = 1;
789 } # tag is an index
d0b74c17 790
e5fc7e69 791 else {
beac1dff 792 my $keys = $tag->{content};
793 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 794
beac1dff 795 ##
796 # Iterate through buckets, looking for a key match
797 ##
8db25060 798 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 799 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 800
8db25060 801 # End of bucket list -- return to outer loop
beac1dff 802 if (!$subloc) {
beac1dff 803 $obj->{return_next} = 1;
804 last;
805 }
8db25060 806 # Located previous key -- return next one found
beac1dff 807 elsif ($key eq $obj->{prev_md5}) {
beac1dff 808 $obj->{return_next} = 1;
809 next;
810 }
8db25060 811 # Seek to bucket location and skip over signature
beac1dff 812 elsif ($obj->{return_next}) {
8db25060 813 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 814
beac1dff 815 # Skip over value to get to plain key
8db25060 816 my $sig;
817 read( $fh, $sig, SIG_SIZE );
818
beac1dff 819 my $size;
e5fc7e69 820 read( $fh, $size, $self->{data_size});
821 $size = unpack($self->{data_pack}, $size);
beac1dff 822 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 823
beac1dff 824 # Read in plain key and return as scalar
beac1dff 825 my $plain_key;
e5fc7e69 826 read( $fh, $size, $self->{data_size});
827 $size = unpack($self->{data_pack}, $size);
beac1dff 828 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 829
beac1dff 830 return $plain_key;
831 }
8db25060 832 }
d0b74c17 833
beac1dff 834 $obj->{return_next} = 1;
835 } # tag is a bucket list
d0b74c17 836
beac1dff 837 return;
6736c116 838}
839
840sub get_next_key {
beac1dff 841 ##
842 # Locate next key, given digested previous one
843 ##
6736c116 844 my $self = shift;
845 my ($obj) = @_;
d0b74c17 846
beac1dff 847 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
848 $obj->{return_next} = 0;
d0b74c17 849
beac1dff 850 ##
851 # If the previous key was not specifed, start at the top and
852 # return the first one found.
853 ##
854 if (!$obj->{prev_md5}) {
855 $obj->{prev_md5} = chr(0) x $self->{hash_size};
856 $obj->{return_next} = 1;
857 }
d0b74c17 858
beac1dff 859 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 860}
861
75be6413 862# Utilities
863
9cec1360 864sub _get_key_subloc {
75be6413 865 my $self = shift;
866 my ($keys, $idx) = @_;
867
9cec1360 868 my ($key, $subloc) = unpack(
869 "a$self->{hash_size} $self->{long_pack}",
75be6413 870 substr(
871 $keys,
9cec1360 872 ($idx * $self->{bucket_size}),
873 $self->{bucket_size},
75be6413 874 ),
875 );
876
9cec1360 877 return ($key, $subloc);
75be6413 878}
879
d608b06e 880sub _find_in_buckets {
881 my $self = shift;
882 my ($tag, $md5) = @_;
883
884 BUCKET:
885 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
886 my ($key, $subloc) = $self->_get_key_subloc( $tag->{content}, $i );
887
888 return ($subloc, $i * $self->{bucket_size}) unless $subloc;
889
890 next BUCKET if $key ne $md5;
891
892 return ($subloc, $i * $self->{bucket_size});
893 }
894
895 return;
896}
897
994ccd8e 898sub _request_space {
899 my $self = shift;
900 my ($obj, $size) = @_;
901
902 my $loc = $obj->_root->{end};
c9ec091a 903 $obj->_root->{end} += $size;
994ccd8e 904
905 return $loc;
906}
907
908sub _release_space {
909 my $self = shift;
910 my ($obj, $size, $loc) = @_;
911
912 return;
913}
914
a20d9a3f 9151;
916__END__
d5d7c51d 917
918# This will be added in later, after more refactoring is done. This is an early
919# attempt at refactoring on the physical level instead of the virtual level.
920sub _read_at {
921 my $self = shift;
922 my ($obj, $spot, $amount, $unpack) = @_;
923
924 my $fh = $obj->_fh;
925 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
926
927 my $buffer;
928 my $bytes_read = read( $fh, $buffer, $amount );
929
930 if ( $unpack ) {
931 $buffer = unpack( $unpack, $buffer );
932 }
933
934 if ( wantarray ) {
935 return ($buffer, $bytes_read);
936 }
937 else {
938 return $buffer;
939 }
940}