Created tag_size() and am converting create_tag() calls to use _request_space()
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
8db25060 14sub SIG_NULL () { 'N' }
15sub SIG_DATA () { 'D' }
16sub SIG_INDEX () { 'I' }
17sub SIG_BLIST () { 'B' }
18sub SIG_SIZE () { 1 }
19
612969fb 20sub precalc_sizes {
beac1dff 21 ##
22 # Precalculate index, bucket and bucket list sizes
23 ##
251dfd0e 24 my $self = shift;
1bf65be7 25
251dfd0e 26 $self->{index_size} = (2**8) * $self->{long_size};
27 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
28 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 29
251dfd0e 30 return 1;
1bf65be7 31}
32
33sub set_pack {
beac1dff 34 ##
35 # Set pack/unpack modes (see file header for more)
36 ##
251dfd0e 37 my $self = shift;
1bf65be7 38 my ($long_s, $long_p, $data_s, $data_p) = @_;
39
81d16922 40 ##
75be6413 41 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
42 # GB per file.
beac1dff 43 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 44 #
45 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 46 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 47 ##
251dfd0e 48 $self->{long_size} = $long_s ? $long_s : 4;
49 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 50
81d16922 51 ##
75be6413 52 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 53 # key/value. Upgrading this is possible (see above) but probably not
54 # necessary. If you need more than 4 GB for a single key or value, this
55 # module is really not for you :-)
81d16922 56 ##
251dfd0e 57 $self->{data_size} = $data_s ? $data_s : 4;
58 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 59
beac1dff 60 return $self->precalc_sizes();
1bf65be7 61}
62
63sub set_digest {
beac1dff 64 ##
65 # Set key digest function (default is MD5)
66 ##
251dfd0e 67 my $self = shift;
1bf65be7 68 my ($digest_func, $hash_size) = @_;
69
d0b74c17 70 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 71 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 72
beac1dff 73 return $self->precalc_sizes();
612969fb 74}
75
76sub new {
77 my $class = shift;
78 my ($args) = @_;
79
80 my $self = bless {
81 long_size => 4,
82 long_pack => 'N',
83 data_size => 4,
84 data_pack => 'N',
251dfd0e 85
612969fb 86 digest => \&Digest::MD5::md5,
87 hash_size => 16,
251dfd0e 88
81d16922 89 ##
d5d7c51d 90 # Maximum number of buckets per list before another level of indexing is
91 # done.
92 # Increase this value for slightly greater speed, but larger database
93 # files. DO NOT decrease this value below 16, due to risk of recursive
94 # reindex overrun.
81d16922 95 ##
612969fb 96 max_buckets => 16,
97 }, $class;
98
251dfd0e 99 $self->precalc_sizes;
612969fb 100
101 return $self;
1bf65be7 102}
103
70b55428 104sub setup_fh {
105 my $self = shift;
106 my ($obj) = @_;
107
108 $self->open( $obj ) if !defined $obj->_fh;
109
6fde4ed2 110 my $fh = $obj->_fh;
111 flock $fh, LOCK_EX;
118ba343 112
6fde4ed2 113 unless ( $obj->{base_offset} ) {
118ba343 114 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
115 my $signature;
116 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
117
118 ##
119 # File is empty -- write signature and master index
120 ##
121 if (!$bytes_read) {
c9ec091a 122 my $loc = $self->_request_space( $obj, length( SIG_FILE ) );
123 seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET);
118ba343 124 print( $fh SIG_FILE);
125
c9ec091a 126 $obj->{base_offset} = $self->_request_space(
16d1ad9b 127 $obj, $self->tag_size( $self->{index_size} ),
c9ec091a 128 );
118ba343 129
130 $self->create_tag(
c9ec091a 131 $obj, $obj->_base_offset, $obj->_type,
132 chr(0) x $self->{index_size},
118ba343 133 );
134
135 # Flush the filehandle
136 my $old_fh = select $fh;
137 my $old_af = $|; $| = 1; $| = $old_af;
138 select $old_fh;
139 }
140 else {
141 $obj->{base_offset} = $bytes_read;
142
143 ##
144 # Check signature was valid
145 ##
146 unless ($signature eq SIG_FILE) {
147 $self->close_fh( $obj );
148 $obj->_throw_error("Signature not found -- file is not a Deep DB");
149 }
150
151 ##
152 # Get our type from master index signature
153 ##
154 my $tag = $self->load_tag($obj, $obj->_base_offset)
155 or $obj->_throw_error("Corrupted file, no master index record");
156
157 unless ($obj->{type} eq $tag->{signature}) {
158 $obj->_throw_error("File type mismatch");
159 }
160 }
118ba343 161 }
e06824f8 162
673464d9 163 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 164 unless ( $obj->_root->{inode} ) {
165 my @stats = stat($obj->_fh);
166 $obj->_root->{inode} = $stats[1];
167 $obj->_root->{end} = $stats[7];
168 }
169
6fde4ed2 170 flock $fh, LOCK_UN;
171
70b55428 172 return 1;
173}
174
a20d9a3f 175sub open {
20f7b20c 176 ##
177 # Open a fh to the database, create if nonexistent.
178 # Make sure file signature matches DBM::Deep spec.
179 ##
a20d9a3f 180 my $self = shift;
70b55428 181 my ($obj) = @_;
a20d9a3f 182
673464d9 183 # Theoretically, adding O_BINARY should remove the need for the binmode
184 # Of course, testing it is going to be ... interesting.
185 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 186
673464d9 187 my $fh;
d5d7c51d 188 my $filename = $obj->_root->{file};
189 sysopen( $fh, $filename, $flags )
190 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 191 $obj->_root->{fh} = $fh;
a20d9a3f 192
193 #XXX Can we remove this by using the right sysopen() flags?
194 # Maybe ... q.v. above
195 binmode $fh; # for win32
196
cd59cad8 197 if ($obj->_root->{autoflush}) {
a20d9a3f 198 my $old = select $fh;
199 $|=1;
200 select $old;
201 }
20f7b20c 202
a20d9a3f 203 return 1;
204}
205
3d1b8be9 206sub close_fh {
cd59cad8 207 my $self = shift;
a21f2d90 208 my ($obj) = @_;
cd59cad8 209
210 if ( my $fh = $obj->_root->{fh} ) {
211 close $fh;
212 }
213 $obj->_root->{fh} = undef;
214
215 return 1;
216}
217
16d1ad9b 218sub tag_size {
219 my $self = shift;
220 my ($size) = @_;
221 return SIG_SIZE + $self->{data_size} + $size;
222}
223
d4b1166e 224sub create_tag {
20f7b20c 225 ##
226 # Given offset, signature and content, create tag and write to disk
227 ##
d4b1166e 228 my $self = shift;
20f7b20c 229 my ($obj, $offset, $sig, $content) = @_;
230 my $size = length($content);
231
d4b1166e 232 my $fh = $obj->_fh;
233
20f7b20c 234 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 235 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 236
237 if ($offset == $obj->_root->{end}) {
16d1ad9b 238 $obj->_root->{end} += $self->tag_size( $size );
20f7b20c 239 }
240
241 return {
242 signature => $sig,
243 size => $size,
8db25060 244 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 245 content => $content
246 };
d4b1166e 247}
248
249sub load_tag {
20f7b20c 250 ##
251 # Given offset, load single tag and return signature, size and data
252 ##
d4b1166e 253 my $self = shift;
20f7b20c 254 my ($obj, $offset) = @_;
255
e06824f8 256# print join(':',map{$_||''}caller(1)), $/;
257
d4b1166e 258 my $fh = $obj->_fh;
259
20f7b20c 260 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 261
75be6413 262 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 263 return if eof $fh;
20f7b20c 264
d4b1166e 265 my $b;
8db25060 266 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 267 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 268
269 my $buffer;
270 read( $fh, $buffer, $size);
271
272 return {
273 signature => $sig,
274 size => $size,
8db25060 275 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 276 content => $buffer
277 };
d4b1166e 278}
279
29b01632 280sub _length_needed {
281 my $self = shift;
282 my ($obj, $value) = @_;
283
284 my $is_dbm_deep = eval {
285 local $SIG{'__DIE__'};
286 $value->isa( 'DBM::Deep' );
287 };
288
289 my $internal_ref = $is_dbm_deep && ($value->_root eq $obj->_root);
290
291 if ( $internal_ref ) {
292 return $self->{long_size};
293 }
294
295 my $r = Scalar::Util::reftype( $value ) || '';
296 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
297 return length( $value );
298 }
299
300 my $actual_length = $self->{index_size};
301
302 # if autobless is enabled, must also take into consideration
303 # the class name, as it is stored along with key/value.
304 if ( $obj->_root->{autobless} ) {
305 my $value_class = Scalar::Util::blessed($value);
306 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
307 $actual_length += length($value_class);
308 }
309 }
310
311 return $actual_length;
312}
313
20f7b20c 314sub add_bucket {
315 ##
316 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
317 # plain (undigested) key and value.
318 ##
d4b1166e 319 my $self = shift;
20f7b20c 320 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 321
eea0d863 322 # This verifies that only supported values will be stored.
323 {
324 my $r = Scalar::Util::reftype( $value );
325 last if !defined $r;
326
327 last if $r eq 'HASH';
328 last if $r eq 'ARRAY';
329
330 $obj->_throw_error(
331 "Storage of variables of type '$r' is not supported."
332 );
333 }
334
20f7b20c 335 my $location = 0;
336 my $result = 2;
337
338 my $root = $obj->_root;
339
20f7b20c 340 my $fh = $obj->_fh;
341
386bab6c 342 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
75be6413 343
386bab6c 344 # Updating a known md5
345 if ( $subloc ) {
346 $result = 1;
20f7b20c 347
d5d7c51d 348 seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET);
349 my $size;
350 read( $fh, $size, $self->{data_size});
351 $size = unpack($self->{data_pack}, $size);
352
29b01632 353 my $actual_length = $self->_length_needed( $obj, $value );
354
386bab6c 355 if ($actual_length <= $size) {
356 $location = $subloc;
20f7b20c 357 }
75be6413 358 else {
359 $location = $root->{end};
386bab6c 360 seek(
361 $fh,
362 $tag->{offset} + $offset + $self->{hash_size} + $root->{file_offset},
363 SEEK_SET,
364 );
365 print( $fh pack($self->{long_pack}, $location) );
75be6413 366 }
75be6413 367 }
386bab6c 368 # Adding a new md5
369 elsif ( defined $offset ) {
386bab6c 370 $location = $root->{end};
371
372 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
373 print( $fh $md5 . pack($self->{long_pack}, $location) );
374 }
375 # If bucket didn't fit into list, split into a new index level
376 else {
377 $self->split_index( $obj, $md5, $tag );
378
379 $location = $root->{end};
380 }
20f7b20c 381
d5d7c51d 382 $self->write_value( $obj, $location, $plain_key, $value );
383
384 return $result;
385}
386
387sub write_value {
388 my $self = shift;
389 my ($obj, $location, $key, $value) = @_;
390
391 my $fh = $obj->_fh;
392 my $root = $obj->_root;
393
394 my $is_dbm_deep = eval {
395 local $SIG{'__DIE__'};
396 $value->isa( 'DBM::Deep' );
397 };
398
399 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
400
401 seek($fh, $location + $root->{file_offset}, SEEK_SET);
402
20f7b20c 403 ##
d5d7c51d 404 # Write signature based on content type, set content length and write
405 # actual value.
20f7b20c 406 ##
d5d7c51d 407 my $r = Scalar::Util::reftype($value) || '';
408 my $content_length;
409 if ( $internal_ref ) {
410 print( $fh SIG_INTERNAL );
411 print( $fh pack($self->{data_pack}, $self->{long_size}) );
412 print( $fh pack($self->{long_pack}, $value->_base_offset) );
413 $content_length = $self->{long_size};
414 }
415 else {
416 if ($r eq 'HASH') {
417 print( $fh SIG_HASH );
418 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
419 $content_length = $self->{index_size};
420 }
421 elsif ($r eq 'ARRAY') {
422 print( $fh SIG_ARRAY );
423 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
424 $content_length = $self->{index_size};
425 }
426 elsif (!defined($value)) {
427 print( $fh SIG_NULL );
428 print( $fh pack($self->{data_pack}, 0) );
429 $content_length = 0;
20f7b20c 430 }
431 else {
d5d7c51d 432 print( $fh SIG_DATA );
433 print( $fh pack($self->{data_pack}, length($value)) . $value );
434 $content_length = length($value);
20f7b20c 435 }
d5d7c51d 436 }
20f7b20c 437
d5d7c51d 438 ##
439 # Plain key is stored AFTER value, as keys are typically fetched less often.
440 ##
441 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 442
d5d7c51d 443 ##
444 # If value is blessed, preserve class name
445 ##
446 if ( $root->{autobless} ) {
447 my $value_class = Scalar::Util::blessed($value);
448 if ( defined $value_class && !$value->isa( 'DBM::Deep' ) ) {
449 ##
450 # Blessed ref -- will restore later
451 ##
452 print( $fh chr(1) );
453 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
454 $content_length += 1;
455 $content_length += $self->{data_size} + length($value_class);
20f7b20c 456 }
d5d7c51d 457 else {
458 print( $fh chr(0) );
459 $content_length += 1;
20f7b20c 460 }
d5d7c51d 461 }
20f7b20c 462
d5d7c51d 463 ##
464 # If this is a new content area, advance EOF counter
465 ##
466 if ($location == $root->{end}) {
467 $root->{end} += SIG_SIZE;
468 $root->{end} += $self->{data_size} + $content_length;
469 $root->{end} += $self->{data_size} + length($key);
470 }
471
472 ##
473 # If content is a hash or array, create new child DBM::Deep object and
474 # pass each key or element to it.
475 ##
476 if ( ! $internal_ref ) {
477 if ($r eq 'HASH') {
478 my $branch = DBM::Deep->new(
479 type => DBM::Deep->TYPE_HASH,
480 base_offset => $location,
481 root => $root,
482 );
483 foreach my $key (keys %{$value}) {
484 $branch->STORE( $key, $value->{$key} );
20f7b20c 485 }
d5d7c51d 486 }
487 elsif ($r eq 'ARRAY') {
488 my $branch = DBM::Deep->new(
489 type => DBM::Deep->TYPE_ARRAY,
490 base_offset => $location,
491 root => $root,
492 );
493 my $index = 0;
494 foreach my $element (@{$value}) {
495 $branch->STORE( $index, $element );
496 $index++;
20f7b20c 497 }
498 }
20f7b20c 499 }
d4b1166e 500
d5d7c51d 501 return 1;
d4b1166e 502}
503
75be6413 504sub split_index {
505 my $self = shift;
506 my ($obj, $md5, $tag) = @_;
507
508 my $fh = $obj->_fh;
509 my $root = $obj->_root;
510 my $keys = $tag->{content};
511
512 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 513
514 my $loc = $self->_request_space(
515 $obj, $self->tag_size( $self->{index_size} ),
516 );
517
518 print( $fh pack($self->{long_pack}, $loc) );
75be6413 519
520 my $index_tag = $self->create_tag(
16d1ad9b 521 $obj, $loc, SIG_INDEX,
75be6413 522 chr(0) x $self->{index_size},
523 );
524
525 my @offsets = ();
526
527 $keys .= $md5 . pack($self->{long_pack}, 0);
528
529 BUCKET:
530 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9cec1360 531 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 532
533 next BUCKET unless $key;
534
75be6413 535 my $num = ord(substr($key, $tag->{ch} + 1, 1));
536
537 if ($offsets[$num]) {
8db25060 538 my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size};
75be6413 539 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
540 my $subkeys;
541 read( $fh, $subkeys, $self->{bucket_list_size});
542
543 for (my $k=0; $k<$self->{max_buckets}; $k++) {
9cec1360 544 my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k );
75be6413 545
546 if (!$subloc) {
547 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
548 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
549 last;
550 }
551 } # k loop
552 }
553 else {
554 $offsets[$num] = $root->{end};
555 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
556 print( $fh pack($self->{long_pack}, $root->{end}) );
557
8db25060 558 my $blist_tag = $self->create_tag($obj, $root->{end}, SIG_BLIST, chr(0) x $self->{bucket_list_size});
75be6413 559
560 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
561 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
562 }
563 } # i loop
564
565 return;
566}
567
8db25060 568sub read_from_loc {
569 my $self = shift;
570 my ($obj, $subloc) = @_;
571
572 my $fh = $obj->_fh;
573
574 ##
575 # Found match -- seek to offset and read signature
576 ##
577 my $signature;
578 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
579 read( $fh, $signature, SIG_SIZE);
580
581 ##
582 # If value is a hash or array, return new DBM::Deep object with correct offset
583 ##
584 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
585 my $obj = DBM::Deep->new(
586 type => $signature,
587 base_offset => $subloc,
588 root => $obj->_root,
589 );
590
591 if ($obj->_root->{autobless}) {
592 ##
593 # Skip over value and plain key to see if object needs
594 # to be re-blessed
595 ##
596 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
597
598 my $size;
599 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
600 if ($size) { seek($fh, $size, SEEK_CUR); }
601
602 my $bless_bit;
603 read( $fh, $bless_bit, 1);
604 if (ord($bless_bit)) {
605 ##
606 # Yes, object needs to be re-blessed
607 ##
608 my $class_name;
609 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
610 if ($size) { read( $fh, $class_name, $size); }
611 if ($class_name) { $obj = bless( $obj, $class_name ); }
612 }
613 }
614
615 return $obj;
616 }
617 elsif ( $signature eq SIG_INTERNAL ) {
618 my $size;
619 read( $fh, $size, $self->{data_size});
620 $size = unpack($self->{data_pack}, $size);
621
622 if ( $size ) {
623 my $new_loc;
624 read( $fh, $new_loc, $size );
625 $new_loc = unpack( $self->{long_pack}, $new_loc );
626
627 return $self->read_from_loc( $obj, $new_loc );
628 }
629 else {
630 return;
631 }
632 }
633 ##
634 # Otherwise return actual value
635 ##
636 elsif ($signature eq SIG_DATA) {
637 my $size;
638 read( $fh, $size, $self->{data_size});
639 $size = unpack($self->{data_pack}, $size);
640
641 my $value = '';
642 if ($size) { read( $fh, $value, $size); }
643 return $value;
644 }
645
646 ##
647 # Key exists, but content is null
648 ##
649 return;
650}
651
9020ee8c 652sub get_bucket_value {
beac1dff 653 ##
654 # Fetch single value given tag and MD5 digested key.
655 ##
656 my $self = shift;
657 my ($obj, $tag, $md5) = @_;
9020ee8c 658
386bab6c 659 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
660 if ( $subloc ) {
8db25060 661 return $self->read_from_loc( $obj, $subloc );
386bab6c 662 }
beac1dff 663 return;
9020ee8c 664}
ab0e4957 665
666sub delete_bucket {
beac1dff 667 ##
668 # Delete single key/value pair given tag and MD5 digested key.
669 ##
670 my $self = shift;
671 my ($obj, $tag, $md5) = @_;
ab0e4957 672
386bab6c 673 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
674 if ( $subloc ) {
675 my $fh = $obj->_fh;
676 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
677 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 678 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 679
ab0e4957 680 return 1;
386bab6c 681 }
beac1dff 682 return;
ab0e4957 683}
684
912d50b1 685sub bucket_exists {
beac1dff 686 ##
687 # Check existence of single key given tag and MD5 digested key.
688 ##
689 my $self = shift;
690 my ($obj, $tag, $md5) = @_;
912d50b1 691
386bab6c 692 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 693 return $subloc && 1;
912d50b1 694}
695
6736c116 696sub find_bucket_list {
beac1dff 697 ##
698 # Locate offset for bucket list, given digested key
699 ##
700 my $self = shift;
d0b74c17 701 my ($obj, $md5, $args) = @_;
702 $args = {} unless $args;
703
beac1dff 704 ##
705 # Locate offset for bucket list using digest index system
706 ##
e5fc7e69 707 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 708 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 709
e5fc7e69 710 my $ch = 0;
8db25060 711 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 712 my $num = ord substr($md5, $ch, 1);
713
714 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
715 $tag = $self->index_lookup( $obj, $tag, $num );
716
717 if (!$tag) {
29b01632 718 return if !$args->{create};
d0b74c17 719
d5d7c51d 720 my $fh = $obj->_fh;
721 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
16d1ad9b 722
723 my $loc = $self->_request_space(
724 $obj, $self->tag_size( $self->{bucket_list_size} ),
725 );
726
727 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 728
d5d7c51d 729 $tag = $self->create_tag(
16d1ad9b 730 $obj, $loc, SIG_BLIST,
d5d7c51d 731 chr(0) x $self->{bucket_list_size},
732 );
733
734 $tag->{ref_loc} = $ref_loc;
735 $tag->{ch} = $ch;
736
737 last;
d0b74c17 738 }
739
16d1ad9b 740 $tag->{ch} = $ch++;
d0b74c17 741 $tag->{ref_loc} = $ref_loc;
beac1dff 742 }
d0b74c17 743
beac1dff 744 return $tag;
6736c116 745}
746
d0b74c17 747sub index_lookup {
748 ##
749 # Given index tag, lookup single entry in index and return .
750 ##
751 my $self = shift;
752 my ($obj, $tag, $index) = @_;
753
754 my $location = unpack(
755 $self->{long_pack},
756 substr(
757 $tag->{content},
758 $index * $self->{long_size},
759 $self->{long_size},
760 ),
761 );
762
763 if (!$location) { return; }
764
765 return $self->load_tag( $obj, $location );
766}
767
6736c116 768sub traverse_index {
beac1dff 769 ##
770 # Scan index and recursively step into deeper levels, looking for next key.
771 ##
6736c116 772 my $self = shift;
773 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 774
beac1dff 775 my $tag = $self->load_tag($obj, $offset );
6736c116 776
777 my $fh = $obj->_fh;
d0b74c17 778
8db25060 779 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 780 my $content = $tag->{content};
e5fc7e69 781 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 782
d5d7c51d 783 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 784 my $subloc = unpack(
785 $self->{long_pack},
e06824f8 786 substr(
787 $content,
788 $idx * $self->{long_size},
789 $self->{long_size},
790 ),
e5fc7e69 791 );
792
beac1dff 793 if ($subloc) {
e5fc7e69 794 my $result = $self->traverse_index(
795 $obj, $subloc, $ch + 1, $force_return_next,
796 );
797
beac1dff 798 if (defined($result)) { return $result; }
799 }
800 } # index loop
d0b74c17 801
beac1dff 802 $obj->{return_next} = 1;
803 } # tag is an index
d0b74c17 804
e5fc7e69 805 else {
beac1dff 806 my $keys = $tag->{content};
807 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 808
beac1dff 809 ##
810 # Iterate through buckets, looking for a key match
811 ##
8db25060 812 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 813 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 814
8db25060 815 # End of bucket list -- return to outer loop
beac1dff 816 if (!$subloc) {
beac1dff 817 $obj->{return_next} = 1;
818 last;
819 }
8db25060 820 # Located previous key -- return next one found
beac1dff 821 elsif ($key eq $obj->{prev_md5}) {
beac1dff 822 $obj->{return_next} = 1;
823 next;
824 }
8db25060 825 # Seek to bucket location and skip over signature
beac1dff 826 elsif ($obj->{return_next}) {
8db25060 827 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 828
beac1dff 829 # Skip over value to get to plain key
8db25060 830 my $sig;
831 read( $fh, $sig, SIG_SIZE );
832
beac1dff 833 my $size;
e5fc7e69 834 read( $fh, $size, $self->{data_size});
835 $size = unpack($self->{data_pack}, $size);
beac1dff 836 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 837
beac1dff 838 # Read in plain key and return as scalar
beac1dff 839 my $plain_key;
e5fc7e69 840 read( $fh, $size, $self->{data_size});
841 $size = unpack($self->{data_pack}, $size);
beac1dff 842 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 843
beac1dff 844 return $plain_key;
845 }
8db25060 846 }
d0b74c17 847
beac1dff 848 $obj->{return_next} = 1;
849 } # tag is a bucket list
d0b74c17 850
beac1dff 851 return;
6736c116 852}
853
854sub get_next_key {
beac1dff 855 ##
856 # Locate next key, given digested previous one
857 ##
6736c116 858 my $self = shift;
859 my ($obj) = @_;
d0b74c17 860
beac1dff 861 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
862 $obj->{return_next} = 0;
d0b74c17 863
beac1dff 864 ##
865 # If the previous key was not specifed, start at the top and
866 # return the first one found.
867 ##
868 if (!$obj->{prev_md5}) {
869 $obj->{prev_md5} = chr(0) x $self->{hash_size};
870 $obj->{return_next} = 1;
871 }
d0b74c17 872
beac1dff 873 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 874}
875
75be6413 876# Utilities
877
9cec1360 878sub _get_key_subloc {
75be6413 879 my $self = shift;
880 my ($keys, $idx) = @_;
881
9cec1360 882 my ($key, $subloc) = unpack(
883 "a$self->{hash_size} $self->{long_pack}",
75be6413 884 substr(
885 $keys,
9cec1360 886 ($idx * $self->{bucket_size}),
887 $self->{bucket_size},
75be6413 888 ),
889 );
890
9cec1360 891 return ($key, $subloc);
75be6413 892}
893
d608b06e 894sub _find_in_buckets {
895 my $self = shift;
896 my ($tag, $md5) = @_;
897
898 BUCKET:
899 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
900 my ($key, $subloc) = $self->_get_key_subloc( $tag->{content}, $i );
901
902 return ($subloc, $i * $self->{bucket_size}) unless $subloc;
903
904 next BUCKET if $key ne $md5;
905
906 return ($subloc, $i * $self->{bucket_size});
907 }
908
909 return;
910}
911
994ccd8e 912sub _request_space {
913 my $self = shift;
914 my ($obj, $size) = @_;
915
916 my $loc = $obj->_root->{end};
c9ec091a 917 $obj->_root->{end} += $size;
994ccd8e 918
919 return $loc;
920}
921
922sub _release_space {
923 my $self = shift;
924 my ($obj, $size, $loc) = @_;
925
926 return;
927}
928
a20d9a3f 9291;
930__END__
d5d7c51d 931
932# This will be added in later, after more refactoring is done. This is an early
933# attempt at refactoring on the physical level instead of the virtual level.
934sub _read_at {
935 my $self = shift;
936 my ($obj, $spot, $amount, $unpack) = @_;
937
938 my $fh = $obj->_fh;
939 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
940
941 my $buffer;
942 my $bytes_read = read( $fh, $buffer, $amount );
943
944 if ( $unpack ) {
945 $buffer = unpack( $unpack, $buffer );
946 }
947
948 if ( wantarray ) {
949 return ($buffer, $bytes_read);
950 }
951 else {
952 return $buffer;
953 }
954}