Added _length_needed() function
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
8db25060 14sub SIG_NULL () { 'N' }
15sub SIG_DATA () { 'D' }
16sub SIG_INDEX () { 'I' }
17sub SIG_BLIST () { 'B' }
18sub SIG_SIZE () { 1 }
19
612969fb 20sub precalc_sizes {
beac1dff 21 ##
22 # Precalculate index, bucket and bucket list sizes
23 ##
251dfd0e 24 my $self = shift;
1bf65be7 25
251dfd0e 26 $self->{index_size} = (2**8) * $self->{long_size};
27 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
28 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 29
251dfd0e 30 return 1;
1bf65be7 31}
32
33sub set_pack {
beac1dff 34 ##
35 # Set pack/unpack modes (see file header for more)
36 ##
251dfd0e 37 my $self = shift;
1bf65be7 38 my ($long_s, $long_p, $data_s, $data_p) = @_;
39
81d16922 40 ##
75be6413 41 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
42 # GB per file.
beac1dff 43 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 44 #
45 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 46 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 47 ##
251dfd0e 48 $self->{long_size} = $long_s ? $long_s : 4;
49 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 50
81d16922 51 ##
75be6413 52 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 53 # key/value. Upgrading this is possible (see above) but probably not
54 # necessary. If you need more than 4 GB for a single key or value, this
55 # module is really not for you :-)
81d16922 56 ##
251dfd0e 57 $self->{data_size} = $data_s ? $data_s : 4;
58 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 59
beac1dff 60 return $self->precalc_sizes();
1bf65be7 61}
62
63sub set_digest {
beac1dff 64 ##
65 # Set key digest function (default is MD5)
66 ##
251dfd0e 67 my $self = shift;
1bf65be7 68 my ($digest_func, $hash_size) = @_;
69
d0b74c17 70 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 71 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 72
beac1dff 73 return $self->precalc_sizes();
612969fb 74}
75
76sub new {
77 my $class = shift;
78 my ($args) = @_;
79
80 my $self = bless {
81 long_size => 4,
82 long_pack => 'N',
83 data_size => 4,
84 data_pack => 'N',
251dfd0e 85
612969fb 86 digest => \&Digest::MD5::md5,
87 hash_size => 16,
251dfd0e 88
81d16922 89 ##
d5d7c51d 90 # Maximum number of buckets per list before another level of indexing is
91 # done.
92 # Increase this value for slightly greater speed, but larger database
93 # files. DO NOT decrease this value below 16, due to risk of recursive
94 # reindex overrun.
81d16922 95 ##
612969fb 96 max_buckets => 16,
97 }, $class;
98
251dfd0e 99 $self->precalc_sizes;
612969fb 100
101 return $self;
1bf65be7 102}
103
70b55428 104sub setup_fh {
105 my $self = shift;
106 my ($obj) = @_;
107
108 $self->open( $obj ) if !defined $obj->_fh;
109
6fde4ed2 110 my $fh = $obj->_fh;
111 flock $fh, LOCK_EX;
118ba343 112
6fde4ed2 113 unless ( $obj->{base_offset} ) {
118ba343 114 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
115 my $signature;
116 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
117
118 ##
119 # File is empty -- write signature and master index
120 ##
121 if (!$bytes_read) {
c9ec091a 122 my $loc = $self->_request_space( $obj, length( SIG_FILE ) );
123 seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET);
118ba343 124 print( $fh SIG_FILE);
125
c9ec091a 126 $obj->{base_offset} = $self->_request_space(
127 $obj, $self->{index_size},
128 );
118ba343 129
130 $self->create_tag(
c9ec091a 131 $obj, $obj->_base_offset, $obj->_type,
132 chr(0) x $self->{index_size},
118ba343 133 );
134
135 # Flush the filehandle
136 my $old_fh = select $fh;
137 my $old_af = $|; $| = 1; $| = $old_af;
138 select $old_fh;
139 }
140 else {
141 $obj->{base_offset} = $bytes_read;
142
143 ##
144 # Check signature was valid
145 ##
146 unless ($signature eq SIG_FILE) {
147 $self->close_fh( $obj );
148 $obj->_throw_error("Signature not found -- file is not a Deep DB");
149 }
150
151 ##
152 # Get our type from master index signature
153 ##
154 my $tag = $self->load_tag($obj, $obj->_base_offset)
155 or $obj->_throw_error("Corrupted file, no master index record");
156
157 unless ($obj->{type} eq $tag->{signature}) {
158 $obj->_throw_error("File type mismatch");
159 }
160 }
118ba343 161 }
e06824f8 162
673464d9 163 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 164 unless ( $obj->_root->{inode} ) {
165 my @stats = stat($obj->_fh);
166 $obj->_root->{inode} = $stats[1];
167 $obj->_root->{end} = $stats[7];
168 }
169
6fde4ed2 170 flock $fh, LOCK_UN;
171
70b55428 172 return 1;
173}
174
a20d9a3f 175sub open {
20f7b20c 176 ##
177 # Open a fh to the database, create if nonexistent.
178 # Make sure file signature matches DBM::Deep spec.
179 ##
a20d9a3f 180 my $self = shift;
70b55428 181 my ($obj) = @_;
a20d9a3f 182
673464d9 183 # Theoretically, adding O_BINARY should remove the need for the binmode
184 # Of course, testing it is going to be ... interesting.
185 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 186
673464d9 187 my $fh;
d5d7c51d 188 my $filename = $obj->_root->{file};
189 sysopen( $fh, $filename, $flags )
190 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 191 $obj->_root->{fh} = $fh;
a20d9a3f 192
193 #XXX Can we remove this by using the right sysopen() flags?
194 # Maybe ... q.v. above
195 binmode $fh; # for win32
196
cd59cad8 197 if ($obj->_root->{autoflush}) {
a20d9a3f 198 my $old = select $fh;
199 $|=1;
200 select $old;
201 }
20f7b20c 202
a20d9a3f 203 return 1;
204}
205
3d1b8be9 206sub close_fh {
cd59cad8 207 my $self = shift;
a21f2d90 208 my ($obj) = @_;
cd59cad8 209
210 if ( my $fh = $obj->_root->{fh} ) {
211 close $fh;
212 }
213 $obj->_root->{fh} = undef;
214
215 return 1;
216}
217
d4b1166e 218sub create_tag {
20f7b20c 219 ##
220 # Given offset, signature and content, create tag and write to disk
221 ##
d4b1166e 222 my $self = shift;
20f7b20c 223 my ($obj, $offset, $sig, $content) = @_;
224 my $size = length($content);
225
d4b1166e 226 my $fh = $obj->_fh;
227
20f7b20c 228 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 229 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 230
231 if ($offset == $obj->_root->{end}) {
8db25060 232 $obj->_root->{end} += SIG_SIZE + $self->{data_size} + $size;
20f7b20c 233 }
234
235 return {
236 signature => $sig,
237 size => $size,
8db25060 238 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 239 content => $content
240 };
d4b1166e 241}
242
243sub load_tag {
20f7b20c 244 ##
245 # Given offset, load single tag and return signature, size and data
246 ##
d4b1166e 247 my $self = shift;
20f7b20c 248 my ($obj, $offset) = @_;
249
e06824f8 250# print join(':',map{$_||''}caller(1)), $/;
251
d4b1166e 252 my $fh = $obj->_fh;
253
20f7b20c 254 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 255
75be6413 256 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 257 return if eof $fh;
20f7b20c 258
d4b1166e 259 my $b;
8db25060 260 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 261 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 262
263 my $buffer;
264 read( $fh, $buffer, $size);
265
266 return {
267 signature => $sig,
268 size => $size,
8db25060 269 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 270 content => $buffer
271 };
d4b1166e 272}
273
29b01632 274sub _length_needed {
275 my $self = shift;
276 my ($obj, $value) = @_;
277
278 my $is_dbm_deep = eval {
279 local $SIG{'__DIE__'};
280 $value->isa( 'DBM::Deep' );
281 };
282
283 my $internal_ref = $is_dbm_deep && ($value->_root eq $obj->_root);
284
285 if ( $internal_ref ) {
286 return $self->{long_size};
287 }
288
289 my $r = Scalar::Util::reftype( $value ) || '';
290 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
291 return length( $value );
292 }
293
294 my $actual_length = $self->{index_size};
295
296 # if autobless is enabled, must also take into consideration
297 # the class name, as it is stored along with key/value.
298 if ( $obj->_root->{autobless} ) {
299 my $value_class = Scalar::Util::blessed($value);
300 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
301 $actual_length += length($value_class);
302 }
303 }
304
305 return $actual_length;
306}
307
20f7b20c 308sub add_bucket {
309 ##
310 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
311 # plain (undigested) key and value.
312 ##
d4b1166e 313 my $self = shift;
20f7b20c 314 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 315
eea0d863 316 # This verifies that only supported values will be stored.
317 {
318 my $r = Scalar::Util::reftype( $value );
319 last if !defined $r;
320
321 last if $r eq 'HASH';
322 last if $r eq 'ARRAY';
323
324 $obj->_throw_error(
325 "Storage of variables of type '$r' is not supported."
326 );
327 }
328
20f7b20c 329 my $location = 0;
330 my $result = 2;
331
332 my $root = $obj->_root;
333
20f7b20c 334 my $fh = $obj->_fh;
335
386bab6c 336 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
75be6413 337
386bab6c 338 # Updating a known md5
339 if ( $subloc ) {
340 $result = 1;
20f7b20c 341
d5d7c51d 342 seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET);
343 my $size;
344 read( $fh, $size, $self->{data_size});
345 $size = unpack($self->{data_pack}, $size);
346
29b01632 347 my $actual_length = $self->_length_needed( $obj, $value );
348
386bab6c 349 if ($actual_length <= $size) {
350 $location = $subloc;
20f7b20c 351 }
75be6413 352 else {
353 $location = $root->{end};
386bab6c 354 seek(
355 $fh,
356 $tag->{offset} + $offset + $self->{hash_size} + $root->{file_offset},
357 SEEK_SET,
358 );
359 print( $fh pack($self->{long_pack}, $location) );
75be6413 360 }
75be6413 361 }
386bab6c 362 # Adding a new md5
363 elsif ( defined $offset ) {
386bab6c 364 $location = $root->{end};
365
366 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
367 print( $fh $md5 . pack($self->{long_pack}, $location) );
368 }
369 # If bucket didn't fit into list, split into a new index level
370 else {
371 $self->split_index( $obj, $md5, $tag );
372
373 $location = $root->{end};
374 }
20f7b20c 375
d5d7c51d 376 $self->write_value( $obj, $location, $plain_key, $value );
377
378 return $result;
379}
380
381sub write_value {
382 my $self = shift;
383 my ($obj, $location, $key, $value) = @_;
384
385 my $fh = $obj->_fh;
386 my $root = $obj->_root;
387
388 my $is_dbm_deep = eval {
389 local $SIG{'__DIE__'};
390 $value->isa( 'DBM::Deep' );
391 };
392
393 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
394
395 seek($fh, $location + $root->{file_offset}, SEEK_SET);
396
20f7b20c 397 ##
d5d7c51d 398 # Write signature based on content type, set content length and write
399 # actual value.
20f7b20c 400 ##
d5d7c51d 401 my $r = Scalar::Util::reftype($value) || '';
402 my $content_length;
403 if ( $internal_ref ) {
404 print( $fh SIG_INTERNAL );
405 print( $fh pack($self->{data_pack}, $self->{long_size}) );
406 print( $fh pack($self->{long_pack}, $value->_base_offset) );
407 $content_length = $self->{long_size};
408 }
409 else {
410 if ($r eq 'HASH') {
411 print( $fh SIG_HASH );
412 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
413 $content_length = $self->{index_size};
414 }
415 elsif ($r eq 'ARRAY') {
416 print( $fh SIG_ARRAY );
417 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
418 $content_length = $self->{index_size};
419 }
420 elsif (!defined($value)) {
421 print( $fh SIG_NULL );
422 print( $fh pack($self->{data_pack}, 0) );
423 $content_length = 0;
20f7b20c 424 }
425 else {
d5d7c51d 426 print( $fh SIG_DATA );
427 print( $fh pack($self->{data_pack}, length($value)) . $value );
428 $content_length = length($value);
20f7b20c 429 }
d5d7c51d 430 }
20f7b20c 431
d5d7c51d 432 ##
433 # Plain key is stored AFTER value, as keys are typically fetched less often.
434 ##
435 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 436
d5d7c51d 437 ##
438 # If value is blessed, preserve class name
439 ##
440 if ( $root->{autobless} ) {
441 my $value_class = Scalar::Util::blessed($value);
442 if ( defined $value_class && !$value->isa( 'DBM::Deep' ) ) {
443 ##
444 # Blessed ref -- will restore later
445 ##
446 print( $fh chr(1) );
447 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
448 $content_length += 1;
449 $content_length += $self->{data_size} + length($value_class);
20f7b20c 450 }
d5d7c51d 451 else {
452 print( $fh chr(0) );
453 $content_length += 1;
20f7b20c 454 }
d5d7c51d 455 }
20f7b20c 456
d5d7c51d 457 ##
458 # If this is a new content area, advance EOF counter
459 ##
460 if ($location == $root->{end}) {
461 $root->{end} += SIG_SIZE;
462 $root->{end} += $self->{data_size} + $content_length;
463 $root->{end} += $self->{data_size} + length($key);
464 }
465
466 ##
467 # If content is a hash or array, create new child DBM::Deep object and
468 # pass each key or element to it.
469 ##
470 if ( ! $internal_ref ) {
471 if ($r eq 'HASH') {
472 my $branch = DBM::Deep->new(
473 type => DBM::Deep->TYPE_HASH,
474 base_offset => $location,
475 root => $root,
476 );
477 foreach my $key (keys %{$value}) {
478 $branch->STORE( $key, $value->{$key} );
20f7b20c 479 }
d5d7c51d 480 }
481 elsif ($r eq 'ARRAY') {
482 my $branch = DBM::Deep->new(
483 type => DBM::Deep->TYPE_ARRAY,
484 base_offset => $location,
485 root => $root,
486 );
487 my $index = 0;
488 foreach my $element (@{$value}) {
489 $branch->STORE( $index, $element );
490 $index++;
20f7b20c 491 }
492 }
20f7b20c 493 }
d4b1166e 494
d5d7c51d 495 return 1;
d4b1166e 496}
497
75be6413 498sub split_index {
499 my $self = shift;
500 my ($obj, $md5, $tag) = @_;
501
502 my $fh = $obj->_fh;
503 my $root = $obj->_root;
504 my $keys = $tag->{content};
505
506 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
507 print( $fh pack($self->{long_pack}, $root->{end}) );
508
509 my $index_tag = $self->create_tag(
510 $obj,
511 $root->{end},
8db25060 512 SIG_INDEX,
75be6413 513 chr(0) x $self->{index_size},
514 );
515
516 my @offsets = ();
517
518 $keys .= $md5 . pack($self->{long_pack}, 0);
519
520 BUCKET:
521 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9cec1360 522 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 523
524 next BUCKET unless $key;
525
75be6413 526 my $num = ord(substr($key, $tag->{ch} + 1, 1));
527
528 if ($offsets[$num]) {
8db25060 529 my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size};
75be6413 530 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
531 my $subkeys;
532 read( $fh, $subkeys, $self->{bucket_list_size});
533
534 for (my $k=0; $k<$self->{max_buckets}; $k++) {
9cec1360 535 my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k );
75be6413 536
537 if (!$subloc) {
538 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
539 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
540 last;
541 }
542 } # k loop
543 }
544 else {
545 $offsets[$num] = $root->{end};
546 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
547 print( $fh pack($self->{long_pack}, $root->{end}) );
548
8db25060 549 my $blist_tag = $self->create_tag($obj, $root->{end}, SIG_BLIST, chr(0) x $self->{bucket_list_size});
75be6413 550
551 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
552 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
553 }
554 } # i loop
555
556 return;
557}
558
8db25060 559sub read_from_loc {
560 my $self = shift;
561 my ($obj, $subloc) = @_;
562
563 my $fh = $obj->_fh;
564
565 ##
566 # Found match -- seek to offset and read signature
567 ##
568 my $signature;
569 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
570 read( $fh, $signature, SIG_SIZE);
571
572 ##
573 # If value is a hash or array, return new DBM::Deep object with correct offset
574 ##
575 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
576 my $obj = DBM::Deep->new(
577 type => $signature,
578 base_offset => $subloc,
579 root => $obj->_root,
580 );
581
582 if ($obj->_root->{autobless}) {
583 ##
584 # Skip over value and plain key to see if object needs
585 # to be re-blessed
586 ##
587 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
588
589 my $size;
590 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
591 if ($size) { seek($fh, $size, SEEK_CUR); }
592
593 my $bless_bit;
594 read( $fh, $bless_bit, 1);
595 if (ord($bless_bit)) {
596 ##
597 # Yes, object needs to be re-blessed
598 ##
599 my $class_name;
600 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
601 if ($size) { read( $fh, $class_name, $size); }
602 if ($class_name) { $obj = bless( $obj, $class_name ); }
603 }
604 }
605
606 return $obj;
607 }
608 elsif ( $signature eq SIG_INTERNAL ) {
609 my $size;
610 read( $fh, $size, $self->{data_size});
611 $size = unpack($self->{data_pack}, $size);
612
613 if ( $size ) {
614 my $new_loc;
615 read( $fh, $new_loc, $size );
616 $new_loc = unpack( $self->{long_pack}, $new_loc );
617
618 return $self->read_from_loc( $obj, $new_loc );
619 }
620 else {
621 return;
622 }
623 }
624 ##
625 # Otherwise return actual value
626 ##
627 elsif ($signature eq SIG_DATA) {
628 my $size;
629 read( $fh, $size, $self->{data_size});
630 $size = unpack($self->{data_pack}, $size);
631
632 my $value = '';
633 if ($size) { read( $fh, $value, $size); }
634 return $value;
635 }
636
637 ##
638 # Key exists, but content is null
639 ##
640 return;
641}
642
9020ee8c 643sub get_bucket_value {
beac1dff 644 ##
645 # Fetch single value given tag and MD5 digested key.
646 ##
647 my $self = shift;
648 my ($obj, $tag, $md5) = @_;
9020ee8c 649
386bab6c 650 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
651 if ( $subloc ) {
8db25060 652 return $self->read_from_loc( $obj, $subloc );
386bab6c 653 }
beac1dff 654 return;
9020ee8c 655}
ab0e4957 656
657sub delete_bucket {
beac1dff 658 ##
659 # Delete single key/value pair given tag and MD5 digested key.
660 ##
661 my $self = shift;
662 my ($obj, $tag, $md5) = @_;
ab0e4957 663
386bab6c 664 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
665 if ( $subloc ) {
666 my $fh = $obj->_fh;
667 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
668 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 669 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 670
ab0e4957 671 return 1;
386bab6c 672 }
beac1dff 673 return;
ab0e4957 674}
675
912d50b1 676sub bucket_exists {
beac1dff 677 ##
678 # Check existence of single key given tag and MD5 digested key.
679 ##
680 my $self = shift;
681 my ($obj, $tag, $md5) = @_;
912d50b1 682
386bab6c 683 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 684 return $subloc && 1;
912d50b1 685}
686
6736c116 687sub find_bucket_list {
beac1dff 688 ##
689 # Locate offset for bucket list, given digested key
690 ##
691 my $self = shift;
d0b74c17 692 my ($obj, $md5, $args) = @_;
693 $args = {} unless $args;
694
beac1dff 695 ##
696 # Locate offset for bucket list using digest index system
697 ##
e5fc7e69 698 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 699 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 700
e5fc7e69 701 my $ch = 0;
8db25060 702 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 703 my $num = ord substr($md5, $ch, 1);
704
705 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
706 $tag = $self->index_lookup( $obj, $tag, $num );
707
708 if (!$tag) {
29b01632 709 return if !$args->{create};
d0b74c17 710
d5d7c51d 711 my $fh = $obj->_fh;
712 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
713 print( $fh pack($self->{long_pack}, $obj->_root->{end}) );
d0b74c17 714
d5d7c51d 715 $tag = $self->create_tag(
716 $obj, $obj->_root->{end},
717 SIG_BLIST,
718 chr(0) x $self->{bucket_list_size},
719 );
720
721 $tag->{ref_loc} = $ref_loc;
722 $tag->{ch} = $ch;
723
724 last;
d0b74c17 725 }
726
727 $tag->{ch} = $ch;
728 $tag->{ref_loc} = $ref_loc;
729
beac1dff 730 $ch++;
731 }
d0b74c17 732
beac1dff 733 return $tag;
6736c116 734}
735
d0b74c17 736sub index_lookup {
737 ##
738 # Given index tag, lookup single entry in index and return .
739 ##
740 my $self = shift;
741 my ($obj, $tag, $index) = @_;
742
743 my $location = unpack(
744 $self->{long_pack},
745 substr(
746 $tag->{content},
747 $index * $self->{long_size},
748 $self->{long_size},
749 ),
750 );
751
752 if (!$location) { return; }
753
754 return $self->load_tag( $obj, $location );
755}
756
6736c116 757sub traverse_index {
beac1dff 758 ##
759 # Scan index and recursively step into deeper levels, looking for next key.
760 ##
6736c116 761 my $self = shift;
762 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 763
beac1dff 764 my $tag = $self->load_tag($obj, $offset );
6736c116 765
766 my $fh = $obj->_fh;
d0b74c17 767
8db25060 768 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 769 my $content = $tag->{content};
e5fc7e69 770 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 771
d5d7c51d 772 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 773 my $subloc = unpack(
774 $self->{long_pack},
e06824f8 775 substr(
776 $content,
777 $idx * $self->{long_size},
778 $self->{long_size},
779 ),
e5fc7e69 780 );
781
beac1dff 782 if ($subloc) {
e5fc7e69 783 my $result = $self->traverse_index(
784 $obj, $subloc, $ch + 1, $force_return_next,
785 );
786
beac1dff 787 if (defined($result)) { return $result; }
788 }
789 } # index loop
d0b74c17 790
beac1dff 791 $obj->{return_next} = 1;
792 } # tag is an index
d0b74c17 793
e5fc7e69 794 else {
beac1dff 795 my $keys = $tag->{content};
796 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 797
beac1dff 798 ##
799 # Iterate through buckets, looking for a key match
800 ##
8db25060 801 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 802 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 803
8db25060 804 # End of bucket list -- return to outer loop
beac1dff 805 if (!$subloc) {
beac1dff 806 $obj->{return_next} = 1;
807 last;
808 }
8db25060 809 # Located previous key -- return next one found
beac1dff 810 elsif ($key eq $obj->{prev_md5}) {
beac1dff 811 $obj->{return_next} = 1;
812 next;
813 }
8db25060 814 # Seek to bucket location and skip over signature
beac1dff 815 elsif ($obj->{return_next}) {
8db25060 816 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 817
beac1dff 818 # Skip over value to get to plain key
8db25060 819 my $sig;
820 read( $fh, $sig, SIG_SIZE );
821
beac1dff 822 my $size;
e5fc7e69 823 read( $fh, $size, $self->{data_size});
824 $size = unpack($self->{data_pack}, $size);
beac1dff 825 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 826
beac1dff 827 # Read in plain key and return as scalar
beac1dff 828 my $plain_key;
e5fc7e69 829 read( $fh, $size, $self->{data_size});
830 $size = unpack($self->{data_pack}, $size);
beac1dff 831 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 832
beac1dff 833 return $plain_key;
834 }
8db25060 835 }
d0b74c17 836
beac1dff 837 $obj->{return_next} = 1;
838 } # tag is a bucket list
d0b74c17 839
beac1dff 840 return;
6736c116 841}
842
843sub get_next_key {
beac1dff 844 ##
845 # Locate next key, given digested previous one
846 ##
6736c116 847 my $self = shift;
848 my ($obj) = @_;
d0b74c17 849
beac1dff 850 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
851 $obj->{return_next} = 0;
d0b74c17 852
beac1dff 853 ##
854 # If the previous key was not specifed, start at the top and
855 # return the first one found.
856 ##
857 if (!$obj->{prev_md5}) {
858 $obj->{prev_md5} = chr(0) x $self->{hash_size};
859 $obj->{return_next} = 1;
860 }
d0b74c17 861
beac1dff 862 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 863}
864
75be6413 865# Utilities
866
9cec1360 867sub _get_key_subloc {
75be6413 868 my $self = shift;
869 my ($keys, $idx) = @_;
870
9cec1360 871 my ($key, $subloc) = unpack(
872 "a$self->{hash_size} $self->{long_pack}",
75be6413 873 substr(
874 $keys,
9cec1360 875 ($idx * $self->{bucket_size}),
876 $self->{bucket_size},
75be6413 877 ),
878 );
879
9cec1360 880 return ($key, $subloc);
75be6413 881}
882
d608b06e 883sub _find_in_buckets {
884 my $self = shift;
885 my ($tag, $md5) = @_;
886
887 BUCKET:
888 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
889 my ($key, $subloc) = $self->_get_key_subloc( $tag->{content}, $i );
890
891 return ($subloc, $i * $self->{bucket_size}) unless $subloc;
892
893 next BUCKET if $key ne $md5;
894
895 return ($subloc, $i * $self->{bucket_size});
896 }
897
898 return;
899}
900
994ccd8e 901sub _request_space {
902 my $self = shift;
903 my ($obj, $size) = @_;
904
905 my $loc = $obj->_root->{end};
c9ec091a 906 $obj->_root->{end} += $size;
994ccd8e 907
908 return $loc;
909}
910
911sub _release_space {
912 my $self = shift;
913 my ($obj, $size, $loc) = @_;
914
915 return;
916}
917
a20d9a3f 9181;
919__END__
d5d7c51d 920
921# This will be added in later, after more refactoring is done. This is an early
922# attempt at refactoring on the physical level instead of the virtual level.
923sub _read_at {
924 my $self = shift;
925 my ($obj, $spot, $amount, $unpack) = @_;
926
927 my $fh = $obj->_fh;
928 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
929
930 my $buffer;
931 my $bytes_read = read( $fh, $buffer, $amount );
932
933 if ( $unpack ) {
934 $buffer = unpack( $unpack, $buffer );
935 }
936
937 if ( wantarray ) {
938 return ($buffer, $bytes_read);
939 }
940 else {
941 return $buffer;
942 }
943}