Converted more to use _request_space() ... still more to go
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
8db25060 14sub SIG_NULL () { 'N' }
15sub SIG_DATA () { 'D' }
16sub SIG_INDEX () { 'I' }
17sub SIG_BLIST () { 'B' }
18sub SIG_SIZE () { 1 }
19
612969fb 20sub precalc_sizes {
beac1dff 21 ##
22 # Precalculate index, bucket and bucket list sizes
23 ##
251dfd0e 24 my $self = shift;
1bf65be7 25
251dfd0e 26 $self->{index_size} = (2**8) * $self->{long_size};
27 $self->{bucket_size} = $self->{hash_size} + $self->{long_size};
28 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 29
251dfd0e 30 return 1;
1bf65be7 31}
32
33sub set_pack {
beac1dff 34 ##
35 # Set pack/unpack modes (see file header for more)
36 ##
251dfd0e 37 my $self = shift;
1bf65be7 38 my ($long_s, $long_p, $data_s, $data_p) = @_;
39
81d16922 40 ##
75be6413 41 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
42 # GB per file.
beac1dff 43 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 44 #
45 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 46 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 47 ##
251dfd0e 48 $self->{long_size} = $long_s ? $long_s : 4;
49 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 50
81d16922 51 ##
75be6413 52 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 53 # key/value. Upgrading this is possible (see above) but probably not
54 # necessary. If you need more than 4 GB for a single key or value, this
55 # module is really not for you :-)
81d16922 56 ##
251dfd0e 57 $self->{data_size} = $data_s ? $data_s : 4;
58 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 59
beac1dff 60 return $self->precalc_sizes();
1bf65be7 61}
62
63sub set_digest {
beac1dff 64 ##
65 # Set key digest function (default is MD5)
66 ##
251dfd0e 67 my $self = shift;
1bf65be7 68 my ($digest_func, $hash_size) = @_;
69
d0b74c17 70 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 71 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 72
beac1dff 73 return $self->precalc_sizes();
612969fb 74}
75
76sub new {
77 my $class = shift;
78 my ($args) = @_;
79
80 my $self = bless {
81 long_size => 4,
82 long_pack => 'N',
83 data_size => 4,
84 data_pack => 'N',
251dfd0e 85
612969fb 86 digest => \&Digest::MD5::md5,
87 hash_size => 16,
251dfd0e 88
81d16922 89 ##
d5d7c51d 90 # Maximum number of buckets per list before another level of indexing is
91 # done.
92 # Increase this value for slightly greater speed, but larger database
93 # files. DO NOT decrease this value below 16, due to risk of recursive
94 # reindex overrun.
81d16922 95 ##
612969fb 96 max_buckets => 16,
97 }, $class;
98
251dfd0e 99 $self->precalc_sizes;
612969fb 100
101 return $self;
1bf65be7 102}
103
70b55428 104sub setup_fh {
105 my $self = shift;
106 my ($obj) = @_;
107
108 $self->open( $obj ) if !defined $obj->_fh;
109
6fde4ed2 110 my $fh = $obj->_fh;
111 flock $fh, LOCK_EX;
118ba343 112
6fde4ed2 113 unless ( $obj->{base_offset} ) {
118ba343 114 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
115 my $signature;
116 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
117
118 ##
119 # File is empty -- write signature and master index
120 ##
121 if (!$bytes_read) {
c9ec091a 122 my $loc = $self->_request_space( $obj, length( SIG_FILE ) );
123 seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET);
118ba343 124 print( $fh SIG_FILE);
125
c9ec091a 126 $obj->{base_offset} = $self->_request_space(
127 $obj, $self->{index_size},
128 );
118ba343 129
130 $self->create_tag(
c9ec091a 131 $obj, $obj->_base_offset, $obj->_type,
132 chr(0) x $self->{index_size},
118ba343 133 );
134
135 # Flush the filehandle
136 my $old_fh = select $fh;
137 my $old_af = $|; $| = 1; $| = $old_af;
138 select $old_fh;
139 }
140 else {
141 $obj->{base_offset} = $bytes_read;
142
143 ##
144 # Check signature was valid
145 ##
146 unless ($signature eq SIG_FILE) {
147 $self->close_fh( $obj );
148 $obj->_throw_error("Signature not found -- file is not a Deep DB");
149 }
150
151 ##
152 # Get our type from master index signature
153 ##
154 my $tag = $self->load_tag($obj, $obj->_base_offset)
155 or $obj->_throw_error("Corrupted file, no master index record");
156
157 unless ($obj->{type} eq $tag->{signature}) {
158 $obj->_throw_error("File type mismatch");
159 }
160 }
118ba343 161 }
e06824f8 162
673464d9 163 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 164 unless ( $obj->_root->{inode} ) {
165 my @stats = stat($obj->_fh);
166 $obj->_root->{inode} = $stats[1];
167 $obj->_root->{end} = $stats[7];
168 }
169
6fde4ed2 170 flock $fh, LOCK_UN;
171
70b55428 172 return 1;
173}
174
a20d9a3f 175sub open {
20f7b20c 176 ##
177 # Open a fh to the database, create if nonexistent.
178 # Make sure file signature matches DBM::Deep spec.
179 ##
a20d9a3f 180 my $self = shift;
70b55428 181 my ($obj) = @_;
a20d9a3f 182
673464d9 183 # Theoretically, adding O_BINARY should remove the need for the binmode
184 # Of course, testing it is going to be ... interesting.
185 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 186
673464d9 187 my $fh;
d5d7c51d 188 my $filename = $obj->_root->{file};
189 sysopen( $fh, $filename, $flags )
190 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 191 $obj->_root->{fh} = $fh;
a20d9a3f 192
193 #XXX Can we remove this by using the right sysopen() flags?
194 # Maybe ... q.v. above
195 binmode $fh; # for win32
196
cd59cad8 197 if ($obj->_root->{autoflush}) {
a20d9a3f 198 my $old = select $fh;
199 $|=1;
200 select $old;
201 }
20f7b20c 202
a20d9a3f 203 return 1;
204}
205
3d1b8be9 206sub close_fh {
cd59cad8 207 my $self = shift;
a21f2d90 208 my ($obj) = @_;
cd59cad8 209
210 if ( my $fh = $obj->_root->{fh} ) {
211 close $fh;
212 }
213 $obj->_root->{fh} = undef;
214
215 return 1;
216}
217
d4b1166e 218sub create_tag {
20f7b20c 219 ##
220 # Given offset, signature and content, create tag and write to disk
221 ##
d4b1166e 222 my $self = shift;
20f7b20c 223 my ($obj, $offset, $sig, $content) = @_;
224 my $size = length($content);
225
d4b1166e 226 my $fh = $obj->_fh;
227
20f7b20c 228 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
251dfd0e 229 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 230
231 if ($offset == $obj->_root->{end}) {
8db25060 232 $obj->_root->{end} += SIG_SIZE + $self->{data_size} + $size;
20f7b20c 233 }
234
235 return {
236 signature => $sig,
237 size => $size,
8db25060 238 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 239 content => $content
240 };
d4b1166e 241}
242
243sub load_tag {
20f7b20c 244 ##
245 # Given offset, load single tag and return signature, size and data
246 ##
d4b1166e 247 my $self = shift;
20f7b20c 248 my ($obj, $offset) = @_;
249
e06824f8 250# print join(':',map{$_||''}caller(1)), $/;
251
d4b1166e 252 my $fh = $obj->_fh;
253
20f7b20c 254 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 255
75be6413 256 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 257 return if eof $fh;
20f7b20c 258
d4b1166e 259 my $b;
8db25060 260 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 261 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 262
263 my $buffer;
264 read( $fh, $buffer, $size);
265
266 return {
267 signature => $sig,
268 size => $size,
8db25060 269 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 270 content => $buffer
271 };
d4b1166e 272}
273
20f7b20c 274sub add_bucket {
275 ##
276 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
277 # plain (undigested) key and value.
278 ##
d4b1166e 279 my $self = shift;
20f7b20c 280 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 281
eea0d863 282 # This verifies that only supported values will be stored.
283 {
284 my $r = Scalar::Util::reftype( $value );
285 last if !defined $r;
286
287 last if $r eq 'HASH';
288 last if $r eq 'ARRAY';
289
290 $obj->_throw_error(
291 "Storage of variables of type '$r' is not supported."
292 );
293 }
294
20f7b20c 295 my $location = 0;
296 my $result = 2;
297
298 my $root = $obj->_root;
299
d5d7c51d 300 my $is_dbm_deep = eval {
301 local $SIG{'__DIE__'};
302 $value->isa( 'DBM::Deep' );
303 };
304
20f7b20c 305 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
306
307 my $fh = $obj->_fh;
308
386bab6c 309 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
75be6413 310
386bab6c 311 # Updating a known md5
312 if ( $subloc ) {
313 $result = 1;
20f7b20c 314
386bab6c 315 ##
d5d7c51d 316 # If value is a hash, array, or raw value with equal or less size, we
317 # can reuse the same content area of the database. Otherwise, we have
318 # to create a new content area at the EOF.
386bab6c 319 ##
320 my $actual_length;
321 if ( $internal_ref ) {
322 $actual_length = $self->{long_size};
323 }
324 else {
325 my $r = Scalar::Util::reftype( $value ) || '';
326 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
327 $actual_length = $self->{index_size};
328
329 # if autobless is enabled, must also take into consideration
330 # the class name, as it is stored along with key/value.
331 if ( $root->{autobless} ) {
332 my $value_class = Scalar::Util::blessed($value);
333 if ( defined $value_class && !$value->isa('DBM::Deep') ) {
334 $actual_length += length($value_class);
8db25060 335 }
75be6413 336 }
20f7b20c 337 }
386bab6c 338 else { $actual_length = length($value); }
75be6413 339 }
20f7b20c 340
d5d7c51d 341 seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET);
342 my $size;
343 read( $fh, $size, $self->{data_size});
344 $size = unpack($self->{data_pack}, $size);
345
386bab6c 346 if ($actual_length <= $size) {
347 $location = $subloc;
20f7b20c 348 }
75be6413 349 else {
350 $location = $root->{end};
386bab6c 351 seek(
352 $fh,
353 $tag->{offset} + $offset + $self->{hash_size} + $root->{file_offset},
354 SEEK_SET,
355 );
356 print( $fh pack($self->{long_pack}, $location) );
75be6413 357 }
75be6413 358 }
386bab6c 359 # Adding a new md5
360 elsif ( defined $offset ) {
386bab6c 361 $location = $root->{end};
362
363 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
364 print( $fh $md5 . pack($self->{long_pack}, $location) );
365 }
366 # If bucket didn't fit into list, split into a new index level
367 else {
368 $self->split_index( $obj, $md5, $tag );
369
370 $location = $root->{end};
371 }
20f7b20c 372
d5d7c51d 373 $self->write_value( $obj, $location, $plain_key, $value );
374
375 return $result;
376}
377
378sub write_value {
379 my $self = shift;
380 my ($obj, $location, $key, $value) = @_;
381
382 my $fh = $obj->_fh;
383 my $root = $obj->_root;
384
385 my $is_dbm_deep = eval {
386 local $SIG{'__DIE__'};
387 $value->isa( 'DBM::Deep' );
388 };
389
390 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
391
392 seek($fh, $location + $root->{file_offset}, SEEK_SET);
393
20f7b20c 394 ##
d5d7c51d 395 # Write signature based on content type, set content length and write
396 # actual value.
20f7b20c 397 ##
d5d7c51d 398 my $r = Scalar::Util::reftype($value) || '';
399 my $content_length;
400 if ( $internal_ref ) {
401 print( $fh SIG_INTERNAL );
402 print( $fh pack($self->{data_pack}, $self->{long_size}) );
403 print( $fh pack($self->{long_pack}, $value->_base_offset) );
404 $content_length = $self->{long_size};
405 }
406 else {
407 if ($r eq 'HASH') {
408 print( $fh SIG_HASH );
409 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
410 $content_length = $self->{index_size};
411 }
412 elsif ($r eq 'ARRAY') {
413 print( $fh SIG_ARRAY );
414 print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} );
415 $content_length = $self->{index_size};
416 }
417 elsif (!defined($value)) {
418 print( $fh SIG_NULL );
419 print( $fh pack($self->{data_pack}, 0) );
420 $content_length = 0;
20f7b20c 421 }
422 else {
d5d7c51d 423 print( $fh SIG_DATA );
424 print( $fh pack($self->{data_pack}, length($value)) . $value );
425 $content_length = length($value);
20f7b20c 426 }
d5d7c51d 427 }
20f7b20c 428
d5d7c51d 429 ##
430 # Plain key is stored AFTER value, as keys are typically fetched less often.
431 ##
432 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 433
d5d7c51d 434 ##
435 # If value is blessed, preserve class name
436 ##
437 if ( $root->{autobless} ) {
438 my $value_class = Scalar::Util::blessed($value);
439 if ( defined $value_class && !$value->isa( 'DBM::Deep' ) ) {
440 ##
441 # Blessed ref -- will restore later
442 ##
443 print( $fh chr(1) );
444 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
445 $content_length += 1;
446 $content_length += $self->{data_size} + length($value_class);
20f7b20c 447 }
d5d7c51d 448 else {
449 print( $fh chr(0) );
450 $content_length += 1;
20f7b20c 451 }
d5d7c51d 452 }
20f7b20c 453
d5d7c51d 454 ##
455 # If this is a new content area, advance EOF counter
456 ##
457 if ($location == $root->{end}) {
458 $root->{end} += SIG_SIZE;
459 $root->{end} += $self->{data_size} + $content_length;
460 $root->{end} += $self->{data_size} + length($key);
461 }
462
463 ##
464 # If content is a hash or array, create new child DBM::Deep object and
465 # pass each key or element to it.
466 ##
467 if ( ! $internal_ref ) {
468 if ($r eq 'HASH') {
469 my $branch = DBM::Deep->new(
470 type => DBM::Deep->TYPE_HASH,
471 base_offset => $location,
472 root => $root,
473 );
474 foreach my $key (keys %{$value}) {
475 $branch->STORE( $key, $value->{$key} );
20f7b20c 476 }
d5d7c51d 477 }
478 elsif ($r eq 'ARRAY') {
479 my $branch = DBM::Deep->new(
480 type => DBM::Deep->TYPE_ARRAY,
481 base_offset => $location,
482 root => $root,
483 );
484 my $index = 0;
485 foreach my $element (@{$value}) {
486 $branch->STORE( $index, $element );
487 $index++;
20f7b20c 488 }
489 }
20f7b20c 490 }
d4b1166e 491
d5d7c51d 492 return 1;
d4b1166e 493}
494
75be6413 495sub split_index {
496 my $self = shift;
497 my ($obj, $md5, $tag) = @_;
498
499 my $fh = $obj->_fh;
500 my $root = $obj->_root;
501 my $keys = $tag->{content};
502
503 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
504 print( $fh pack($self->{long_pack}, $root->{end}) );
505
506 my $index_tag = $self->create_tag(
507 $obj,
508 $root->{end},
8db25060 509 SIG_INDEX,
75be6413 510 chr(0) x $self->{index_size},
511 );
512
513 my @offsets = ();
514
515 $keys .= $md5 . pack($self->{long_pack}, 0);
516
517 BUCKET:
518 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9cec1360 519 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 520
521 next BUCKET unless $key;
522
75be6413 523 my $num = ord(substr($key, $tag->{ch} + 1, 1));
524
525 if ($offsets[$num]) {
8db25060 526 my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size};
75be6413 527 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
528 my $subkeys;
529 read( $fh, $subkeys, $self->{bucket_list_size});
530
531 for (my $k=0; $k<$self->{max_buckets}; $k++) {
9cec1360 532 my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k );
75be6413 533
534 if (!$subloc) {
535 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
536 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
537 last;
538 }
539 } # k loop
540 }
541 else {
542 $offsets[$num] = $root->{end};
543 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
544 print( $fh pack($self->{long_pack}, $root->{end}) );
545
8db25060 546 my $blist_tag = $self->create_tag($obj, $root->{end}, SIG_BLIST, chr(0) x $self->{bucket_list_size});
75be6413 547
548 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
549 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
550 }
551 } # i loop
552
553 return;
554}
555
8db25060 556sub read_from_loc {
557 my $self = shift;
558 my ($obj, $subloc) = @_;
559
560 my $fh = $obj->_fh;
561
562 ##
563 # Found match -- seek to offset and read signature
564 ##
565 my $signature;
566 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
567 read( $fh, $signature, SIG_SIZE);
568
569 ##
570 # If value is a hash or array, return new DBM::Deep object with correct offset
571 ##
572 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
573 my $obj = DBM::Deep->new(
574 type => $signature,
575 base_offset => $subloc,
576 root => $obj->_root,
577 );
578
579 if ($obj->_root->{autobless}) {
580 ##
581 # Skip over value and plain key to see if object needs
582 # to be re-blessed
583 ##
584 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
585
586 my $size;
587 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
588 if ($size) { seek($fh, $size, SEEK_CUR); }
589
590 my $bless_bit;
591 read( $fh, $bless_bit, 1);
592 if (ord($bless_bit)) {
593 ##
594 # Yes, object needs to be re-blessed
595 ##
596 my $class_name;
597 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
598 if ($size) { read( $fh, $class_name, $size); }
599 if ($class_name) { $obj = bless( $obj, $class_name ); }
600 }
601 }
602
603 return $obj;
604 }
605 elsif ( $signature eq SIG_INTERNAL ) {
606 my $size;
607 read( $fh, $size, $self->{data_size});
608 $size = unpack($self->{data_pack}, $size);
609
610 if ( $size ) {
611 my $new_loc;
612 read( $fh, $new_loc, $size );
613 $new_loc = unpack( $self->{long_pack}, $new_loc );
614
615 return $self->read_from_loc( $obj, $new_loc );
616 }
617 else {
618 return;
619 }
620 }
621 ##
622 # Otherwise return actual value
623 ##
624 elsif ($signature eq SIG_DATA) {
625 my $size;
626 read( $fh, $size, $self->{data_size});
627 $size = unpack($self->{data_pack}, $size);
628
629 my $value = '';
630 if ($size) { read( $fh, $value, $size); }
631 return $value;
632 }
633
634 ##
635 # Key exists, but content is null
636 ##
637 return;
638}
639
9020ee8c 640sub get_bucket_value {
beac1dff 641 ##
642 # Fetch single value given tag and MD5 digested key.
643 ##
644 my $self = shift;
645 my ($obj, $tag, $md5) = @_;
9020ee8c 646
386bab6c 647 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
648 if ( $subloc ) {
8db25060 649 return $self->read_from_loc( $obj, $subloc );
386bab6c 650 }
beac1dff 651 return;
9020ee8c 652}
ab0e4957 653
654sub delete_bucket {
beac1dff 655 ##
656 # Delete single key/value pair given tag and MD5 digested key.
657 ##
658 my $self = shift;
659 my ($obj, $tag, $md5) = @_;
ab0e4957 660
386bab6c 661 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
662 if ( $subloc ) {
663 my $fh = $obj->_fh;
664 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
665 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 666 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 667
ab0e4957 668 return 1;
386bab6c 669 }
beac1dff 670 return;
ab0e4957 671}
672
912d50b1 673sub bucket_exists {
beac1dff 674 ##
675 # Check existence of single key given tag and MD5 digested key.
676 ##
677 my $self = shift;
678 my ($obj, $tag, $md5) = @_;
912d50b1 679
386bab6c 680 my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 681 return $subloc && 1;
912d50b1 682}
683
6736c116 684sub find_bucket_list {
beac1dff 685 ##
686 # Locate offset for bucket list, given digested key
687 ##
688 my $self = shift;
d0b74c17 689 my ($obj, $md5, $args) = @_;
690 $args = {} unless $args;
691
beac1dff 692 ##
693 # Locate offset for bucket list using digest index system
694 ##
e5fc7e69 695 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 696 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 697
e5fc7e69 698 my $ch = 0;
8db25060 699 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 700 my $num = ord substr($md5, $ch, 1);
701
702 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
703 $tag = $self->index_lookup( $obj, $tag, $num );
704
705 if (!$tag) {
d5d7c51d 706 return if ! $args->{create};
d0b74c17 707
d5d7c51d 708 my $fh = $obj->_fh;
709 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
710 print( $fh pack($self->{long_pack}, $obj->_root->{end}) );
d0b74c17 711
d5d7c51d 712 $tag = $self->create_tag(
713 $obj, $obj->_root->{end},
714 SIG_BLIST,
715 chr(0) x $self->{bucket_list_size},
716 );
717
718 $tag->{ref_loc} = $ref_loc;
719 $tag->{ch} = $ch;
720
721 last;
d0b74c17 722 }
723
724 $tag->{ch} = $ch;
725 $tag->{ref_loc} = $ref_loc;
726
beac1dff 727 $ch++;
728 }
d0b74c17 729
beac1dff 730 return $tag;
6736c116 731}
732
d0b74c17 733sub index_lookup {
734 ##
735 # Given index tag, lookup single entry in index and return .
736 ##
737 my $self = shift;
738 my ($obj, $tag, $index) = @_;
739
740 my $location = unpack(
741 $self->{long_pack},
742 substr(
743 $tag->{content},
744 $index * $self->{long_size},
745 $self->{long_size},
746 ),
747 );
748
749 if (!$location) { return; }
750
751 return $self->load_tag( $obj, $location );
752}
753
6736c116 754sub traverse_index {
beac1dff 755 ##
756 # Scan index and recursively step into deeper levels, looking for next key.
757 ##
6736c116 758 my $self = shift;
759 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 760
beac1dff 761 my $tag = $self->load_tag($obj, $offset );
6736c116 762
763 my $fh = $obj->_fh;
d0b74c17 764
8db25060 765 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 766 my $content = $tag->{content};
e5fc7e69 767 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 768
d5d7c51d 769 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 770 my $subloc = unpack(
771 $self->{long_pack},
e06824f8 772 substr(
773 $content,
774 $idx * $self->{long_size},
775 $self->{long_size},
776 ),
e5fc7e69 777 );
778
beac1dff 779 if ($subloc) {
e5fc7e69 780 my $result = $self->traverse_index(
781 $obj, $subloc, $ch + 1, $force_return_next,
782 );
783
beac1dff 784 if (defined($result)) { return $result; }
785 }
786 } # index loop
d0b74c17 787
beac1dff 788 $obj->{return_next} = 1;
789 } # tag is an index
d0b74c17 790
e5fc7e69 791 else {
beac1dff 792 my $keys = $tag->{content};
793 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 794
beac1dff 795 ##
796 # Iterate through buckets, looking for a key match
797 ##
8db25060 798 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 799 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 800
8db25060 801 # End of bucket list -- return to outer loop
beac1dff 802 if (!$subloc) {
beac1dff 803 $obj->{return_next} = 1;
804 last;
805 }
8db25060 806 # Located previous key -- return next one found
beac1dff 807 elsif ($key eq $obj->{prev_md5}) {
beac1dff 808 $obj->{return_next} = 1;
809 next;
810 }
8db25060 811 # Seek to bucket location and skip over signature
beac1dff 812 elsif ($obj->{return_next}) {
8db25060 813 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 814
beac1dff 815 # Skip over value to get to plain key
8db25060 816 my $sig;
817 read( $fh, $sig, SIG_SIZE );
818
beac1dff 819 my $size;
e5fc7e69 820 read( $fh, $size, $self->{data_size});
821 $size = unpack($self->{data_pack}, $size);
beac1dff 822 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 823
beac1dff 824 # Read in plain key and return as scalar
beac1dff 825 my $plain_key;
e5fc7e69 826 read( $fh, $size, $self->{data_size});
827 $size = unpack($self->{data_pack}, $size);
beac1dff 828 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 829
beac1dff 830 return $plain_key;
831 }
8db25060 832 }
d0b74c17 833
beac1dff 834 $obj->{return_next} = 1;
835 } # tag is a bucket list
d0b74c17 836
beac1dff 837 return;
6736c116 838}
839
840sub get_next_key {
beac1dff 841 ##
842 # Locate next key, given digested previous one
843 ##
6736c116 844 my $self = shift;
845 my ($obj) = @_;
d0b74c17 846
beac1dff 847 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
848 $obj->{return_next} = 0;
d0b74c17 849
beac1dff 850 ##
851 # If the previous key was not specifed, start at the top and
852 # return the first one found.
853 ##
854 if (!$obj->{prev_md5}) {
855 $obj->{prev_md5} = chr(0) x $self->{hash_size};
856 $obj->{return_next} = 1;
857 }
d0b74c17 858
beac1dff 859 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 860}
861
75be6413 862# Utilities
863
9cec1360 864sub _get_key_subloc {
75be6413 865 my $self = shift;
866 my ($keys, $idx) = @_;
867
9cec1360 868 my ($key, $subloc) = unpack(
869 "a$self->{hash_size} $self->{long_pack}",
75be6413 870 substr(
871 $keys,
9cec1360 872 ($idx * $self->{bucket_size}),
873 $self->{bucket_size},
75be6413 874 ),
875 );
876
9cec1360 877 return ($key, $subloc);
75be6413 878}
879
d608b06e 880sub _find_in_buckets {
881 my $self = shift;
882 my ($tag, $md5) = @_;
883
884 BUCKET:
885 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
886 my ($key, $subloc) = $self->_get_key_subloc( $tag->{content}, $i );
887
888 return ($subloc, $i * $self->{bucket_size}) unless $subloc;
889
890 next BUCKET if $key ne $md5;
891
892 return ($subloc, $i * $self->{bucket_size});
893 }
894
895 return;
896}
897
994ccd8e 898sub _request_space {
899 my $self = shift;
900 my ($obj, $size) = @_;
901
902 my $loc = $obj->_root->{end};
c9ec091a 903 $obj->_root->{end} += $size;
994ccd8e 904
905 return $loc;
906}
907
908sub _release_space {
909 my $self = shift;
910 my ($obj, $size, $loc) = @_;
911
912 return;
913}
914
a20d9a3f 9151;
916__END__
d5d7c51d 917
918# This will be added in later, after more refactoring is done. This is an early
919# attempt at refactoring on the physical level instead of the virtual level.
920sub _read_at {
921 my $self = shift;
922 my ($obj, $spot, $amount, $unpack) = @_;
923
924 my $fh = $obj->_fh;
925 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
926
927 my $buffer;
928 my $bytes_read = read( $fh, $buffer, $amount );
929
930 if ( $unpack ) {
931 $buffer = unpack( $unpack, $buffer );
932 }
933
934 if ( wantarray ) {
935 return ($buffer, $bytes_read);
936 }
937 else {
938 return $buffer;
939 }
940}