Added MSWin32 checks for is_writable call
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
8db25060 14sub SIG_NULL () { 'N' }
15sub SIG_DATA () { 'D' }
16sub SIG_INDEX () { 'I' }
17sub SIG_BLIST () { 'B' }
18sub SIG_SIZE () { 1 }
19
612969fb 20sub precalc_sizes {
beac1dff 21 ##
22 # Precalculate index, bucket and bucket list sizes
23 ##
251dfd0e 24 my $self = shift;
1bf65be7 25
251dfd0e 26 $self->{index_size} = (2**8) * $self->{long_size};
6ed2f3df 27 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
251dfd0e 28 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 29
251dfd0e 30 return 1;
1bf65be7 31}
32
33sub set_pack {
beac1dff 34 ##
35 # Set pack/unpack modes (see file header for more)
36 ##
251dfd0e 37 my $self = shift;
1bf65be7 38 my ($long_s, $long_p, $data_s, $data_p) = @_;
39
81d16922 40 ##
75be6413 41 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
42 # GB per file.
beac1dff 43 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 44 #
45 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 46 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 47 ##
251dfd0e 48 $self->{long_size} = $long_s ? $long_s : 4;
49 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 50
81d16922 51 ##
75be6413 52 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 53 # key/value. Upgrading this is possible (see above) but probably not
54 # necessary. If you need more than 4 GB for a single key or value, this
55 # module is really not for you :-)
81d16922 56 ##
251dfd0e 57 $self->{data_size} = $data_s ? $data_s : 4;
58 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 59
beac1dff 60 return $self->precalc_sizes();
1bf65be7 61}
62
63sub set_digest {
beac1dff 64 ##
65 # Set key digest function (default is MD5)
66 ##
251dfd0e 67 my $self = shift;
1bf65be7 68 my ($digest_func, $hash_size) = @_;
69
d0b74c17 70 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 71 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 72
beac1dff 73 return $self->precalc_sizes();
612969fb 74}
75
76sub new {
77 my $class = shift;
78 my ($args) = @_;
79
80 my $self = bless {
81 long_size => 4,
82 long_pack => 'N',
83 data_size => 4,
84 data_pack => 'N',
251dfd0e 85
612969fb 86 digest => \&Digest::MD5::md5,
87 hash_size => 16,
251dfd0e 88
81d16922 89 ##
d5d7c51d 90 # Maximum number of buckets per list before another level of indexing is
91 # done.
92 # Increase this value for slightly greater speed, but larger database
93 # files. DO NOT decrease this value below 16, due to risk of recursive
94 # reindex overrun.
81d16922 95 ##
612969fb 96 max_buckets => 16,
97 }, $class;
98
251dfd0e 99 $self->precalc_sizes;
612969fb 100
101 return $self;
1bf65be7 102}
103
70b55428 104sub setup_fh {
105 my $self = shift;
106 my ($obj) = @_;
107
108 $self->open( $obj ) if !defined $obj->_fh;
109
6fde4ed2 110 my $fh = $obj->_fh;
111 flock $fh, LOCK_EX;
118ba343 112
6fde4ed2 113 unless ( $obj->{base_offset} ) {
118ba343 114 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
115 my $signature;
116 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
117
118 ##
119 # File is empty -- write signature and master index
120 ##
121 if (!$bytes_read) {
c9ec091a 122 my $loc = $self->_request_space( $obj, length( SIG_FILE ) );
123 seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET);
118ba343 124 print( $fh SIG_FILE);
125
c9ec091a 126 $obj->{base_offset} = $self->_request_space(
16d1ad9b 127 $obj, $self->tag_size( $self->{index_size} ),
c9ec091a 128 );
118ba343 129
9e4f83a0 130 $self->write_tag(
c9ec091a 131 $obj, $obj->_base_offset, $obj->_type,
f37c15ab 132 chr(0)x$self->{index_size},
118ba343 133 );
134
135 # Flush the filehandle
136 my $old_fh = select $fh;
137 my $old_af = $|; $| = 1; $| = $old_af;
138 select $old_fh;
139 }
140 else {
141 $obj->{base_offset} = $bytes_read;
142
143 ##
144 # Check signature was valid
145 ##
146 unless ($signature eq SIG_FILE) {
147 $self->close_fh( $obj );
148 $obj->_throw_error("Signature not found -- file is not a Deep DB");
149 }
150
151 ##
152 # Get our type from master index signature
153 ##
154 my $tag = $self->load_tag($obj, $obj->_base_offset)
155 or $obj->_throw_error("Corrupted file, no master index record");
156
157 unless ($obj->{type} eq $tag->{signature}) {
158 $obj->_throw_error("File type mismatch");
159 }
160 }
118ba343 161 }
e06824f8 162
673464d9 163 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 164 unless ( $obj->_root->{inode} ) {
165 my @stats = stat($obj->_fh);
166 $obj->_root->{inode} = $stats[1];
167 $obj->_root->{end} = $stats[7];
168 }
169
6fde4ed2 170 flock $fh, LOCK_UN;
171
70b55428 172 return 1;
173}
174
a20d9a3f 175sub open {
20f7b20c 176 ##
177 # Open a fh to the database, create if nonexistent.
178 # Make sure file signature matches DBM::Deep spec.
179 ##
a20d9a3f 180 my $self = shift;
70b55428 181 my ($obj) = @_;
a20d9a3f 182
673464d9 183 # Theoretically, adding O_BINARY should remove the need for the binmode
184 # Of course, testing it is going to be ... interesting.
185 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 186
673464d9 187 my $fh;
d5d7c51d 188 my $filename = $obj->_root->{file};
189 sysopen( $fh, $filename, $flags )
190 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 191 $obj->_root->{fh} = $fh;
a20d9a3f 192
193 #XXX Can we remove this by using the right sysopen() flags?
194 # Maybe ... q.v. above
195 binmode $fh; # for win32
196
cd59cad8 197 if ($obj->_root->{autoflush}) {
a20d9a3f 198 my $old = select $fh;
199 $|=1;
200 select $old;
201 }
20f7b20c 202
a20d9a3f 203 return 1;
204}
205
3d1b8be9 206sub close_fh {
cd59cad8 207 my $self = shift;
a21f2d90 208 my ($obj) = @_;
cd59cad8 209
210 if ( my $fh = $obj->_root->{fh} ) {
211 close $fh;
212 }
213 $obj->_root->{fh} = undef;
214
215 return 1;
216}
217
16d1ad9b 218sub tag_size {
219 my $self = shift;
220 my ($size) = @_;
221 return SIG_SIZE + $self->{data_size} + $size;
222}
223
9e4f83a0 224sub write_tag {
20f7b20c 225 ##
226 # Given offset, signature and content, create tag and write to disk
227 ##
d4b1166e 228 my $self = shift;
20f7b20c 229 my ($obj, $offset, $sig, $content) = @_;
f37c15ab 230 my $size = length( $content );
20f7b20c 231
d4b1166e 232 my $fh = $obj->_fh;
233
f37c15ab 234 if ( defined $offset ) {
235 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
236 }
237
251dfd0e 238 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 239
f37c15ab 240 return unless defined $offset;
241
20f7b20c 242 return {
243 signature => $sig,
244 size => $size,
8db25060 245 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 246 content => $content
247 };
d4b1166e 248}
249
250sub load_tag {
20f7b20c 251 ##
252 # Given offset, load single tag and return signature, size and data
253 ##
d4b1166e 254 my $self = shift;
20f7b20c 255 my ($obj, $offset) = @_;
256
e06824f8 257# print join(':',map{$_||''}caller(1)), $/;
258
d4b1166e 259 my $fh = $obj->_fh;
260
20f7b20c 261 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 262
75be6413 263 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 264 return if eof $fh;
20f7b20c 265
d4b1166e 266 my $b;
8db25060 267 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 268 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 269
270 my $buffer;
271 read( $fh, $buffer, $size);
272
273 return {
274 signature => $sig,
275 size => $size,
8db25060 276 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 277 content => $buffer
278 };
d4b1166e 279}
280
29b01632 281sub _length_needed {
282 my $self = shift;
f37c15ab 283 my ($obj, $value, $key) = @_;
29b01632 284
285 my $is_dbm_deep = eval {
286 local $SIG{'__DIE__'};
287 $value->isa( 'DBM::Deep' );
288 };
289
f37c15ab 290 my $len = SIG_SIZE + $self->{data_size}
291 + $self->{data_size} + length( $key );
29b01632 292
f37c15ab 293 if ( $is_dbm_deep && $value->_root eq $obj->_root ) {
294 return $len + $self->{long_size};
29b01632 295 }
296
297 my $r = Scalar::Util::reftype( $value ) || '';
9a187d8c 298 if ( $obj->_root->{autobless} ) {
299 # This is for the bit saying whether or not this thing is blessed.
300 $len += 1;
301 }
302
29b01632 303 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 304 if ( defined $value ) {
305 $len += length( $value );
306 }
307 return $len;
29b01632 308 }
309
f37c15ab 310 $len += $self->{index_size};
29b01632 311
312 # if autobless is enabled, must also take into consideration
f37c15ab 313 # the class name as it is stored after the key.
29b01632 314 if ( $obj->_root->{autobless} ) {
315 my $value_class = Scalar::Util::blessed($value);
f37c15ab 316 if ( defined $value_class && !$is_dbm_deep ) {
317 $len += $self->{data_size} + length($value_class);
29b01632 318 }
319 }
320
f37c15ab 321 return $len;
29b01632 322}
323
20f7b20c 324sub add_bucket {
325 ##
326 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
327 # plain (undigested) key and value.
328 ##
d4b1166e 329 my $self = shift;
20f7b20c 330 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 331
eea0d863 332 # This verifies that only supported values will be stored.
333 {
334 my $r = Scalar::Util::reftype( $value );
335 last if !defined $r;
336
337 last if $r eq 'HASH';
338 last if $r eq 'ARRAY';
339
340 $obj->_throw_error(
341 "Storage of variables of type '$r' is not supported."
342 );
343 }
344
20f7b20c 345 my $location = 0;
346 my $result = 2;
347
348 my $root = $obj->_root;
f37c15ab 349 my $fh = $obj->_fh;
20f7b20c 350
f37c15ab 351 my $actual_length = $self->_length_needed( $obj, $value, $plain_key );
20f7b20c 352
9a187d8c 353 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
75be6413 354
386bab6c 355 # Updating a known md5
356 if ( $subloc ) {
357 $result = 1;
20f7b20c 358
386bab6c 359 if ($actual_length <= $size) {
360 $location = $subloc;
20f7b20c 361 }
75be6413 362 else {
f37c15ab 363 $location = $self->_request_space( $obj, $actual_length );
386bab6c 364 seek(
365 $fh,
9a187d8c 366 $tag->{offset} + $offset
367 + $self->{hash_size} + $root->{file_offset},
386bab6c 368 SEEK_SET,
369 );
9a187d8c 370 print( $fh pack($self->{long_pack}, $location ) );
371 print( $fh pack($self->{long_pack}, $actual_length ) );
75be6413 372 }
75be6413 373 }
386bab6c 374 # Adding a new md5
375 elsif ( defined $offset ) {
f37c15ab 376 $location = $self->_request_space( $obj, $actual_length );
386bab6c 377
378 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 379 print( $fh $md5 . pack($self->{long_pack}, $location ) );
380 print( $fh pack($self->{long_pack}, $actual_length ) );
386bab6c 381 }
382 # If bucket didn't fit into list, split into a new index level
383 else {
9a187d8c 384#XXX This is going to be a problem.
386bab6c 385 $self->split_index( $obj, $md5, $tag );
386
f37c15ab 387 $location = $self->_request_space( $obj, $actual_length );
386bab6c 388 }
20f7b20c 389
d5d7c51d 390 $self->write_value( $obj, $location, $plain_key, $value );
391
392 return $result;
393}
394
395sub write_value {
396 my $self = shift;
397 my ($obj, $location, $key, $value) = @_;
398
399 my $fh = $obj->_fh;
400 my $root = $obj->_root;
401
402 my $is_dbm_deep = eval {
403 local $SIG{'__DIE__'};
404 $value->isa( 'DBM::Deep' );
405 };
406
407 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
408
409 seek($fh, $location + $root->{file_offset}, SEEK_SET);
410
20f7b20c 411 ##
d5d7c51d 412 # Write signature based on content type, set content length and write
413 # actual value.
20f7b20c 414 ##
d5d7c51d 415 my $r = Scalar::Util::reftype($value) || '';
d5d7c51d 416 if ( $internal_ref ) {
9e4f83a0 417 $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $value->_base_offset) );
f37c15ab 418 }
419 elsif ($r eq 'HASH') {
9e4f83a0 420 $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 421 }
422 elsif ($r eq 'ARRAY') {
9e4f83a0 423 $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 424 }
425 elsif (!defined($value)) {
9e4f83a0 426 $self->write_tag( $obj, undef, SIG_NULL, '' );
d5d7c51d 427 }
428 else {
9e4f83a0 429 $self->write_tag( $obj, undef, SIG_DATA, $value );
d5d7c51d 430 }
20f7b20c 431
d5d7c51d 432 ##
433 # Plain key is stored AFTER value, as keys are typically fetched less often.
434 ##
435 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 436
9a187d8c 437 # Internal references don't care about autobless
438 return 1 if $internal_ref;
439
d5d7c51d 440 ##
441 # If value is blessed, preserve class name
442 ##
443 if ( $root->{autobless} ) {
444 my $value_class = Scalar::Util::blessed($value);
f37c15ab 445 if ( defined $value_class && !$is_dbm_deep ) {
d5d7c51d 446 print( $fh chr(1) );
447 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 448 }
d5d7c51d 449 else {
450 print( $fh chr(0) );
20f7b20c 451 }
d5d7c51d 452 }
20f7b20c 453
d5d7c51d 454 ##
d5d7c51d 455 # If content is a hash or array, create new child DBM::Deep object and
456 # pass each key or element to it.
457 ##
f37c15ab 458 if ( !$internal_ref ) {
d5d7c51d 459 if ($r eq 'HASH') {
460 my $branch = DBM::Deep->new(
461 type => DBM::Deep->TYPE_HASH,
462 base_offset => $location,
463 root => $root,
464 );
465 foreach my $key (keys %{$value}) {
466 $branch->STORE( $key, $value->{$key} );
20f7b20c 467 }
d5d7c51d 468 }
469 elsif ($r eq 'ARRAY') {
470 my $branch = DBM::Deep->new(
471 type => DBM::Deep->TYPE_ARRAY,
472 base_offset => $location,
473 root => $root,
474 );
475 my $index = 0;
476 foreach my $element (@{$value}) {
477 $branch->STORE( $index, $element );
478 $index++;
20f7b20c 479 }
480 }
20f7b20c 481 }
d4b1166e 482
d5d7c51d 483 return 1;
d4b1166e 484}
485
75be6413 486sub split_index {
487 my $self = shift;
488 my ($obj, $md5, $tag) = @_;
489
490 my $fh = $obj->_fh;
491 my $root = $obj->_root;
492 my $keys = $tag->{content};
493
494 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 495
496 my $loc = $self->_request_space(
497 $obj, $self->tag_size( $self->{index_size} ),
498 );
499
500 print( $fh pack($self->{long_pack}, $loc) );
75be6413 501
9e4f83a0 502 my $index_tag = $self->write_tag(
16d1ad9b 503 $obj, $loc, SIG_INDEX,
f37c15ab 504 chr(0)x$self->{index_size},
75be6413 505 );
506
507 my @offsets = ();
508
9a187d8c 509 $keys .= $md5 . (pack($self->{long_pack}, 0) x 2);
75be6413 510
511 BUCKET:
512 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 513 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 514
515 next BUCKET unless $key;
516
75be6413 517 my $num = ord(substr($key, $tag->{ch} + 1, 1));
518
519 if ($offsets[$num]) {
8db25060 520 my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size};
75be6413 521 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
522 my $subkeys;
523 read( $fh, $subkeys, $self->{bucket_list_size});
524
525 for (my $k=0; $k<$self->{max_buckets}; $k++) {
9cec1360 526 my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k );
75be6413 527
528 if (!$subloc) {
529 seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET);
530 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
531 last;
532 }
533 } # k loop
534 }
535 else {
536 $offsets[$num] = $root->{end};
537 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
75be6413 538
2603d86e 539 my $loc = $self->_request_space(
540 $obj, $self->tag_size( $self->{bucket_list_size} ),
541 );
542
543 print( $fh pack($self->{long_pack}, $loc) );
544
9e4f83a0 545 my $blist_tag = $self->write_tag(
2603d86e 546 $obj, $loc, SIG_BLIST,
f37c15ab 547 chr(0)x$self->{bucket_list_size},
2603d86e 548 );
75be6413 549
550 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
551 print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) );
552 }
553 } # i loop
554
555 return;
556}
557
8db25060 558sub read_from_loc {
559 my $self = shift;
560 my ($obj, $subloc) = @_;
561
562 my $fh = $obj->_fh;
563
564 ##
565 # Found match -- seek to offset and read signature
566 ##
567 my $signature;
568 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
569 read( $fh, $signature, SIG_SIZE);
570
571 ##
572 # If value is a hash or array, return new DBM::Deep object with correct offset
573 ##
574 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
575 my $obj = DBM::Deep->new(
576 type => $signature,
577 base_offset => $subloc,
578 root => $obj->_root,
579 );
580
581 if ($obj->_root->{autobless}) {
582 ##
583 # Skip over value and plain key to see if object needs
584 # to be re-blessed
585 ##
586 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
587
588 my $size;
589 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
590 if ($size) { seek($fh, $size, SEEK_CUR); }
591
592 my $bless_bit;
593 read( $fh, $bless_bit, 1);
594 if (ord($bless_bit)) {
595 ##
596 # Yes, object needs to be re-blessed
597 ##
598 my $class_name;
599 read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size);
600 if ($size) { read( $fh, $class_name, $size); }
601 if ($class_name) { $obj = bless( $obj, $class_name ); }
602 }
603 }
604
605 return $obj;
606 }
607 elsif ( $signature eq SIG_INTERNAL ) {
608 my $size;
609 read( $fh, $size, $self->{data_size});
610 $size = unpack($self->{data_pack}, $size);
611
612 if ( $size ) {
613 my $new_loc;
614 read( $fh, $new_loc, $size );
615 $new_loc = unpack( $self->{long_pack}, $new_loc );
616
617 return $self->read_from_loc( $obj, $new_loc );
618 }
619 else {
620 return;
621 }
622 }
623 ##
624 # Otherwise return actual value
625 ##
626 elsif ($signature eq SIG_DATA) {
627 my $size;
628 read( $fh, $size, $self->{data_size});
629 $size = unpack($self->{data_pack}, $size);
630
631 my $value = '';
632 if ($size) { read( $fh, $value, $size); }
633 return $value;
634 }
635
636 ##
637 # Key exists, but content is null
638 ##
639 return;
640}
641
9020ee8c 642sub get_bucket_value {
beac1dff 643 ##
644 # Fetch single value given tag and MD5 digested key.
645 ##
646 my $self = shift;
647 my ($obj, $tag, $md5) = @_;
9020ee8c 648
9a187d8c 649 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 650 if ( $subloc ) {
8db25060 651 return $self->read_from_loc( $obj, $subloc );
386bab6c 652 }
beac1dff 653 return;
9020ee8c 654}
ab0e4957 655
656sub delete_bucket {
beac1dff 657 ##
658 # Delete single key/value pair given tag and MD5 digested key.
659 ##
660 my $self = shift;
661 my ($obj, $tag, $md5) = @_;
ab0e4957 662
9a187d8c 663 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 664 if ( $subloc ) {
665 my $fh = $obj->_fh;
666 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
667 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 668 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 669
ab0e4957 670 return 1;
386bab6c 671 }
beac1dff 672 return;
ab0e4957 673}
674
912d50b1 675sub bucket_exists {
beac1dff 676 ##
677 # Check existence of single key given tag and MD5 digested key.
678 ##
679 my $self = shift;
680 my ($obj, $tag, $md5) = @_;
912d50b1 681
9a187d8c 682 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 683 return $subloc && 1;
912d50b1 684}
685
6736c116 686sub find_bucket_list {
beac1dff 687 ##
688 # Locate offset for bucket list, given digested key
689 ##
690 my $self = shift;
d0b74c17 691 my ($obj, $md5, $args) = @_;
692 $args = {} unless $args;
693
beac1dff 694 ##
695 # Locate offset for bucket list using digest index system
696 ##
e5fc7e69 697 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 698 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 699
e5fc7e69 700 my $ch = 0;
8db25060 701 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 702 my $num = ord substr($md5, $ch, 1);
703
704 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
705 $tag = $self->index_lookup( $obj, $tag, $num );
706
707 if (!$tag) {
29b01632 708 return if !$args->{create};
d0b74c17 709
d5d7c51d 710 my $fh = $obj->_fh;
711 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
16d1ad9b 712
713 my $loc = $self->_request_space(
714 $obj, $self->tag_size( $self->{bucket_list_size} ),
715 );
716
717 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 718
9e4f83a0 719 $tag = $self->write_tag(
16d1ad9b 720 $obj, $loc, SIG_BLIST,
f37c15ab 721 chr(0)x$self->{bucket_list_size},
d5d7c51d 722 );
723
724 $tag->{ref_loc} = $ref_loc;
725 $tag->{ch} = $ch;
726
727 last;
d0b74c17 728 }
729
16d1ad9b 730 $tag->{ch} = $ch++;
d0b74c17 731 $tag->{ref_loc} = $ref_loc;
beac1dff 732 }
d0b74c17 733
beac1dff 734 return $tag;
6736c116 735}
736
d0b74c17 737sub index_lookup {
738 ##
739 # Given index tag, lookup single entry in index and return .
740 ##
741 my $self = shift;
742 my ($obj, $tag, $index) = @_;
743
744 my $location = unpack(
745 $self->{long_pack},
746 substr(
747 $tag->{content},
748 $index * $self->{long_size},
749 $self->{long_size},
750 ),
751 );
752
753 if (!$location) { return; }
754
755 return $self->load_tag( $obj, $location );
756}
757
6736c116 758sub traverse_index {
beac1dff 759 ##
760 # Scan index and recursively step into deeper levels, looking for next key.
761 ##
6736c116 762 my $self = shift;
763 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 764
beac1dff 765 my $tag = $self->load_tag($obj, $offset );
6736c116 766
767 my $fh = $obj->_fh;
d0b74c17 768
8db25060 769 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 770 my $content = $tag->{content};
e5fc7e69 771 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 772
d5d7c51d 773 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 774 my $subloc = unpack(
775 $self->{long_pack},
e06824f8 776 substr(
777 $content,
778 $idx * $self->{long_size},
779 $self->{long_size},
780 ),
e5fc7e69 781 );
782
beac1dff 783 if ($subloc) {
e5fc7e69 784 my $result = $self->traverse_index(
785 $obj, $subloc, $ch + 1, $force_return_next,
786 );
787
beac1dff 788 if (defined($result)) { return $result; }
789 }
790 } # index loop
d0b74c17 791
beac1dff 792 $obj->{return_next} = 1;
793 } # tag is an index
d0b74c17 794
e5fc7e69 795 else {
beac1dff 796 my $keys = $tag->{content};
797 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 798
beac1dff 799 ##
800 # Iterate through buckets, looking for a key match
801 ##
8db25060 802 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 803 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 804
8db25060 805 # End of bucket list -- return to outer loop
beac1dff 806 if (!$subloc) {
beac1dff 807 $obj->{return_next} = 1;
808 last;
809 }
8db25060 810 # Located previous key -- return next one found
beac1dff 811 elsif ($key eq $obj->{prev_md5}) {
beac1dff 812 $obj->{return_next} = 1;
813 next;
814 }
8db25060 815 # Seek to bucket location and skip over signature
beac1dff 816 elsif ($obj->{return_next}) {
8db25060 817 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 818
beac1dff 819 # Skip over value to get to plain key
8db25060 820 my $sig;
821 read( $fh, $sig, SIG_SIZE );
822
beac1dff 823 my $size;
e5fc7e69 824 read( $fh, $size, $self->{data_size});
825 $size = unpack($self->{data_pack}, $size);
beac1dff 826 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 827
beac1dff 828 # Read in plain key and return as scalar
beac1dff 829 my $plain_key;
e5fc7e69 830 read( $fh, $size, $self->{data_size});
831 $size = unpack($self->{data_pack}, $size);
beac1dff 832 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 833
beac1dff 834 return $plain_key;
835 }
8db25060 836 }
d0b74c17 837
beac1dff 838 $obj->{return_next} = 1;
839 } # tag is a bucket list
d0b74c17 840
beac1dff 841 return;
6736c116 842}
843
844sub get_next_key {
beac1dff 845 ##
846 # Locate next key, given digested previous one
847 ##
6736c116 848 my $self = shift;
849 my ($obj) = @_;
d0b74c17 850
beac1dff 851 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
852 $obj->{return_next} = 0;
d0b74c17 853
beac1dff 854 ##
855 # If the previous key was not specifed, start at the top and
856 # return the first one found.
857 ##
858 if (!$obj->{prev_md5}) {
859 $obj->{prev_md5} = chr(0) x $self->{hash_size};
860 $obj->{return_next} = 1;
861 }
d0b74c17 862
beac1dff 863 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 864}
865
75be6413 866# Utilities
867
9cec1360 868sub _get_key_subloc {
75be6413 869 my $self = shift;
870 my ($keys, $idx) = @_;
871
6ed2f3df 872 my ($key, $subloc, $size) = unpack(
873 "a$self->{hash_size} $self->{long_pack} $self->{long_pack}",
75be6413 874 substr(
875 $keys,
9cec1360 876 ($idx * $self->{bucket_size}),
877 $self->{bucket_size},
75be6413 878 ),
879 );
880
6ed2f3df 881 return ($key, $subloc, $size);
75be6413 882}
883
d608b06e 884sub _find_in_buckets {
885 my $self = shift;
886 my ($tag, $md5) = @_;
887
888 BUCKET:
889 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
9a187d8c 890 my ($key, $subloc, $size) = $self->_get_key_subloc(
891 $tag->{content}, $i,
892 );
d608b06e 893
9a187d8c 894 return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc;
d608b06e 895
896 next BUCKET if $key ne $md5;
897
9a187d8c 898 return ($subloc, $i * $self->{bucket_size}, $size);
d608b06e 899 }
900
901 return;
902}
903
994ccd8e 904sub _request_space {
905 my $self = shift;
906 my ($obj, $size) = @_;
907
908 my $loc = $obj->_root->{end};
c9ec091a 909 $obj->_root->{end} += $size;
994ccd8e 910
911 return $loc;
912}
913
914sub _release_space {
915 my $self = shift;
916 my ($obj, $size, $loc) = @_;
917
918 return;
919}
920
a20d9a3f 9211;
922__END__
d5d7c51d 923
924# This will be added in later, after more refactoring is done. This is an early
925# attempt at refactoring on the physical level instead of the virtual level.
926sub _read_at {
927 my $self = shift;
928 my ($obj, $spot, $amount, $unpack) = @_;
929
930 my $fh = $obj->_fh;
931 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
932
933 my $buffer;
934 my $bytes_read = read( $fh, $buffer, $amount );
935
936 if ( $unpack ) {
937 $buffer = unpack( $unpack, $buffer );
938 }
939
940 if ( wantarray ) {
941 return ($buffer, $bytes_read);
942 }
943 else {
944 return $buffer;
945 }
946}