New testing feature that allows specification of the workdir for the tests
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
8db25060 14sub SIG_NULL () { 'N' }
15sub SIG_DATA () { 'D' }
16sub SIG_INDEX () { 'I' }
17sub SIG_BLIST () { 'B' }
7b1e1aa1 18sub SIG_FREE () { 'F' }
8db25060 19sub SIG_SIZE () { 1 }
20
612969fb 21sub precalc_sizes {
beac1dff 22 ##
23 # Precalculate index, bucket and bucket list sizes
24 ##
251dfd0e 25 my $self = shift;
1bf65be7 26
251dfd0e 27 $self->{index_size} = (2**8) * $self->{long_size};
6ed2f3df 28 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
251dfd0e 29 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 30
251dfd0e 31 return 1;
1bf65be7 32}
33
34sub set_pack {
beac1dff 35 ##
36 # Set pack/unpack modes (see file header for more)
37 ##
251dfd0e 38 my $self = shift;
1bf65be7 39 my ($long_s, $long_p, $data_s, $data_p) = @_;
40
81d16922 41 ##
75be6413 42 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
43 # GB per file.
beac1dff 44 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 45 #
46 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 47 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 48 ##
251dfd0e 49 $self->{long_size} = $long_s ? $long_s : 4;
50 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 51
81d16922 52 ##
75be6413 53 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 54 # key/value. Upgrading this is possible (see above) but probably not
55 # necessary. If you need more than 4 GB for a single key or value, this
56 # module is really not for you :-)
81d16922 57 ##
251dfd0e 58 $self->{data_size} = $data_s ? $data_s : 4;
59 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 60
beac1dff 61 return $self->precalc_sizes();
1bf65be7 62}
63
64sub set_digest {
beac1dff 65 ##
66 # Set key digest function (default is MD5)
67 ##
251dfd0e 68 my $self = shift;
1bf65be7 69 my ($digest_func, $hash_size) = @_;
70
d0b74c17 71 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 72 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 73
beac1dff 74 return $self->precalc_sizes();
612969fb 75}
76
77sub new {
78 my $class = shift;
79 my ($args) = @_;
80
81 my $self = bless {
82 long_size => 4,
83 long_pack => 'N',
84 data_size => 4,
85 data_pack => 'N',
251dfd0e 86
612969fb 87 digest => \&Digest::MD5::md5,
88 hash_size => 16,
251dfd0e 89
81d16922 90 ##
d5d7c51d 91 # Maximum number of buckets per list before another level of indexing is
92 # done.
93 # Increase this value for slightly greater speed, but larger database
94 # files. DO NOT decrease this value below 16, due to risk of recursive
95 # reindex overrun.
81d16922 96 ##
612969fb 97 max_buckets => 16,
98 }, $class;
99
fde3db1a 100 # Grab the parameters we want to use
101 foreach my $param ( keys %$self ) {
102 next unless exists $args->{$param};
103 $self->{$param} = $args->{$param}
104 }
105
251dfd0e 106 $self->precalc_sizes;
612969fb 107
108 return $self;
1bf65be7 109}
110
fde3db1a 111sub write_file_header {
0d0f3d5d 112 my $self = shift;
113 my ($obj) = @_;
114
115 my $fh = $obj->_fh;
116
42f79e07 117 my $loc = $self->_request_space(
118 $obj, length( SIG_FILE ) + $self->{data_size},
119 );
0d0f3d5d 120 seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET);
42f79e07 121 print( $fh SIG_FILE, pack($self->{data_pack}, 0) );
0d0f3d5d 122
123 return;
124}
125
fde3db1a 126sub read_file_header {
e064ccd1 127 my $self = shift;
128 my ($obj) = @_;
129
130 my $fh = $obj->_fh;
131
132 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
42f79e07 133 my $buffer;
134 my $bytes_read = read(
135 $fh, $buffer, length(SIG_FILE) + $self->{data_size},
136 );
e064ccd1 137
138 if ( $bytes_read ) {
42f79e07 139 my ($signature, $version) = unpack( "A4 $self->{data_pack}", $buffer );
e064ccd1 140 unless ($signature eq SIG_FILE) {
141 $self->close_fh( $obj );
142 $obj->_throw_error("Signature not found -- file is not a Deep DB");
143 }
144 }
145
146 return $bytes_read;
147}
148
70b55428 149sub setup_fh {
150 my $self = shift;
151 my ($obj) = @_;
152
153 $self->open( $obj ) if !defined $obj->_fh;
154
6fde4ed2 155 my $fh = $obj->_fh;
156 flock $fh, LOCK_EX;
118ba343 157
6fde4ed2 158 unless ( $obj->{base_offset} ) {
fde3db1a 159 my $bytes_read = $self->read_file_header( $obj );
118ba343 160
161 ##
fde3db1a 162 # File is empty -- write header and master index
118ba343 163 ##
164 if (!$bytes_read) {
fde3db1a 165 $self->write_file_header( $obj );
118ba343 166
c9ec091a 167 $obj->{base_offset} = $self->_request_space(
16d1ad9b 168 $obj, $self->tag_size( $self->{index_size} ),
c9ec091a 169 );
118ba343 170
9e4f83a0 171 $self->write_tag(
c9ec091a 172 $obj, $obj->_base_offset, $obj->_type,
f37c15ab 173 chr(0)x$self->{index_size},
118ba343 174 );
175
176 # Flush the filehandle
177 my $old_fh = select $fh;
178 my $old_af = $|; $| = 1; $| = $old_af;
179 select $old_fh;
180 }
181 else {
182 $obj->{base_offset} = $bytes_read;
183
184 ##
fde3db1a 185 # Get our type from master index header
118ba343 186 ##
187 my $tag = $self->load_tag($obj, $obj->_base_offset)
188 or $obj->_throw_error("Corrupted file, no master index record");
189
190 unless ($obj->{type} eq $tag->{signature}) {
191 $obj->_throw_error("File type mismatch");
192 }
193 }
118ba343 194 }
e06824f8 195
673464d9 196 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 197 unless ( $obj->_root->{inode} ) {
198 my @stats = stat($obj->_fh);
199 $obj->_root->{inode} = $stats[1];
200 $obj->_root->{end} = $stats[7];
201 }
202
6fde4ed2 203 flock $fh, LOCK_UN;
204
70b55428 205 return 1;
206}
207
a20d9a3f 208sub open {
20f7b20c 209 ##
210 # Open a fh to the database, create if nonexistent.
211 # Make sure file signature matches DBM::Deep spec.
212 ##
a20d9a3f 213 my $self = shift;
70b55428 214 my ($obj) = @_;
a20d9a3f 215
673464d9 216 # Theoretically, adding O_BINARY should remove the need for the binmode
217 # Of course, testing it is going to be ... interesting.
218 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 219
673464d9 220 my $fh;
d5d7c51d 221 my $filename = $obj->_root->{file};
222 sysopen( $fh, $filename, $flags )
223 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 224 $obj->_root->{fh} = $fh;
a20d9a3f 225
42f79e07 226 # Even though we use O_BINARY, better be safe than sorry.
227 binmode $fh;
a20d9a3f 228
cd59cad8 229 if ($obj->_root->{autoflush}) {
a20d9a3f 230 my $old = select $fh;
231 $|=1;
232 select $old;
233 }
20f7b20c 234
a20d9a3f 235 return 1;
236}
237
3d1b8be9 238sub close_fh {
cd59cad8 239 my $self = shift;
a21f2d90 240 my ($obj) = @_;
cd59cad8 241
242 if ( my $fh = $obj->_root->{fh} ) {
243 close $fh;
244 }
245 $obj->_root->{fh} = undef;
246
247 return 1;
248}
249
16d1ad9b 250sub tag_size {
251 my $self = shift;
252 my ($size) = @_;
253 return SIG_SIZE + $self->{data_size} + $size;
254}
255
9e4f83a0 256sub write_tag {
20f7b20c 257 ##
258 # Given offset, signature and content, create tag and write to disk
259 ##
d4b1166e 260 my $self = shift;
20f7b20c 261 my ($obj, $offset, $sig, $content) = @_;
f37c15ab 262 my $size = length( $content );
20f7b20c 263
d4b1166e 264 my $fh = $obj->_fh;
265
f37c15ab 266 if ( defined $offset ) {
267 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
268 }
269
251dfd0e 270 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 271
f37c15ab 272 return unless defined $offset;
273
20f7b20c 274 return {
275 signature => $sig,
276 size => $size,
8db25060 277 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 278 content => $content
279 };
d4b1166e 280}
281
282sub load_tag {
20f7b20c 283 ##
284 # Given offset, load single tag and return signature, size and data
285 ##
d4b1166e 286 my $self = shift;
20f7b20c 287 my ($obj, $offset) = @_;
288
e06824f8 289# print join(':',map{$_||''}caller(1)), $/;
290
d4b1166e 291 my $fh = $obj->_fh;
292
20f7b20c 293 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 294
75be6413 295 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 296 return if eof $fh;
20f7b20c 297
d4b1166e 298 my $b;
8db25060 299 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 300 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 301
302 my $buffer;
303 read( $fh, $buffer, $size);
304
305 return {
306 signature => $sig,
307 size => $size,
8db25060 308 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 309 content => $buffer
310 };
d4b1166e 311}
312
56ec4340 313sub _get_dbm_object {
314 my $item = shift;
315
316 my $obj = eval {
317 local $SIG{__DIE__};
318 if ($item->isa( 'DBM::Deep' )) {
319 return $item;
320 }
321 return;
322 };
323 return $obj if $obj;
324
325 my $r = Scalar::Util::reftype( $item ) || '';
326 if ( $r eq 'HASH' ) {
327 my $obj = eval {
328 local $SIG{__DIE__};
329 my $obj = tied(%$item);
330 if ($obj->isa( 'DBM::Deep' )) {
331 return $obj;
332 }
333 return;
334 };
335 return $obj if $obj;
336 }
337 elsif ( $r eq 'ARRAY' ) {
338 my $obj = eval {
339 local $SIG{__DIE__};
340 my $obj = tied(@$item);
341 if ($obj->isa( 'DBM::Deep' )) {
342 return $obj;
343 }
344 return;
345 };
346 return $obj if $obj;
347 }
348
349 return;
350}
351
29b01632 352sub _length_needed {
353 my $self = shift;
f37c15ab 354 my ($obj, $value, $key) = @_;
29b01632 355
356 my $is_dbm_deep = eval {
357 local $SIG{'__DIE__'};
358 $value->isa( 'DBM::Deep' );
359 };
360
f37c15ab 361 my $len = SIG_SIZE + $self->{data_size}
362 + $self->{data_size} + length( $key );
29b01632 363
f37c15ab 364 if ( $is_dbm_deep && $value->_root eq $obj->_root ) {
365 return $len + $self->{long_size};
29b01632 366 }
367
368 my $r = Scalar::Util::reftype( $value ) || '';
9a187d8c 369 if ( $obj->_root->{autobless} ) {
370 # This is for the bit saying whether or not this thing is blessed.
371 $len += 1;
372 }
373
29b01632 374 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 375 if ( defined $value ) {
376 $len += length( $value );
377 }
378 return $len;
29b01632 379 }
380
f37c15ab 381 $len += $self->{index_size};
29b01632 382
383 # if autobless is enabled, must also take into consideration
f37c15ab 384 # the class name as it is stored after the key.
29b01632 385 if ( $obj->_root->{autobless} ) {
56ec4340 386 my $c = Scalar::Util::blessed($value);
387 if ( defined $c && !$is_dbm_deep ) {
388 $len += $self->{data_size} + length($c);
29b01632 389 }
390 }
391
f37c15ab 392 return $len;
29b01632 393}
394
20f7b20c 395sub add_bucket {
396 ##
397 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
398 # plain (undigested) key and value.
399 ##
d4b1166e 400 my $self = shift;
20f7b20c 401 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 402
eea0d863 403 # This verifies that only supported values will be stored.
404 {
405 my $r = Scalar::Util::reftype( $value );
406 last if !defined $r;
407
408 last if $r eq 'HASH';
409 last if $r eq 'ARRAY';
410
411 $obj->_throw_error(
412 "Storage of variables of type '$r' is not supported."
413 );
414 }
415
20f7b20c 416 my $location = 0;
417 my $result = 2;
418
419 my $root = $obj->_root;
f37c15ab 420 my $fh = $obj->_fh;
20f7b20c 421
f37c15ab 422 my $actual_length = $self->_length_needed( $obj, $value, $plain_key );
20f7b20c 423
9a187d8c 424 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
75be6413 425
f9c33187 426# $self->_release_space( $obj, $size, $subloc );
386bab6c 427 # Updating a known md5
f9c33187 428#XXX This needs updating to use _release_space
386bab6c 429 if ( $subloc ) {
430 $result = 1;
20f7b20c 431
386bab6c 432 if ($actual_length <= $size) {
433 $location = $subloc;
20f7b20c 434 }
75be6413 435 else {
f37c15ab 436 $location = $self->_request_space( $obj, $actual_length );
386bab6c 437 seek(
438 $fh,
9a187d8c 439 $tag->{offset} + $offset
440 + $self->{hash_size} + $root->{file_offset},
386bab6c 441 SEEK_SET,
442 );
9a187d8c 443 print( $fh pack($self->{long_pack}, $location ) );
444 print( $fh pack($self->{long_pack}, $actual_length ) );
75be6413 445 }
75be6413 446 }
386bab6c 447 # Adding a new md5
448 elsif ( defined $offset ) {
f37c15ab 449 $location = $self->_request_space( $obj, $actual_length );
386bab6c 450
451 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 452 print( $fh $md5 . pack($self->{long_pack}, $location ) );
453 print( $fh pack($self->{long_pack}, $actual_length ) );
386bab6c 454 }
455 # If bucket didn't fit into list, split into a new index level
f9c33187 456 # split_index() will do the _request_space() call
386bab6c 457 else {
f9c33187 458 $location = $self->split_index( $obj, $md5, $tag );
386bab6c 459 }
20f7b20c 460
d5d7c51d 461 $self->write_value( $obj, $location, $plain_key, $value );
462
463 return $result;
464}
465
466sub write_value {
467 my $self = shift;
468 my ($obj, $location, $key, $value) = @_;
469
470 my $fh = $obj->_fh;
471 my $root = $obj->_root;
472
9d4fa373 473 my $dbm_deep_obj = _get_dbm_object( $value );
474 if ( $dbm_deep_obj && $dbm_deep_obj->_root ne $obj->_root ) {
475 $obj->_throw_error( "Cannot cross-reference. Use export() instead" );
476 }
d5d7c51d 477
478 seek($fh, $location + $root->{file_offset}, SEEK_SET);
479
20f7b20c 480 ##
d5d7c51d 481 # Write signature based on content type, set content length and write
482 # actual value.
20f7b20c 483 ##
9d4fa373 484 my $r = Scalar::Util::reftype( $value ) || '';
485 if ( $dbm_deep_obj ) {
486 $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 487 }
488 elsif ($r eq 'HASH') {
9d4fa373 489 if ( !$dbm_deep_obj && tied %{$value} ) {
019ab3a1 490 $obj->_throw_error( "Cannot store something that is tied" );
491 }
9e4f83a0 492 $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 493 }
494 elsif ($r eq 'ARRAY') {
9d4fa373 495 if ( !$dbm_deep_obj && tied @{$value} ) {
019ab3a1 496 $obj->_throw_error( "Cannot store something that is tied" );
497 }
9e4f83a0 498 $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 499 }
500 elsif (!defined($value)) {
9e4f83a0 501 $self->write_tag( $obj, undef, SIG_NULL, '' );
d5d7c51d 502 }
503 else {
9e4f83a0 504 $self->write_tag( $obj, undef, SIG_DATA, $value );
d5d7c51d 505 }
20f7b20c 506
d5d7c51d 507 ##
508 # Plain key is stored AFTER value, as keys are typically fetched less often.
509 ##
510 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 511
9a187d8c 512 # Internal references don't care about autobless
9d4fa373 513 return 1 if $dbm_deep_obj;
9a187d8c 514
d5d7c51d 515 ##
516 # If value is blessed, preserve class name
517 ##
518 if ( $root->{autobless} ) {
56ec4340 519 my $c = Scalar::Util::blessed($value);
520 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 521 print( $fh chr(1) );
56ec4340 522 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 523 }
d5d7c51d 524 else {
525 print( $fh chr(0) );
20f7b20c 526 }
d5d7c51d 527 }
20f7b20c 528
d5d7c51d 529 ##
56ec4340 530 # Tie the passed in reference so that changes to it are reflected in the
531 # datafile. The use of $location as the base_offset will act as the
532 # the linkage between parent and child.
533 #
534 # The overall assignment is a hack around the fact that just tying doesn't
535 # store the values. This may not be the wrong thing to do.
d5d7c51d 536 ##
9d4fa373 537 if ($r eq 'HASH') {
538 my %x = %$value;
539 tie %$value, 'DBM::Deep', {
540 base_offset => $location,
541 root => $root,
542 };
543 %$value = %x;
544 }
545 elsif ($r eq 'ARRAY') {
546 my @x = @$value;
547 tie @$value, 'DBM::Deep', {
548 base_offset => $location,
549 root => $root,
550 };
551 @$value = @x;
20f7b20c 552 }
d4b1166e 553
d5d7c51d 554 return 1;
d4b1166e 555}
556
75be6413 557sub split_index {
558 my $self = shift;
559 my ($obj, $md5, $tag) = @_;
560
561 my $fh = $obj->_fh;
562 my $root = $obj->_root;
16d1ad9b 563
564 my $loc = $self->_request_space(
565 $obj, $self->tag_size( $self->{index_size} ),
566 );
567
7b1e1aa1 568 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 569 print( $fh pack($self->{long_pack}, $loc) );
75be6413 570
9e4f83a0 571 my $index_tag = $self->write_tag(
16d1ad9b 572 $obj, $loc, SIG_INDEX,
f37c15ab 573 chr(0)x$self->{index_size},
75be6413 574 );
575
f9c33187 576 my $newtag_loc = $self->_request_space(
577 $obj, $self->tag_size( $self->{bucket_list_size} ),
578 );
75be6413 579
7b1e1aa1 580 my $keys = $tag->{content}
f9c33187 581 . $md5 . pack($self->{long_pack}, $newtag_loc)
582 . pack($self->{long_pack}, 0);
75be6413 583
f9c33187 584 my @newloc = ();
75be6413 585 BUCKET:
586 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 587 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 588
f9c33187 589 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
590 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 591
75be6413 592 my $num = ord(substr($key, $tag->{ch} + 1, 1));
593
f9c33187 594 if ($newloc[$num]) {
595 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 596 my $subkeys;
597 read( $fh, $subkeys, $self->{bucket_list_size});
598
f9c33187 599 # This is looking for the first empty spot
7b1e1aa1 600 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 601 { content => $subkeys }, '',
7b1e1aa1 602 );
75be6413 603
f9c33187 604 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
605 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 606
607 next;
75be6413 608 }
75be6413 609
7b1e1aa1 610 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 611
7b1e1aa1 612 my $loc = $self->_request_space(
613 $obj, $self->tag_size( $self->{bucket_list_size} ),
614 );
2603d86e 615
7b1e1aa1 616 print( $fh pack($self->{long_pack}, $loc) );
75be6413 617
7b1e1aa1 618 my $blist_tag = $self->write_tag(
619 $obj, $loc, SIG_BLIST,
620 chr(0)x$self->{bucket_list_size},
621 );
622
623 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 624 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 625
f9c33187 626 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 627 }
628
629 $self->_release_space(
f9c33187 630 $obj, $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 631 $tag->{offset} - SIG_SIZE - $self->{data_size},
632 );
75be6413 633
f9c33187 634 return $newtag_loc;
75be6413 635}
636
8db25060 637sub read_from_loc {
638 my $self = shift;
639 my ($obj, $subloc) = @_;
640
641 my $fh = $obj->_fh;
642
643 ##
644 # Found match -- seek to offset and read signature
645 ##
646 my $signature;
647 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
648 read( $fh, $signature, SIG_SIZE);
649
650 ##
651 # If value is a hash or array, return new DBM::Deep object with correct offset
652 ##
653 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 654 my $new_obj = DBM::Deep->new({
8db25060 655 type => $signature,
656 base_offset => $subloc,
657 root => $obj->_root,
685e40f1 658 });
8db25060 659
685e40f1 660 if ($new_obj->_root->{autobless}) {
8db25060 661 ##
662 # Skip over value and plain key to see if object needs
663 # to be re-blessed
664 ##
665 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
666
667 my $size;
c6ea6b6c 668 read( $fh, $size, $self->{data_size});
669 $size = unpack($self->{data_pack}, $size);
8db25060 670 if ($size) { seek($fh, $size, SEEK_CUR); }
671
672 my $bless_bit;
673 read( $fh, $bless_bit, 1);
674 if (ord($bless_bit)) {
675 ##
676 # Yes, object needs to be re-blessed
677 ##
678 my $class_name;
c6ea6b6c 679 read( $fh, $size, $self->{data_size});
680 $size = unpack($self->{data_pack}, $size);
8db25060 681 if ($size) { read( $fh, $class_name, $size); }
685e40f1 682 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 683 }
684 }
685
685e40f1 686 return $new_obj;
8db25060 687 }
688 elsif ( $signature eq SIG_INTERNAL ) {
689 my $size;
690 read( $fh, $size, $self->{data_size});
691 $size = unpack($self->{data_pack}, $size);
692
693 if ( $size ) {
694 my $new_loc;
695 read( $fh, $new_loc, $size );
696 $new_loc = unpack( $self->{long_pack}, $new_loc );
697
698 return $self->read_from_loc( $obj, $new_loc );
699 }
700 else {
701 return;
702 }
703 }
704 ##
705 # Otherwise return actual value
706 ##
707 elsif ($signature eq SIG_DATA) {
708 my $size;
709 read( $fh, $size, $self->{data_size});
710 $size = unpack($self->{data_pack}, $size);
711
712 my $value = '';
713 if ($size) { read( $fh, $value, $size); }
714 return $value;
715 }
716
717 ##
718 # Key exists, but content is null
719 ##
720 return;
721}
722
9020ee8c 723sub get_bucket_value {
beac1dff 724 ##
725 # Fetch single value given tag and MD5 digested key.
726 ##
727 my $self = shift;
728 my ($obj, $tag, $md5) = @_;
9020ee8c 729
9a187d8c 730 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 731 if ( $subloc ) {
8db25060 732 return $self->read_from_loc( $obj, $subloc );
386bab6c 733 }
beac1dff 734 return;
9020ee8c 735}
ab0e4957 736
737sub delete_bucket {
beac1dff 738 ##
739 # Delete single key/value pair given tag and MD5 digested key.
740 ##
741 my $self = shift;
742 my ($obj, $tag, $md5) = @_;
ab0e4957 743
9a187d8c 744 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
f9c33187 745#XXX This needs _release_space()
386bab6c 746 if ( $subloc ) {
747 my $fh = $obj->_fh;
748 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
749 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 750 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 751
ab0e4957 752 return 1;
386bab6c 753 }
beac1dff 754 return;
ab0e4957 755}
756
912d50b1 757sub bucket_exists {
beac1dff 758 ##
759 # Check existence of single key given tag and MD5 digested key.
760 ##
761 my $self = shift;
762 my ($obj, $tag, $md5) = @_;
912d50b1 763
9a187d8c 764 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 765 return $subloc && 1;
912d50b1 766}
767
6736c116 768sub find_bucket_list {
beac1dff 769 ##
770 # Locate offset for bucket list, given digested key
771 ##
772 my $self = shift;
d0b74c17 773 my ($obj, $md5, $args) = @_;
774 $args = {} unless $args;
775
beac1dff 776 ##
777 # Locate offset for bucket list using digest index system
778 ##
e5fc7e69 779 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 780 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 781
e5fc7e69 782 my $ch = 0;
8db25060 783 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 784 my $num = ord substr($md5, $ch, 1);
785
786 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
787 $tag = $self->index_lookup( $obj, $tag, $num );
788
789 if (!$tag) {
29b01632 790 return if !$args->{create};
d0b74c17 791
16d1ad9b 792 my $loc = $self->_request_space(
793 $obj, $self->tag_size( $self->{bucket_list_size} ),
794 );
795
7b1e1aa1 796 my $fh = $obj->_fh;
797 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
16d1ad9b 798 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 799
9e4f83a0 800 $tag = $self->write_tag(
16d1ad9b 801 $obj, $loc, SIG_BLIST,
f37c15ab 802 chr(0)x$self->{bucket_list_size},
d5d7c51d 803 );
804
805 $tag->{ref_loc} = $ref_loc;
806 $tag->{ch} = $ch;
807
808 last;
d0b74c17 809 }
810
16d1ad9b 811 $tag->{ch} = $ch++;
d0b74c17 812 $tag->{ref_loc} = $ref_loc;
beac1dff 813 }
d0b74c17 814
beac1dff 815 return $tag;
6736c116 816}
817
d0b74c17 818sub index_lookup {
819 ##
820 # Given index tag, lookup single entry in index and return .
821 ##
822 my $self = shift;
823 my ($obj, $tag, $index) = @_;
824
825 my $location = unpack(
826 $self->{long_pack},
827 substr(
828 $tag->{content},
829 $index * $self->{long_size},
830 $self->{long_size},
831 ),
832 );
833
834 if (!$location) { return; }
835
836 return $self->load_tag( $obj, $location );
837}
838
6736c116 839sub traverse_index {
beac1dff 840 ##
841 # Scan index and recursively step into deeper levels, looking for next key.
842 ##
6736c116 843 my $self = shift;
844 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 845
beac1dff 846 my $tag = $self->load_tag($obj, $offset );
6736c116 847
848 my $fh = $obj->_fh;
d0b74c17 849
8db25060 850 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 851 my $content = $tag->{content};
e5fc7e69 852 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 853
d5d7c51d 854 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 855 my $subloc = unpack(
856 $self->{long_pack},
e06824f8 857 substr(
858 $content,
859 $idx * $self->{long_size},
860 $self->{long_size},
861 ),
e5fc7e69 862 );
863
beac1dff 864 if ($subloc) {
e5fc7e69 865 my $result = $self->traverse_index(
866 $obj, $subloc, $ch + 1, $force_return_next,
867 );
868
beac1dff 869 if (defined($result)) { return $result; }
870 }
871 } # index loop
d0b74c17 872
beac1dff 873 $obj->{return_next} = 1;
874 } # tag is an index
d0b74c17 875
e5fc7e69 876 else {
beac1dff 877 my $keys = $tag->{content};
878 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 879
beac1dff 880 ##
881 # Iterate through buckets, looking for a key match
882 ##
8db25060 883 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 884 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 885
8db25060 886 # End of bucket list -- return to outer loop
beac1dff 887 if (!$subloc) {
beac1dff 888 $obj->{return_next} = 1;
889 last;
890 }
8db25060 891 # Located previous key -- return next one found
beac1dff 892 elsif ($key eq $obj->{prev_md5}) {
beac1dff 893 $obj->{return_next} = 1;
894 next;
895 }
8db25060 896 # Seek to bucket location and skip over signature
beac1dff 897 elsif ($obj->{return_next}) {
8db25060 898 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 899
beac1dff 900 # Skip over value to get to plain key
8db25060 901 my $sig;
902 read( $fh, $sig, SIG_SIZE );
903
beac1dff 904 my $size;
e5fc7e69 905 read( $fh, $size, $self->{data_size});
906 $size = unpack($self->{data_pack}, $size);
beac1dff 907 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 908
beac1dff 909 # Read in plain key and return as scalar
beac1dff 910 my $plain_key;
e5fc7e69 911 read( $fh, $size, $self->{data_size});
912 $size = unpack($self->{data_pack}, $size);
beac1dff 913 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 914
beac1dff 915 return $plain_key;
916 }
8db25060 917 }
d0b74c17 918
beac1dff 919 $obj->{return_next} = 1;
920 } # tag is a bucket list
d0b74c17 921
beac1dff 922 return;
6736c116 923}
924
925sub get_next_key {
beac1dff 926 ##
927 # Locate next key, given digested previous one
928 ##
6736c116 929 my $self = shift;
930 my ($obj) = @_;
d0b74c17 931
beac1dff 932 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
933 $obj->{return_next} = 0;
d0b74c17 934
beac1dff 935 ##
936 # If the previous key was not specifed, start at the top and
937 # return the first one found.
938 ##
939 if (!$obj->{prev_md5}) {
940 $obj->{prev_md5} = chr(0) x $self->{hash_size};
941 $obj->{return_next} = 1;
942 }
d0b74c17 943
beac1dff 944 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 945}
946
75be6413 947# Utilities
948
9cec1360 949sub _get_key_subloc {
75be6413 950 my $self = shift;
951 my ($keys, $idx) = @_;
952
6ed2f3df 953 my ($key, $subloc, $size) = unpack(
954 "a$self->{hash_size} $self->{long_pack} $self->{long_pack}",
75be6413 955 substr(
956 $keys,
9cec1360 957 ($idx * $self->{bucket_size}),
958 $self->{bucket_size},
75be6413 959 ),
960 );
961
6ed2f3df 962 return ($key, $subloc, $size);
75be6413 963}
964
d608b06e 965sub _find_in_buckets {
966 my $self = shift;
967 my ($tag, $md5) = @_;
968
969 BUCKET:
970 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
9a187d8c 971 my ($key, $subloc, $size) = $self->_get_key_subloc(
972 $tag->{content}, $i,
973 );
d608b06e 974
9a187d8c 975 return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc;
d608b06e 976
977 next BUCKET if $key ne $md5;
978
9a187d8c 979 return ($subloc, $i * $self->{bucket_size}, $size);
d608b06e 980 }
981
982 return;
983}
984
7b1e1aa1 985#sub _print_at {
986# my $self = shift;
987# my ($obj, $spot, $data) = @_;
988#
989# my $fh = $obj->_fh;
990# seek( $fh, $spot, SEEK_SET );
991# print( $fh $data );
992#
993# return;
994#}
995
994ccd8e 996sub _request_space {
997 my $self = shift;
998 my ($obj, $size) = @_;
999
1000 my $loc = $obj->_root->{end};
c9ec091a 1001 $obj->_root->{end} += $size;
994ccd8e 1002
1003 return $loc;
1004}
1005
1006sub _release_space {
1007 my $self = shift;
1008 my ($obj, $size, $loc) = @_;
1009
7b1e1aa1 1010 my $next_loc = 0;
1011
1012 my $fh = $obj->_fh;
1013 seek( $fh, $loc + $obj->_root->{file_offset}, SEEK_SET );
1014 print( $fh SIG_FREE
1015 . pack($self->{long_pack}, $size )
1016 . pack($self->{long_pack}, $next_loc )
1017 );
1018
994ccd8e 1019 return;
1020}
1021
a20d9a3f 10221;
1023__END__
d5d7c51d 1024
1025# This will be added in later, after more refactoring is done. This is an early
1026# attempt at refactoring on the physical level instead of the virtual level.
1027sub _read_at {
1028 my $self = shift;
1029 my ($obj, $spot, $amount, $unpack) = @_;
1030
1031 my $fh = $obj->_fh;
1032 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
1033
1034 my $buffer;
1035 my $bytes_read = read( $fh, $buffer, $amount );
1036
1037 if ( $unpack ) {
1038 $buffer = unpack( $unpack, $buffer );
1039 }
1040
1041 if ( wantarray ) {
1042 return ($buffer, $bytes_read);
1043 }
1044 else {
1045 return $buffer;
1046 }
1047}