Added basic array test structure
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
8db25060 14sub SIG_NULL () { 'N' }
15sub SIG_DATA () { 'D' }
16sub SIG_INDEX () { 'I' }
17sub SIG_BLIST () { 'B' }
7b1e1aa1 18sub SIG_FREE () { 'F' }
8db25060 19sub SIG_SIZE () { 1 }
20
612969fb 21sub precalc_sizes {
beac1dff 22 ##
23 # Precalculate index, bucket and bucket list sizes
24 ##
251dfd0e 25 my $self = shift;
1bf65be7 26
251dfd0e 27 $self->{index_size} = (2**8) * $self->{long_size};
6ed2f3df 28 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
251dfd0e 29 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 30
251dfd0e 31 return 1;
1bf65be7 32}
33
34sub set_pack {
beac1dff 35 ##
36 # Set pack/unpack modes (see file header for more)
37 ##
251dfd0e 38 my $self = shift;
1bf65be7 39 my ($long_s, $long_p, $data_s, $data_p) = @_;
40
81d16922 41 ##
75be6413 42 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
43 # GB per file.
beac1dff 44 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 45 #
46 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 47 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 48 ##
251dfd0e 49 $self->{long_size} = $long_s ? $long_s : 4;
50 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 51
81d16922 52 ##
75be6413 53 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 54 # key/value. Upgrading this is possible (see above) but probably not
55 # necessary. If you need more than 4 GB for a single key or value, this
56 # module is really not for you :-)
81d16922 57 ##
251dfd0e 58 $self->{data_size} = $data_s ? $data_s : 4;
59 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 60
beac1dff 61 return $self->precalc_sizes();
1bf65be7 62}
63
64sub set_digest {
beac1dff 65 ##
66 # Set key digest function (default is MD5)
67 ##
251dfd0e 68 my $self = shift;
1bf65be7 69 my ($digest_func, $hash_size) = @_;
70
d0b74c17 71 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 72 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 73
beac1dff 74 return $self->precalc_sizes();
612969fb 75}
76
77sub new {
78 my $class = shift;
79 my ($args) = @_;
80
81 my $self = bless {
82 long_size => 4,
83 long_pack => 'N',
84 data_size => 4,
85 data_pack => 'N',
251dfd0e 86
612969fb 87 digest => \&Digest::MD5::md5,
88 hash_size => 16,
251dfd0e 89
81d16922 90 ##
d5d7c51d 91 # Maximum number of buckets per list before another level of indexing is
92 # done.
93 # Increase this value for slightly greater speed, but larger database
94 # files. DO NOT decrease this value below 16, due to risk of recursive
95 # reindex overrun.
81d16922 96 ##
612969fb 97 max_buckets => 16,
98 }, $class;
99
251dfd0e 100 $self->precalc_sizes;
612969fb 101
102 return $self;
1bf65be7 103}
104
0d0f3d5d 105sub write_file_signature {
106 my $self = shift;
107 my ($obj) = @_;
108
109 my $fh = $obj->_fh;
110
111 my $loc = $self->_request_space( $obj, length( SIG_FILE ) );
112 seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET);
113 print( $fh SIG_FILE);
114
115 return;
116}
117
e064ccd1 118sub read_file_signature {
119 my $self = shift;
120 my ($obj) = @_;
121
122 my $fh = $obj->_fh;
123
124 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
125 my $signature;
126 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
127
128 if ( $bytes_read ) {
129 unless ($signature eq SIG_FILE) {
130 $self->close_fh( $obj );
131 $obj->_throw_error("Signature not found -- file is not a Deep DB");
132 }
133 }
134
135 return $bytes_read;
136}
137
70b55428 138sub setup_fh {
139 my $self = shift;
140 my ($obj) = @_;
141
142 $self->open( $obj ) if !defined $obj->_fh;
143
6fde4ed2 144 my $fh = $obj->_fh;
145 flock $fh, LOCK_EX;
118ba343 146
6fde4ed2 147 unless ( $obj->{base_offset} ) {
e064ccd1 148 my $bytes_read = $self->read_file_signature( $obj );
118ba343 149
150 ##
151 # File is empty -- write signature and master index
152 ##
153 if (!$bytes_read) {
0d0f3d5d 154 $self->write_file_signature( $obj );
118ba343 155
c9ec091a 156 $obj->{base_offset} = $self->_request_space(
16d1ad9b 157 $obj, $self->tag_size( $self->{index_size} ),
c9ec091a 158 );
118ba343 159
9e4f83a0 160 $self->write_tag(
c9ec091a 161 $obj, $obj->_base_offset, $obj->_type,
f37c15ab 162 chr(0)x$self->{index_size},
118ba343 163 );
164
165 # Flush the filehandle
166 my $old_fh = select $fh;
167 my $old_af = $|; $| = 1; $| = $old_af;
168 select $old_fh;
169 }
170 else {
171 $obj->{base_offset} = $bytes_read;
172
173 ##
118ba343 174 # Get our type from master index signature
175 ##
176 my $tag = $self->load_tag($obj, $obj->_base_offset)
177 or $obj->_throw_error("Corrupted file, no master index record");
178
179 unless ($obj->{type} eq $tag->{signature}) {
180 $obj->_throw_error("File type mismatch");
181 }
182 }
118ba343 183 }
e06824f8 184
673464d9 185 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 186 unless ( $obj->_root->{inode} ) {
187 my @stats = stat($obj->_fh);
188 $obj->_root->{inode} = $stats[1];
189 $obj->_root->{end} = $stats[7];
190 }
191
6fde4ed2 192 flock $fh, LOCK_UN;
193
70b55428 194 return 1;
195}
196
a20d9a3f 197sub open {
20f7b20c 198 ##
199 # Open a fh to the database, create if nonexistent.
200 # Make sure file signature matches DBM::Deep spec.
201 ##
a20d9a3f 202 my $self = shift;
70b55428 203 my ($obj) = @_;
a20d9a3f 204
673464d9 205 # Theoretically, adding O_BINARY should remove the need for the binmode
206 # Of course, testing it is going to be ... interesting.
207 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 208
673464d9 209 my $fh;
d5d7c51d 210 my $filename = $obj->_root->{file};
211 sysopen( $fh, $filename, $flags )
212 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 213 $obj->_root->{fh} = $fh;
a20d9a3f 214
215 #XXX Can we remove this by using the right sysopen() flags?
216 # Maybe ... q.v. above
217 binmode $fh; # for win32
218
cd59cad8 219 if ($obj->_root->{autoflush}) {
a20d9a3f 220 my $old = select $fh;
221 $|=1;
222 select $old;
223 }
20f7b20c 224
a20d9a3f 225 return 1;
226}
227
3d1b8be9 228sub close_fh {
cd59cad8 229 my $self = shift;
a21f2d90 230 my ($obj) = @_;
cd59cad8 231
232 if ( my $fh = $obj->_root->{fh} ) {
233 close $fh;
234 }
235 $obj->_root->{fh} = undef;
236
237 return 1;
238}
239
16d1ad9b 240sub tag_size {
241 my $self = shift;
242 my ($size) = @_;
243 return SIG_SIZE + $self->{data_size} + $size;
244}
245
9e4f83a0 246sub write_tag {
20f7b20c 247 ##
248 # Given offset, signature and content, create tag and write to disk
249 ##
d4b1166e 250 my $self = shift;
20f7b20c 251 my ($obj, $offset, $sig, $content) = @_;
f37c15ab 252 my $size = length( $content );
20f7b20c 253
d4b1166e 254 my $fh = $obj->_fh;
255
f37c15ab 256 if ( defined $offset ) {
257 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
258 }
259
251dfd0e 260 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 261
f37c15ab 262 return unless defined $offset;
263
20f7b20c 264 return {
265 signature => $sig,
266 size => $size,
8db25060 267 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 268 content => $content
269 };
d4b1166e 270}
271
272sub load_tag {
20f7b20c 273 ##
274 # Given offset, load single tag and return signature, size and data
275 ##
d4b1166e 276 my $self = shift;
20f7b20c 277 my ($obj, $offset) = @_;
278
e06824f8 279# print join(':',map{$_||''}caller(1)), $/;
280
d4b1166e 281 my $fh = $obj->_fh;
282
20f7b20c 283 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 284
75be6413 285 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 286 return if eof $fh;
20f7b20c 287
d4b1166e 288 my $b;
8db25060 289 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 290 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 291
292 my $buffer;
293 read( $fh, $buffer, $size);
294
295 return {
296 signature => $sig,
297 size => $size,
8db25060 298 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 299 content => $buffer
300 };
d4b1166e 301}
302
29b01632 303sub _length_needed {
304 my $self = shift;
f37c15ab 305 my ($obj, $value, $key) = @_;
29b01632 306
307 my $is_dbm_deep = eval {
308 local $SIG{'__DIE__'};
309 $value->isa( 'DBM::Deep' );
310 };
311
f37c15ab 312 my $len = SIG_SIZE + $self->{data_size}
313 + $self->{data_size} + length( $key );
29b01632 314
f37c15ab 315 if ( $is_dbm_deep && $value->_root eq $obj->_root ) {
316 return $len + $self->{long_size};
29b01632 317 }
318
319 my $r = Scalar::Util::reftype( $value ) || '';
9a187d8c 320 if ( $obj->_root->{autobless} ) {
321 # This is for the bit saying whether or not this thing is blessed.
322 $len += 1;
323 }
324
29b01632 325 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 326 if ( defined $value ) {
327 $len += length( $value );
328 }
329 return $len;
29b01632 330 }
331
f37c15ab 332 $len += $self->{index_size};
29b01632 333
334 # if autobless is enabled, must also take into consideration
f37c15ab 335 # the class name as it is stored after the key.
29b01632 336 if ( $obj->_root->{autobless} ) {
337 my $value_class = Scalar::Util::blessed($value);
f37c15ab 338 if ( defined $value_class && !$is_dbm_deep ) {
339 $len += $self->{data_size} + length($value_class);
29b01632 340 }
341 }
342
f37c15ab 343 return $len;
29b01632 344}
345
20f7b20c 346sub add_bucket {
347 ##
348 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
349 # plain (undigested) key and value.
350 ##
d4b1166e 351 my $self = shift;
20f7b20c 352 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 353
eea0d863 354 # This verifies that only supported values will be stored.
355 {
356 my $r = Scalar::Util::reftype( $value );
357 last if !defined $r;
358
359 last if $r eq 'HASH';
360 last if $r eq 'ARRAY';
361
362 $obj->_throw_error(
363 "Storage of variables of type '$r' is not supported."
364 );
365 }
366
20f7b20c 367 my $location = 0;
368 my $result = 2;
369
370 my $root = $obj->_root;
f37c15ab 371 my $fh = $obj->_fh;
20f7b20c 372
f37c15ab 373 my $actual_length = $self->_length_needed( $obj, $value, $plain_key );
20f7b20c 374
9a187d8c 375 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
75be6413 376
f9c33187 377# $self->_release_space( $obj, $size, $subloc );
386bab6c 378 # Updating a known md5
f9c33187 379#XXX This needs updating to use _release_space
386bab6c 380 if ( $subloc ) {
381 $result = 1;
20f7b20c 382
386bab6c 383 if ($actual_length <= $size) {
384 $location = $subloc;
20f7b20c 385 }
75be6413 386 else {
f37c15ab 387 $location = $self->_request_space( $obj, $actual_length );
386bab6c 388 seek(
389 $fh,
9a187d8c 390 $tag->{offset} + $offset
391 + $self->{hash_size} + $root->{file_offset},
386bab6c 392 SEEK_SET,
393 );
9a187d8c 394 print( $fh pack($self->{long_pack}, $location ) );
395 print( $fh pack($self->{long_pack}, $actual_length ) );
75be6413 396 }
75be6413 397 }
386bab6c 398 # Adding a new md5
399 elsif ( defined $offset ) {
f37c15ab 400 $location = $self->_request_space( $obj, $actual_length );
386bab6c 401
402 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 403 print( $fh $md5 . pack($self->{long_pack}, $location ) );
404 print( $fh pack($self->{long_pack}, $actual_length ) );
386bab6c 405 }
406 # If bucket didn't fit into list, split into a new index level
f9c33187 407 # split_index() will do the _request_space() call
386bab6c 408 else {
f9c33187 409 $location = $self->split_index( $obj, $md5, $tag );
386bab6c 410 }
20f7b20c 411
d5d7c51d 412 $self->write_value( $obj, $location, $plain_key, $value );
413
414 return $result;
415}
416
9d4fa373 417sub _get_tied {
418 my $item = shift;
419 my $r = Scalar::Util::reftype( $item ) || return;
420 if ( $r eq 'HASH' ) {
421 return tied(%$item);
422 }
423 elsif ( $r eq 'ARRAY' ) {
424 return tied(@$item);
425 }
426 else {
427 return;
428 };
429}
430
431sub _get_dbm_object {
432 my $item = shift;
433
434 my $obj = eval {
435 local $SIG{__DIE__};
436 if ($item->isa( 'DBM::Deep' )) {
437 return $item;
438 }
439 return;
440 };
441 return $obj if $obj;
442
443 my $r = Scalar::Util::reftype( $item ) || '';
444 if ( $r eq 'HASH' ) {
445 my $obj = eval {
446 local $SIG{__DIE__};
447 my $obj = tied(%$item);
448 if ($obj->isa( 'DBM::Deep' )) {
449 return $obj;
450 }
451 return;
452 };
453 return $obj if $obj;
454 }
455 elsif ( $r eq 'ARRAY' ) {
456 my $obj = eval {
457 local $SIG{__DIE__};
458 my $obj = tied(@$item);
459 if ($obj->isa( 'DBM::Deep' )) {
460 return $obj;
461 }
462 return;
463 };
464 return $obj if $obj;
465 }
466
467 return;
468}
469
d5d7c51d 470sub write_value {
471 my $self = shift;
472 my ($obj, $location, $key, $value) = @_;
473
474 my $fh = $obj->_fh;
475 my $root = $obj->_root;
476
9d4fa373 477 my $dbm_deep_obj = _get_dbm_object( $value );
478 if ( $dbm_deep_obj && $dbm_deep_obj->_root ne $obj->_root ) {
479 $obj->_throw_error( "Cannot cross-reference. Use export() instead" );
480 }
d5d7c51d 481
482 seek($fh, $location + $root->{file_offset}, SEEK_SET);
483
20f7b20c 484 ##
d5d7c51d 485 # Write signature based on content type, set content length and write
486 # actual value.
20f7b20c 487 ##
9d4fa373 488 my $r = Scalar::Util::reftype( $value ) || '';
489 if ( $dbm_deep_obj ) {
490 $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 491 }
492 elsif ($r eq 'HASH') {
9d4fa373 493 if ( !$dbm_deep_obj && tied %{$value} ) {
019ab3a1 494 $obj->_throw_error( "Cannot store something that is tied" );
495 }
9e4f83a0 496 $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 497 }
498 elsif ($r eq 'ARRAY') {
9d4fa373 499 if ( !$dbm_deep_obj && tied @{$value} ) {
019ab3a1 500 $obj->_throw_error( "Cannot store something that is tied" );
501 }
9e4f83a0 502 $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 503 }
504 elsif (!defined($value)) {
9e4f83a0 505 $self->write_tag( $obj, undef, SIG_NULL, '' );
d5d7c51d 506 }
507 else {
9e4f83a0 508 $self->write_tag( $obj, undef, SIG_DATA, $value );
d5d7c51d 509 }
20f7b20c 510
d5d7c51d 511 ##
512 # Plain key is stored AFTER value, as keys are typically fetched less often.
513 ##
514 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 515
9a187d8c 516 # Internal references don't care about autobless
9d4fa373 517 return 1 if $dbm_deep_obj;
9a187d8c 518
d5d7c51d 519 ##
520 # If value is blessed, preserve class name
521 ##
522 if ( $root->{autobless} ) {
523 my $value_class = Scalar::Util::blessed($value);
9d4fa373 524 if ( defined $value_class && !$dbm_deep_obj ) {
d5d7c51d 525 print( $fh chr(1) );
526 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 527 }
d5d7c51d 528 else {
529 print( $fh chr(0) );
20f7b20c 530 }
d5d7c51d 531 }
20f7b20c 532
d5d7c51d 533 ##
d5d7c51d 534 # If content is a hash or array, create new child DBM::Deep object and
535 # pass each key or element to it.
536 ##
9d4fa373 537 if ($r eq 'HASH') {
538 my %x = %$value;
539 tie %$value, 'DBM::Deep', {
540 base_offset => $location,
541 root => $root,
542 };
543 %$value = %x;
544 }
545 elsif ($r eq 'ARRAY') {
546 my @x = @$value;
547 tie @$value, 'DBM::Deep', {
548 base_offset => $location,
549 root => $root,
550 };
551 @$value = @x;
20f7b20c 552 }
d4b1166e 553
d5d7c51d 554 return 1;
d4b1166e 555}
556
75be6413 557sub split_index {
558 my $self = shift;
559 my ($obj, $md5, $tag) = @_;
560
561 my $fh = $obj->_fh;
562 my $root = $obj->_root;
16d1ad9b 563
564 my $loc = $self->_request_space(
565 $obj, $self->tag_size( $self->{index_size} ),
566 );
567
7b1e1aa1 568 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 569 print( $fh pack($self->{long_pack}, $loc) );
75be6413 570
9e4f83a0 571 my $index_tag = $self->write_tag(
16d1ad9b 572 $obj, $loc, SIG_INDEX,
f37c15ab 573 chr(0)x$self->{index_size},
75be6413 574 );
575
f9c33187 576 my $newtag_loc = $self->_request_space(
577 $obj, $self->tag_size( $self->{bucket_list_size} ),
578 );
75be6413 579
7b1e1aa1 580 my $keys = $tag->{content}
f9c33187 581 . $md5 . pack($self->{long_pack}, $newtag_loc)
582 . pack($self->{long_pack}, 0);
75be6413 583
f9c33187 584 my @newloc = ();
75be6413 585 BUCKET:
586 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 587 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 588
f9c33187 589 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
590 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 591
75be6413 592 my $num = ord(substr($key, $tag->{ch} + 1, 1));
593
f9c33187 594 if ($newloc[$num]) {
595 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 596 my $subkeys;
597 read( $fh, $subkeys, $self->{bucket_list_size});
598
f9c33187 599 # This is looking for the first empty spot
7b1e1aa1 600 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 601 { content => $subkeys }, '',
7b1e1aa1 602 );
75be6413 603
f9c33187 604 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
605 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 606
607 next;
75be6413 608 }
75be6413 609
7b1e1aa1 610 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 611
7b1e1aa1 612 my $loc = $self->_request_space(
613 $obj, $self->tag_size( $self->{bucket_list_size} ),
614 );
2603d86e 615
7b1e1aa1 616 print( $fh pack($self->{long_pack}, $loc) );
75be6413 617
7b1e1aa1 618 my $blist_tag = $self->write_tag(
619 $obj, $loc, SIG_BLIST,
620 chr(0)x$self->{bucket_list_size},
621 );
622
623 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 624 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 625
f9c33187 626 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 627 }
628
629 $self->_release_space(
f9c33187 630 $obj, $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 631 $tag->{offset} - SIG_SIZE - $self->{data_size},
632 );
75be6413 633
f9c33187 634 return $newtag_loc;
75be6413 635}
636
8db25060 637sub read_from_loc {
638 my $self = shift;
639 my ($obj, $subloc) = @_;
640
641 my $fh = $obj->_fh;
642
643 ##
644 # Found match -- seek to offset and read signature
645 ##
646 my $signature;
647 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
648 read( $fh, $signature, SIG_SIZE);
649
650 ##
651 # If value is a hash or array, return new DBM::Deep object with correct offset
652 ##
653 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 654 my $new_obj = DBM::Deep->new({
8db25060 655 type => $signature,
656 base_offset => $subloc,
657 root => $obj->_root,
685e40f1 658 });
8db25060 659
685e40f1 660 if ($new_obj->_root->{autobless}) {
8db25060 661 ##
662 # Skip over value and plain key to see if object needs
663 # to be re-blessed
664 ##
665 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
666
667 my $size;
c6ea6b6c 668 read( $fh, $size, $self->{data_size});
669 $size = unpack($self->{data_pack}, $size);
8db25060 670 if ($size) { seek($fh, $size, SEEK_CUR); }
671
672 my $bless_bit;
673 read( $fh, $bless_bit, 1);
674 if (ord($bless_bit)) {
675 ##
676 # Yes, object needs to be re-blessed
677 ##
678 my $class_name;
c6ea6b6c 679 read( $fh, $size, $self->{data_size});
680 $size = unpack($self->{data_pack}, $size);
8db25060 681 if ($size) { read( $fh, $class_name, $size); }
685e40f1 682 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 683 }
684 }
685
685e40f1 686 return $new_obj;
8db25060 687 }
688 elsif ( $signature eq SIG_INTERNAL ) {
689 my $size;
690 read( $fh, $size, $self->{data_size});
691 $size = unpack($self->{data_pack}, $size);
692
693 if ( $size ) {
694 my $new_loc;
695 read( $fh, $new_loc, $size );
696 $new_loc = unpack( $self->{long_pack}, $new_loc );
697
698 return $self->read_from_loc( $obj, $new_loc );
699 }
700 else {
701 return;
702 }
703 }
704 ##
705 # Otherwise return actual value
706 ##
707 elsif ($signature eq SIG_DATA) {
708 my $size;
709 read( $fh, $size, $self->{data_size});
710 $size = unpack($self->{data_pack}, $size);
711
712 my $value = '';
713 if ($size) { read( $fh, $value, $size); }
714 return $value;
715 }
716
717 ##
718 # Key exists, but content is null
719 ##
720 return;
721}
722
9020ee8c 723sub get_bucket_value {
beac1dff 724 ##
725 # Fetch single value given tag and MD5 digested key.
726 ##
727 my $self = shift;
728 my ($obj, $tag, $md5) = @_;
9020ee8c 729
9a187d8c 730 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 731 if ( $subloc ) {
8db25060 732 return $self->read_from_loc( $obj, $subloc );
386bab6c 733 }
beac1dff 734 return;
9020ee8c 735}
ab0e4957 736
737sub delete_bucket {
beac1dff 738 ##
739 # Delete single key/value pair given tag and MD5 digested key.
740 ##
741 my $self = shift;
742 my ($obj, $tag, $md5) = @_;
ab0e4957 743
9a187d8c 744 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
f9c33187 745#XXX This needs _release_space()
386bab6c 746 if ( $subloc ) {
747 my $fh = $obj->_fh;
748 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
749 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 750 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 751
ab0e4957 752 return 1;
386bab6c 753 }
beac1dff 754 return;
ab0e4957 755}
756
912d50b1 757sub bucket_exists {
beac1dff 758 ##
759 # Check existence of single key given tag and MD5 digested key.
760 ##
761 my $self = shift;
762 my ($obj, $tag, $md5) = @_;
912d50b1 763
9a187d8c 764 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 765 return $subloc && 1;
912d50b1 766}
767
6736c116 768sub find_bucket_list {
beac1dff 769 ##
770 # Locate offset for bucket list, given digested key
771 ##
772 my $self = shift;
d0b74c17 773 my ($obj, $md5, $args) = @_;
774 $args = {} unless $args;
775
beac1dff 776 ##
777 # Locate offset for bucket list using digest index system
778 ##
e5fc7e69 779 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 780 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 781
e5fc7e69 782 my $ch = 0;
8db25060 783 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 784 my $num = ord substr($md5, $ch, 1);
785
786 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
787 $tag = $self->index_lookup( $obj, $tag, $num );
788
789 if (!$tag) {
29b01632 790 return if !$args->{create};
d0b74c17 791
16d1ad9b 792 my $loc = $self->_request_space(
793 $obj, $self->tag_size( $self->{bucket_list_size} ),
794 );
795
7b1e1aa1 796 my $fh = $obj->_fh;
797 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
16d1ad9b 798 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 799
9e4f83a0 800 $tag = $self->write_tag(
16d1ad9b 801 $obj, $loc, SIG_BLIST,
f37c15ab 802 chr(0)x$self->{bucket_list_size},
d5d7c51d 803 );
804
805 $tag->{ref_loc} = $ref_loc;
806 $tag->{ch} = $ch;
807
808 last;
d0b74c17 809 }
810
16d1ad9b 811 $tag->{ch} = $ch++;
d0b74c17 812 $tag->{ref_loc} = $ref_loc;
beac1dff 813 }
d0b74c17 814
beac1dff 815 return $tag;
6736c116 816}
817
d0b74c17 818sub index_lookup {
819 ##
820 # Given index tag, lookup single entry in index and return .
821 ##
822 my $self = shift;
823 my ($obj, $tag, $index) = @_;
824
825 my $location = unpack(
826 $self->{long_pack},
827 substr(
828 $tag->{content},
829 $index * $self->{long_size},
830 $self->{long_size},
831 ),
832 );
833
834 if (!$location) { return; }
835
836 return $self->load_tag( $obj, $location );
837}
838
6736c116 839sub traverse_index {
beac1dff 840 ##
841 # Scan index and recursively step into deeper levels, looking for next key.
842 ##
6736c116 843 my $self = shift;
844 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 845
beac1dff 846 my $tag = $self->load_tag($obj, $offset );
6736c116 847
848 my $fh = $obj->_fh;
d0b74c17 849
8db25060 850 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 851 my $content = $tag->{content};
e5fc7e69 852 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 853
d5d7c51d 854 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 855 my $subloc = unpack(
856 $self->{long_pack},
e06824f8 857 substr(
858 $content,
859 $idx * $self->{long_size},
860 $self->{long_size},
861 ),
e5fc7e69 862 );
863
beac1dff 864 if ($subloc) {
e5fc7e69 865 my $result = $self->traverse_index(
866 $obj, $subloc, $ch + 1, $force_return_next,
867 );
868
beac1dff 869 if (defined($result)) { return $result; }
870 }
871 } # index loop
d0b74c17 872
beac1dff 873 $obj->{return_next} = 1;
874 } # tag is an index
d0b74c17 875
e5fc7e69 876 else {
beac1dff 877 my $keys = $tag->{content};
878 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 879
beac1dff 880 ##
881 # Iterate through buckets, looking for a key match
882 ##
8db25060 883 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 884 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 885
8db25060 886 # End of bucket list -- return to outer loop
beac1dff 887 if (!$subloc) {
beac1dff 888 $obj->{return_next} = 1;
889 last;
890 }
8db25060 891 # Located previous key -- return next one found
beac1dff 892 elsif ($key eq $obj->{prev_md5}) {
beac1dff 893 $obj->{return_next} = 1;
894 next;
895 }
8db25060 896 # Seek to bucket location and skip over signature
beac1dff 897 elsif ($obj->{return_next}) {
8db25060 898 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 899
beac1dff 900 # Skip over value to get to plain key
8db25060 901 my $sig;
902 read( $fh, $sig, SIG_SIZE );
903
beac1dff 904 my $size;
e5fc7e69 905 read( $fh, $size, $self->{data_size});
906 $size = unpack($self->{data_pack}, $size);
beac1dff 907 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 908
beac1dff 909 # Read in plain key and return as scalar
beac1dff 910 my $plain_key;
e5fc7e69 911 read( $fh, $size, $self->{data_size});
912 $size = unpack($self->{data_pack}, $size);
beac1dff 913 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 914
beac1dff 915 return $plain_key;
916 }
8db25060 917 }
d0b74c17 918
beac1dff 919 $obj->{return_next} = 1;
920 } # tag is a bucket list
d0b74c17 921
beac1dff 922 return;
6736c116 923}
924
925sub get_next_key {
beac1dff 926 ##
927 # Locate next key, given digested previous one
928 ##
6736c116 929 my $self = shift;
930 my ($obj) = @_;
d0b74c17 931
beac1dff 932 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
933 $obj->{return_next} = 0;
d0b74c17 934
beac1dff 935 ##
936 # If the previous key was not specifed, start at the top and
937 # return the first one found.
938 ##
939 if (!$obj->{prev_md5}) {
940 $obj->{prev_md5} = chr(0) x $self->{hash_size};
941 $obj->{return_next} = 1;
942 }
d0b74c17 943
beac1dff 944 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 945}
946
75be6413 947# Utilities
948
9cec1360 949sub _get_key_subloc {
75be6413 950 my $self = shift;
951 my ($keys, $idx) = @_;
952
6ed2f3df 953 my ($key, $subloc, $size) = unpack(
954 "a$self->{hash_size} $self->{long_pack} $self->{long_pack}",
75be6413 955 substr(
956 $keys,
9cec1360 957 ($idx * $self->{bucket_size}),
958 $self->{bucket_size},
75be6413 959 ),
960 );
961
6ed2f3df 962 return ($key, $subloc, $size);
75be6413 963}
964
d608b06e 965sub _find_in_buckets {
966 my $self = shift;
967 my ($tag, $md5) = @_;
968
969 BUCKET:
970 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
9a187d8c 971 my ($key, $subloc, $size) = $self->_get_key_subloc(
972 $tag->{content}, $i,
973 );
d608b06e 974
9a187d8c 975 return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc;
d608b06e 976
977 next BUCKET if $key ne $md5;
978
9a187d8c 979 return ($subloc, $i * $self->{bucket_size}, $size);
d608b06e 980 }
981
982 return;
983}
984
7b1e1aa1 985#sub _print_at {
986# my $self = shift;
987# my ($obj, $spot, $data) = @_;
988#
989# my $fh = $obj->_fh;
990# seek( $fh, $spot, SEEK_SET );
991# print( $fh $data );
992#
993# return;
994#}
995
994ccd8e 996sub _request_space {
997 my $self = shift;
998 my ($obj, $size) = @_;
999
1000 my $loc = $obj->_root->{end};
c9ec091a 1001 $obj->_root->{end} += $size;
994ccd8e 1002
1003 return $loc;
1004}
1005
1006sub _release_space {
1007 my $self = shift;
1008 my ($obj, $size, $loc) = @_;
1009
7b1e1aa1 1010 my $next_loc = 0;
1011
1012 my $fh = $obj->_fh;
1013 seek( $fh, $loc + $obj->_root->{file_offset}, SEEK_SET );
1014 print( $fh SIG_FREE
1015 . pack($self->{long_pack}, $size )
1016 . pack($self->{long_pack}, $next_loc )
1017 );
1018
994ccd8e 1019 return;
1020}
1021
a20d9a3f 10221;
1023__END__
d5d7c51d 1024
1025# This will be added in later, after more refactoring is done. This is an early
1026# attempt at refactoring on the physical level instead of the virtual level.
1027sub _read_at {
1028 my $self = shift;
1029 my ($obj, $spot, $amount, $unpack) = @_;
1030
1031 my $fh = $obj->_fh;
1032 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
1033
1034 my $buffer;
1035 my $bytes_read = read( $fh, $buffer, $amount );
1036
1037 if ( $unpack ) {
1038 $buffer = unpack( $unpack, $buffer );
1039 }
1040
1041 if ( wantarray ) {
1042 return ($buffer, $bytes_read);
1043 }
1044 else {
1045 return $buffer;
1046 }
1047}