Create a write_signature() method
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
3use strict;
4
5use Fcntl qw( :DEFAULT :flock :seek );
6
8db25060 7##
8# Setup file and tag signatures. These should never change.
9##
10sub SIG_FILE () { 'DPDB' }
11sub SIG_INTERNAL () { 'i' }
12sub SIG_HASH () { 'H' }
13sub SIG_ARRAY () { 'A' }
8db25060 14sub SIG_NULL () { 'N' }
15sub SIG_DATA () { 'D' }
16sub SIG_INDEX () { 'I' }
17sub SIG_BLIST () { 'B' }
7b1e1aa1 18sub SIG_FREE () { 'F' }
8db25060 19sub SIG_SIZE () { 1 }
20
612969fb 21sub precalc_sizes {
beac1dff 22 ##
23 # Precalculate index, bucket and bucket list sizes
24 ##
251dfd0e 25 my $self = shift;
1bf65be7 26
251dfd0e 27 $self->{index_size} = (2**8) * $self->{long_size};
6ed2f3df 28 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
251dfd0e 29 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
1bf65be7 30
251dfd0e 31 return 1;
1bf65be7 32}
33
34sub set_pack {
beac1dff 35 ##
36 # Set pack/unpack modes (see file header for more)
37 ##
251dfd0e 38 my $self = shift;
1bf65be7 39 my ($long_s, $long_p, $data_s, $data_p) = @_;
40
81d16922 41 ##
75be6413 42 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4
43 # GB per file.
beac1dff 44 # (Perl must be compiled with largefile support for files > 2 GB)
81d16922 45 #
46 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
beac1dff 47 # (Perl must be compiled with largefile and 64-bit long support)
81d16922 48 ##
251dfd0e 49 $self->{long_size} = $long_s ? $long_s : 4;
50 $self->{long_pack} = $long_p ? $long_p : 'N';
1bf65be7 51
81d16922 52 ##
75be6413 53 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each
d5d7c51d 54 # key/value. Upgrading this is possible (see above) but probably not
55 # necessary. If you need more than 4 GB for a single key or value, this
56 # module is really not for you :-)
81d16922 57 ##
251dfd0e 58 $self->{data_size} = $data_s ? $data_s : 4;
59 $self->{data_pack} = $data_p ? $data_p : 'N';
1bf65be7 60
beac1dff 61 return $self->precalc_sizes();
1bf65be7 62}
63
64sub set_digest {
beac1dff 65 ##
66 # Set key digest function (default is MD5)
67 ##
251dfd0e 68 my $self = shift;
1bf65be7 69 my ($digest_func, $hash_size) = @_;
70
d0b74c17 71 $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5;
251dfd0e 72 $self->{hash_size} = $hash_size ? $hash_size : 16;
612969fb 73
beac1dff 74 return $self->precalc_sizes();
612969fb 75}
76
77sub new {
78 my $class = shift;
79 my ($args) = @_;
80
81 my $self = bless {
82 long_size => 4,
83 long_pack => 'N',
84 data_size => 4,
85 data_pack => 'N',
251dfd0e 86
612969fb 87 digest => \&Digest::MD5::md5,
88 hash_size => 16,
251dfd0e 89
81d16922 90 ##
d5d7c51d 91 # Maximum number of buckets per list before another level of indexing is
92 # done.
93 # Increase this value for slightly greater speed, but larger database
94 # files. DO NOT decrease this value below 16, due to risk of recursive
95 # reindex overrun.
81d16922 96 ##
612969fb 97 max_buckets => 16,
98 }, $class;
99
251dfd0e 100 $self->precalc_sizes;
612969fb 101
102 return $self;
1bf65be7 103}
104
0d0f3d5d 105sub write_file_signature {
106 my $self = shift;
107 my ($obj) = @_;
108
109 my $fh = $obj->_fh;
110
111 my $loc = $self->_request_space( $obj, length( SIG_FILE ) );
112 seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET);
113 print( $fh SIG_FILE);
114
115 return;
116}
117
70b55428 118sub setup_fh {
119 my $self = shift;
120 my ($obj) = @_;
121
122 $self->open( $obj ) if !defined $obj->_fh;
123
6fde4ed2 124 my $fh = $obj->_fh;
125 flock $fh, LOCK_EX;
118ba343 126
6fde4ed2 127 unless ( $obj->{base_offset} ) {
118ba343 128 seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET);
129 my $signature;
130 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
131
132 ##
133 # File is empty -- write signature and master index
134 ##
135 if (!$bytes_read) {
0d0f3d5d 136 $self->write_file_signature( $obj );
118ba343 137
c9ec091a 138 $obj->{base_offset} = $self->_request_space(
16d1ad9b 139 $obj, $self->tag_size( $self->{index_size} ),
c9ec091a 140 );
118ba343 141
9e4f83a0 142 $self->write_tag(
c9ec091a 143 $obj, $obj->_base_offset, $obj->_type,
f37c15ab 144 chr(0)x$self->{index_size},
118ba343 145 );
146
147 # Flush the filehandle
148 my $old_fh = select $fh;
149 my $old_af = $|; $| = 1; $| = $old_af;
150 select $old_fh;
151 }
152 else {
153 $obj->{base_offset} = $bytes_read;
154
155 ##
156 # Check signature was valid
157 ##
158 unless ($signature eq SIG_FILE) {
159 $self->close_fh( $obj );
160 $obj->_throw_error("Signature not found -- file is not a Deep DB");
161 }
162
163 ##
164 # Get our type from master index signature
165 ##
166 my $tag = $self->load_tag($obj, $obj->_base_offset)
167 or $obj->_throw_error("Corrupted file, no master index record");
168
169 unless ($obj->{type} eq $tag->{signature}) {
170 $obj->_throw_error("File type mismatch");
171 }
172 }
118ba343 173 }
e06824f8 174
673464d9 175 #XXX We have to make sure we don't mess up when autoflush isn't turned on
70b55428 176 unless ( $obj->_root->{inode} ) {
177 my @stats = stat($obj->_fh);
178 $obj->_root->{inode} = $stats[1];
179 $obj->_root->{end} = $stats[7];
180 }
181
6fde4ed2 182 flock $fh, LOCK_UN;
183
70b55428 184 return 1;
185}
186
a20d9a3f 187sub open {
20f7b20c 188 ##
189 # Open a fh to the database, create if nonexistent.
190 # Make sure file signature matches DBM::Deep spec.
191 ##
a20d9a3f 192 my $self = shift;
70b55428 193 my ($obj) = @_;
a20d9a3f 194
673464d9 195 # Theoretically, adding O_BINARY should remove the need for the binmode
196 # Of course, testing it is going to be ... interesting.
197 my $flags = O_RDWR | O_CREAT | O_BINARY;
a20d9a3f 198
673464d9 199 my $fh;
d5d7c51d 200 my $filename = $obj->_root->{file};
201 sysopen( $fh, $filename, $flags )
202 or $obj->_throw_error("Cannot sysopen file '$filename': $!");
673464d9 203 $obj->_root->{fh} = $fh;
a20d9a3f 204
205 #XXX Can we remove this by using the right sysopen() flags?
206 # Maybe ... q.v. above
207 binmode $fh; # for win32
208
cd59cad8 209 if ($obj->_root->{autoflush}) {
a20d9a3f 210 my $old = select $fh;
211 $|=1;
212 select $old;
213 }
20f7b20c 214
a20d9a3f 215 return 1;
216}
217
3d1b8be9 218sub close_fh {
cd59cad8 219 my $self = shift;
a21f2d90 220 my ($obj) = @_;
cd59cad8 221
222 if ( my $fh = $obj->_root->{fh} ) {
223 close $fh;
224 }
225 $obj->_root->{fh} = undef;
226
227 return 1;
228}
229
16d1ad9b 230sub tag_size {
231 my $self = shift;
232 my ($size) = @_;
233 return SIG_SIZE + $self->{data_size} + $size;
234}
235
9e4f83a0 236sub write_tag {
20f7b20c 237 ##
238 # Given offset, signature and content, create tag and write to disk
239 ##
d4b1166e 240 my $self = shift;
20f7b20c 241 my ($obj, $offset, $sig, $content) = @_;
f37c15ab 242 my $size = length( $content );
20f7b20c 243
d4b1166e 244 my $fh = $obj->_fh;
245
f37c15ab 246 if ( defined $offset ) {
247 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
248 }
249
251dfd0e 250 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 251
f37c15ab 252 return unless defined $offset;
253
20f7b20c 254 return {
255 signature => $sig,
256 size => $size,
8db25060 257 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 258 content => $content
259 };
d4b1166e 260}
261
262sub load_tag {
20f7b20c 263 ##
264 # Given offset, load single tag and return signature, size and data
265 ##
d4b1166e 266 my $self = shift;
20f7b20c 267 my ($obj, $offset) = @_;
268
e06824f8 269# print join(':',map{$_||''}caller(1)), $/;
270
d4b1166e 271 my $fh = $obj->_fh;
272
20f7b20c 273 seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET);
e5fc7e69 274
75be6413 275 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 276 return if eof $fh;
20f7b20c 277
d4b1166e 278 my $b;
8db25060 279 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 280 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 281
282 my $buffer;
283 read( $fh, $buffer, $size);
284
285 return {
286 signature => $sig,
287 size => $size,
8db25060 288 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 289 content => $buffer
290 };
d4b1166e 291}
292
29b01632 293sub _length_needed {
294 my $self = shift;
f37c15ab 295 my ($obj, $value, $key) = @_;
29b01632 296
297 my $is_dbm_deep = eval {
298 local $SIG{'__DIE__'};
299 $value->isa( 'DBM::Deep' );
300 };
301
f37c15ab 302 my $len = SIG_SIZE + $self->{data_size}
303 + $self->{data_size} + length( $key );
29b01632 304
f37c15ab 305 if ( $is_dbm_deep && $value->_root eq $obj->_root ) {
306 return $len + $self->{long_size};
29b01632 307 }
308
309 my $r = Scalar::Util::reftype( $value ) || '';
9a187d8c 310 if ( $obj->_root->{autobless} ) {
311 # This is for the bit saying whether or not this thing is blessed.
312 $len += 1;
313 }
314
29b01632 315 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 316 if ( defined $value ) {
317 $len += length( $value );
318 }
319 return $len;
29b01632 320 }
321
f37c15ab 322 $len += $self->{index_size};
29b01632 323
324 # if autobless is enabled, must also take into consideration
f37c15ab 325 # the class name as it is stored after the key.
29b01632 326 if ( $obj->_root->{autobless} ) {
327 my $value_class = Scalar::Util::blessed($value);
f37c15ab 328 if ( defined $value_class && !$is_dbm_deep ) {
329 $len += $self->{data_size} + length($value_class);
29b01632 330 }
331 }
332
f37c15ab 333 return $len;
29b01632 334}
335
20f7b20c 336sub add_bucket {
337 ##
338 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
339 # plain (undigested) key and value.
340 ##
d4b1166e 341 my $self = shift;
20f7b20c 342 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 343
eea0d863 344 # This verifies that only supported values will be stored.
345 {
346 my $r = Scalar::Util::reftype( $value );
347 last if !defined $r;
348
349 last if $r eq 'HASH';
350 last if $r eq 'ARRAY';
351
352 $obj->_throw_error(
353 "Storage of variables of type '$r' is not supported."
354 );
355 }
356
20f7b20c 357 my $location = 0;
358 my $result = 2;
359
360 my $root = $obj->_root;
f37c15ab 361 my $fh = $obj->_fh;
20f7b20c 362
f37c15ab 363 my $actual_length = $self->_length_needed( $obj, $value, $plain_key );
20f7b20c 364
9a187d8c 365 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
75be6413 366
f9c33187 367# $self->_release_space( $obj, $size, $subloc );
386bab6c 368 # Updating a known md5
f9c33187 369#XXX This needs updating to use _release_space
386bab6c 370 if ( $subloc ) {
371 $result = 1;
20f7b20c 372
386bab6c 373 if ($actual_length <= $size) {
374 $location = $subloc;
20f7b20c 375 }
75be6413 376 else {
f37c15ab 377 $location = $self->_request_space( $obj, $actual_length );
386bab6c 378 seek(
379 $fh,
9a187d8c 380 $tag->{offset} + $offset
381 + $self->{hash_size} + $root->{file_offset},
386bab6c 382 SEEK_SET,
383 );
9a187d8c 384 print( $fh pack($self->{long_pack}, $location ) );
385 print( $fh pack($self->{long_pack}, $actual_length ) );
75be6413 386 }
75be6413 387 }
386bab6c 388 # Adding a new md5
389 elsif ( defined $offset ) {
f37c15ab 390 $location = $self->_request_space( $obj, $actual_length );
386bab6c 391
392 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 393 print( $fh $md5 . pack($self->{long_pack}, $location ) );
394 print( $fh pack($self->{long_pack}, $actual_length ) );
386bab6c 395 }
396 # If bucket didn't fit into list, split into a new index level
f9c33187 397 # split_index() will do the _request_space() call
386bab6c 398 else {
f9c33187 399 $location = $self->split_index( $obj, $md5, $tag );
386bab6c 400 }
20f7b20c 401
d5d7c51d 402 $self->write_value( $obj, $location, $plain_key, $value );
403
404 return $result;
405}
406
9d4fa373 407sub _get_tied {
408 my $item = shift;
409 my $r = Scalar::Util::reftype( $item ) || return;
410 if ( $r eq 'HASH' ) {
411 return tied(%$item);
412 }
413 elsif ( $r eq 'ARRAY' ) {
414 return tied(@$item);
415 }
416 else {
417 return;
418 };
419}
420
421sub _get_dbm_object {
422 my $item = shift;
423
424 my $obj = eval {
425 local $SIG{__DIE__};
426 if ($item->isa( 'DBM::Deep' )) {
427 return $item;
428 }
429 return;
430 };
431 return $obj if $obj;
432
433 my $r = Scalar::Util::reftype( $item ) || '';
434 if ( $r eq 'HASH' ) {
435 my $obj = eval {
436 local $SIG{__DIE__};
437 my $obj = tied(%$item);
438 if ($obj->isa( 'DBM::Deep' )) {
439 return $obj;
440 }
441 return;
442 };
443 return $obj if $obj;
444 }
445 elsif ( $r eq 'ARRAY' ) {
446 my $obj = eval {
447 local $SIG{__DIE__};
448 my $obj = tied(@$item);
449 if ($obj->isa( 'DBM::Deep' )) {
450 return $obj;
451 }
452 return;
453 };
454 return $obj if $obj;
455 }
456
457 return;
458}
459
d5d7c51d 460sub write_value {
461 my $self = shift;
462 my ($obj, $location, $key, $value) = @_;
463
464 my $fh = $obj->_fh;
465 my $root = $obj->_root;
466
9d4fa373 467 my $dbm_deep_obj = _get_dbm_object( $value );
468 if ( $dbm_deep_obj && $dbm_deep_obj->_root ne $obj->_root ) {
469 $obj->_throw_error( "Cannot cross-reference. Use export() instead" );
470 }
d5d7c51d 471
472 seek($fh, $location + $root->{file_offset}, SEEK_SET);
473
20f7b20c 474 ##
d5d7c51d 475 # Write signature based on content type, set content length and write
476 # actual value.
20f7b20c 477 ##
9d4fa373 478 my $r = Scalar::Util::reftype( $value ) || '';
479 if ( $dbm_deep_obj ) {
480 $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 481 }
482 elsif ($r eq 'HASH') {
9d4fa373 483 if ( !$dbm_deep_obj && tied %{$value} ) {
019ab3a1 484 $obj->_throw_error( "Cannot store something that is tied" );
485 }
9e4f83a0 486 $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 487 }
488 elsif ($r eq 'ARRAY') {
9d4fa373 489 if ( !$dbm_deep_obj && tied @{$value} ) {
019ab3a1 490 $obj->_throw_error( "Cannot store something that is tied" );
491 }
9e4f83a0 492 $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 493 }
494 elsif (!defined($value)) {
9e4f83a0 495 $self->write_tag( $obj, undef, SIG_NULL, '' );
d5d7c51d 496 }
497 else {
9e4f83a0 498 $self->write_tag( $obj, undef, SIG_DATA, $value );
d5d7c51d 499 }
20f7b20c 500
d5d7c51d 501 ##
502 # Plain key is stored AFTER value, as keys are typically fetched less often.
503 ##
504 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 505
9a187d8c 506 # Internal references don't care about autobless
9d4fa373 507 return 1 if $dbm_deep_obj;
9a187d8c 508
d5d7c51d 509 ##
510 # If value is blessed, preserve class name
511 ##
512 if ( $root->{autobless} ) {
513 my $value_class = Scalar::Util::blessed($value);
9d4fa373 514 if ( defined $value_class && !$dbm_deep_obj ) {
d5d7c51d 515 print( $fh chr(1) );
516 print( $fh pack($self->{data_pack}, length($value_class)) . $value_class );
20f7b20c 517 }
d5d7c51d 518 else {
519 print( $fh chr(0) );
20f7b20c 520 }
d5d7c51d 521 }
20f7b20c 522
d5d7c51d 523 ##
d5d7c51d 524 # If content is a hash or array, create new child DBM::Deep object and
525 # pass each key or element to it.
526 ##
9d4fa373 527 if ($r eq 'HASH') {
528 my %x = %$value;
529 tie %$value, 'DBM::Deep', {
530 base_offset => $location,
531 root => $root,
532 };
533 %$value = %x;
534 }
535 elsif ($r eq 'ARRAY') {
536 my @x = @$value;
537 tie @$value, 'DBM::Deep', {
538 base_offset => $location,
539 root => $root,
540 };
541 @$value = @x;
20f7b20c 542 }
d4b1166e 543
d5d7c51d 544 return 1;
d4b1166e 545}
546
75be6413 547sub split_index {
548 my $self = shift;
549 my ($obj, $md5, $tag) = @_;
550
551 my $fh = $obj->_fh;
552 my $root = $obj->_root;
16d1ad9b 553
554 my $loc = $self->_request_space(
555 $obj, $self->tag_size( $self->{index_size} ),
556 );
557
7b1e1aa1 558 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 559 print( $fh pack($self->{long_pack}, $loc) );
75be6413 560
9e4f83a0 561 my $index_tag = $self->write_tag(
16d1ad9b 562 $obj, $loc, SIG_INDEX,
f37c15ab 563 chr(0)x$self->{index_size},
75be6413 564 );
565
f9c33187 566 my $newtag_loc = $self->_request_space(
567 $obj, $self->tag_size( $self->{bucket_list_size} ),
568 );
75be6413 569
7b1e1aa1 570 my $keys = $tag->{content}
f9c33187 571 . $md5 . pack($self->{long_pack}, $newtag_loc)
572 . pack($self->{long_pack}, 0);
75be6413 573
f9c33187 574 my @newloc = ();
75be6413 575 BUCKET:
576 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 577 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 578
f9c33187 579 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
580 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 581
75be6413 582 my $num = ord(substr($key, $tag->{ch} + 1, 1));
583
f9c33187 584 if ($newloc[$num]) {
585 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 586 my $subkeys;
587 read( $fh, $subkeys, $self->{bucket_list_size});
588
f9c33187 589 # This is looking for the first empty spot
7b1e1aa1 590 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 591 { content => $subkeys }, '',
7b1e1aa1 592 );
75be6413 593
f9c33187 594 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
595 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 596
597 next;
75be6413 598 }
75be6413 599
7b1e1aa1 600 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 601
7b1e1aa1 602 my $loc = $self->_request_space(
603 $obj, $self->tag_size( $self->{bucket_list_size} ),
604 );
2603d86e 605
7b1e1aa1 606 print( $fh pack($self->{long_pack}, $loc) );
75be6413 607
7b1e1aa1 608 my $blist_tag = $self->write_tag(
609 $obj, $loc, SIG_BLIST,
610 chr(0)x$self->{bucket_list_size},
611 );
612
613 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 614 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 615
f9c33187 616 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 617 }
618
619 $self->_release_space(
f9c33187 620 $obj, $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 621 $tag->{offset} - SIG_SIZE - $self->{data_size},
622 );
75be6413 623
f9c33187 624 return $newtag_loc;
75be6413 625}
626
8db25060 627sub read_from_loc {
628 my $self = shift;
629 my ($obj, $subloc) = @_;
630
631 my $fh = $obj->_fh;
632
633 ##
634 # Found match -- seek to offset and read signature
635 ##
636 my $signature;
637 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
638 read( $fh, $signature, SIG_SIZE);
639
640 ##
641 # If value is a hash or array, return new DBM::Deep object with correct offset
642 ##
643 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 644 my $new_obj = DBM::Deep->new({
8db25060 645 type => $signature,
646 base_offset => $subloc,
647 root => $obj->_root,
685e40f1 648 });
8db25060 649
685e40f1 650 if ($new_obj->_root->{autobless}) {
8db25060 651 ##
652 # Skip over value and plain key to see if object needs
653 # to be re-blessed
654 ##
655 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
656
657 my $size;
c6ea6b6c 658 read( $fh, $size, $self->{data_size});
659 $size = unpack($self->{data_pack}, $size);
8db25060 660 if ($size) { seek($fh, $size, SEEK_CUR); }
661
662 my $bless_bit;
663 read( $fh, $bless_bit, 1);
664 if (ord($bless_bit)) {
665 ##
666 # Yes, object needs to be re-blessed
667 ##
668 my $class_name;
c6ea6b6c 669 read( $fh, $size, $self->{data_size});
670 $size = unpack($self->{data_pack}, $size);
8db25060 671 if ($size) { read( $fh, $class_name, $size); }
685e40f1 672 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 673 }
674 }
675
685e40f1 676 return $new_obj;
8db25060 677 }
678 elsif ( $signature eq SIG_INTERNAL ) {
679 my $size;
680 read( $fh, $size, $self->{data_size});
681 $size = unpack($self->{data_pack}, $size);
682
683 if ( $size ) {
684 my $new_loc;
685 read( $fh, $new_loc, $size );
686 $new_loc = unpack( $self->{long_pack}, $new_loc );
687
688 return $self->read_from_loc( $obj, $new_loc );
689 }
690 else {
691 return;
692 }
693 }
694 ##
695 # Otherwise return actual value
696 ##
697 elsif ($signature eq SIG_DATA) {
698 my $size;
699 read( $fh, $size, $self->{data_size});
700 $size = unpack($self->{data_pack}, $size);
701
702 my $value = '';
703 if ($size) { read( $fh, $value, $size); }
704 return $value;
705 }
706
707 ##
708 # Key exists, but content is null
709 ##
710 return;
711}
712
9020ee8c 713sub get_bucket_value {
beac1dff 714 ##
715 # Fetch single value given tag and MD5 digested key.
716 ##
717 my $self = shift;
718 my ($obj, $tag, $md5) = @_;
9020ee8c 719
9a187d8c 720 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 721 if ( $subloc ) {
8db25060 722 return $self->read_from_loc( $obj, $subloc );
386bab6c 723 }
beac1dff 724 return;
9020ee8c 725}
ab0e4957 726
727sub delete_bucket {
beac1dff 728 ##
729 # Delete single key/value pair given tag and MD5 digested key.
730 ##
731 my $self = shift;
732 my ($obj, $tag, $md5) = @_;
ab0e4957 733
9a187d8c 734 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
f9c33187 735#XXX This needs _release_space()
386bab6c 736 if ( $subloc ) {
737 my $fh = $obj->_fh;
738 seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET);
739 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 740 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 741
ab0e4957 742 return 1;
386bab6c 743 }
beac1dff 744 return;
ab0e4957 745}
746
912d50b1 747sub bucket_exists {
beac1dff 748 ##
749 # Check existence of single key given tag and MD5 digested key.
750 ##
751 my $self = shift;
752 my ($obj, $tag, $md5) = @_;
912d50b1 753
9a187d8c 754 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 755 return $subloc && 1;
912d50b1 756}
757
6736c116 758sub find_bucket_list {
beac1dff 759 ##
760 # Locate offset for bucket list, given digested key
761 ##
762 my $self = shift;
d0b74c17 763 my ($obj, $md5, $args) = @_;
764 $args = {} unless $args;
765
beac1dff 766 ##
767 # Locate offset for bucket list using digest index system
768 ##
e5fc7e69 769 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 770 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 771
e5fc7e69 772 my $ch = 0;
8db25060 773 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 774 my $num = ord substr($md5, $ch, 1);
775
776 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
777 $tag = $self->index_lookup( $obj, $tag, $num );
778
779 if (!$tag) {
29b01632 780 return if !$args->{create};
d0b74c17 781
16d1ad9b 782 my $loc = $self->_request_space(
783 $obj, $self->tag_size( $self->{bucket_list_size} ),
784 );
785
7b1e1aa1 786 my $fh = $obj->_fh;
787 seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET);
16d1ad9b 788 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 789
9e4f83a0 790 $tag = $self->write_tag(
16d1ad9b 791 $obj, $loc, SIG_BLIST,
f37c15ab 792 chr(0)x$self->{bucket_list_size},
d5d7c51d 793 );
794
795 $tag->{ref_loc} = $ref_loc;
796 $tag->{ch} = $ch;
797
798 last;
d0b74c17 799 }
800
16d1ad9b 801 $tag->{ch} = $ch++;
d0b74c17 802 $tag->{ref_loc} = $ref_loc;
beac1dff 803 }
d0b74c17 804
beac1dff 805 return $tag;
6736c116 806}
807
d0b74c17 808sub index_lookup {
809 ##
810 # Given index tag, lookup single entry in index and return .
811 ##
812 my $self = shift;
813 my ($obj, $tag, $index) = @_;
814
815 my $location = unpack(
816 $self->{long_pack},
817 substr(
818 $tag->{content},
819 $index * $self->{long_size},
820 $self->{long_size},
821 ),
822 );
823
824 if (!$location) { return; }
825
826 return $self->load_tag( $obj, $location );
827}
828
6736c116 829sub traverse_index {
beac1dff 830 ##
831 # Scan index and recursively step into deeper levels, looking for next key.
832 ##
6736c116 833 my $self = shift;
834 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 835
beac1dff 836 my $tag = $self->load_tag($obj, $offset );
6736c116 837
838 my $fh = $obj->_fh;
d0b74c17 839
8db25060 840 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 841 my $content = $tag->{content};
e5fc7e69 842 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 843
d5d7c51d 844 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 845 my $subloc = unpack(
846 $self->{long_pack},
e06824f8 847 substr(
848 $content,
849 $idx * $self->{long_size},
850 $self->{long_size},
851 ),
e5fc7e69 852 );
853
beac1dff 854 if ($subloc) {
e5fc7e69 855 my $result = $self->traverse_index(
856 $obj, $subloc, $ch + 1, $force_return_next,
857 );
858
beac1dff 859 if (defined($result)) { return $result; }
860 }
861 } # index loop
d0b74c17 862
beac1dff 863 $obj->{return_next} = 1;
864 } # tag is an index
d0b74c17 865
e5fc7e69 866 else {
beac1dff 867 my $keys = $tag->{content};
868 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 869
beac1dff 870 ##
871 # Iterate through buckets, looking for a key match
872 ##
8db25060 873 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 874 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 875
8db25060 876 # End of bucket list -- return to outer loop
beac1dff 877 if (!$subloc) {
beac1dff 878 $obj->{return_next} = 1;
879 last;
880 }
8db25060 881 # Located previous key -- return next one found
beac1dff 882 elsif ($key eq $obj->{prev_md5}) {
beac1dff 883 $obj->{return_next} = 1;
884 next;
885 }
8db25060 886 # Seek to bucket location and skip over signature
beac1dff 887 elsif ($obj->{return_next}) {
8db25060 888 seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET);
d0b74c17 889
beac1dff 890 # Skip over value to get to plain key
8db25060 891 my $sig;
892 read( $fh, $sig, SIG_SIZE );
893
beac1dff 894 my $size;
e5fc7e69 895 read( $fh, $size, $self->{data_size});
896 $size = unpack($self->{data_pack}, $size);
beac1dff 897 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 898
beac1dff 899 # Read in plain key and return as scalar
beac1dff 900 my $plain_key;
e5fc7e69 901 read( $fh, $size, $self->{data_size});
902 $size = unpack($self->{data_pack}, $size);
beac1dff 903 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 904
beac1dff 905 return $plain_key;
906 }
8db25060 907 }
d0b74c17 908
beac1dff 909 $obj->{return_next} = 1;
910 } # tag is a bucket list
d0b74c17 911
beac1dff 912 return;
6736c116 913}
914
915sub get_next_key {
beac1dff 916 ##
917 # Locate next key, given digested previous one
918 ##
6736c116 919 my $self = shift;
920 my ($obj) = @_;
d0b74c17 921
beac1dff 922 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
923 $obj->{return_next} = 0;
d0b74c17 924
beac1dff 925 ##
926 # If the previous key was not specifed, start at the top and
927 # return the first one found.
928 ##
929 if (!$obj->{prev_md5}) {
930 $obj->{prev_md5} = chr(0) x $self->{hash_size};
931 $obj->{return_next} = 1;
932 }
d0b74c17 933
beac1dff 934 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 935}
936
75be6413 937# Utilities
938
9cec1360 939sub _get_key_subloc {
75be6413 940 my $self = shift;
941 my ($keys, $idx) = @_;
942
6ed2f3df 943 my ($key, $subloc, $size) = unpack(
944 "a$self->{hash_size} $self->{long_pack} $self->{long_pack}",
75be6413 945 substr(
946 $keys,
9cec1360 947 ($idx * $self->{bucket_size}),
948 $self->{bucket_size},
75be6413 949 ),
950 );
951
6ed2f3df 952 return ($key, $subloc, $size);
75be6413 953}
954
d608b06e 955sub _find_in_buckets {
956 my $self = shift;
957 my ($tag, $md5) = @_;
958
959 BUCKET:
960 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
9a187d8c 961 my ($key, $subloc, $size) = $self->_get_key_subloc(
962 $tag->{content}, $i,
963 );
d608b06e 964
9a187d8c 965 return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc;
d608b06e 966
967 next BUCKET if $key ne $md5;
968
9a187d8c 969 return ($subloc, $i * $self->{bucket_size}, $size);
d608b06e 970 }
971
972 return;
973}
974
7b1e1aa1 975#sub _print_at {
976# my $self = shift;
977# my ($obj, $spot, $data) = @_;
978#
979# my $fh = $obj->_fh;
980# seek( $fh, $spot, SEEK_SET );
981# print( $fh $data );
982#
983# return;
984#}
985
994ccd8e 986sub _request_space {
987 my $self = shift;
988 my ($obj, $size) = @_;
989
990 my $loc = $obj->_root->{end};
c9ec091a 991 $obj->_root->{end} += $size;
994ccd8e 992
993 return $loc;
994}
995
996sub _release_space {
997 my $self = shift;
998 my ($obj, $size, $loc) = @_;
999
7b1e1aa1 1000 my $next_loc = 0;
1001
1002 my $fh = $obj->_fh;
1003 seek( $fh, $loc + $obj->_root->{file_offset}, SEEK_SET );
1004 print( $fh SIG_FREE
1005 . pack($self->{long_pack}, $size )
1006 . pack($self->{long_pack}, $next_loc )
1007 );
1008
994ccd8e 1009 return;
1010}
1011
a20d9a3f 10121;
1013__END__
d5d7c51d 1014
1015# This will be added in later, after more refactoring is done. This is an early
1016# attempt at refactoring on the physical level instead of the virtual level.
1017sub _read_at {
1018 my $self = shift;
1019 my ($obj, $spot, $amount, $unpack) = @_;
1020
1021 my $fh = $obj->_fh;
1022 seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET );
1023
1024 my $buffer;
1025 my $bytes_read = read( $fh, $buffer, $amount );
1026
1027 if ( $unpack ) {
1028 $buffer = unpack( $unpack, $buffer );
1029 }
1030
1031 if ( wantarray ) {
1032 return ($buffer, $bytes_read);
1033 }
1034 else {
1035 return $buffer;
1036 }
1037}