Broke _root out into its own object, moved a few methods up to it, and renamed _root...
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
8use Fcntl qw( :DEFAULT :flock :seek );
9
8db25060 10##
11# Setup file and tag signatures. These should never change.
12##
13sub SIG_FILE () { 'DPDB' }
460b1067 14sub SIG_HEADER () { 'h' }
8db25060 15sub SIG_INTERNAL () { 'i' }
16sub SIG_HASH () { 'H' }
17sub SIG_ARRAY () { 'A' }
8db25060 18sub SIG_NULL () { 'N' }
19sub SIG_DATA () { 'D' }
20sub SIG_INDEX () { 'I' }
21sub SIG_BLIST () { 'B' }
7b1e1aa1 22sub SIG_FREE () { 'F' }
8db25060 23sub SIG_SIZE () { 1 }
24
612969fb 25sub new {
26 my $class = shift;
27 my ($args) = @_;
28
29 my $self = bless {
30 long_size => 4,
31 long_pack => 'N',
32 data_size => 4,
33 data_pack => 'N',
251dfd0e 34
612969fb 35 digest => \&Digest::MD5::md5,
36 hash_size => 16,
251dfd0e 37
81d16922 38 ##
d5d7c51d 39 # Maximum number of buckets per list before another level of indexing is
e0098e7f 40 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 41 # files. DO NOT decrease this value below 16, due to risk of recursive
42 # reindex overrun.
81d16922 43 ##
612969fb 44 max_buckets => 16,
460b1067 45
46 fileobj => undef,
612969fb 47 }, $class;
48
e0098e7f 49 if ( defined $args->{pack_size} ) {
50 if ( lc $args->{pack_size} eq 'small' ) {
51 $args->{long_size} = 2;
52 $args->{long_pack} = 'S';
53 }
54 elsif ( lc $args->{pack_size} eq 'medium' ) {
55 $args->{long_size} = 4;
56 $args->{long_pack} = 'N';
57 }
58 elsif ( lc $args->{pack_size} eq 'large' ) {
59 $args->{long_size} = 8;
60 $args->{long_pack} = 'Q';
61 }
62 else {
63 die "Unknown pack_size value: '$args->{pack_size}'\n";
64 }
65 }
66
fde3db1a 67 # Grab the parameters we want to use
68 foreach my $param ( keys %$self ) {
69 next unless exists $args->{$param};
3e9498a1 70 $self->{$param} = $args->{$param};
fde3db1a 71 }
72
e0098e7f 73 if ( $self->{max_buckets} < 16 ) {
74 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
75 $self->{max_buckets} = 16;
76 }
77
260a80b4 78 return $self;
79}
80
460b1067 81sub _fileobj { return $_[0]{fileobj} }
82sub _fh { return $_[0]->_fileobj->{fh} }
83
260a80b4 84sub calculate_sizes {
85 my $self = shift;
86
e0098e7f 87 $self->{index_size} = (2**8) * $self->{long_size};
88 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
89 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 90
260a80b4 91 return;
1bf65be7 92}
93
fde3db1a 94sub write_file_header {
0d0f3d5d 95 my $self = shift;
460b1067 96# my ($obj) = @_;
0d0f3d5d 97
460b1067 98 my $fh = $self->_fh;
0d0f3d5d 99
42f79e07 100 my $loc = $self->_request_space(
460b1067 101 undef, length( SIG_FILE ) + 21,
42f79e07 102 );
460b1067 103 seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET);
260a80b4 104 print( $fh
105 SIG_FILE,
460b1067 106 SIG_HEADER,
107 pack('N', 1), # header version
108 pack('N', 12), # header size
109 pack('N', 0), # file version
260a80b4 110 pack('S', $self->{long_size}),
111 pack('A', $self->{long_pack}),
112 pack('S', $self->{data_size}),
113 pack('A', $self->{data_pack}),
114 pack('S', $self->{max_buckets}),
115 );
0d0f3d5d 116
117 return;
118}
119
fde3db1a 120sub read_file_header {
e064ccd1 121 my $self = shift;
122 my ($obj) = @_;
123
124 my $fh = $obj->_fh;
125
460b1067 126 seek($fh, 0 + $obj->_fileobj->{file_offset}, SEEK_SET);
42f79e07 127 my $buffer;
460b1067 128 my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
129
130 return unless $bytes_read;
131
132 my ($file_signature, $sig_header, $header_version, $size) = unpack(
133 'A4 A N N', $buffer
42f79e07 134 );
e064ccd1 135
460b1067 136 unless ( $file_signature eq SIG_FILE ) {
137 $self->{fileobj}->close;
138 $obj->_throw_error( "Signature not found -- file is not a Deep DB" );
139 }
260a80b4 140
460b1067 141 unless ( $sig_header eq SIG_HEADER ) {
142 $self->{fileobj}->close;
143 $obj->_throw_error( "Old file version found." );
144 }
9b2370e0 145
460b1067 146 my $buffer2;
147 $bytes_read += read( $fh, $buffer2, $size );
148 my ($file_version, @values) = unpack( 'N S A S A S', $buffer2 );
149 if ( @values < 5 || grep { !defined } @values ) {
150 $self->{fileobj}->close;
151 $obj->_throw_error("Corrupted file - bad header");
e064ccd1 152 }
153
460b1067 154 #XXX Add warnings if values weren't set right
155 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
156
e064ccd1 157 return $bytes_read;
158}
159
460b1067 160sub get_file_version {
70b55428 161 my $self = shift;
162 my ($obj) = @_;
163
460b1067 164 my $fh = $obj->_fh;
165
166 seek( $fh, 13 + $obj->_fileobj->{file_offset}, SEEK_SET );
167 my $buffer;
168 my $bytes_read = read( $fh, $buffer, 4 );
169 unless ( $bytes_read == 4 ) {
170 $obj->_throw_error( "Cannot read file version" );
171 }
172
173 return unpack( 'N', $buffer );
174}
175
176sub write_file_version {
177 my $self = shift;
178 my ($obj, $new_version) = @_;
179
180 my $fh = $obj->_fh;
181
182 seek( $fh, 13 + $obj->_fileobj->{file_offset}, SEEK_SET );
183 print( $fh pack( 'N', $new_version ) );
184
185 return;
186}
187
188sub setup_fh {
189 my $self = shift;
190 my ($obj) = @_;
70b55428 191
6fde4ed2 192 my $fh = $obj->_fh;
193 flock $fh, LOCK_EX;
118ba343 194
260a80b4 195 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 196 unless ( $obj->{base_offset} ) {
fde3db1a 197 my $bytes_read = $self->read_file_header( $obj );
118ba343 198
260a80b4 199 $self->calculate_sizes;
200
118ba343 201 ##
fde3db1a 202 # File is empty -- write header and master index
118ba343 203 ##
204 if (!$bytes_read) {
fde3db1a 205 $self->write_file_header( $obj );
118ba343 206
c9ec091a 207 $obj->{base_offset} = $self->_request_space(
16d1ad9b 208 $obj, $self->tag_size( $self->{index_size} ),
c9ec091a 209 );
118ba343 210
9e4f83a0 211 $self->write_tag(
c9ec091a 212 $obj, $obj->_base_offset, $obj->_type,
f37c15ab 213 chr(0)x$self->{index_size},
118ba343 214 );
215
216 # Flush the filehandle
217 my $old_fh = select $fh;
218 my $old_af = $|; $| = 1; $| = $old_af;
219 select $old_fh;
220 }
221 else {
222 $obj->{base_offset} = $bytes_read;
223
224 ##
fde3db1a 225 # Get our type from master index header
118ba343 226 ##
227 my $tag = $self->load_tag($obj, $obj->_base_offset)
228 or $obj->_throw_error("Corrupted file, no master index record");
229
230 unless ($obj->{type} eq $tag->{signature}) {
231 $obj->_throw_error("File type mismatch");
232 }
233 }
118ba343 234 }
260a80b4 235 else {
236 $self->calculate_sizes;
237 }
e06824f8 238
673464d9 239 #XXX We have to make sure we don't mess up when autoflush isn't turned on
460b1067 240 unless ( $obj->_fileobj->{inode} ) {
70b55428 241 my @stats = stat($obj->_fh);
460b1067 242 $obj->_fileobj->{inode} = $stats[1];
243 $obj->_fileobj->{end} = $stats[7];
70b55428 244 }
245
6fde4ed2 246 flock $fh, LOCK_UN;
247
70b55428 248 return 1;
249}
250
16d1ad9b 251sub tag_size {
252 my $self = shift;
253 my ($size) = @_;
254 return SIG_SIZE + $self->{data_size} + $size;
255}
256
9e4f83a0 257sub write_tag {
20f7b20c 258 ##
259 # Given offset, signature and content, create tag and write to disk
260 ##
d4b1166e 261 my $self = shift;
20f7b20c 262 my ($obj, $offset, $sig, $content) = @_;
f37c15ab 263 my $size = length( $content );
20f7b20c 264
d4b1166e 265 my $fh = $obj->_fh;
266
f37c15ab 267 if ( defined $offset ) {
460b1067 268 seek($fh, $offset + $obj->_fileobj->{file_offset}, SEEK_SET);
f37c15ab 269 }
270
251dfd0e 271 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 272
f37c15ab 273 return unless defined $offset;
274
20f7b20c 275 return {
276 signature => $sig,
277 size => $size,
8db25060 278 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 279 content => $content
280 };
d4b1166e 281}
282
283sub load_tag {
20f7b20c 284 ##
285 # Given offset, load single tag and return signature, size and data
286 ##
d4b1166e 287 my $self = shift;
20f7b20c 288 my ($obj, $offset) = @_;
289
e06824f8 290# print join(':',map{$_||''}caller(1)), $/;
291
d4b1166e 292 my $fh = $obj->_fh;
293
460b1067 294 seek($fh, $offset + $obj->_fileobj->{file_offset}, SEEK_SET);
e5fc7e69 295
75be6413 296 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 297 return if eof $fh;
20f7b20c 298
d4b1166e 299 my $b;
8db25060 300 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 301 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 302
303 my $buffer;
304 read( $fh, $buffer, $size);
305
306 return {
307 signature => $sig,
308 size => $size,
8db25060 309 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 310 content => $buffer
311 };
d4b1166e 312}
313
56ec4340 314sub _get_dbm_object {
315 my $item = shift;
316
317 my $obj = eval {
318 local $SIG{__DIE__};
319 if ($item->isa( 'DBM::Deep' )) {
320 return $item;
321 }
322 return;
323 };
324 return $obj if $obj;
325
326 my $r = Scalar::Util::reftype( $item ) || '';
327 if ( $r eq 'HASH' ) {
328 my $obj = eval {
329 local $SIG{__DIE__};
330 my $obj = tied(%$item);
331 if ($obj->isa( 'DBM::Deep' )) {
332 return $obj;
333 }
334 return;
335 };
336 return $obj if $obj;
337 }
338 elsif ( $r eq 'ARRAY' ) {
339 my $obj = eval {
340 local $SIG{__DIE__};
341 my $obj = tied(@$item);
342 if ($obj->isa( 'DBM::Deep' )) {
343 return $obj;
344 }
345 return;
346 };
347 return $obj if $obj;
348 }
349
350 return;
351}
352
29b01632 353sub _length_needed {
354 my $self = shift;
f37c15ab 355 my ($obj, $value, $key) = @_;
29b01632 356
357 my $is_dbm_deep = eval {
358 local $SIG{'__DIE__'};
359 $value->isa( 'DBM::Deep' );
360 };
361
f37c15ab 362 my $len = SIG_SIZE + $self->{data_size}
363 + $self->{data_size} + length( $key );
29b01632 364
460b1067 365 if ( $is_dbm_deep && $value->_fileobj eq $obj->_fileobj ) {
f37c15ab 366 return $len + $self->{long_size};
29b01632 367 }
368
369 my $r = Scalar::Util::reftype( $value ) || '';
460b1067 370 if ( $obj->_fileobj->{autobless} ) {
9a187d8c 371 # This is for the bit saying whether or not this thing is blessed.
372 $len += 1;
373 }
374
29b01632 375 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 376 if ( defined $value ) {
377 $len += length( $value );
378 }
379 return $len;
29b01632 380 }
381
f37c15ab 382 $len += $self->{index_size};
29b01632 383
384 # if autobless is enabled, must also take into consideration
f37c15ab 385 # the class name as it is stored after the key.
460b1067 386 if ( $obj->_fileobj->{autobless} ) {
56ec4340 387 my $c = Scalar::Util::blessed($value);
388 if ( defined $c && !$is_dbm_deep ) {
389 $len += $self->{data_size} + length($c);
29b01632 390 }
391 }
392
f37c15ab 393 return $len;
29b01632 394}
395
20f7b20c 396sub add_bucket {
397 ##
398 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
399 # plain (undigested) key and value.
400 ##
d4b1166e 401 my $self = shift;
20f7b20c 402 my ($obj, $tag, $md5, $plain_key, $value) = @_;
75be6413 403
eea0d863 404 # This verifies that only supported values will be stored.
405 {
406 my $r = Scalar::Util::reftype( $value );
407 last if !defined $r;
408
409 last if $r eq 'HASH';
410 last if $r eq 'ARRAY';
411
412 $obj->_throw_error(
413 "Storage of variables of type '$r' is not supported."
414 );
415 }
416
20f7b20c 417 my $location = 0;
418 my $result = 2;
419
460b1067 420 my $root = $obj->_fileobj;
f37c15ab 421 my $fh = $obj->_fh;
20f7b20c 422
f37c15ab 423 my $actual_length = $self->_length_needed( $obj, $value, $plain_key );
20f7b20c 424
9a187d8c 425 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
75be6413 426
f9c33187 427# $self->_release_space( $obj, $size, $subloc );
386bab6c 428 # Updating a known md5
f9c33187 429#XXX This needs updating to use _release_space
386bab6c 430 if ( $subloc ) {
431 $result = 1;
20f7b20c 432
386bab6c 433 if ($actual_length <= $size) {
434 $location = $subloc;
20f7b20c 435 }
75be6413 436 else {
f37c15ab 437 $location = $self->_request_space( $obj, $actual_length );
386bab6c 438 seek(
439 $fh,
9a187d8c 440 $tag->{offset} + $offset
441 + $self->{hash_size} + $root->{file_offset},
386bab6c 442 SEEK_SET,
443 );
9a187d8c 444 print( $fh pack($self->{long_pack}, $location ) );
445 print( $fh pack($self->{long_pack}, $actual_length ) );
75be6413 446 }
75be6413 447 }
386bab6c 448 # Adding a new md5
449 elsif ( defined $offset ) {
f37c15ab 450 $location = $self->_request_space( $obj, $actual_length );
386bab6c 451
452 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 453 print( $fh $md5 . pack($self->{long_pack}, $location ) );
454 print( $fh pack($self->{long_pack}, $actual_length ) );
386bab6c 455 }
456 # If bucket didn't fit into list, split into a new index level
f9c33187 457 # split_index() will do the _request_space() call
386bab6c 458 else {
f9c33187 459 $location = $self->split_index( $obj, $md5, $tag );
386bab6c 460 }
20f7b20c 461
d5d7c51d 462 $self->write_value( $obj, $location, $plain_key, $value );
463
464 return $result;
465}
466
467sub write_value {
468 my $self = shift;
469 my ($obj, $location, $key, $value) = @_;
470
471 my $fh = $obj->_fh;
460b1067 472 my $root = $obj->_fileobj;
d5d7c51d 473
9d4fa373 474 my $dbm_deep_obj = _get_dbm_object( $value );
460b1067 475 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $obj->_fileobj ) {
9d4fa373 476 $obj->_throw_error( "Cannot cross-reference. Use export() instead" );
477 }
d5d7c51d 478
479 seek($fh, $location + $root->{file_offset}, SEEK_SET);
480
20f7b20c 481 ##
d5d7c51d 482 # Write signature based on content type, set content length and write
483 # actual value.
20f7b20c 484 ##
9d4fa373 485 my $r = Scalar::Util::reftype( $value ) || '';
486 if ( $dbm_deep_obj ) {
487 $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 488 }
489 elsif ($r eq 'HASH') {
9d4fa373 490 if ( !$dbm_deep_obj && tied %{$value} ) {
019ab3a1 491 $obj->_throw_error( "Cannot store something that is tied" );
492 }
9e4f83a0 493 $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 494 }
495 elsif ($r eq 'ARRAY') {
9d4fa373 496 if ( !$dbm_deep_obj && tied @{$value} ) {
019ab3a1 497 $obj->_throw_error( "Cannot store something that is tied" );
498 }
9e4f83a0 499 $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 500 }
501 elsif (!defined($value)) {
9e4f83a0 502 $self->write_tag( $obj, undef, SIG_NULL, '' );
d5d7c51d 503 }
504 else {
9e4f83a0 505 $self->write_tag( $obj, undef, SIG_DATA, $value );
d5d7c51d 506 }
20f7b20c 507
d5d7c51d 508 ##
509 # Plain key is stored AFTER value, as keys are typically fetched less often.
510 ##
511 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 512
9a187d8c 513 # Internal references don't care about autobless
9d4fa373 514 return 1 if $dbm_deep_obj;
9a187d8c 515
d5d7c51d 516 ##
517 # If value is blessed, preserve class name
518 ##
519 if ( $root->{autobless} ) {
56ec4340 520 my $c = Scalar::Util::blessed($value);
521 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 522 print( $fh chr(1) );
56ec4340 523 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 524 }
d5d7c51d 525 else {
526 print( $fh chr(0) );
20f7b20c 527 }
d5d7c51d 528 }
20f7b20c 529
d5d7c51d 530 ##
56ec4340 531 # Tie the passed in reference so that changes to it are reflected in the
532 # datafile. The use of $location as the base_offset will act as the
533 # the linkage between parent and child.
534 #
535 # The overall assignment is a hack around the fact that just tying doesn't
536 # store the values. This may not be the wrong thing to do.
d5d7c51d 537 ##
9d4fa373 538 if ($r eq 'HASH') {
539 my %x = %$value;
540 tie %$value, 'DBM::Deep', {
541 base_offset => $location,
460b1067 542 fileobj => $root,
9d4fa373 543 };
544 %$value = %x;
545 }
546 elsif ($r eq 'ARRAY') {
547 my @x = @$value;
548 tie @$value, 'DBM::Deep', {
549 base_offset => $location,
460b1067 550 fileobj => $root,
9d4fa373 551 };
552 @$value = @x;
20f7b20c 553 }
d4b1166e 554
d5d7c51d 555 return 1;
d4b1166e 556}
557
75be6413 558sub split_index {
559 my $self = shift;
560 my ($obj, $md5, $tag) = @_;
561
562 my $fh = $obj->_fh;
460b1067 563 my $root = $obj->_fileobj;
16d1ad9b 564
565 my $loc = $self->_request_space(
566 $obj, $self->tag_size( $self->{index_size} ),
567 );
568
7b1e1aa1 569 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 570 print( $fh pack($self->{long_pack}, $loc) );
75be6413 571
9e4f83a0 572 my $index_tag = $self->write_tag(
16d1ad9b 573 $obj, $loc, SIG_INDEX,
f37c15ab 574 chr(0)x$self->{index_size},
75be6413 575 );
576
f9c33187 577 my $newtag_loc = $self->_request_space(
578 $obj, $self->tag_size( $self->{bucket_list_size} ),
579 );
75be6413 580
7b1e1aa1 581 my $keys = $tag->{content}
f9c33187 582 . $md5 . pack($self->{long_pack}, $newtag_loc)
583 . pack($self->{long_pack}, 0);
75be6413 584
f9c33187 585 my @newloc = ();
75be6413 586 BUCKET:
587 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 588 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 589
f9c33187 590 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
591 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 592
75be6413 593 my $num = ord(substr($key, $tag->{ch} + 1, 1));
594
f9c33187 595 if ($newloc[$num]) {
596 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 597 my $subkeys;
598 read( $fh, $subkeys, $self->{bucket_list_size});
599
f9c33187 600 # This is looking for the first empty spot
7b1e1aa1 601 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 602 { content => $subkeys }, '',
7b1e1aa1 603 );
75be6413 604
f9c33187 605 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
606 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 607
608 next;
75be6413 609 }
75be6413 610
7b1e1aa1 611 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 612
7b1e1aa1 613 my $loc = $self->_request_space(
614 $obj, $self->tag_size( $self->{bucket_list_size} ),
615 );
2603d86e 616
7b1e1aa1 617 print( $fh pack($self->{long_pack}, $loc) );
75be6413 618
7b1e1aa1 619 my $blist_tag = $self->write_tag(
620 $obj, $loc, SIG_BLIST,
621 chr(0)x$self->{bucket_list_size},
622 );
623
624 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 625 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 626
f9c33187 627 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 628 }
629
630 $self->_release_space(
f9c33187 631 $obj, $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 632 $tag->{offset} - SIG_SIZE - $self->{data_size},
633 );
75be6413 634
f9c33187 635 return $newtag_loc;
75be6413 636}
637
8db25060 638sub read_from_loc {
639 my $self = shift;
640 my ($obj, $subloc) = @_;
641
642 my $fh = $obj->_fh;
643
644 ##
645 # Found match -- seek to offset and read signature
646 ##
647 my $signature;
460b1067 648 seek($fh, $subloc + $obj->_fileobj->{file_offset}, SEEK_SET);
8db25060 649 read( $fh, $signature, SIG_SIZE);
650
651 ##
652 # If value is a hash or array, return new DBM::Deep object with correct offset
653 ##
654 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 655 my $new_obj = DBM::Deep->new({
8db25060 656 type => $signature,
657 base_offset => $subloc,
460b1067 658 fileobj => $obj->_fileobj,
685e40f1 659 });
8db25060 660
460b1067 661 if ($new_obj->_fileobj->{autobless}) {
8db25060 662 ##
663 # Skip over value and plain key to see if object needs
664 # to be re-blessed
665 ##
666 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
667
668 my $size;
c6ea6b6c 669 read( $fh, $size, $self->{data_size});
670 $size = unpack($self->{data_pack}, $size);
8db25060 671 if ($size) { seek($fh, $size, SEEK_CUR); }
672
673 my $bless_bit;
674 read( $fh, $bless_bit, 1);
675 if (ord($bless_bit)) {
676 ##
677 # Yes, object needs to be re-blessed
678 ##
679 my $class_name;
c6ea6b6c 680 read( $fh, $size, $self->{data_size});
681 $size = unpack($self->{data_pack}, $size);
8db25060 682 if ($size) { read( $fh, $class_name, $size); }
685e40f1 683 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 684 }
685 }
686
685e40f1 687 return $new_obj;
8db25060 688 }
689 elsif ( $signature eq SIG_INTERNAL ) {
690 my $size;
691 read( $fh, $size, $self->{data_size});
692 $size = unpack($self->{data_pack}, $size);
693
694 if ( $size ) {
695 my $new_loc;
696 read( $fh, $new_loc, $size );
697 $new_loc = unpack( $self->{long_pack}, $new_loc );
698
699 return $self->read_from_loc( $obj, $new_loc );
700 }
701 else {
702 return;
703 }
704 }
705 ##
706 # Otherwise return actual value
707 ##
460b1067 708 elsif ( $signature eq SIG_DATA ) {
8db25060 709 my $size;
710 read( $fh, $size, $self->{data_size});
711 $size = unpack($self->{data_pack}, $size);
712
713 my $value = '';
714 if ($size) { read( $fh, $value, $size); }
715 return $value;
716 }
717
718 ##
719 # Key exists, but content is null
720 ##
721 return;
722}
723
9020ee8c 724sub get_bucket_value {
beac1dff 725 ##
726 # Fetch single value given tag and MD5 digested key.
727 ##
728 my $self = shift;
729 my ($obj, $tag, $md5) = @_;
9020ee8c 730
9a187d8c 731 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 732 if ( $subloc ) {
8db25060 733 return $self->read_from_loc( $obj, $subloc );
386bab6c 734 }
beac1dff 735 return;
9020ee8c 736}
ab0e4957 737
738sub delete_bucket {
beac1dff 739 ##
740 # Delete single key/value pair given tag and MD5 digested key.
741 ##
742 my $self = shift;
743 my ($obj, $tag, $md5) = @_;
ab0e4957 744
9a187d8c 745 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
f9c33187 746#XXX This needs _release_space()
386bab6c 747 if ( $subloc ) {
748 my $fh = $obj->_fh;
460b1067 749 seek($fh, $tag->{offset} + $offset + $obj->_fileobj->{file_offset}, SEEK_SET);
386bab6c 750 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 751 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 752
ab0e4957 753 return 1;
386bab6c 754 }
beac1dff 755 return;
ab0e4957 756}
757
912d50b1 758sub bucket_exists {
beac1dff 759 ##
760 # Check existence of single key given tag and MD5 digested key.
761 ##
762 my $self = shift;
763 my ($obj, $tag, $md5) = @_;
912d50b1 764
9a187d8c 765 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 766 return $subloc && 1;
912d50b1 767}
768
6736c116 769sub find_bucket_list {
beac1dff 770 ##
771 # Locate offset for bucket list, given digested key
772 ##
773 my $self = shift;
d0b74c17 774 my ($obj, $md5, $args) = @_;
775 $args = {} unless $args;
776
beac1dff 777 ##
778 # Locate offset for bucket list using digest index system
779 ##
e5fc7e69 780 my $tag = $self->load_tag($obj, $obj->_base_offset)
d5d7c51d 781 or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 782
e5fc7e69 783 my $ch = 0;
8db25060 784 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 785 my $num = ord substr($md5, $ch, 1);
786
787 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
788 $tag = $self->index_lookup( $obj, $tag, $num );
789
790 if (!$tag) {
29b01632 791 return if !$args->{create};
d0b74c17 792
16d1ad9b 793 my $loc = $self->_request_space(
794 $obj, $self->tag_size( $self->{bucket_list_size} ),
795 );
796
7b1e1aa1 797 my $fh = $obj->_fh;
460b1067 798 seek($fh, $ref_loc + $obj->_fileobj->{file_offset}, SEEK_SET);
16d1ad9b 799 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 800
9e4f83a0 801 $tag = $self->write_tag(
16d1ad9b 802 $obj, $loc, SIG_BLIST,
f37c15ab 803 chr(0)x$self->{bucket_list_size},
d5d7c51d 804 );
805
806 $tag->{ref_loc} = $ref_loc;
807 $tag->{ch} = $ch;
808
809 last;
d0b74c17 810 }
811
16d1ad9b 812 $tag->{ch} = $ch++;
d0b74c17 813 $tag->{ref_loc} = $ref_loc;
beac1dff 814 }
d0b74c17 815
beac1dff 816 return $tag;
6736c116 817}
818
d0b74c17 819sub index_lookup {
820 ##
821 # Given index tag, lookup single entry in index and return .
822 ##
823 my $self = shift;
824 my ($obj, $tag, $index) = @_;
825
826 my $location = unpack(
827 $self->{long_pack},
828 substr(
829 $tag->{content},
830 $index * $self->{long_size},
831 $self->{long_size},
832 ),
833 );
834
835 if (!$location) { return; }
836
837 return $self->load_tag( $obj, $location );
838}
839
6736c116 840sub traverse_index {
beac1dff 841 ##
842 # Scan index and recursively step into deeper levels, looking for next key.
843 ##
6736c116 844 my $self = shift;
845 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 846
beac1dff 847 my $tag = $self->load_tag($obj, $offset );
6736c116 848
849 my $fh = $obj->_fh;
d0b74c17 850
8db25060 851 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 852 my $content = $tag->{content};
e5fc7e69 853 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 854
d5d7c51d 855 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 856 my $subloc = unpack(
857 $self->{long_pack},
e06824f8 858 substr(
859 $content,
860 $idx * $self->{long_size},
861 $self->{long_size},
862 ),
e5fc7e69 863 );
864
beac1dff 865 if ($subloc) {
e5fc7e69 866 my $result = $self->traverse_index(
867 $obj, $subloc, $ch + 1, $force_return_next,
868 );
869
beac1dff 870 if (defined($result)) { return $result; }
871 }
872 } # index loop
d0b74c17 873
beac1dff 874 $obj->{return_next} = 1;
875 } # tag is an index
d0b74c17 876
e5fc7e69 877 else {
beac1dff 878 my $keys = $tag->{content};
879 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 880
beac1dff 881 ##
882 # Iterate through buckets, looking for a key match
883 ##
8db25060 884 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 885 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 886
8db25060 887 # End of bucket list -- return to outer loop
beac1dff 888 if (!$subloc) {
beac1dff 889 $obj->{return_next} = 1;
890 last;
891 }
8db25060 892 # Located previous key -- return next one found
beac1dff 893 elsif ($key eq $obj->{prev_md5}) {
beac1dff 894 $obj->{return_next} = 1;
895 next;
896 }
8db25060 897 # Seek to bucket location and skip over signature
beac1dff 898 elsif ($obj->{return_next}) {
460b1067 899 seek($fh, $subloc + $obj->_fileobj->{file_offset}, SEEK_SET);
d0b74c17 900
beac1dff 901 # Skip over value to get to plain key
8db25060 902 my $sig;
903 read( $fh, $sig, SIG_SIZE );
904
beac1dff 905 my $size;
e5fc7e69 906 read( $fh, $size, $self->{data_size});
907 $size = unpack($self->{data_pack}, $size);
beac1dff 908 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 909
beac1dff 910 # Read in plain key and return as scalar
beac1dff 911 my $plain_key;
e5fc7e69 912 read( $fh, $size, $self->{data_size});
913 $size = unpack($self->{data_pack}, $size);
beac1dff 914 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 915
beac1dff 916 return $plain_key;
917 }
8db25060 918 }
d0b74c17 919
beac1dff 920 $obj->{return_next} = 1;
921 } # tag is a bucket list
d0b74c17 922
beac1dff 923 return;
6736c116 924}
925
926sub get_next_key {
beac1dff 927 ##
928 # Locate next key, given digested previous one
929 ##
6736c116 930 my $self = shift;
931 my ($obj) = @_;
d0b74c17 932
beac1dff 933 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
934 $obj->{return_next} = 0;
d0b74c17 935
beac1dff 936 ##
937 # If the previous key was not specifed, start at the top and
938 # return the first one found.
939 ##
940 if (!$obj->{prev_md5}) {
941 $obj->{prev_md5} = chr(0) x $self->{hash_size};
942 $obj->{return_next} = 1;
943 }
d0b74c17 944
beac1dff 945 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 946}
947
75be6413 948# Utilities
949
9cec1360 950sub _get_key_subloc {
75be6413 951 my $self = shift;
952 my ($keys, $idx) = @_;
953
6ed2f3df 954 my ($key, $subloc, $size) = unpack(
955 "a$self->{hash_size} $self->{long_pack} $self->{long_pack}",
75be6413 956 substr(
957 $keys,
9cec1360 958 ($idx * $self->{bucket_size}),
959 $self->{bucket_size},
75be6413 960 ),
961 );
962
6ed2f3df 963 return ($key, $subloc, $size);
75be6413 964}
965
d608b06e 966sub _find_in_buckets {
967 my $self = shift;
968 my ($tag, $md5) = @_;
969
970 BUCKET:
971 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
9a187d8c 972 my ($key, $subloc, $size) = $self->_get_key_subloc(
973 $tag->{content}, $i,
974 );
d608b06e 975
9a187d8c 976 return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc;
d608b06e 977
978 next BUCKET if $key ne $md5;
979
9a187d8c 980 return ($subloc, $i * $self->{bucket_size}, $size);
d608b06e 981 }
982
983 return;
984}
985
7b1e1aa1 986#sub _print_at {
987# my $self = shift;
988# my ($obj, $spot, $data) = @_;
989#
990# my $fh = $obj->_fh;
991# seek( $fh, $spot, SEEK_SET );
992# print( $fh $data );
993#
994# return;
995#}
996
994ccd8e 997sub _request_space {
998 my $self = shift;
999 my ($obj, $size) = @_;
1000
460b1067 1001 my $loc = $self->_fileobj->{end};
1002 $self->_fileobj->{end} += $size;
994ccd8e 1003
1004 return $loc;
1005}
1006
1007sub _release_space {
1008 my $self = shift;
1009 my ($obj, $size, $loc) = @_;
1010
7b1e1aa1 1011 my $next_loc = 0;
1012
1013 my $fh = $obj->_fh;
460b1067 1014 seek( $fh, $loc + $obj->_fileobj->{file_offset}, SEEK_SET );
7b1e1aa1 1015 print( $fh SIG_FREE
1016 . pack($self->{long_pack}, $size )
1017 . pack($self->{long_pack}, $next_loc )
1018 );
1019
994ccd8e 1020 return;
1021}
1022
a20d9a3f 10231;
1024__END__
d5d7c51d 1025
1026# This will be added in later, after more refactoring is done. This is an early
1027# attempt at refactoring on the physical level instead of the virtual level.
1028sub _read_at {
1029 my $self = shift;
1030 my ($obj, $spot, $amount, $unpack) = @_;
1031
1032 my $fh = $obj->_fh;
460b1067 1033 seek( $fh, $spot + $obj->_fileobj->{file_offset}, SEEK_SET );
d5d7c51d 1034
1035 my $buffer;
1036 my $bytes_read = read( $fh, $buffer, $amount );
1037
1038 if ( $unpack ) {
1039 $buffer = unpack( $unpack, $buffer );
1040 }
1041
1042 if ( wantarray ) {
1043 return ($buffer, $bytes_read);
1044 }
1045 else {
1046 return $buffer;
1047 }
1048}