Converted Engine to use File correctly, removing all tramping of
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
8use Fcntl qw( :DEFAULT :flock :seek );
9
8db25060 10##
11# Setup file and tag signatures. These should never change.
12##
13sub SIG_FILE () { 'DPDB' }
460b1067 14sub SIG_HEADER () { 'h' }
8db25060 15sub SIG_INTERNAL () { 'i' }
16sub SIG_HASH () { 'H' }
17sub SIG_ARRAY () { 'A' }
8db25060 18sub SIG_NULL () { 'N' }
19sub SIG_DATA () { 'D' }
20sub SIG_INDEX () { 'I' }
21sub SIG_BLIST () { 'B' }
7b1e1aa1 22sub SIG_FREE () { 'F' }
8db25060 23sub SIG_SIZE () { 1 }
24
612969fb 25sub new {
26 my $class = shift;
27 my ($args) = @_;
28
29 my $self = bless {
30 long_size => 4,
31 long_pack => 'N',
32 data_size => 4,
33 data_pack => 'N',
251dfd0e 34
612969fb 35 digest => \&Digest::MD5::md5,
36 hash_size => 16,
251dfd0e 37
81d16922 38 ##
d5d7c51d 39 # Maximum number of buckets per list before another level of indexing is
e0098e7f 40 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 41 # files. DO NOT decrease this value below 16, due to risk of recursive
42 # reindex overrun.
81d16922 43 ##
612969fb 44 max_buckets => 16,
460b1067 45
46 fileobj => undef,
612969fb 47 }, $class;
48
e0098e7f 49 if ( defined $args->{pack_size} ) {
50 if ( lc $args->{pack_size} eq 'small' ) {
51 $args->{long_size} = 2;
52 $args->{long_pack} = 'S';
53 }
54 elsif ( lc $args->{pack_size} eq 'medium' ) {
55 $args->{long_size} = 4;
56 $args->{long_pack} = 'N';
57 }
58 elsif ( lc $args->{pack_size} eq 'large' ) {
59 $args->{long_size} = 8;
60 $args->{long_pack} = 'Q';
61 }
62 else {
63 die "Unknown pack_size value: '$args->{pack_size}'\n";
64 }
65 }
66
fde3db1a 67 # Grab the parameters we want to use
68 foreach my $param ( keys %$self ) {
69 next unless exists $args->{$param};
3e9498a1 70 $self->{$param} = $args->{$param};
fde3db1a 71 }
72
e0098e7f 73 if ( $self->{max_buckets} < 16 ) {
74 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
75 $self->{max_buckets} = 16;
76 }
77
260a80b4 78 return $self;
79}
80
460b1067 81sub _fileobj { return $_[0]{fileobj} }
82sub _fh { return $_[0]->_fileobj->{fh} }
83
260a80b4 84sub calculate_sizes {
85 my $self = shift;
86
e0098e7f 87 $self->{index_size} = (2**8) * $self->{long_size};
88 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
89 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 90
260a80b4 91 return;
1bf65be7 92}
93
fde3db1a 94sub write_file_header {
0d0f3d5d 95 my $self = shift;
0d0f3d5d 96
460b1067 97 my $fh = $self->_fh;
0d0f3d5d 98
e96daec8 99 my $loc = $self->_request_space( length( SIG_FILE ) + 21 );
460b1067 100 seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET);
260a80b4 101 print( $fh
102 SIG_FILE,
460b1067 103 SIG_HEADER,
104 pack('N', 1), # header version
105 pack('N', 12), # header size
106 pack('N', 0), # file version
260a80b4 107 pack('S', $self->{long_size}),
108 pack('A', $self->{long_pack}),
109 pack('S', $self->{data_size}),
110 pack('A', $self->{data_pack}),
111 pack('S', $self->{max_buckets}),
112 );
0d0f3d5d 113
114 return;
115}
116
fde3db1a 117sub read_file_header {
e064ccd1 118 my $self = shift;
e064ccd1 119
e96daec8 120 my $fh = $self->_fh;
e064ccd1 121
e96daec8 122 seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET);
42f79e07 123 my $buffer;
460b1067 124 my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
125
126 return unless $bytes_read;
127
128 my ($file_signature, $sig_header, $header_version, $size) = unpack(
129 'A4 A N N', $buffer
42f79e07 130 );
e064ccd1 131
460b1067 132 unless ( $file_signature eq SIG_FILE ) {
133 $self->{fileobj}->close;
e96daec8 134 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 135 }
260a80b4 136
460b1067 137 unless ( $sig_header eq SIG_HEADER ) {
138 $self->{fileobj}->close;
e96daec8 139 $self->_throw_error( "Old file version found." );
460b1067 140 }
9b2370e0 141
460b1067 142 my $buffer2;
143 $bytes_read += read( $fh, $buffer2, $size );
144 my ($file_version, @values) = unpack( 'N S A S A S', $buffer2 );
145 if ( @values < 5 || grep { !defined } @values ) {
146 $self->{fileobj}->close;
e96daec8 147 $self->_throw_error("Corrupted file - bad header");
e064ccd1 148 }
149
460b1067 150 #XXX Add warnings if values weren't set right
151 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
152
e064ccd1 153 return $bytes_read;
154}
155
460b1067 156sub setup_fh {
157 my $self = shift;
158 my ($obj) = @_;
70b55428 159
e96daec8 160 my $fh = $self->_fh;
6fde4ed2 161 flock $fh, LOCK_EX;
118ba343 162
260a80b4 163 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 164 unless ( $obj->{base_offset} ) {
e96daec8 165 my $bytes_read = $self->read_file_header;
118ba343 166
260a80b4 167 $self->calculate_sizes;
168
118ba343 169 ##
fde3db1a 170 # File is empty -- write header and master index
118ba343 171 ##
172 if (!$bytes_read) {
e96daec8 173 $self->write_file_header;
118ba343 174
e96daec8 175 $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) );
118ba343 176
9e4f83a0 177 $self->write_tag(
e96daec8 178 $obj->_base_offset, $obj->_type,
f37c15ab 179 chr(0)x$self->{index_size},
118ba343 180 );
181
182 # Flush the filehandle
183 my $old_fh = select $fh;
184 my $old_af = $|; $| = 1; $| = $old_af;
185 select $old_fh;
186 }
187 else {
188 $obj->{base_offset} = $bytes_read;
189
190 ##
fde3db1a 191 # Get our type from master index header
118ba343 192 ##
e96daec8 193 my $tag = $self->load_tag($obj->_base_offset)
194 or $self->_throw_error("Corrupted file, no master index record");
118ba343 195
e96daec8 196 unless ($obj->_type eq $tag->{signature}) {
197 $self->_throw_error("File type mismatch");
118ba343 198 }
199 }
118ba343 200 }
260a80b4 201 else {
202 $self->calculate_sizes;
203 }
e06824f8 204
673464d9 205 #XXX We have to make sure we don't mess up when autoflush isn't turned on
e96daec8 206 unless ( $self->_fileobj->{inode} ) {
207 my @stats = stat($fh);
208 $self->_fileobj->{inode} = $stats[1];
209 $self->_fileobj->{end} = $stats[7];
70b55428 210 }
211
6fde4ed2 212 flock $fh, LOCK_UN;
213
70b55428 214 return 1;
215}
216
16d1ad9b 217sub tag_size {
218 my $self = shift;
219 my ($size) = @_;
220 return SIG_SIZE + $self->{data_size} + $size;
221}
222
9e4f83a0 223sub write_tag {
20f7b20c 224 ##
225 # Given offset, signature and content, create tag and write to disk
226 ##
d4b1166e 227 my $self = shift;
e96daec8 228 my ($offset, $sig, $content) = @_;
f37c15ab 229 my $size = length( $content );
20f7b20c 230
e96daec8 231 my $fh = $self->_fh;
d4b1166e 232
f37c15ab 233 if ( defined $offset ) {
e96daec8 234 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
f37c15ab 235 }
236
251dfd0e 237 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 238
f37c15ab 239 return unless defined $offset;
240
20f7b20c 241 return {
242 signature => $sig,
243 size => $size,
8db25060 244 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 245 content => $content
246 };
d4b1166e 247}
248
249sub load_tag {
20f7b20c 250 ##
251 # Given offset, load single tag and return signature, size and data
252 ##
d4b1166e 253 my $self = shift;
e96daec8 254 my ($offset) = @_;
20f7b20c 255
e06824f8 256# print join(':',map{$_||''}caller(1)), $/;
257
e96daec8 258 my $fh = $self->_fh;
d4b1166e 259
e96daec8 260 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
e5fc7e69 261
75be6413 262 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 263 return if eof $fh;
20f7b20c 264
d4b1166e 265 my $b;
8db25060 266 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 267 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 268
269 my $buffer;
270 read( $fh, $buffer, $size);
271
272 return {
273 signature => $sig,
274 size => $size,
8db25060 275 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 276 content => $buffer
277 };
d4b1166e 278}
279
56ec4340 280sub _get_dbm_object {
281 my $item = shift;
282
283 my $obj = eval {
284 local $SIG{__DIE__};
285 if ($item->isa( 'DBM::Deep' )) {
286 return $item;
287 }
288 return;
289 };
290 return $obj if $obj;
291
292 my $r = Scalar::Util::reftype( $item ) || '';
293 if ( $r eq 'HASH' ) {
294 my $obj = eval {
295 local $SIG{__DIE__};
296 my $obj = tied(%$item);
297 if ($obj->isa( 'DBM::Deep' )) {
298 return $obj;
299 }
300 return;
301 };
302 return $obj if $obj;
303 }
304 elsif ( $r eq 'ARRAY' ) {
305 my $obj = eval {
306 local $SIG{__DIE__};
307 my $obj = tied(@$item);
308 if ($obj->isa( 'DBM::Deep' )) {
309 return $obj;
310 }
311 return;
312 };
313 return $obj if $obj;
314 }
315
316 return;
317}
318
29b01632 319sub _length_needed {
320 my $self = shift;
e96daec8 321 my ($value, $key) = @_;
29b01632 322
323 my $is_dbm_deep = eval {
324 local $SIG{'__DIE__'};
325 $value->isa( 'DBM::Deep' );
326 };
327
f37c15ab 328 my $len = SIG_SIZE + $self->{data_size}
329 + $self->{data_size} + length( $key );
29b01632 330
e96daec8 331 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
f37c15ab 332 return $len + $self->{long_size};
29b01632 333 }
334
335 my $r = Scalar::Util::reftype( $value ) || '';
e96daec8 336 if ( $self->_fileobj->{autobless} ) {
9a187d8c 337 # This is for the bit saying whether or not this thing is blessed.
338 $len += 1;
339 }
340
29b01632 341 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 342 if ( defined $value ) {
343 $len += length( $value );
344 }
345 return $len;
29b01632 346 }
347
f37c15ab 348 $len += $self->{index_size};
29b01632 349
350 # if autobless is enabled, must also take into consideration
f37c15ab 351 # the class name as it is stored after the key.
e96daec8 352 if ( $self->_fileobj->{autobless} ) {
56ec4340 353 my $c = Scalar::Util::blessed($value);
354 if ( defined $c && !$is_dbm_deep ) {
355 $len += $self->{data_size} + length($c);
29b01632 356 }
357 }
358
f37c15ab 359 return $len;
29b01632 360}
361
20f7b20c 362sub add_bucket {
363 ##
364 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
365 # plain (undigested) key and value.
366 ##
d4b1166e 367 my $self = shift;
e96daec8 368 my ($tag, $md5, $plain_key, $value) = @_;
75be6413 369
eea0d863 370 # This verifies that only supported values will be stored.
371 {
372 my $r = Scalar::Util::reftype( $value );
373 last if !defined $r;
374
375 last if $r eq 'HASH';
376 last if $r eq 'ARRAY';
377
e96daec8 378 $self->_throw_error(
eea0d863 379 "Storage of variables of type '$r' is not supported."
380 );
381 }
382
20f7b20c 383 my $location = 0;
384 my $result = 2;
385
e96daec8 386 my $root = $self->_fileobj;
387 my $fh = $self->_fh;
20f7b20c 388
e96daec8 389 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 390
9a187d8c 391 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
75be6413 392
e96daec8 393# $self->_release_space( $size, $subloc );
386bab6c 394 # Updating a known md5
f9c33187 395#XXX This needs updating to use _release_space
386bab6c 396 if ( $subloc ) {
397 $result = 1;
20f7b20c 398
386bab6c 399 if ($actual_length <= $size) {
400 $location = $subloc;
20f7b20c 401 }
75be6413 402 else {
e96daec8 403 $location = $self->_request_space( $actual_length );
386bab6c 404 seek(
405 $fh,
9a187d8c 406 $tag->{offset} + $offset
407 + $self->{hash_size} + $root->{file_offset},
386bab6c 408 SEEK_SET,
409 );
9a187d8c 410 print( $fh pack($self->{long_pack}, $location ) );
411 print( $fh pack($self->{long_pack}, $actual_length ) );
75be6413 412 }
75be6413 413 }
386bab6c 414 # Adding a new md5
415 elsif ( defined $offset ) {
e96daec8 416 $location = $self->_request_space( $actual_length );
386bab6c 417
418 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 419 print( $fh $md5 . pack($self->{long_pack}, $location ) );
420 print( $fh pack($self->{long_pack}, $actual_length ) );
386bab6c 421 }
422 # If bucket didn't fit into list, split into a new index level
f9c33187 423 # split_index() will do the _request_space() call
386bab6c 424 else {
e96daec8 425 $location = $self->split_index( $md5, $tag );
386bab6c 426 }
20f7b20c 427
e96daec8 428 $self->write_value( $location, $plain_key, $value );
d5d7c51d 429
430 return $result;
431}
432
433sub write_value {
434 my $self = shift;
e96daec8 435 my ($location, $key, $value) = @_;
d5d7c51d 436
e96daec8 437 my $fh = $self->_fh;
438 my $root = $self->_fileobj;
d5d7c51d 439
9d4fa373 440 my $dbm_deep_obj = _get_dbm_object( $value );
e96daec8 441 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) {
442 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 443 }
d5d7c51d 444
445 seek($fh, $location + $root->{file_offset}, SEEK_SET);
446
20f7b20c 447 ##
d5d7c51d 448 # Write signature based on content type, set content length and write
449 # actual value.
20f7b20c 450 ##
9d4fa373 451 my $r = Scalar::Util::reftype( $value ) || '';
452 if ( $dbm_deep_obj ) {
e96daec8 453 $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 454 }
455 elsif ($r eq 'HASH') {
9d4fa373 456 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 457 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 458 }
e96daec8 459 $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 460 }
461 elsif ($r eq 'ARRAY') {
9d4fa373 462 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 463 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 464 }
e96daec8 465 $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 466 }
467 elsif (!defined($value)) {
e96daec8 468 $self->write_tag( undef, SIG_NULL, '' );
d5d7c51d 469 }
470 else {
e96daec8 471 $self->write_tag( undef, SIG_DATA, $value );
d5d7c51d 472 }
20f7b20c 473
d5d7c51d 474 ##
475 # Plain key is stored AFTER value, as keys are typically fetched less often.
476 ##
477 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 478
9a187d8c 479 # Internal references don't care about autobless
9d4fa373 480 return 1 if $dbm_deep_obj;
9a187d8c 481
d5d7c51d 482 ##
483 # If value is blessed, preserve class name
484 ##
485 if ( $root->{autobless} ) {
56ec4340 486 my $c = Scalar::Util::blessed($value);
487 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 488 print( $fh chr(1) );
56ec4340 489 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 490 }
d5d7c51d 491 else {
492 print( $fh chr(0) );
20f7b20c 493 }
d5d7c51d 494 }
20f7b20c 495
d5d7c51d 496 ##
56ec4340 497 # Tie the passed in reference so that changes to it are reflected in the
498 # datafile. The use of $location as the base_offset will act as the
499 # the linkage between parent and child.
500 #
501 # The overall assignment is a hack around the fact that just tying doesn't
502 # store the values. This may not be the wrong thing to do.
d5d7c51d 503 ##
9d4fa373 504 if ($r eq 'HASH') {
505 my %x = %$value;
506 tie %$value, 'DBM::Deep', {
507 base_offset => $location,
460b1067 508 fileobj => $root,
9d4fa373 509 };
510 %$value = %x;
511 }
512 elsif ($r eq 'ARRAY') {
513 my @x = @$value;
514 tie @$value, 'DBM::Deep', {
515 base_offset => $location,
460b1067 516 fileobj => $root,
9d4fa373 517 };
518 @$value = @x;
20f7b20c 519 }
d4b1166e 520
d5d7c51d 521 return 1;
d4b1166e 522}
523
75be6413 524sub split_index {
525 my $self = shift;
e96daec8 526 my ($md5, $tag) = @_;
75be6413 527
e96daec8 528 my $fh = $self->_fh;
529 my $root = $self->_fileobj;
16d1ad9b 530
531 my $loc = $self->_request_space(
e96daec8 532 $self->tag_size( $self->{index_size} ),
16d1ad9b 533 );
534
7b1e1aa1 535 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 536 print( $fh pack($self->{long_pack}, $loc) );
75be6413 537
9e4f83a0 538 my $index_tag = $self->write_tag(
e96daec8 539 $loc, SIG_INDEX,
f37c15ab 540 chr(0)x$self->{index_size},
75be6413 541 );
542
f9c33187 543 my $newtag_loc = $self->_request_space(
e96daec8 544 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 545 );
75be6413 546
7b1e1aa1 547 my $keys = $tag->{content}
f9c33187 548 . $md5 . pack($self->{long_pack}, $newtag_loc)
549 . pack($self->{long_pack}, 0);
75be6413 550
f9c33187 551 my @newloc = ();
75be6413 552 BUCKET:
553 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 554 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 555
f9c33187 556 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
557 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 558
75be6413 559 my $num = ord(substr($key, $tag->{ch} + 1, 1));
560
f9c33187 561 if ($newloc[$num]) {
562 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 563 my $subkeys;
564 read( $fh, $subkeys, $self->{bucket_list_size});
565
f9c33187 566 # This is looking for the first empty spot
7b1e1aa1 567 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 568 { content => $subkeys }, '',
7b1e1aa1 569 );
75be6413 570
f9c33187 571 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
572 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 573
574 next;
75be6413 575 }
75be6413 576
7b1e1aa1 577 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 578
7b1e1aa1 579 my $loc = $self->_request_space(
e96daec8 580 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 581 );
2603d86e 582
7b1e1aa1 583 print( $fh pack($self->{long_pack}, $loc) );
75be6413 584
7b1e1aa1 585 my $blist_tag = $self->write_tag(
e96daec8 586 $loc, SIG_BLIST,
7b1e1aa1 587 chr(0)x$self->{bucket_list_size},
588 );
589
590 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 591 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 592
f9c33187 593 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 594 }
595
596 $self->_release_space(
e96daec8 597 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 598 $tag->{offset} - SIG_SIZE - $self->{data_size},
599 );
75be6413 600
f9c33187 601 return $newtag_loc;
75be6413 602}
603
8db25060 604sub read_from_loc {
605 my $self = shift;
e96daec8 606 my ($subloc) = @_;
8db25060 607
e96daec8 608 my $fh = $self->_fh;
8db25060 609
610 ##
611 # Found match -- seek to offset and read signature
612 ##
613 my $signature;
e96daec8 614 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
8db25060 615 read( $fh, $signature, SIG_SIZE);
616
617 ##
618 # If value is a hash or array, return new DBM::Deep object with correct offset
619 ##
620 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 621 my $new_obj = DBM::Deep->new({
8db25060 622 type => $signature,
623 base_offset => $subloc,
e96daec8 624 fileobj => $self->_fileobj,
685e40f1 625 });
8db25060 626
460b1067 627 if ($new_obj->_fileobj->{autobless}) {
8db25060 628 ##
629 # Skip over value and plain key to see if object needs
630 # to be re-blessed
631 ##
632 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
633
634 my $size;
c6ea6b6c 635 read( $fh, $size, $self->{data_size});
636 $size = unpack($self->{data_pack}, $size);
8db25060 637 if ($size) { seek($fh, $size, SEEK_CUR); }
638
639 my $bless_bit;
640 read( $fh, $bless_bit, 1);
641 if (ord($bless_bit)) {
642 ##
643 # Yes, object needs to be re-blessed
644 ##
645 my $class_name;
c6ea6b6c 646 read( $fh, $size, $self->{data_size});
647 $size = unpack($self->{data_pack}, $size);
8db25060 648 if ($size) { read( $fh, $class_name, $size); }
685e40f1 649 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 650 }
651 }
652
685e40f1 653 return $new_obj;
8db25060 654 }
655 elsif ( $signature eq SIG_INTERNAL ) {
656 my $size;
657 read( $fh, $size, $self->{data_size});
658 $size = unpack($self->{data_pack}, $size);
659
660 if ( $size ) {
661 my $new_loc;
662 read( $fh, $new_loc, $size );
663 $new_loc = unpack( $self->{long_pack}, $new_loc );
664
e96daec8 665 return $self->read_from_loc( $new_loc );
8db25060 666 }
667 else {
668 return;
669 }
670 }
671 ##
672 # Otherwise return actual value
673 ##
460b1067 674 elsif ( $signature eq SIG_DATA ) {
8db25060 675 my $size;
676 read( $fh, $size, $self->{data_size});
677 $size = unpack($self->{data_pack}, $size);
678
679 my $value = '';
680 if ($size) { read( $fh, $value, $size); }
681 return $value;
682 }
683
684 ##
685 # Key exists, but content is null
686 ##
687 return;
688}
689
9020ee8c 690sub get_bucket_value {
beac1dff 691 ##
692 # Fetch single value given tag and MD5 digested key.
693 ##
694 my $self = shift;
e96daec8 695 my ($tag, $md5) = @_;
9020ee8c 696
9a187d8c 697 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 698 if ( $subloc ) {
e96daec8 699 return $self->read_from_loc( $subloc );
386bab6c 700 }
beac1dff 701 return;
9020ee8c 702}
ab0e4957 703
704sub delete_bucket {
beac1dff 705 ##
706 # Delete single key/value pair given tag and MD5 digested key.
707 ##
708 my $self = shift;
e96daec8 709 my ($tag, $md5) = @_;
ab0e4957 710
9a187d8c 711 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
f9c33187 712#XXX This needs _release_space()
386bab6c 713 if ( $subloc ) {
e96daec8 714 my $fh = $self->_fh;
715 seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET);
386bab6c 716 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 717 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 718
ab0e4957 719 return 1;
386bab6c 720 }
beac1dff 721 return;
ab0e4957 722}
723
912d50b1 724sub bucket_exists {
beac1dff 725 ##
726 # Check existence of single key given tag and MD5 digested key.
727 ##
728 my $self = shift;
e96daec8 729 my ($tag, $md5) = @_;
912d50b1 730
9a187d8c 731 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 732 return $subloc && 1;
912d50b1 733}
734
6736c116 735sub find_bucket_list {
beac1dff 736 ##
737 # Locate offset for bucket list, given digested key
738 ##
739 my $self = shift;
e96daec8 740 my ($offset, $md5, $args) = @_;
d0b74c17 741 $args = {} unless $args;
742
beac1dff 743 ##
744 # Locate offset for bucket list using digest index system
745 ##
e96daec8 746 my $tag = $self->load_tag( $offset )
747 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 748
e5fc7e69 749 my $ch = 0;
8db25060 750 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 751 my $num = ord substr($md5, $ch, 1);
752
753 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 754 $tag = $self->index_lookup( $tag, $num );
d0b74c17 755
756 if (!$tag) {
29b01632 757 return if !$args->{create};
d0b74c17 758
16d1ad9b 759 my $loc = $self->_request_space(
e96daec8 760 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 761 );
762
e96daec8 763 my $fh = $self->_fh;
764 seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET);
16d1ad9b 765 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 766
9e4f83a0 767 $tag = $self->write_tag(
e96daec8 768 $loc, SIG_BLIST,
f37c15ab 769 chr(0)x$self->{bucket_list_size},
d5d7c51d 770 );
771
772 $tag->{ref_loc} = $ref_loc;
773 $tag->{ch} = $ch;
774
775 last;
d0b74c17 776 }
777
16d1ad9b 778 $tag->{ch} = $ch++;
d0b74c17 779 $tag->{ref_loc} = $ref_loc;
beac1dff 780 }
d0b74c17 781
beac1dff 782 return $tag;
6736c116 783}
784
d0b74c17 785sub index_lookup {
786 ##
787 # Given index tag, lookup single entry in index and return .
788 ##
789 my $self = shift;
e96daec8 790 my ($tag, $index) = @_;
d0b74c17 791
792 my $location = unpack(
793 $self->{long_pack},
794 substr(
795 $tag->{content},
796 $index * $self->{long_size},
797 $self->{long_size},
798 ),
799 );
800
801 if (!$location) { return; }
802
e96daec8 803 return $self->load_tag( $location );
d0b74c17 804}
805
6736c116 806sub traverse_index {
beac1dff 807 ##
808 # Scan index and recursively step into deeper levels, looking for next key.
809 ##
6736c116 810 my $self = shift;
811 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 812
e96daec8 813 my $tag = $self->load_tag( $offset );
6736c116 814
e96daec8 815 my $fh = $self->_fh;
d0b74c17 816
8db25060 817 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 818 my $content = $tag->{content};
e5fc7e69 819 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 820
d5d7c51d 821 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 822 my $subloc = unpack(
823 $self->{long_pack},
e06824f8 824 substr(
825 $content,
826 $idx * $self->{long_size},
827 $self->{long_size},
828 ),
e5fc7e69 829 );
830
beac1dff 831 if ($subloc) {
e5fc7e69 832 my $result = $self->traverse_index(
833 $obj, $subloc, $ch + 1, $force_return_next,
834 );
835
beac1dff 836 if (defined($result)) { return $result; }
837 }
838 } # index loop
d0b74c17 839
beac1dff 840 $obj->{return_next} = 1;
841 } # tag is an index
d0b74c17 842
e5fc7e69 843 else {
beac1dff 844 my $keys = $tag->{content};
845 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 846
beac1dff 847 ##
848 # Iterate through buckets, looking for a key match
849 ##
8db25060 850 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 851 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 852
8db25060 853 # End of bucket list -- return to outer loop
beac1dff 854 if (!$subloc) {
beac1dff 855 $obj->{return_next} = 1;
856 last;
857 }
8db25060 858 # Located previous key -- return next one found
beac1dff 859 elsif ($key eq $obj->{prev_md5}) {
beac1dff 860 $obj->{return_next} = 1;
861 next;
862 }
8db25060 863 # Seek to bucket location and skip over signature
beac1dff 864 elsif ($obj->{return_next}) {
e96daec8 865 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
d0b74c17 866
beac1dff 867 # Skip over value to get to plain key
8db25060 868 my $sig;
869 read( $fh, $sig, SIG_SIZE );
870
beac1dff 871 my $size;
e5fc7e69 872 read( $fh, $size, $self->{data_size});
873 $size = unpack($self->{data_pack}, $size);
beac1dff 874 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 875
beac1dff 876 # Read in plain key and return as scalar
beac1dff 877 my $plain_key;
e5fc7e69 878 read( $fh, $size, $self->{data_size});
879 $size = unpack($self->{data_pack}, $size);
beac1dff 880 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 881
beac1dff 882 return $plain_key;
883 }
8db25060 884 }
d0b74c17 885
beac1dff 886 $obj->{return_next} = 1;
887 } # tag is a bucket list
d0b74c17 888
beac1dff 889 return;
6736c116 890}
891
892sub get_next_key {
beac1dff 893 ##
894 # Locate next key, given digested previous one
895 ##
6736c116 896 my $self = shift;
897 my ($obj) = @_;
d0b74c17 898
beac1dff 899 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
900 $obj->{return_next} = 0;
d0b74c17 901
beac1dff 902 ##
903 # If the previous key was not specifed, start at the top and
904 # return the first one found.
905 ##
906 if (!$obj->{prev_md5}) {
907 $obj->{prev_md5} = chr(0) x $self->{hash_size};
908 $obj->{return_next} = 1;
909 }
d0b74c17 910
beac1dff 911 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 912}
913
75be6413 914# Utilities
915
9cec1360 916sub _get_key_subloc {
75be6413 917 my $self = shift;
918 my ($keys, $idx) = @_;
919
6ed2f3df 920 my ($key, $subloc, $size) = unpack(
921 "a$self->{hash_size} $self->{long_pack} $self->{long_pack}",
75be6413 922 substr(
923 $keys,
9cec1360 924 ($idx * $self->{bucket_size}),
925 $self->{bucket_size},
75be6413 926 ),
927 );
928
6ed2f3df 929 return ($key, $subloc, $size);
75be6413 930}
931
d608b06e 932sub _find_in_buckets {
933 my $self = shift;
934 my ($tag, $md5) = @_;
935
936 BUCKET:
937 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
9a187d8c 938 my ($key, $subloc, $size) = $self->_get_key_subloc(
939 $tag->{content}, $i,
940 );
d608b06e 941
9a187d8c 942 return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc;
d608b06e 943
944 next BUCKET if $key ne $md5;
945
9a187d8c 946 return ($subloc, $i * $self->{bucket_size}, $size);
d608b06e 947 }
948
949 return;
950}
951
994ccd8e 952sub _request_space {
953 my $self = shift;
e96daec8 954 my ($size) = @_;
994ccd8e 955
460b1067 956 my $loc = $self->_fileobj->{end};
957 $self->_fileobj->{end} += $size;
994ccd8e 958
959 return $loc;
960}
961
962sub _release_space {
963 my $self = shift;
e96daec8 964 my ($size, $loc) = @_;
994ccd8e 965
7b1e1aa1 966 my $next_loc = 0;
967
e96daec8 968 my $fh = $self->_fh;
969 seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET );
7b1e1aa1 970 print( $fh SIG_FREE
971 . pack($self->{long_pack}, $size )
972 . pack($self->{long_pack}, $next_loc )
973 );
974
994ccd8e 975 return;
976}
977
e96daec8 978sub _throw_error {
979 die "DBM::Deep: $_[1]\n";
980}
981
a20d9a3f 9821;
983__END__
d5d7c51d 984
985# This will be added in later, after more refactoring is done. This is an early
986# attempt at refactoring on the physical level instead of the virtual level.
987sub _read_at {
988 my $self = shift;
e96daec8 989 my ($spot, $amount, $unpack) = @_;
d5d7c51d 990
e96daec8 991 my $fh = $self->_fh;
992 seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET );
d5d7c51d 993
994 my $buffer;
995 my $bytes_read = read( $fh, $buffer, $amount );
996
997 if ( $unpack ) {
998 $buffer = unpack( $unpack, $buffer );
999 }
1000
1001 if ( wantarray ) {
1002 return ($buffer, $bytes_read);
1003 }
1004 else {
1005 return $buffer;
1006 }
1007}
e96daec8 1008
1009sub _print_at {
1010 my $self = shift;
1011 my ($spot, $data) = @_;
1012
1013 my $fh = $self->_fh;
1014 seek( $fh, $spot, SEEK_SET );
1015 print( $fh $data );
1016
1017 return;
1018}
1019
1020sub get_file_version {
1021 my $self = shift;
1022
1023 my $fh = $self->_fh;
1024
1025 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1026 my $buffer;
1027 my $bytes_read = read( $fh, $buffer, 4 );
1028 unless ( $bytes_read == 4 ) {
1029 $self->_throw_error( "Cannot read file version" );
1030 }
1031
1032 return unpack( 'N', $buffer );
1033}
1034
1035sub write_file_version {
1036 my $self = shift;
1037 my ($new_version) = @_;
1038
1039 my $fh = $self->_fh;
1040
1041 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1042 print( $fh pack( 'N', $new_version ) );
1043
1044 return;
1045}
1046