Tagged 0.983 and removed the branch
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
8use Fcntl qw( :DEFAULT :flock :seek );
9
21838116 10# File-wide notes:
11# * All the local($/,$\); are to protect read() and print() from -l.
12
8db25060 13##
14# Setup file and tag signatures. These should never change.
15##
16sub SIG_FILE () { 'DPDB' }
460b1067 17sub SIG_HEADER () { 'h' }
8db25060 18sub SIG_INTERNAL () { 'i' }
19sub SIG_HASH () { 'H' }
20sub SIG_ARRAY () { 'A' }
8db25060 21sub SIG_NULL () { 'N' }
22sub SIG_DATA () { 'D' }
23sub SIG_INDEX () { 'I' }
24sub SIG_BLIST () { 'B' }
7b1e1aa1 25sub SIG_FREE () { 'F' }
8db25060 26sub SIG_SIZE () { 1 }
27
612969fb 28sub new {
29 my $class = shift;
30 my ($args) = @_;
31
32 my $self = bless {
33 long_size => 4,
34 long_pack => 'N',
35 data_size => 4,
36 data_pack => 'N',
251dfd0e 37
612969fb 38 digest => \&Digest::MD5::md5,
39 hash_size => 16,
251dfd0e 40
81d16922 41 ##
d5d7c51d 42 # Maximum number of buckets per list before another level of indexing is
e0098e7f 43 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 44 # files. DO NOT decrease this value below 16, due to risk of recursive
45 # reindex overrun.
81d16922 46 ##
612969fb 47 max_buckets => 16,
460b1067 48
49 fileobj => undef,
612969fb 50 }, $class;
51
e0098e7f 52 if ( defined $args->{pack_size} ) {
53 if ( lc $args->{pack_size} eq 'small' ) {
54 $args->{long_size} = 2;
55 $args->{long_pack} = 'S';
56 }
57 elsif ( lc $args->{pack_size} eq 'medium' ) {
58 $args->{long_size} = 4;
59 $args->{long_pack} = 'N';
60 }
61 elsif ( lc $args->{pack_size} eq 'large' ) {
62 $args->{long_size} = 8;
63 $args->{long_pack} = 'Q';
64 }
65 else {
66 die "Unknown pack_size value: '$args->{pack_size}'\n";
67 }
68 }
69
fde3db1a 70 # Grab the parameters we want to use
71 foreach my $param ( keys %$self ) {
72 next unless exists $args->{$param};
3e9498a1 73 $self->{$param} = $args->{$param};
fde3db1a 74 }
75
e0098e7f 76 if ( $self->{max_buckets} < 16 ) {
77 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
78 $self->{max_buckets} = 16;
79 }
80
260a80b4 81 return $self;
82}
83
460b1067 84sub _fileobj { return $_[0]{fileobj} }
85sub _fh { return $_[0]->_fileobj->{fh} }
86
260a80b4 87sub calculate_sizes {
88 my $self = shift;
89
28394a1a 90 #XXX Does this need to be updated with different hashing algorithms?
e0098e7f 91 $self->{index_size} = (2**8) * $self->{long_size};
28394a1a 92 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
e0098e7f 93 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 94
260a80b4 95 return;
1bf65be7 96}
97
fde3db1a 98sub write_file_header {
0d0f3d5d 99 my $self = shift;
0d0f3d5d 100
21838116 101 local($/,$\);
102
460b1067 103 my $fh = $self->_fh;
0d0f3d5d 104
e96daec8 105 my $loc = $self->_request_space( length( SIG_FILE ) + 21 );
460b1067 106 seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET);
260a80b4 107 print( $fh
108 SIG_FILE,
460b1067 109 SIG_HEADER,
110 pack('N', 1), # header version
111 pack('N', 12), # header size
112 pack('N', 0), # file version
260a80b4 113 pack('S', $self->{long_size}),
114 pack('A', $self->{long_pack}),
115 pack('S', $self->{data_size}),
116 pack('A', $self->{data_pack}),
117 pack('S', $self->{max_buckets}),
118 );
0d0f3d5d 119
120 return;
121}
122
fde3db1a 123sub read_file_header {
e064ccd1 124 my $self = shift;
e064ccd1 125
21838116 126 local($/,$\);
127
e96daec8 128 my $fh = $self->_fh;
e064ccd1 129
e96daec8 130 seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET);
42f79e07 131 my $buffer;
460b1067 132 my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
133
134 return unless $bytes_read;
135
136 my ($file_signature, $sig_header, $header_version, $size) = unpack(
137 'A4 A N N', $buffer
42f79e07 138 );
e064ccd1 139
460b1067 140 unless ( $file_signature eq SIG_FILE ) {
141 $self->{fileobj}->close;
e96daec8 142 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 143 }
260a80b4 144
460b1067 145 unless ( $sig_header eq SIG_HEADER ) {
146 $self->{fileobj}->close;
e96daec8 147 $self->_throw_error( "Old file version found." );
460b1067 148 }
9b2370e0 149
460b1067 150 my $buffer2;
151 $bytes_read += read( $fh, $buffer2, $size );
152 my ($file_version, @values) = unpack( 'N S A S A S', $buffer2 );
153 if ( @values < 5 || grep { !defined } @values ) {
154 $self->{fileobj}->close;
e96daec8 155 $self->_throw_error("Corrupted file - bad header");
e064ccd1 156 }
157
460b1067 158 #XXX Add warnings if values weren't set right
159 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
160
e064ccd1 161 return $bytes_read;
162}
163
460b1067 164sub setup_fh {
165 my $self = shift;
166 my ($obj) = @_;
70b55428 167
e96daec8 168 my $fh = $self->_fh;
6fde4ed2 169 flock $fh, LOCK_EX;
118ba343 170
260a80b4 171 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 172 unless ( $obj->{base_offset} ) {
e96daec8 173 my $bytes_read = $self->read_file_header;
118ba343 174
260a80b4 175 $self->calculate_sizes;
176
118ba343 177 ##
fde3db1a 178 # File is empty -- write header and master index
118ba343 179 ##
180 if (!$bytes_read) {
e96daec8 181 $self->write_file_header;
118ba343 182
e96daec8 183 $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) );
118ba343 184
9e4f83a0 185 $self->write_tag(
e96daec8 186 $obj->_base_offset, $obj->_type,
f37c15ab 187 chr(0)x$self->{index_size},
118ba343 188 );
189
190 # Flush the filehandle
191 my $old_fh = select $fh;
192 my $old_af = $|; $| = 1; $| = $old_af;
193 select $old_fh;
194 }
195 else {
196 $obj->{base_offset} = $bytes_read;
197
198 ##
fde3db1a 199 # Get our type from master index header
118ba343 200 ##
e96daec8 201 my $tag = $self->load_tag($obj->_base_offset)
202 or $self->_throw_error("Corrupted file, no master index record");
118ba343 203
e96daec8 204 unless ($obj->_type eq $tag->{signature}) {
205 $self->_throw_error("File type mismatch");
118ba343 206 }
207 }
118ba343 208 }
260a80b4 209 else {
210 $self->calculate_sizes;
211 }
e06824f8 212
673464d9 213 #XXX We have to make sure we don't mess up when autoflush isn't turned on
e96daec8 214 unless ( $self->_fileobj->{inode} ) {
215 my @stats = stat($fh);
216 $self->_fileobj->{inode} = $stats[1];
217 $self->_fileobj->{end} = $stats[7];
70b55428 218 }
219
6fde4ed2 220 flock $fh, LOCK_UN;
221
70b55428 222 return 1;
223}
224
16d1ad9b 225sub tag_size {
226 my $self = shift;
227 my ($size) = @_;
228 return SIG_SIZE + $self->{data_size} + $size;
229}
230
9e4f83a0 231sub write_tag {
20f7b20c 232 ##
233 # Given offset, signature and content, create tag and write to disk
234 ##
d4b1166e 235 my $self = shift;
e96daec8 236 my ($offset, $sig, $content) = @_;
f37c15ab 237 my $size = length( $content );
20f7b20c 238
21838116 239 local($/,$\);
240
e96daec8 241 my $fh = $self->_fh;
d4b1166e 242
f37c15ab 243 if ( defined $offset ) {
e96daec8 244 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
f37c15ab 245 }
246
251dfd0e 247 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 248
f37c15ab 249 return unless defined $offset;
250
20f7b20c 251 return {
252 signature => $sig,
253 size => $size,
8db25060 254 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 255 content => $content
256 };
d4b1166e 257}
258
259sub load_tag {
20f7b20c 260 ##
261 # Given offset, load single tag and return signature, size and data
262 ##
d4b1166e 263 my $self = shift;
e96daec8 264 my ($offset) = @_;
20f7b20c 265
21838116 266 local($/,$\);
267
e06824f8 268# print join(':',map{$_||''}caller(1)), $/;
269
e96daec8 270 my $fh = $self->_fh;
d4b1166e 271
e96daec8 272 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
e5fc7e69 273
75be6413 274 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 275 return if eof $fh;
20f7b20c 276
d4b1166e 277 my $b;
8db25060 278 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 279 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 280
281 my $buffer;
282 read( $fh, $buffer, $size);
283
284 return {
285 signature => $sig,
286 size => $size,
8db25060 287 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 288 content => $buffer
289 };
d4b1166e 290}
291
56ec4340 292sub _get_dbm_object {
293 my $item = shift;
294
295 my $obj = eval {
296 local $SIG{__DIE__};
297 if ($item->isa( 'DBM::Deep' )) {
298 return $item;
299 }
300 return;
301 };
302 return $obj if $obj;
303
304 my $r = Scalar::Util::reftype( $item ) || '';
305 if ( $r eq 'HASH' ) {
306 my $obj = eval {
307 local $SIG{__DIE__};
308 my $obj = tied(%$item);
309 if ($obj->isa( 'DBM::Deep' )) {
310 return $obj;
311 }
312 return;
313 };
314 return $obj if $obj;
315 }
316 elsif ( $r eq 'ARRAY' ) {
317 my $obj = eval {
318 local $SIG{__DIE__};
319 my $obj = tied(@$item);
320 if ($obj->isa( 'DBM::Deep' )) {
321 return $obj;
322 }
323 return;
324 };
325 return $obj if $obj;
326 }
327
328 return;
329}
330
29b01632 331sub _length_needed {
332 my $self = shift;
e96daec8 333 my ($value, $key) = @_;
29b01632 334
335 my $is_dbm_deep = eval {
336 local $SIG{'__DIE__'};
337 $value->isa( 'DBM::Deep' );
338 };
339
f37c15ab 340 my $len = SIG_SIZE + $self->{data_size}
341 + $self->{data_size} + length( $key );
29b01632 342
e96daec8 343 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
f37c15ab 344 return $len + $self->{long_size};
29b01632 345 }
346
347 my $r = Scalar::Util::reftype( $value ) || '';
e96daec8 348 if ( $self->_fileobj->{autobless} ) {
9a187d8c 349 # This is for the bit saying whether or not this thing is blessed.
350 $len += 1;
351 }
352
29b01632 353 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 354 if ( defined $value ) {
355 $len += length( $value );
356 }
357 return $len;
29b01632 358 }
359
f37c15ab 360 $len += $self->{index_size};
29b01632 361
362 # if autobless is enabled, must also take into consideration
f37c15ab 363 # the class name as it is stored after the key.
e96daec8 364 if ( $self->_fileobj->{autobless} ) {
56ec4340 365 my $c = Scalar::Util::blessed($value);
366 if ( defined $c && !$is_dbm_deep ) {
367 $len += $self->{data_size} + length($c);
29b01632 368 }
369 }
370
f37c15ab 371 return $len;
29b01632 372}
373
20f7b20c 374sub add_bucket {
375 ##
376 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
377 # plain (undigested) key and value.
378 ##
d4b1166e 379 my $self = shift;
e96daec8 380 my ($tag, $md5, $plain_key, $value) = @_;
75be6413 381
21838116 382 local($/,$\);
383
eea0d863 384 # This verifies that only supported values will be stored.
385 {
386 my $r = Scalar::Util::reftype( $value );
387 last if !defined $r;
388
389 last if $r eq 'HASH';
390 last if $r eq 'ARRAY';
391
e96daec8 392 $self->_throw_error(
eea0d863 393 "Storage of variables of type '$r' is not supported."
394 );
395 }
396
20f7b20c 397 my $location = 0;
398 my $result = 2;
399
e96daec8 400 my $root = $self->_fileobj;
401 my $fh = $self->_fh;
20f7b20c 402
e96daec8 403 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 404
21838116 405 #ACID - This is a mutation. Must only find the exact transaction
406 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
75be6413 407
e96daec8 408# $self->_release_space( $size, $subloc );
386bab6c 409 # Updating a known md5
f9c33187 410#XXX This needs updating to use _release_space
386bab6c 411 if ( $subloc ) {
412 $result = 1;
20f7b20c 413
386bab6c 414 if ($actual_length <= $size) {
415 $location = $subloc;
20f7b20c 416 }
75be6413 417 else {
e96daec8 418 $location = $self->_request_space( $actual_length );
386bab6c 419 seek(
420 $fh,
9a187d8c 421 $tag->{offset} + $offset
422 + $self->{hash_size} + $root->{file_offset},
386bab6c 423 SEEK_SET,
424 );
9a187d8c 425 print( $fh pack($self->{long_pack}, $location ) );
426 print( $fh pack($self->{long_pack}, $actual_length ) );
28394a1a 427 print( $fh pack($self->{long_pack}, $root->transaction_id ) );
75be6413 428 }
75be6413 429 }
386bab6c 430 # Adding a new md5
431 elsif ( defined $offset ) {
e96daec8 432 $location = $self->_request_space( $actual_length );
386bab6c 433
434 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 435 print( $fh $md5 . pack($self->{long_pack}, $location ) );
436 print( $fh pack($self->{long_pack}, $actual_length ) );
28394a1a 437 print( $fh pack($self->{long_pack}, $root->transaction_id ) );
386bab6c 438 }
439 # If bucket didn't fit into list, split into a new index level
f9c33187 440 # split_index() will do the _request_space() call
386bab6c 441 else {
e96daec8 442 $location = $self->split_index( $md5, $tag );
386bab6c 443 }
20f7b20c 444
e96daec8 445 $self->write_value( $location, $plain_key, $value );
d5d7c51d 446
447 return $result;
448}
449
450sub write_value {
451 my $self = shift;
e96daec8 452 my ($location, $key, $value) = @_;
d5d7c51d 453
21838116 454 local($/,$\);
455
e96daec8 456 my $fh = $self->_fh;
457 my $root = $self->_fileobj;
d5d7c51d 458
9d4fa373 459 my $dbm_deep_obj = _get_dbm_object( $value );
e96daec8 460 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) {
461 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 462 }
d5d7c51d 463
464 seek($fh, $location + $root->{file_offset}, SEEK_SET);
465
20f7b20c 466 ##
d5d7c51d 467 # Write signature based on content type, set content length and write
468 # actual value.
20f7b20c 469 ##
9d4fa373 470 my $r = Scalar::Util::reftype( $value ) || '';
471 if ( $dbm_deep_obj ) {
e96daec8 472 $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 473 }
474 elsif ($r eq 'HASH') {
9d4fa373 475 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 476 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 477 }
e96daec8 478 $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 479 }
480 elsif ($r eq 'ARRAY') {
9d4fa373 481 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 482 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 483 }
e96daec8 484 $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 485 }
486 elsif (!defined($value)) {
e96daec8 487 $self->write_tag( undef, SIG_NULL, '' );
d5d7c51d 488 }
489 else {
e96daec8 490 $self->write_tag( undef, SIG_DATA, $value );
d5d7c51d 491 }
20f7b20c 492
d5d7c51d 493 ##
494 # Plain key is stored AFTER value, as keys are typically fetched less often.
495 ##
496 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 497
9a187d8c 498 # Internal references don't care about autobless
9d4fa373 499 return 1 if $dbm_deep_obj;
9a187d8c 500
d5d7c51d 501 ##
502 # If value is blessed, preserve class name
503 ##
504 if ( $root->{autobless} ) {
56ec4340 505 my $c = Scalar::Util::blessed($value);
506 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 507 print( $fh chr(1) );
56ec4340 508 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 509 }
d5d7c51d 510 else {
511 print( $fh chr(0) );
20f7b20c 512 }
d5d7c51d 513 }
20f7b20c 514
d5d7c51d 515 ##
56ec4340 516 # Tie the passed in reference so that changes to it are reflected in the
517 # datafile. The use of $location as the base_offset will act as the
518 # the linkage between parent and child.
519 #
520 # The overall assignment is a hack around the fact that just tying doesn't
521 # store the values. This may not be the wrong thing to do.
d5d7c51d 522 ##
9d4fa373 523 if ($r eq 'HASH') {
524 my %x = %$value;
525 tie %$value, 'DBM::Deep', {
526 base_offset => $location,
460b1067 527 fileobj => $root,
9d4fa373 528 };
529 %$value = %x;
530 }
531 elsif ($r eq 'ARRAY') {
532 my @x = @$value;
533 tie @$value, 'DBM::Deep', {
534 base_offset => $location,
460b1067 535 fileobj => $root,
9d4fa373 536 };
537 @$value = @x;
20f7b20c 538 }
d4b1166e 539
d5d7c51d 540 return 1;
d4b1166e 541}
542
75be6413 543sub split_index {
544 my $self = shift;
e96daec8 545 my ($md5, $tag) = @_;
75be6413 546
21838116 547 local($/,$\);
548
e96daec8 549 my $fh = $self->_fh;
550 my $root = $self->_fileobj;
16d1ad9b 551
552 my $loc = $self->_request_space(
e96daec8 553 $self->tag_size( $self->{index_size} ),
16d1ad9b 554 );
555
7b1e1aa1 556 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 557 print( $fh pack($self->{long_pack}, $loc) );
75be6413 558
9e4f83a0 559 my $index_tag = $self->write_tag(
e96daec8 560 $loc, SIG_INDEX,
f37c15ab 561 chr(0)x$self->{index_size},
75be6413 562 );
563
f9c33187 564 my $newtag_loc = $self->_request_space(
e96daec8 565 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 566 );
75be6413 567
7b1e1aa1 568 my $keys = $tag->{content}
f9c33187 569 . $md5 . pack($self->{long_pack}, $newtag_loc)
28394a1a 570 . pack($self->{long_pack}, 0) # size
571 . pack($self->{long_pack}, 0); # transaction #
75be6413 572
f9c33187 573 my @newloc = ();
75be6413 574 BUCKET:
575 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 576 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 577
f9c33187 578 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
579 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 580
75be6413 581 my $num = ord(substr($key, $tag->{ch} + 1, 1));
582
f9c33187 583 if ($newloc[$num]) {
584 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 585 my $subkeys;
586 read( $fh, $subkeys, $self->{bucket_list_size});
587
f9c33187 588 # This is looking for the first empty spot
7b1e1aa1 589 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 590 { content => $subkeys }, '',
7b1e1aa1 591 );
75be6413 592
f9c33187 593 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
594 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 595
596 next;
75be6413 597 }
75be6413 598
7b1e1aa1 599 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 600
7b1e1aa1 601 my $loc = $self->_request_space(
e96daec8 602 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 603 );
2603d86e 604
7b1e1aa1 605 print( $fh pack($self->{long_pack}, $loc) );
75be6413 606
7b1e1aa1 607 my $blist_tag = $self->write_tag(
e96daec8 608 $loc, SIG_BLIST,
7b1e1aa1 609 chr(0)x$self->{bucket_list_size},
610 );
611
612 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 613 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 614
f9c33187 615 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 616 }
617
618 $self->_release_space(
e96daec8 619 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 620 $tag->{offset} - SIG_SIZE - $self->{data_size},
621 );
75be6413 622
f9c33187 623 return $newtag_loc;
75be6413 624}
625
8db25060 626sub read_from_loc {
627 my $self = shift;
e96daec8 628 my ($subloc) = @_;
8db25060 629
21838116 630 local($/,$\);
631
e96daec8 632 my $fh = $self->_fh;
8db25060 633
634 ##
635 # Found match -- seek to offset and read signature
636 ##
637 my $signature;
e96daec8 638 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
8db25060 639 read( $fh, $signature, SIG_SIZE);
640
641 ##
642 # If value is a hash or array, return new DBM::Deep object with correct offset
643 ##
644 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 645 my $new_obj = DBM::Deep->new({
8db25060 646 type => $signature,
647 base_offset => $subloc,
e96daec8 648 fileobj => $self->_fileobj,
685e40f1 649 });
8db25060 650
460b1067 651 if ($new_obj->_fileobj->{autobless}) {
8db25060 652 ##
653 # Skip over value and plain key to see if object needs
654 # to be re-blessed
655 ##
656 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
657
658 my $size;
c6ea6b6c 659 read( $fh, $size, $self->{data_size});
660 $size = unpack($self->{data_pack}, $size);
8db25060 661 if ($size) { seek($fh, $size, SEEK_CUR); }
662
663 my $bless_bit;
664 read( $fh, $bless_bit, 1);
665 if (ord($bless_bit)) {
666 ##
667 # Yes, object needs to be re-blessed
668 ##
669 my $class_name;
c6ea6b6c 670 read( $fh, $size, $self->{data_size});
671 $size = unpack($self->{data_pack}, $size);
8db25060 672 if ($size) { read( $fh, $class_name, $size); }
685e40f1 673 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 674 }
675 }
676
685e40f1 677 return $new_obj;
8db25060 678 }
679 elsif ( $signature eq SIG_INTERNAL ) {
680 my $size;
681 read( $fh, $size, $self->{data_size});
682 $size = unpack($self->{data_pack}, $size);
683
684 if ( $size ) {
685 my $new_loc;
686 read( $fh, $new_loc, $size );
687 $new_loc = unpack( $self->{long_pack}, $new_loc );
688
e96daec8 689 return $self->read_from_loc( $new_loc );
8db25060 690 }
691 else {
692 return;
693 }
694 }
695 ##
696 # Otherwise return actual value
697 ##
460b1067 698 elsif ( $signature eq SIG_DATA ) {
8db25060 699 my $size;
700 read( $fh, $size, $self->{data_size});
701 $size = unpack($self->{data_pack}, $size);
702
703 my $value = '';
704 if ($size) { read( $fh, $value, $size); }
705 return $value;
706 }
707
708 ##
709 # Key exists, but content is null
710 ##
711 return;
712}
713
9020ee8c 714sub get_bucket_value {
beac1dff 715 ##
716 # Fetch single value given tag and MD5 digested key.
717 ##
718 my $self = shift;
e96daec8 719 my ($tag, $md5) = @_;
9020ee8c 720
21838116 721 #ACID - This is a read. Can find exact or HEAD
9a187d8c 722 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 723 if ( $subloc ) {
e96daec8 724 return $self->read_from_loc( $subloc );
386bab6c 725 }
beac1dff 726 return;
9020ee8c 727}
ab0e4957 728
729sub delete_bucket {
beac1dff 730 ##
731 # Delete single key/value pair given tag and MD5 digested key.
732 ##
733 my $self = shift;
e96daec8 734 my ($tag, $md5) = @_;
ab0e4957 735
21838116 736 local($/,$\);
737
738 #ACID - This is a mutation. Must only find the exact transaction
739 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
f9c33187 740#XXX This needs _release_space()
386bab6c 741 if ( $subloc ) {
e96daec8 742 my $fh = $self->_fh;
743 seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET);
386bab6c 744 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 745 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 746
ab0e4957 747 return 1;
386bab6c 748 }
beac1dff 749 return;
ab0e4957 750}
751
912d50b1 752sub bucket_exists {
beac1dff 753 ##
754 # Check existence of single key given tag and MD5 digested key.
755 ##
756 my $self = shift;
e96daec8 757 my ($tag, $md5) = @_;
912d50b1 758
21838116 759 #ACID - This is a read. Can find exact or HEAD
9a187d8c 760 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 761 return $subloc && 1;
912d50b1 762}
763
6736c116 764sub find_bucket_list {
beac1dff 765 ##
766 # Locate offset for bucket list, given digested key
767 ##
768 my $self = shift;
e96daec8 769 my ($offset, $md5, $args) = @_;
d0b74c17 770 $args = {} unless $args;
771
21838116 772 local($/,$\);
773
beac1dff 774 ##
775 # Locate offset for bucket list using digest index system
776 ##
e96daec8 777 my $tag = $self->load_tag( $offset )
778 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 779
e5fc7e69 780 my $ch = 0;
8db25060 781 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 782 my $num = ord substr($md5, $ch, 1);
783
784 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 785 $tag = $self->index_lookup( $tag, $num );
d0b74c17 786
787 if (!$tag) {
29b01632 788 return if !$args->{create};
d0b74c17 789
16d1ad9b 790 my $loc = $self->_request_space(
e96daec8 791 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 792 );
793
e96daec8 794 my $fh = $self->_fh;
795 seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET);
16d1ad9b 796 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 797
9e4f83a0 798 $tag = $self->write_tag(
e96daec8 799 $loc, SIG_BLIST,
f37c15ab 800 chr(0)x$self->{bucket_list_size},
d5d7c51d 801 );
802
803 $tag->{ref_loc} = $ref_loc;
804 $tag->{ch} = $ch;
805
806 last;
d0b74c17 807 }
808
16d1ad9b 809 $tag->{ch} = $ch++;
d0b74c17 810 $tag->{ref_loc} = $ref_loc;
beac1dff 811 }
d0b74c17 812
beac1dff 813 return $tag;
6736c116 814}
815
d0b74c17 816sub index_lookup {
817 ##
818 # Given index tag, lookup single entry in index and return .
819 ##
820 my $self = shift;
e96daec8 821 my ($tag, $index) = @_;
d0b74c17 822
823 my $location = unpack(
824 $self->{long_pack},
825 substr(
826 $tag->{content},
827 $index * $self->{long_size},
828 $self->{long_size},
829 ),
830 );
831
832 if (!$location) { return; }
833
e96daec8 834 return $self->load_tag( $location );
d0b74c17 835}
836
6736c116 837sub traverse_index {
beac1dff 838 ##
839 # Scan index and recursively step into deeper levels, looking for next key.
840 ##
6736c116 841 my $self = shift;
842 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 843
21838116 844 local($/,$\);
845
e96daec8 846 my $tag = $self->load_tag( $offset );
6736c116 847
e96daec8 848 my $fh = $self->_fh;
d0b74c17 849
8db25060 850 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 851 my $content = $tag->{content};
e5fc7e69 852 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 853
d5d7c51d 854 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 855 my $subloc = unpack(
856 $self->{long_pack},
e06824f8 857 substr(
858 $content,
859 $idx * $self->{long_size},
860 $self->{long_size},
861 ),
e5fc7e69 862 );
863
beac1dff 864 if ($subloc) {
e5fc7e69 865 my $result = $self->traverse_index(
866 $obj, $subloc, $ch + 1, $force_return_next,
867 );
868
beac1dff 869 if (defined($result)) { return $result; }
870 }
871 } # index loop
d0b74c17 872
beac1dff 873 $obj->{return_next} = 1;
874 } # tag is an index
d0b74c17 875
e5fc7e69 876 else {
beac1dff 877 my $keys = $tag->{content};
878 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 879
beac1dff 880 ##
881 # Iterate through buckets, looking for a key match
882 ##
8db25060 883 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 884 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 885
8db25060 886 # End of bucket list -- return to outer loop
beac1dff 887 if (!$subloc) {
beac1dff 888 $obj->{return_next} = 1;
889 last;
890 }
8db25060 891 # Located previous key -- return next one found
beac1dff 892 elsif ($key eq $obj->{prev_md5}) {
beac1dff 893 $obj->{return_next} = 1;
894 next;
895 }
8db25060 896 # Seek to bucket location and skip over signature
beac1dff 897 elsif ($obj->{return_next}) {
e96daec8 898 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
d0b74c17 899
beac1dff 900 # Skip over value to get to plain key
8db25060 901 my $sig;
902 read( $fh, $sig, SIG_SIZE );
903
beac1dff 904 my $size;
e5fc7e69 905 read( $fh, $size, $self->{data_size});
906 $size = unpack($self->{data_pack}, $size);
beac1dff 907 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 908
beac1dff 909 # Read in plain key and return as scalar
beac1dff 910 my $plain_key;
e5fc7e69 911 read( $fh, $size, $self->{data_size});
912 $size = unpack($self->{data_pack}, $size);
beac1dff 913 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 914
beac1dff 915 return $plain_key;
916 }
8db25060 917 }
d0b74c17 918
beac1dff 919 $obj->{return_next} = 1;
920 } # tag is a bucket list
d0b74c17 921
beac1dff 922 return;
6736c116 923}
924
925sub get_next_key {
beac1dff 926 ##
927 # Locate next key, given digested previous one
928 ##
6736c116 929 my $self = shift;
930 my ($obj) = @_;
d0b74c17 931
beac1dff 932 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
933 $obj->{return_next} = 0;
d0b74c17 934
beac1dff 935 ##
936 # If the previous key was not specifed, start at the top and
937 # return the first one found.
938 ##
939 if (!$obj->{prev_md5}) {
940 $obj->{prev_md5} = chr(0) x $self->{hash_size};
941 $obj->{return_next} = 1;
942 }
d0b74c17 943
beac1dff 944 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 945}
946
75be6413 947# Utilities
948
9cec1360 949sub _get_key_subloc {
75be6413 950 my $self = shift;
951 my ($keys, $idx) = @_;
952
28394a1a 953 my ($key, $subloc, $size, $transaction) = unpack(
954 # This is 'a', not 'A'. Please read the pack() documentation for the
955 # difference between the two and why it's important.
956 "a$self->{hash_size} $self->{long_pack} $self->{long_pack} $self->{long_pack}",
75be6413 957 substr(
958 $keys,
9cec1360 959 ($idx * $self->{bucket_size}),
960 $self->{bucket_size},
75be6413 961 ),
962 );
963
28394a1a 964 return ($key, $subloc, $size, $transaction);
75be6413 965}
966
d608b06e 967sub _find_in_buckets {
968 my $self = shift;
21838116 969 my ($tag, $md5, $exact) = @_;
d608b06e 970
28394a1a 971 my $trans_id = $self->_fileobj->transaction_id;
972
21838116 973 my @zero;
974
d608b06e 975 BUCKET:
976 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
28394a1a 977 my ($key, $subloc, $size, $transaction_id) = $self->_get_key_subloc(
9a187d8c 978 $tag->{content}, $i,
979 );
d608b06e 980
21838116 981 my @rv = ($subloc, $i * $self->{bucket_size}, $size);
982
983 unless ( $subloc ) {
984 return @zero if !$exact && @zero and $trans_id;
985 return @rv;
986 }
987
988 next BUCKET if $key ne $md5;
d608b06e 989
21838116 990 # Save off the HEAD in case we need it.
991 @zero = @rv if $transaction_id == 0;
d608b06e 992
21838116 993 next BUCKET if $transaction_id != $trans_id;
994
995 return @rv;
d608b06e 996 }
997
998 return;
999}
1000
994ccd8e 1001sub _request_space {
1002 my $self = shift;
e96daec8 1003 my ($size) = @_;
994ccd8e 1004
460b1067 1005 my $loc = $self->_fileobj->{end};
1006 $self->_fileobj->{end} += $size;
994ccd8e 1007
1008 return $loc;
1009}
1010
1011sub _release_space {
1012 my $self = shift;
e96daec8 1013 my ($size, $loc) = @_;
994ccd8e 1014
21838116 1015 local($/,$\);
1016
7b1e1aa1 1017 my $next_loc = 0;
1018
e96daec8 1019 my $fh = $self->_fh;
1020 seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET );
7b1e1aa1 1021 print( $fh SIG_FREE
1022 . pack($self->{long_pack}, $size )
1023 . pack($self->{long_pack}, $next_loc )
1024 );
1025
994ccd8e 1026 return;
1027}
1028
e96daec8 1029sub _throw_error {
1030 die "DBM::Deep: $_[1]\n";
1031}
1032
a20d9a3f 10331;
1034__END__
d5d7c51d 1035
1036# This will be added in later, after more refactoring is done. This is an early
1037# attempt at refactoring on the physical level instead of the virtual level.
1038sub _read_at {
1039 my $self = shift;
e96daec8 1040 my ($spot, $amount, $unpack) = @_;
d5d7c51d 1041
21838116 1042 local($/,$\);
1043
e96daec8 1044 my $fh = $self->_fh;
1045 seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET );
d5d7c51d 1046
1047 my $buffer;
1048 my $bytes_read = read( $fh, $buffer, $amount );
1049
1050 if ( $unpack ) {
1051 $buffer = unpack( $unpack, $buffer );
1052 }
1053
1054 if ( wantarray ) {
1055 return ($buffer, $bytes_read);
1056 }
1057 else {
1058 return $buffer;
1059 }
1060}
e96daec8 1061
1062sub _print_at {
1063 my $self = shift;
1064 my ($spot, $data) = @_;
1065
21838116 1066 local($/,$\);
1067
e96daec8 1068 my $fh = $self->_fh;
1069 seek( $fh, $spot, SEEK_SET );
1070 print( $fh $data );
1071
1072 return;
1073}
1074
1075sub get_file_version {
1076 my $self = shift;
1077
21838116 1078 local($/,$\);
1079
e96daec8 1080 my $fh = $self->_fh;
1081
1082 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1083 my $buffer;
1084 my $bytes_read = read( $fh, $buffer, 4 );
1085 unless ( $bytes_read == 4 ) {
1086 $self->_throw_error( "Cannot read file version" );
1087 }
1088
1089 return unpack( 'N', $buffer );
1090}
1091
1092sub write_file_version {
1093 my $self = shift;
1094 my ($new_version) = @_;
1095
21838116 1096 local($/,$\);
1097
e96daec8 1098 my $fh = $self->_fh;
1099
1100 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1101 print( $fh pack( 'N', $new_version ) );
1102
1103 return;
1104}
1105