Commit right before adding the keylist
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
86867f3a 8our $VERSION = q(0.99_01);
9
633df1fd 10use Fcntl qw( :DEFAULT :flock );
359a01ac 11use Scalar::Util ();
a20d9a3f 12
21838116 13# File-wide notes:
20b7f047 14# * To add to bucket_size, make sure you modify the following:
15# - calculate_sizes()
16# - _get_key_subloc()
17# - add_bucket() - where the buckets are printed
21838116 18
8db25060 19##
20# Setup file and tag signatures. These should never change.
21##
22sub SIG_FILE () { 'DPDB' }
460b1067 23sub SIG_HEADER () { 'h' }
8db25060 24sub SIG_INTERNAL () { 'i' }
25sub SIG_HASH () { 'H' }
26sub SIG_ARRAY () { 'A' }
8db25060 27sub SIG_NULL () { 'N' }
28sub SIG_DATA () { 'D' }
29sub SIG_INDEX () { 'I' }
30sub SIG_BLIST () { 'B' }
7b1e1aa1 31sub SIG_FREE () { 'F' }
86867f3a 32sub SIG_KEYS () { 'K' }
8db25060 33sub SIG_SIZE () { 1 }
34
612969fb 35sub new {
36 my $class = shift;
37 my ($args) = @_;
38
39 my $self = bless {
86867f3a 40 long_size => 4,
41 long_pack => 'N',
42 data_size => 4,
43 data_pack => 'N',
251dfd0e 44
86867f3a 45 digest => \&Digest::MD5::md5,
46 hash_size => 16,
251dfd0e 47
81d16922 48 ##
86867f3a 49 # Maximum number of buckets per blist before another level of indexing is
e0098e7f 50 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 51 # files. DO NOT decrease this value below 16, due to risk of recursive
52 # reindex overrun.
81d16922 53 ##
612969fb 54 max_buckets => 16,
460b1067 55
56 fileobj => undef,
359a01ac 57 obj => undef,
612969fb 58 }, $class;
59
e0098e7f 60 if ( defined $args->{pack_size} ) {
61 if ( lc $args->{pack_size} eq 'small' ) {
62 $args->{long_size} = 2;
c9b6d0d8 63 $args->{long_pack} = 'n';
e0098e7f 64 }
65 elsif ( lc $args->{pack_size} eq 'medium' ) {
66 $args->{long_size} = 4;
67 $args->{long_pack} = 'N';
68 }
69 elsif ( lc $args->{pack_size} eq 'large' ) {
70 $args->{long_size} = 8;
71 $args->{long_pack} = 'Q';
72 }
73 else {
74 die "Unknown pack_size value: '$args->{pack_size}'\n";
75 }
76 }
77
fde3db1a 78 # Grab the parameters we want to use
79 foreach my $param ( keys %$self ) {
80 next unless exists $args->{$param};
3e9498a1 81 $self->{$param} = $args->{$param};
fde3db1a 82 }
359a01ac 83 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 84
e0098e7f 85 if ( $self->{max_buckets} < 16 ) {
86 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
87 $self->{max_buckets} = 16;
88 }
89
260a80b4 90 return $self;
91}
92
460b1067 93sub _fileobj { return $_[0]{fileobj} }
460b1067 94
260a80b4 95sub calculate_sizes {
96 my $self = shift;
97
633df1fd 98 # The 2**8 here indicates the number of different characters in the
99 # current hashing algorithm
28394a1a 100 #XXX Does this need to be updated with different hashing algorithms?
e0098e7f 101 $self->{index_size} = (2**8) * $self->{long_size};
28394a1a 102 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
e0098e7f 103 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 104
260a80b4 105 return;
1bf65be7 106}
107
fde3db1a 108sub write_file_header {
0d0f3d5d 109 my $self = shift;
0d0f3d5d 110
019404df 111 my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 21 );
0d0f3d5d 112
019404df 113 $self->_fileobj->print_at( $loc,
260a80b4 114 SIG_FILE,
460b1067 115 SIG_HEADER,
116 pack('N', 1), # header version
117 pack('N', 12), # header size
15ba72cc 118 pack('N', 0), # currently running transaction IDs
c9b6d0d8 119 pack('n', $self->{long_size}),
260a80b4 120 pack('A', $self->{long_pack}),
c9b6d0d8 121 pack('n', $self->{data_size}),
260a80b4 122 pack('A', $self->{data_pack}),
c9b6d0d8 123 pack('n', $self->{max_buckets}),
260a80b4 124 );
0d0f3d5d 125
20b7f047 126 $self->_fileobj->set_transaction_offset( 13 );
127
0d0f3d5d 128 return;
129}
130
fde3db1a 131sub read_file_header {
e064ccd1 132 my $self = shift;
e064ccd1 133
7dcefff3 134 my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 );
135 return unless length($buffer);
460b1067 136
137 my ($file_signature, $sig_header, $header_version, $size) = unpack(
138 'A4 A N N', $buffer
42f79e07 139 );
e064ccd1 140
460b1067 141 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 142 $self->_fileobj->close;
e96daec8 143 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 144 }
260a80b4 145
460b1067 146 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 147 $self->_fileobj->close;
e96daec8 148 $self->_throw_error( "Old file version found." );
460b1067 149 }
9b2370e0 150
7dcefff3 151 my $buffer2 = $self->_fileobj->read_at( undef, $size );
c9b6d0d8 152 my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 );
15ba72cc 153
154 $self->_fileobj->set_transaction_offset( 13 );
155
460b1067 156 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 157 $self->_fileobj->close;
e96daec8 158 $self->_throw_error("Corrupted file - bad header");
e064ccd1 159 }
160
460b1067 161 #XXX Add warnings if values weren't set right
162 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
163
7dcefff3 164 return length($buffer) + length($buffer2);
e064ccd1 165}
166
460b1067 167sub setup_fh {
168 my $self = shift;
169 my ($obj) = @_;
70b55428 170
7dcefff3 171 # Need to remove use of $fh here
172 my $fh = $self->_fileobj->{fh};
6fde4ed2 173 flock $fh, LOCK_EX;
118ba343 174
260a80b4 175 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 176 unless ( $obj->{base_offset} ) {
e96daec8 177 my $bytes_read = $self->read_file_header;
118ba343 178
260a80b4 179 $self->calculate_sizes;
180
118ba343 181 ##
fde3db1a 182 # File is empty -- write header and master index
118ba343 183 ##
184 if (!$bytes_read) {
aa83bc1e 185 $self->_fileobj->audit( "# Database created on" );
359a01ac 186
e96daec8 187 $self->write_file_header;
118ba343 188
22e20cce 189 $obj->{base_offset} = $self->_fileobj->request_space(
190 $self->tag_size( $self->{index_size} ),
191 );
118ba343 192
9e4f83a0 193 $self->write_tag(
e96daec8 194 $obj->_base_offset, $obj->_type,
f37c15ab 195 chr(0)x$self->{index_size},
118ba343 196 );
197
198 # Flush the filehandle
199 my $old_fh = select $fh;
200 my $old_af = $|; $| = 1; $| = $old_af;
201 select $old_fh;
202 }
203 else {
204 $obj->{base_offset} = $bytes_read;
205
206 ##
fde3db1a 207 # Get our type from master index header
118ba343 208 ##
359a01ac 209 my $tag = $self->load_tag($obj->_base_offset);
210 unless ( $tag ) {
211 flock $fh, LOCK_UN;
212 $self->_throw_error("Corrupted file, no master index record");
213 }
118ba343 214
e96daec8 215 unless ($obj->_type eq $tag->{signature}) {
359a01ac 216 flock $fh, LOCK_UN;
e96daec8 217 $self->_throw_error("File type mismatch");
118ba343 218 }
219 }
118ba343 220 }
260a80b4 221 else {
222 $self->calculate_sizes;
223 }
e06824f8 224
673464d9 225 #XXX We have to make sure we don't mess up when autoflush isn't turned on
7dcefff3 226 $self->_fileobj->set_inode;
70b55428 227
6fde4ed2 228 flock $fh, LOCK_UN;
229
70b55428 230 return 1;
231}
232
16d1ad9b 233sub tag_size {
234 my $self = shift;
235 my ($size) = @_;
236 return SIG_SIZE + $self->{data_size} + $size;
237}
238
9e4f83a0 239sub write_tag {
20f7b20c 240 ##
241 # Given offset, signature and content, create tag and write to disk
242 ##
d4b1166e 243 my $self = shift;
e96daec8 244 my ($offset, $sig, $content) = @_;
f37c15ab 245 my $size = length( $content );
20f7b20c 246
7dcefff3 247 $self->_fileobj->print_at(
248 $offset,
249 $sig, pack($self->{data_pack}, $size), $content,
250 );
20f7b20c 251
f37c15ab 252 return unless defined $offset;
253
20f7b20c 254 return {
255 signature => $sig,
72e315ac 256 #XXX Is this even used?
86867f3a 257 size => $size,
258 offset => $offset + SIG_SIZE + $self->{data_size},
259 content => $content
20f7b20c 260 };
d4b1166e 261}
262
263sub load_tag {
20f7b20c 264 ##
265 # Given offset, load single tag and return signature, size and data
266 ##
d4b1166e 267 my $self = shift;
e96daec8 268 my ($offset) = @_;
20f7b20c 269
7dcefff3 270 my $fileobj = $self->_fileobj;
20f7b20c 271
86867f3a 272 my ($sig, $size) = unpack(
273 "A $self->{data_pack}",
274 $fileobj->read_at( $offset, SIG_SIZE + $self->{data_size} ),
275 );
20f7b20c 276
277 return {
278 signature => $sig,
72e315ac 279 #XXX Is this even used?
86867f3a 280 size => $size,
281 offset => $offset + SIG_SIZE + $self->{data_size},
282 content => $fileobj->read_at( undef, $size ),
20f7b20c 283 };
d4b1166e 284}
285
20f7b20c 286sub add_bucket {
287 ##
288 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
289 # plain (undigested) key and value.
290 ##
d4b1166e 291 my $self = shift;
359a01ac 292 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
c9b6d0d8 293 $deleted ||= 0;
75be6413 294
eea0d863 295 # This verifies that only supported values will be stored.
296 {
297 my $r = Scalar::Util::reftype( $value );
eea0d863 298
86867f3a 299 last if !defined $r;
eea0d863 300 last if $r eq 'HASH';
301 last if $r eq 'ARRAY';
302
e96daec8 303 $self->_throw_error(
86867f3a 304 "Storage of references of type '$r' is not supported."
eea0d863 305 );
306 }
307
019404df 308 my $fileobj = $self->_fileobj;
20f7b20c 309
e96daec8 310 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 311
21838116 312 #ACID - This is a mutation. Must only find the exact transaction
c9b6d0d8 313 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 );
314
315 my @transactions;
019404df 316 if ( $fileobj->transaction_id == 0 ) {
317 @transactions = $fileobj->current_transactions;
c9b6d0d8 318 }
75be6413 319
e96daec8 320# $self->_release_space( $size, $subloc );
386bab6c 321 # Updating a known md5
f9c33187 322#XXX This needs updating to use _release_space
86867f3a 323 my $location;
386bab6c 324 if ( $subloc ) {
386bab6c 325 if ($actual_length <= $size) {
326 $location = $subloc;
20f7b20c 327 }
75be6413 328 else {
019404df 329 $location = $fileobj->request_space( $actual_length );
330
331 $fileobj->print_at( $tag->{offset} + $offset + $self->{hash_size},
332 pack($self->{long_pack}, $location ),
333 pack($self->{long_pack}, $actual_length ),
334 pack('n n', $fileobj->transaction_id, $deleted ),
386bab6c 335 );
75be6413 336 }
504185fb 337
338 my $old_value = $self->read_from_loc( $subloc, $orig_key );
339 for ( @transactions ) {
340 my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
341 $fileobj->{transaction_id} = $_;
342 $self->add_bucket( $tag2, $md5, $orig_key, $old_value, undef, $orig_key );
343 $fileobj->{transaction_id} = 0;
344 }
345 $tag = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
75be6413 346 }
386bab6c 347 # Adding a new md5
348 elsif ( defined $offset ) {
019404df 349 $location = $fileobj->request_space( $actual_length );
386bab6c 350
019404df 351 $fileobj->print_at( $tag->{offset} + $offset,
352 $md5,
353 pack($self->{long_pack}, $location ),
354 pack($self->{long_pack}, $actual_length ),
355 pack('n n', $fileobj->transaction_id, $deleted ),
356 );
c9b6d0d8 357
358 for ( @transactions ) {
359 my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
019404df 360 $fileobj->{transaction_id} = $_;
359a01ac 361 $self->add_bucket( $tag2, $md5, '', '', 1, $orig_key );
019404df 362 $fileobj->{transaction_id} = 0;
c9b6d0d8 363 }
22e20cce 364 $tag = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
386bab6c 365 }
366 # If bucket didn't fit into list, split into a new index level
86867f3a 367 # split_index() will do the $self->_fileobj->request_space() call
368 #XXX It also needs to be transactionally aware
386bab6c 369 else {
e96daec8 370 $location = $self->split_index( $md5, $tag );
386bab6c 371 }
20f7b20c 372
359a01ac 373 $self->write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 374
86867f3a 375 return 1;
d5d7c51d 376}
377
378sub write_value {
379 my $self = shift;
359a01ac 380 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 381
7dcefff3 382 my $fileobj = $self->_fileobj;
d5d7c51d 383
9d4fa373 384 my $dbm_deep_obj = _get_dbm_object( $value );
7dcefff3 385 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) {
e96daec8 386 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 387 }
d5d7c51d 388
20f7b20c 389 ##
d5d7c51d 390 # Write signature based on content type, set content length and write
391 # actual value.
20f7b20c 392 ##
9d4fa373 393 my $r = Scalar::Util::reftype( $value ) || '';
394 if ( $dbm_deep_obj ) {
7dcefff3 395 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 396 }
397 elsif ($r eq 'HASH') {
9d4fa373 398 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 399 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 400 }
7dcefff3 401 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 402 }
403 elsif ($r eq 'ARRAY') {
9d4fa373 404 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 405 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 406 }
7dcefff3 407 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 408 }
409 elsif (!defined($value)) {
7dcefff3 410 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 411 }
412 else {
7dcefff3 413 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 414 }
20f7b20c 415
d5d7c51d 416 ##
417 # Plain key is stored AFTER value, as keys are typically fetched less often.
418 ##
7dcefff3 419 $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 420
9a187d8c 421 # Internal references don't care about autobless
9d4fa373 422 return 1 if $dbm_deep_obj;
9a187d8c 423
d5d7c51d 424 ##
425 # If value is blessed, preserve class name
426 ##
7dcefff3 427 if ( $fileobj->{autobless} ) {
633df1fd 428 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
7dcefff3 429 $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 430 }
d5d7c51d 431 else {
7dcefff3 432 $fileobj->print_at( undef, chr(0) );
20f7b20c 433 }
d5d7c51d 434 }
20f7b20c 435
d5d7c51d 436 ##
56ec4340 437 # Tie the passed in reference so that changes to it are reflected in the
438 # datafile. The use of $location as the base_offset will act as the
439 # the linkage between parent and child.
440 #
441 # The overall assignment is a hack around the fact that just tying doesn't
442 # store the values. This may not be the wrong thing to do.
d5d7c51d 443 ##
9d4fa373 444 if ($r eq 'HASH') {
445 my %x = %$value;
446 tie %$value, 'DBM::Deep', {
447 base_offset => $location,
7dcefff3 448 fileobj => $fileobj,
359a01ac 449 parent => $self->{obj},
450 parent_key => $orig_key,
9d4fa373 451 };
452 %$value = %x;
453 }
454 elsif ($r eq 'ARRAY') {
455 my @x = @$value;
456 tie @$value, 'DBM::Deep', {
457 base_offset => $location,
7dcefff3 458 fileobj => $fileobj,
359a01ac 459 parent => $self->{obj},
460 parent_key => $orig_key,
9d4fa373 461 };
462 @$value = @x;
20f7b20c 463 }
d4b1166e 464
d5d7c51d 465 return 1;
d4b1166e 466}
467
75be6413 468sub split_index {
469 my $self = shift;
e96daec8 470 my ($md5, $tag) = @_;
75be6413 471
019404df 472 my $fileobj = $self->_fileobj;
21838116 473
019404df 474 my $loc = $fileobj->request_space(
e96daec8 475 $self->tag_size( $self->{index_size} ),
16d1ad9b 476 );
477
019404df 478 $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 479
9e4f83a0 480 my $index_tag = $self->write_tag(
e96daec8 481 $loc, SIG_INDEX,
f37c15ab 482 chr(0)x$self->{index_size},
75be6413 483 );
484
019404df 485 my $newtag_loc = $fileobj->request_space(
e96daec8 486 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 487 );
75be6413 488
7b1e1aa1 489 my $keys = $tag->{content}
f9c33187 490 . $md5 . pack($self->{long_pack}, $newtag_loc)
28394a1a 491 . pack($self->{long_pack}, 0) # size
20b7f047 492 . pack($self->{long_pack}, 0); # transaction ID
75be6413 493
f9c33187 494 my @newloc = ();
75be6413 495 BUCKET:
633df1fd 496 # The <= here is deliberate - we have max_buckets+1 keys to iterate
497 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 498 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 499 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 500
f9c33187 501 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
502 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 503
75be6413 504 my $num = ord(substr($key, $tag->{ch} + 1, 1));
505
f9c33187 506 if ($newloc[$num]) {
7dcefff3 507 my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 508
f9c33187 509 # This is looking for the first empty spot
7b1e1aa1 510 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 511 { content => $subkeys }, '',
7b1e1aa1 512 );
75be6413 513
633df1fd 514 $fileobj->print_at(
515 $newloc[$num] + $offset,
516 $key, pack($self->{long_pack}, $old_subloc),
517 );
7b1e1aa1 518
519 next;
75be6413 520 }
75be6413 521
019404df 522 my $loc = $fileobj->request_space(
e96daec8 523 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 524 );
2603d86e 525
019404df 526 $fileobj->print_at(
527 $index_tag->{offset} + ($num * $self->{long_size}),
528 pack($self->{long_pack}, $loc),
529 );
75be6413 530
7b1e1aa1 531 my $blist_tag = $self->write_tag(
e96daec8 532 $loc, SIG_BLIST,
7b1e1aa1 533 chr(0)x$self->{bucket_list_size},
534 );
535
019404df 536 $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 537
f9c33187 538 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 539 }
540
541 $self->_release_space(
e96daec8 542 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 543 $tag->{offset} - SIG_SIZE - $self->{data_size},
544 );
75be6413 545
f9c33187 546 return $newtag_loc;
75be6413 547}
548
8db25060 549sub read_from_loc {
550 my $self = shift;
359a01ac 551 my ($subloc, $orig_key) = @_;
8db25060 552
7dcefff3 553 my $fileobj = $self->_fileobj;
8db25060 554
7dcefff3 555 my $signature = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 556
557 ##
558 # If value is a hash or array, return new DBM::Deep object with correct offset
559 ##
560 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 561 my $new_obj = DBM::Deep->new({
359a01ac 562 type => $signature,
8db25060 563 base_offset => $subloc,
e96daec8 564 fileobj => $self->_fileobj,
359a01ac 565 parent => $self->{obj},
566 parent_key => $orig_key,
685e40f1 567 });
8db25060 568
460b1067 569 if ($new_obj->_fileobj->{autobless}) {
8db25060 570 ##
571 # Skip over value and plain key to see if object needs
572 # to be re-blessed
573 ##
7dcefff3 574 $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 575
7dcefff3 576 my $size = $fileobj->read_at( undef, $self->{data_size} );
c6ea6b6c 577 $size = unpack($self->{data_pack}, $size);
7dcefff3 578 if ($size) { $fileobj->increment_pointer( $size ); }
8db25060 579
7dcefff3 580 my $bless_bit = $fileobj->read_at( undef, 1 );
86867f3a 581 if ( ord($bless_bit) ) {
582 my $size = unpack(
583 $self->{data_pack},
584 $fileobj->read_at( undef, $self->{data_size} ),
585 );
7dcefff3 586
86867f3a 587 if ( $size ) {
588 $new_obj = bless $new_obj, $fileobj->read_at( undef, $size );
589 }
8db25060 590 }
591 }
592
685e40f1 593 return $new_obj;
8db25060 594 }
595 elsif ( $signature eq SIG_INTERNAL ) {
7dcefff3 596 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 597 $size = unpack($self->{data_pack}, $size);
598
599 if ( $size ) {
7dcefff3 600 my $new_loc = $fileobj->read_at( undef, $size );
601 $new_loc = unpack( $self->{long_pack}, $new_loc );
359a01ac 602 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 603 }
604 else {
605 return;
606 }
607 }
608 ##
609 # Otherwise return actual value
610 ##
460b1067 611 elsif ( $signature eq SIG_DATA ) {
7dcefff3 612 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 613 $size = unpack($self->{data_pack}, $size);
614
86867f3a 615 my $value = $size ? $fileobj->read_at( undef, $size ) : '';
8db25060 616 return $value;
617 }
618
619 ##
620 # Key exists, but content is null
621 ##
622 return;
623}
624
9020ee8c 625sub get_bucket_value {
beac1dff 626 ##
627 # Fetch single value given tag and MD5 digested key.
628 ##
629 my $self = shift;
359a01ac 630 my ($tag, $md5, $orig_key) = @_;
9020ee8c 631
21838116 632 #ACID - This is a read. Can find exact or HEAD
94e8af14 633 my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
634
635 if ( !$subloc ) {
636 #XXX Need to use real key
637# $self->add_bucket( $tag, $md5, $orig_key, undef, undef, $orig_key );
638# return;
639 }
640 elsif ( !$is_deleted ) {
359a01ac 641 return $self->read_from_loc( $subloc, $orig_key );
386bab6c 642 }
94e8af14 643
beac1dff 644 return;
9020ee8c 645}
ab0e4957 646
647sub delete_bucket {
beac1dff 648 ##
649 # Delete single key/value pair given tag and MD5 digested key.
650 ##
651 my $self = shift;
a97c8f67 652 my ($tag, $md5, $orig_key) = @_;
ab0e4957 653
22e20cce 654 #ACID - Although this is a mutation, we must find any transaction.
655 # This is because we need to mark something as deleted that is in the HEAD.
656 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 657
658 return if !$subloc;
659
660 my $fileobj = $self->_fileobj;
661
662 my @transactions;
663 if ( $fileobj->transaction_id == 0 ) {
664 @transactions = $fileobj->current_transactions;
665 }
666
633df1fd 667 if ( $fileobj->transaction_id == 0 ) {
668 my $value = $self->read_from_loc( $subloc, $orig_key );
669
670 for (@transactions) {
633df1fd 671 $fileobj->{transaction_id} = $_;
672 #XXX Need to use real key
22e20cce 673 $self->add_bucket( $tag, $md5, $orig_key, $value, undef, $orig_key );
633df1fd 674 $fileobj->{transaction_id} = 0;
675 }
22e20cce 676 $tag = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
633df1fd 677
22e20cce 678 #XXX This needs _release_space() for the value and anything below
633df1fd 679 $fileobj->print_at(
019404df 680 $tag->{offset} + $offset,
633df1fd 681 substr( $tag->{content}, $offset + $self->{bucket_size} ),
019404df 682 chr(0) x $self->{bucket_size},
683 );
386bab6c 684 }
633df1fd 685 else {
22e20cce 686 $self->add_bucket( $tag, $md5, '', '', 1, $orig_key );
633df1fd 687 }
688
689 return 1;
ab0e4957 690}
691
912d50b1 692sub bucket_exists {
beac1dff 693 ##
694 # Check existence of single key given tag and MD5 digested key.
695 ##
696 my $self = shift;
e96daec8 697 my ($tag, $md5) = @_;
912d50b1 698
21838116 699 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 700 my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
701 return ($subloc && !$is_deleted) && 1;
912d50b1 702}
703
72e315ac 704sub find_blist {
beac1dff 705 ##
706 # Locate offset for bucket list, given digested key
707 ##
708 my $self = shift;
e96daec8 709 my ($offset, $md5, $args) = @_;
d0b74c17 710 $args = {} unless $args;
711
beac1dff 712 ##
713 # Locate offset for bucket list using digest index system
714 ##
e96daec8 715 my $tag = $self->load_tag( $offset )
716 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 717
e5fc7e69 718 my $ch = 0;
8db25060 719 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 720 my $num = ord substr($md5, $ch, 1);
721
722 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 723 $tag = $self->index_lookup( $tag, $num );
d0b74c17 724
725 if (!$tag) {
29b01632 726 return if !$args->{create};
d0b74c17 727
019404df 728 my $loc = $self->_fileobj->request_space(
e96daec8 729 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 730 );
731
019404df 732 $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 733
9e4f83a0 734 $tag = $self->write_tag(
e96daec8 735 $loc, SIG_BLIST,
f37c15ab 736 chr(0)x$self->{bucket_list_size},
d5d7c51d 737 );
738
739 $tag->{ref_loc} = $ref_loc;
740 $tag->{ch} = $ch;
741
742 last;
d0b74c17 743 }
744
16d1ad9b 745 $tag->{ch} = $ch++;
d0b74c17 746 $tag->{ref_loc} = $ref_loc;
beac1dff 747 }
d0b74c17 748
beac1dff 749 return $tag;
6736c116 750}
751
d0b74c17 752sub index_lookup {
753 ##
754 # Given index tag, lookup single entry in index and return .
755 ##
756 my $self = shift;
e96daec8 757 my ($tag, $index) = @_;
d0b74c17 758
759 my $location = unpack(
760 $self->{long_pack},
761 substr(
762 $tag->{content},
763 $index * $self->{long_size},
764 $self->{long_size},
765 ),
766 );
767
768 if (!$location) { return; }
769
e96daec8 770 return $self->load_tag( $location );
d0b74c17 771}
772
6736c116 773sub traverse_index {
beac1dff 774 ##
775 # Scan index and recursively step into deeper levels, looking for next key.
776 ##
6736c116 777 my $self = shift;
778 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 779
e96daec8 780 my $tag = $self->load_tag( $offset );
6736c116 781
8db25060 782 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 783 my $content = $tag->{content};
e5fc7e69 784 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 785
d5d7c51d 786 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 787 my $subloc = unpack(
788 $self->{long_pack},
e06824f8 789 substr(
790 $content,
791 $idx * $self->{long_size},
792 $self->{long_size},
793 ),
e5fc7e69 794 );
795
beac1dff 796 if ($subloc) {
e5fc7e69 797 my $result = $self->traverse_index(
798 $obj, $subloc, $ch + 1, $force_return_next,
799 );
800
beac1dff 801 if (defined($result)) { return $result; }
802 }
803 } # index loop
d0b74c17 804
beac1dff 805 $obj->{return_next} = 1;
42717e46 806 }
807 # This is the bucket list
e5fc7e69 808 else {
beac1dff 809 my $keys = $tag->{content};
810 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 811
beac1dff 812 ##
813 # Iterate through buckets, looking for a key match
814 ##
42717e46 815 my $transaction_id = $self->_fileobj->transaction_id;
8db25060 816 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
42717e46 817 my ($key, $subloc, $size, $trans_id, $is_deleted) = $self->_get_key_subloc( $keys, $i );
818
819 next if $is_deleted;
820#XXX Need to find all the copies of this key to find out if $transaction_id has it
821#XXX marked as deleted, in use, or what.
822 next if $trans_id && $trans_id != $transaction_id;
d0b74c17 823
8db25060 824 # End of bucket list -- return to outer loop
beac1dff 825 if (!$subloc) {
beac1dff 826 $obj->{return_next} = 1;
827 last;
828 }
8db25060 829 # Located previous key -- return next one found
beac1dff 830 elsif ($key eq $obj->{prev_md5}) {
beac1dff 831 $obj->{return_next} = 1;
832 next;
833 }
8db25060 834 # Seek to bucket location and skip over signature
beac1dff 835 elsif ($obj->{return_next}) {
7dcefff3 836 my $fileobj = $self->_fileobj;
d0b74c17 837
beac1dff 838 # Skip over value to get to plain key
7dcefff3 839 my $sig = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 840
7dcefff3 841 my $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 842 $size = unpack($self->{data_pack}, $size);
7dcefff3 843 if ($size) { $fileobj->increment_pointer( $size ); }
d0b74c17 844
beac1dff 845 # Read in plain key and return as scalar
7dcefff3 846 $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 847 $size = unpack($self->{data_pack}, $size);
86867f3a 848
7dcefff3 849 my $plain_key;
850 if ($size) { $plain_key = $fileobj->read_at( undef, $size); }
beac1dff 851 return $plain_key;
852 }
8db25060 853 }
d0b74c17 854
beac1dff 855 $obj->{return_next} = 1;
42717e46 856 }
d0b74c17 857
beac1dff 858 return;
6736c116 859}
860
861sub get_next_key {
beac1dff 862 ##
863 # Locate next key, given digested previous one
864 ##
6736c116 865 my $self = shift;
866 my ($obj) = @_;
d0b74c17 867
beac1dff 868 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
869 $obj->{return_next} = 0;
d0b74c17 870
beac1dff 871 ##
872 # If the previous key was not specifed, start at the top and
873 # return the first one found.
874 ##
875 if (!$obj->{prev_md5}) {
876 $obj->{prev_md5} = chr(0) x $self->{hash_size};
877 $obj->{return_next} = 1;
878 }
d0b74c17 879
beac1dff 880 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 881}
882
75be6413 883# Utilities
884
9cec1360 885sub _get_key_subloc {
75be6413 886 my $self = shift;
887 my ($keys, $idx) = @_;
888
c9b6d0d8 889 my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack(
28394a1a 890 # This is 'a', not 'A'. Please read the pack() documentation for the
891 # difference between the two and why it's important.
c9b6d0d8 892 "a$self->{hash_size} $self->{long_pack}2 n2",
75be6413 893 substr(
894 $keys,
9cec1360 895 ($idx * $self->{bucket_size}),
896 $self->{bucket_size},
75be6413 897 ),
898 );
899
c9b6d0d8 900 return ($key, $subloc, $size, $transaction_id, $is_deleted);
75be6413 901}
902
d608b06e 903sub _find_in_buckets {
904 my $self = shift;
21838116 905 my ($tag, $md5, $exact) = @_;
22e20cce 906 $exact ||= 0;
d608b06e 907
28394a1a 908 my $trans_id = $self->_fileobj->transaction_id;
909
21838116 910 my @zero;
911
d608b06e 912 BUCKET:
913 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
c9b6d0d8 914 my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc(
9a187d8c 915 $tag->{content}, $i,
916 );
d608b06e 917
c9b6d0d8 918 my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted);
21838116 919
920 unless ( $subloc ) {
633df1fd 921 if ( !$exact && @zero && $trans_id ) {
c9b6d0d8 922 @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted);
20b7f047 923 }
21838116 924 return @rv;
925 }
926
927 next BUCKET if $key ne $md5;
d608b06e 928
21838116 929 # Save off the HEAD in case we need it.
c9b6d0d8 930 @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0;
d608b06e 931
21838116 932 next BUCKET if $transaction_id != $trans_id;
933
934 return @rv;
d608b06e 935 }
936
937 return;
938}
939
994ccd8e 940sub _release_space {
941 my $self = shift;
e96daec8 942 my ($size, $loc) = @_;
994ccd8e 943
7b1e1aa1 944 my $next_loc = 0;
945
019404df 946 $self->_fileobj->print_at( $loc,
947 SIG_FREE,
948 pack($self->{long_pack}, $size ),
949 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 950 );
951
994ccd8e 952 return;
953}
954
e96daec8 955sub _throw_error {
956 die "DBM::Deep: $_[1]\n";
957}
958
86867f3a 959sub _get_dbm_object {
960 my $item = shift;
961
962 my $obj = eval {
963 local $SIG{__DIE__};
964 if ($item->isa( 'DBM::Deep' )) {
965 return $item;
966 }
967 return;
968 };
969 return $obj if $obj;
970
971 my $r = Scalar::Util::reftype( $item ) || '';
972 if ( $r eq 'HASH' ) {
973 my $obj = eval {
974 local $SIG{__DIE__};
975 my $obj = tied(%$item);
976 if ($obj->isa( 'DBM::Deep' )) {
977 return $obj;
978 }
979 return;
980 };
981 return $obj if $obj;
982 }
983 elsif ( $r eq 'ARRAY' ) {
984 my $obj = eval {
985 local $SIG{__DIE__};
986 my $obj = tied(@$item);
987 if ($obj->isa( 'DBM::Deep' )) {
988 return $obj;
989 }
990 return;
991 };
992 return $obj if $obj;
993 }
994
995 return;
996}
997
998sub _length_needed {
999 my $self = shift;
1000 my ($value, $key) = @_;
1001
1002 my $is_dbm_deep = eval {
1003 local $SIG{'__DIE__'};
1004 $value->isa( 'DBM::Deep' );
1005 };
1006
1007 my $len = SIG_SIZE
1008 + $self->{data_size} # size for value
1009 + $self->{data_size} # size for key
1010 + length( $key ); # length of key
1011
1012 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
1013 # long_size is for the internal reference
1014 return $len + $self->{long_size};
1015 }
1016
1017 if ( $self->_fileobj->{autobless} ) {
1018 # This is for the bit saying whether or not this thing is blessed.
1019 $len += 1;
1020 }
1021
1022 my $r = Scalar::Util::reftype( $value ) || '';
1023 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1024 if ( defined $value ) {
1025 $len += length( $value );
1026 }
1027 return $len;
1028 }
1029
1030 $len += $self->{index_size};
1031
1032 # if autobless is enabled, must also take into consideration
1033 # the class name as it is stored after the key.
1034 if ( $self->_fileobj->{autobless} ) {
1035 my $c = Scalar::Util::blessed($value);
1036 if ( defined $c && !$is_dbm_deep ) {
1037 $len += $self->{data_size} + length($c);
1038 }
1039 }
1040
1041 return $len;
1042}
1043
a20d9a3f 10441;
1045__END__