Fixed bug where overwrites weren't transaction-aware
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
633df1fd 8use Fcntl qw( :DEFAULT :flock );
359a01ac 9use Scalar::Util ();
a20d9a3f 10
21838116 11# File-wide notes:
20b7f047 12# * To add to bucket_size, make sure you modify the following:
13# - calculate_sizes()
14# - _get_key_subloc()
15# - add_bucket() - where the buckets are printed
21838116 16
8db25060 17##
18# Setup file and tag signatures. These should never change.
19##
20sub SIG_FILE () { 'DPDB' }
460b1067 21sub SIG_HEADER () { 'h' }
8db25060 22sub SIG_INTERNAL () { 'i' }
23sub SIG_HASH () { 'H' }
24sub SIG_ARRAY () { 'A' }
8db25060 25sub SIG_NULL () { 'N' }
26sub SIG_DATA () { 'D' }
27sub SIG_INDEX () { 'I' }
28sub SIG_BLIST () { 'B' }
7b1e1aa1 29sub SIG_FREE () { 'F' }
8db25060 30sub SIG_SIZE () { 1 }
31
612969fb 32sub new {
33 my $class = shift;
34 my ($args) = @_;
35
36 my $self = bless {
37 long_size => 4,
38 long_pack => 'N',
39 data_size => 4,
40 data_pack => 'N',
251dfd0e 41
612969fb 42 digest => \&Digest::MD5::md5,
43 hash_size => 16,
251dfd0e 44
81d16922 45 ##
d5d7c51d 46 # Maximum number of buckets per list before another level of indexing is
e0098e7f 47 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 48 # files. DO NOT decrease this value below 16, due to risk of recursive
49 # reindex overrun.
81d16922 50 ##
612969fb 51 max_buckets => 16,
460b1067 52
53 fileobj => undef,
359a01ac 54 obj => undef,
612969fb 55 }, $class;
56
e0098e7f 57 if ( defined $args->{pack_size} ) {
58 if ( lc $args->{pack_size} eq 'small' ) {
59 $args->{long_size} = 2;
c9b6d0d8 60 $args->{long_pack} = 'n';
e0098e7f 61 }
62 elsif ( lc $args->{pack_size} eq 'medium' ) {
63 $args->{long_size} = 4;
64 $args->{long_pack} = 'N';
65 }
66 elsif ( lc $args->{pack_size} eq 'large' ) {
67 $args->{long_size} = 8;
68 $args->{long_pack} = 'Q';
69 }
70 else {
71 die "Unknown pack_size value: '$args->{pack_size}'\n";
72 }
73 }
74
fde3db1a 75 # Grab the parameters we want to use
76 foreach my $param ( keys %$self ) {
77 next unless exists $args->{$param};
3e9498a1 78 $self->{$param} = $args->{$param};
fde3db1a 79 }
359a01ac 80 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 81
e0098e7f 82 if ( $self->{max_buckets} < 16 ) {
83 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
84 $self->{max_buckets} = 16;
85 }
86
260a80b4 87 return $self;
88}
89
460b1067 90sub _fileobj { return $_[0]{fileobj} }
460b1067 91
260a80b4 92sub calculate_sizes {
93 my $self = shift;
94
633df1fd 95 # The 2**8 here indicates the number of different characters in the
96 # current hashing algorithm
28394a1a 97 #XXX Does this need to be updated with different hashing algorithms?
e0098e7f 98 $self->{index_size} = (2**8) * $self->{long_size};
28394a1a 99 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
e0098e7f 100 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 101
260a80b4 102 return;
1bf65be7 103}
104
fde3db1a 105sub write_file_header {
0d0f3d5d 106 my $self = shift;
0d0f3d5d 107
019404df 108 my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 21 );
0d0f3d5d 109
019404df 110 $self->_fileobj->print_at( $loc,
260a80b4 111 SIG_FILE,
460b1067 112 SIG_HEADER,
113 pack('N', 1), # header version
114 pack('N', 12), # header size
15ba72cc 115 pack('N', 0), # currently running transaction IDs
c9b6d0d8 116 pack('n', $self->{long_size}),
260a80b4 117 pack('A', $self->{long_pack}),
c9b6d0d8 118 pack('n', $self->{data_size}),
260a80b4 119 pack('A', $self->{data_pack}),
c9b6d0d8 120 pack('n', $self->{max_buckets}),
260a80b4 121 );
0d0f3d5d 122
20b7f047 123 $self->_fileobj->set_transaction_offset( 13 );
124
0d0f3d5d 125 return;
126}
127
fde3db1a 128sub read_file_header {
e064ccd1 129 my $self = shift;
e064ccd1 130
7dcefff3 131 my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 );
132 return unless length($buffer);
460b1067 133
134 my ($file_signature, $sig_header, $header_version, $size) = unpack(
135 'A4 A N N', $buffer
42f79e07 136 );
e064ccd1 137
460b1067 138 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 139 $self->_fileobj->close;
e96daec8 140 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 141 }
260a80b4 142
460b1067 143 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 144 $self->_fileobj->close;
e96daec8 145 $self->_throw_error( "Old file version found." );
460b1067 146 }
9b2370e0 147
7dcefff3 148 my $buffer2 = $self->_fileobj->read_at( undef, $size );
c9b6d0d8 149 my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 );
15ba72cc 150
151 $self->_fileobj->set_transaction_offset( 13 );
152
460b1067 153 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 154 $self->_fileobj->close;
e96daec8 155 $self->_throw_error("Corrupted file - bad header");
e064ccd1 156 }
157
460b1067 158 #XXX Add warnings if values weren't set right
159 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
160
7dcefff3 161 return length($buffer) + length($buffer2);
e064ccd1 162}
163
460b1067 164sub setup_fh {
165 my $self = shift;
166 my ($obj) = @_;
70b55428 167
7dcefff3 168 # Need to remove use of $fh here
169 my $fh = $self->_fileobj->{fh};
6fde4ed2 170 flock $fh, LOCK_EX;
118ba343 171
260a80b4 172 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 173 unless ( $obj->{base_offset} ) {
e96daec8 174 my $bytes_read = $self->read_file_header;
118ba343 175
260a80b4 176 $self->calculate_sizes;
177
118ba343 178 ##
fde3db1a 179 # File is empty -- write header and master index
118ba343 180 ##
181 if (!$bytes_read) {
aa83bc1e 182 $self->_fileobj->audit( "# Database created on" );
359a01ac 183
e96daec8 184 $self->write_file_header;
118ba343 185
22e20cce 186 $obj->{base_offset} = $self->_fileobj->request_space(
187 $self->tag_size( $self->{index_size} ),
188 );
118ba343 189
9e4f83a0 190 $self->write_tag(
e96daec8 191 $obj->_base_offset, $obj->_type,
f37c15ab 192 chr(0)x$self->{index_size},
118ba343 193 );
194
195 # Flush the filehandle
196 my $old_fh = select $fh;
197 my $old_af = $|; $| = 1; $| = $old_af;
198 select $old_fh;
199 }
200 else {
201 $obj->{base_offset} = $bytes_read;
202
203 ##
fde3db1a 204 # Get our type from master index header
118ba343 205 ##
359a01ac 206 my $tag = $self->load_tag($obj->_base_offset);
207 unless ( $tag ) {
208 flock $fh, LOCK_UN;
209 $self->_throw_error("Corrupted file, no master index record");
210 }
118ba343 211
e96daec8 212 unless ($obj->_type eq $tag->{signature}) {
359a01ac 213 flock $fh, LOCK_UN;
e96daec8 214 $self->_throw_error("File type mismatch");
118ba343 215 }
216 }
118ba343 217 }
260a80b4 218 else {
219 $self->calculate_sizes;
220 }
e06824f8 221
673464d9 222 #XXX We have to make sure we don't mess up when autoflush isn't turned on
7dcefff3 223 $self->_fileobj->set_inode;
70b55428 224
6fde4ed2 225 flock $fh, LOCK_UN;
226
70b55428 227 return 1;
228}
229
16d1ad9b 230sub tag_size {
231 my $self = shift;
232 my ($size) = @_;
233 return SIG_SIZE + $self->{data_size} + $size;
234}
235
9e4f83a0 236sub write_tag {
20f7b20c 237 ##
238 # Given offset, signature and content, create tag and write to disk
239 ##
d4b1166e 240 my $self = shift;
e96daec8 241 my ($offset, $sig, $content) = @_;
f37c15ab 242 my $size = length( $content );
20f7b20c 243
7dcefff3 244 $self->_fileobj->print_at(
245 $offset,
246 $sig, pack($self->{data_pack}, $size), $content,
247 );
20f7b20c 248
f37c15ab 249 return unless defined $offset;
250
20f7b20c 251 return {
252 signature => $sig,
253 size => $size,
8db25060 254 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 255 content => $content
256 };
d4b1166e 257}
258
259sub load_tag {
20f7b20c 260 ##
261 # Given offset, load single tag and return signature, size and data
262 ##
d4b1166e 263 my $self = shift;
e96daec8 264 my ($offset) = @_;
20f7b20c 265
7dcefff3 266 my $fileobj = $self->_fileobj;
20f7b20c 267
7dcefff3 268 my $s = SIG_SIZE + $self->{data_size};
269 my $b = $fileobj->read_at( $offset, $s );
251dfd0e 270 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 271
7dcefff3 272 my $buffer = $fileobj->read_at( undef, $size );
20f7b20c 273
274 return {
275 signature => $sig,
276 size => $size,
8db25060 277 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 278 content => $buffer
279 };
d4b1166e 280}
281
56ec4340 282sub _get_dbm_object {
283 my $item = shift;
284
285 my $obj = eval {
286 local $SIG{__DIE__};
287 if ($item->isa( 'DBM::Deep' )) {
288 return $item;
289 }
290 return;
291 };
292 return $obj if $obj;
293
294 my $r = Scalar::Util::reftype( $item ) || '';
295 if ( $r eq 'HASH' ) {
296 my $obj = eval {
297 local $SIG{__DIE__};
298 my $obj = tied(%$item);
299 if ($obj->isa( 'DBM::Deep' )) {
300 return $obj;
301 }
302 return;
303 };
304 return $obj if $obj;
305 }
306 elsif ( $r eq 'ARRAY' ) {
307 my $obj = eval {
308 local $SIG{__DIE__};
309 my $obj = tied(@$item);
310 if ($obj->isa( 'DBM::Deep' )) {
311 return $obj;
312 }
313 return;
314 };
315 return $obj if $obj;
316 }
317
318 return;
319}
320
29b01632 321sub _length_needed {
322 my $self = shift;
e96daec8 323 my ($value, $key) = @_;
29b01632 324
325 my $is_dbm_deep = eval {
326 local $SIG{'__DIE__'};
327 $value->isa( 'DBM::Deep' );
328 };
329
633df1fd 330 my $len = SIG_SIZE
331 + $self->{data_size} # size for value
332 + $self->{data_size} # size for key
333 + length( $key ); # length of key
29b01632 334
e96daec8 335 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
633df1fd 336 # long_size is for the internal reference
f37c15ab 337 return $len + $self->{long_size};
29b01632 338 }
339
e96daec8 340 if ( $self->_fileobj->{autobless} ) {
9a187d8c 341 # This is for the bit saying whether or not this thing is blessed.
342 $len += 1;
343 }
344
633df1fd 345 my $r = Scalar::Util::reftype( $value ) || '';
29b01632 346 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 347 if ( defined $value ) {
348 $len += length( $value );
349 }
350 return $len;
29b01632 351 }
352
f37c15ab 353 $len += $self->{index_size};
29b01632 354
355 # if autobless is enabled, must also take into consideration
f37c15ab 356 # the class name as it is stored after the key.
e96daec8 357 if ( $self->_fileobj->{autobless} ) {
56ec4340 358 my $c = Scalar::Util::blessed($value);
359 if ( defined $c && !$is_dbm_deep ) {
360 $len += $self->{data_size} + length($c);
29b01632 361 }
362 }
363
f37c15ab 364 return $len;
29b01632 365}
366
20f7b20c 367sub add_bucket {
368 ##
369 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
370 # plain (undigested) key and value.
371 ##
d4b1166e 372 my $self = shift;
359a01ac 373 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
c9b6d0d8 374 $deleted ||= 0;
75be6413 375
21838116 376 local($/,$\);
377
eea0d863 378 # This verifies that only supported values will be stored.
379 {
380 my $r = Scalar::Util::reftype( $value );
381 last if !defined $r;
382
383 last if $r eq 'HASH';
384 last if $r eq 'ARRAY';
385
e96daec8 386 $self->_throw_error(
eea0d863 387 "Storage of variables of type '$r' is not supported."
388 );
389 }
390
20f7b20c 391 my $location = 0;
392 my $result = 2;
393
019404df 394 my $fileobj = $self->_fileobj;
20f7b20c 395
e96daec8 396 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 397
21838116 398 #ACID - This is a mutation. Must only find the exact transaction
c9b6d0d8 399 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 );
400
401 my @transactions;
019404df 402 if ( $fileobj->transaction_id == 0 ) {
403 @transactions = $fileobj->current_transactions;
c9b6d0d8 404 }
75be6413 405
e96daec8 406# $self->_release_space( $size, $subloc );
386bab6c 407 # Updating a known md5
f9c33187 408#XXX This needs updating to use _release_space
386bab6c 409 if ( $subloc ) {
410 $result = 1;
20f7b20c 411
386bab6c 412 if ($actual_length <= $size) {
413 $location = $subloc;
20f7b20c 414 }
75be6413 415 else {
019404df 416 $location = $fileobj->request_space( $actual_length );
417
418 $fileobj->print_at( $tag->{offset} + $offset + $self->{hash_size},
419 pack($self->{long_pack}, $location ),
420 pack($self->{long_pack}, $actual_length ),
421 pack('n n', $fileobj->transaction_id, $deleted ),
386bab6c 422 );
75be6413 423 }
504185fb 424
425 my $old_value = $self->read_from_loc( $subloc, $orig_key );
426 for ( @transactions ) {
427 my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
428 $fileobj->{transaction_id} = $_;
429 $self->add_bucket( $tag2, $md5, $orig_key, $old_value, undef, $orig_key );
430 $fileobj->{transaction_id} = 0;
431 }
432 $tag = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
75be6413 433 }
386bab6c 434 # Adding a new md5
435 elsif ( defined $offset ) {
019404df 436 $location = $fileobj->request_space( $actual_length );
386bab6c 437
019404df 438 $fileobj->print_at( $tag->{offset} + $offset,
439 $md5,
440 pack($self->{long_pack}, $location ),
441 pack($self->{long_pack}, $actual_length ),
442 pack('n n', $fileobj->transaction_id, $deleted ),
443 );
c9b6d0d8 444
445 for ( @transactions ) {
446 my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
019404df 447 $fileobj->{transaction_id} = $_;
359a01ac 448 $self->add_bucket( $tag2, $md5, '', '', 1, $orig_key );
019404df 449 $fileobj->{transaction_id} = 0;
c9b6d0d8 450 }
22e20cce 451 $tag = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
386bab6c 452 }
453 # If bucket didn't fit into list, split into a new index level
019404df 454 # split_index() will do the _fileobj->request_space() call
386bab6c 455 else {
e96daec8 456 $location = $self->split_index( $md5, $tag );
386bab6c 457 }
20f7b20c 458
359a01ac 459 $self->write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 460
461 return $result;
462}
463
464sub write_value {
465 my $self = shift;
359a01ac 466 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 467
7dcefff3 468 my $fileobj = $self->_fileobj;
d5d7c51d 469
9d4fa373 470 my $dbm_deep_obj = _get_dbm_object( $value );
7dcefff3 471 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) {
e96daec8 472 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 473 }
d5d7c51d 474
20f7b20c 475 ##
d5d7c51d 476 # Write signature based on content type, set content length and write
477 # actual value.
20f7b20c 478 ##
9d4fa373 479 my $r = Scalar::Util::reftype( $value ) || '';
480 if ( $dbm_deep_obj ) {
7dcefff3 481 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 482 }
483 elsif ($r eq 'HASH') {
9d4fa373 484 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 485 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 486 }
7dcefff3 487 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 488 }
489 elsif ($r eq 'ARRAY') {
9d4fa373 490 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 491 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 492 }
7dcefff3 493 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 494 }
495 elsif (!defined($value)) {
7dcefff3 496 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 497 }
498 else {
7dcefff3 499 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 500 }
20f7b20c 501
d5d7c51d 502 ##
503 # Plain key is stored AFTER value, as keys are typically fetched less often.
504 ##
7dcefff3 505 $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 506
9a187d8c 507 # Internal references don't care about autobless
9d4fa373 508 return 1 if $dbm_deep_obj;
9a187d8c 509
d5d7c51d 510 ##
511 # If value is blessed, preserve class name
512 ##
7dcefff3 513 if ( $fileobj->{autobless} ) {
633df1fd 514 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
7dcefff3 515 $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 516 }
d5d7c51d 517 else {
7dcefff3 518 $fileobj->print_at( undef, chr(0) );
20f7b20c 519 }
d5d7c51d 520 }
20f7b20c 521
d5d7c51d 522 ##
56ec4340 523 # Tie the passed in reference so that changes to it are reflected in the
524 # datafile. The use of $location as the base_offset will act as the
525 # the linkage between parent and child.
526 #
527 # The overall assignment is a hack around the fact that just tying doesn't
528 # store the values. This may not be the wrong thing to do.
d5d7c51d 529 ##
9d4fa373 530 if ($r eq 'HASH') {
531 my %x = %$value;
532 tie %$value, 'DBM::Deep', {
533 base_offset => $location,
7dcefff3 534 fileobj => $fileobj,
359a01ac 535 parent => $self->{obj},
536 parent_key => $orig_key,
9d4fa373 537 };
538 %$value = %x;
539 }
540 elsif ($r eq 'ARRAY') {
541 my @x = @$value;
542 tie @$value, 'DBM::Deep', {
543 base_offset => $location,
7dcefff3 544 fileobj => $fileobj,
359a01ac 545 parent => $self->{obj},
546 parent_key => $orig_key,
9d4fa373 547 };
548 @$value = @x;
20f7b20c 549 }
d4b1166e 550
d5d7c51d 551 return 1;
d4b1166e 552}
553
75be6413 554sub split_index {
555 my $self = shift;
e96daec8 556 my ($md5, $tag) = @_;
75be6413 557
019404df 558 my $fileobj = $self->_fileobj;
21838116 559
019404df 560 my $loc = $fileobj->request_space(
e96daec8 561 $self->tag_size( $self->{index_size} ),
16d1ad9b 562 );
563
019404df 564 $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 565
9e4f83a0 566 my $index_tag = $self->write_tag(
e96daec8 567 $loc, SIG_INDEX,
f37c15ab 568 chr(0)x$self->{index_size},
75be6413 569 );
570
019404df 571 my $newtag_loc = $fileobj->request_space(
e96daec8 572 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 573 );
75be6413 574
7b1e1aa1 575 my $keys = $tag->{content}
f9c33187 576 . $md5 . pack($self->{long_pack}, $newtag_loc)
28394a1a 577 . pack($self->{long_pack}, 0) # size
20b7f047 578 . pack($self->{long_pack}, 0); # transaction ID
75be6413 579
f9c33187 580 my @newloc = ();
75be6413 581 BUCKET:
633df1fd 582 # The <= here is deliberate - we have max_buckets+1 keys to iterate
583 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 584 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 585 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 586
f9c33187 587 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
588 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 589
75be6413 590 my $num = ord(substr($key, $tag->{ch} + 1, 1));
591
f9c33187 592 if ($newloc[$num]) {
7dcefff3 593 my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 594
f9c33187 595 # This is looking for the first empty spot
7b1e1aa1 596 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 597 { content => $subkeys }, '',
7b1e1aa1 598 );
75be6413 599
633df1fd 600 $fileobj->print_at(
601 $newloc[$num] + $offset,
602 $key, pack($self->{long_pack}, $old_subloc),
603 );
7b1e1aa1 604
605 next;
75be6413 606 }
75be6413 607
019404df 608 my $loc = $fileobj->request_space(
e96daec8 609 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 610 );
2603d86e 611
019404df 612 $fileobj->print_at(
613 $index_tag->{offset} + ($num * $self->{long_size}),
614 pack($self->{long_pack}, $loc),
615 );
75be6413 616
7b1e1aa1 617 my $blist_tag = $self->write_tag(
e96daec8 618 $loc, SIG_BLIST,
7b1e1aa1 619 chr(0)x$self->{bucket_list_size},
620 );
621
019404df 622 $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 623
f9c33187 624 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 625 }
626
627 $self->_release_space(
e96daec8 628 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 629 $tag->{offset} - SIG_SIZE - $self->{data_size},
630 );
75be6413 631
f9c33187 632 return $newtag_loc;
75be6413 633}
634
8db25060 635sub read_from_loc {
636 my $self = shift;
359a01ac 637 my ($subloc, $orig_key) = @_;
8db25060 638
7dcefff3 639 my $fileobj = $self->_fileobj;
8db25060 640
7dcefff3 641 my $signature = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 642
643 ##
644 # If value is a hash or array, return new DBM::Deep object with correct offset
645 ##
646 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 647 my $new_obj = DBM::Deep->new({
359a01ac 648 type => $signature,
8db25060 649 base_offset => $subloc,
e96daec8 650 fileobj => $self->_fileobj,
359a01ac 651 parent => $self->{obj},
652 parent_key => $orig_key,
685e40f1 653 });
8db25060 654
460b1067 655 if ($new_obj->_fileobj->{autobless}) {
8db25060 656 ##
657 # Skip over value and plain key to see if object needs
658 # to be re-blessed
659 ##
7dcefff3 660 $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 661
7dcefff3 662 my $size = $fileobj->read_at( undef, $self->{data_size} );
c6ea6b6c 663 $size = unpack($self->{data_pack}, $size);
7dcefff3 664 if ($size) { $fileobj->increment_pointer( $size ); }
8db25060 665
7dcefff3 666 my $bless_bit = $fileobj->read_at( undef, 1 );
8db25060 667 if (ord($bless_bit)) {
668 ##
669 # Yes, object needs to be re-blessed
670 ##
7dcefff3 671 my $size = $fileobj->read_at( undef, $self->{data_size} );
c6ea6b6c 672 $size = unpack($self->{data_pack}, $size);
7dcefff3 673
674 my $class_name;
675 if ($size) { $class_name = $fileobj->read_at( undef, $size ); }
676 if (defined $class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 677 }
678 }
679
685e40f1 680 return $new_obj;
8db25060 681 }
682 elsif ( $signature eq SIG_INTERNAL ) {
7dcefff3 683 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 684 $size = unpack($self->{data_pack}, $size);
685
686 if ( $size ) {
7dcefff3 687 my $new_loc = $fileobj->read_at( undef, $size );
688 $new_loc = unpack( $self->{long_pack}, $new_loc );
359a01ac 689 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 690 }
691 else {
692 return;
693 }
694 }
695 ##
696 # Otherwise return actual value
697 ##
460b1067 698 elsif ( $signature eq SIG_DATA ) {
7dcefff3 699 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 700 $size = unpack($self->{data_pack}, $size);
701
702 my $value = '';
7dcefff3 703 if ($size) { $value = $fileobj->read_at( undef, $size ); }
8db25060 704 return $value;
705 }
706
707 ##
708 # Key exists, but content is null
709 ##
710 return;
711}
712
9020ee8c 713sub get_bucket_value {
beac1dff 714 ##
715 # Fetch single value given tag and MD5 digested key.
716 ##
717 my $self = shift;
359a01ac 718 my ($tag, $md5, $orig_key) = @_;
9020ee8c 719
21838116 720 #ACID - This is a read. Can find exact or HEAD
94e8af14 721 my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
722
723 if ( !$subloc ) {
724 #XXX Need to use real key
725# $self->add_bucket( $tag, $md5, $orig_key, undef, undef, $orig_key );
726# return;
727 }
728 elsif ( !$is_deleted ) {
359a01ac 729 return $self->read_from_loc( $subloc, $orig_key );
386bab6c 730 }
94e8af14 731
beac1dff 732 return;
9020ee8c 733}
ab0e4957 734
735sub delete_bucket {
beac1dff 736 ##
737 # Delete single key/value pair given tag and MD5 digested key.
738 ##
739 my $self = shift;
a97c8f67 740 my ($tag, $md5, $orig_key) = @_;
ab0e4957 741
22e20cce 742 #ACID - Although this is a mutation, we must find any transaction.
743 # This is because we need to mark something as deleted that is in the HEAD.
744 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 745
746 return if !$subloc;
747
748 my $fileobj = $self->_fileobj;
749
750 my @transactions;
751 if ( $fileobj->transaction_id == 0 ) {
752 @transactions = $fileobj->current_transactions;
753 }
754
633df1fd 755 if ( $fileobj->transaction_id == 0 ) {
756 my $value = $self->read_from_loc( $subloc, $orig_key );
757
758 for (@transactions) {
633df1fd 759 $fileobj->{transaction_id} = $_;
760 #XXX Need to use real key
22e20cce 761 $self->add_bucket( $tag, $md5, $orig_key, $value, undef, $orig_key );
633df1fd 762 $fileobj->{transaction_id} = 0;
763 }
22e20cce 764 $tag = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
633df1fd 765
22e20cce 766 #XXX This needs _release_space() for the value and anything below
633df1fd 767 $fileobj->print_at(
019404df 768 $tag->{offset} + $offset,
633df1fd 769 substr( $tag->{content}, $offset + $self->{bucket_size} ),
019404df 770 chr(0) x $self->{bucket_size},
771 );
386bab6c 772 }
633df1fd 773 else {
22e20cce 774 $self->add_bucket( $tag, $md5, '', '', 1, $orig_key );
633df1fd 775 }
776
777 return 1;
ab0e4957 778}
779
912d50b1 780sub bucket_exists {
beac1dff 781 ##
782 # Check existence of single key given tag and MD5 digested key.
783 ##
784 my $self = shift;
e96daec8 785 my ($tag, $md5) = @_;
912d50b1 786
21838116 787 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 788 my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
789 return ($subloc && !$is_deleted) && 1;
912d50b1 790}
791
6736c116 792sub find_bucket_list {
beac1dff 793 ##
794 # Locate offset for bucket list, given digested key
795 ##
796 my $self = shift;
e96daec8 797 my ($offset, $md5, $args) = @_;
d0b74c17 798 $args = {} unless $args;
799
21838116 800 local($/,$\);
801
beac1dff 802 ##
803 # Locate offset for bucket list using digest index system
804 ##
e96daec8 805 my $tag = $self->load_tag( $offset )
806 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 807
e5fc7e69 808 my $ch = 0;
8db25060 809 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 810 my $num = ord substr($md5, $ch, 1);
811
812 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 813 $tag = $self->index_lookup( $tag, $num );
d0b74c17 814
815 if (!$tag) {
29b01632 816 return if !$args->{create};
d0b74c17 817
019404df 818 my $loc = $self->_fileobj->request_space(
e96daec8 819 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 820 );
821
019404df 822 $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 823
9e4f83a0 824 $tag = $self->write_tag(
e96daec8 825 $loc, SIG_BLIST,
f37c15ab 826 chr(0)x$self->{bucket_list_size},
d5d7c51d 827 );
828
829 $tag->{ref_loc} = $ref_loc;
830 $tag->{ch} = $ch;
831
832 last;
d0b74c17 833 }
834
16d1ad9b 835 $tag->{ch} = $ch++;
d0b74c17 836 $tag->{ref_loc} = $ref_loc;
beac1dff 837 }
d0b74c17 838
beac1dff 839 return $tag;
6736c116 840}
841
d0b74c17 842sub index_lookup {
843 ##
844 # Given index tag, lookup single entry in index and return .
845 ##
846 my $self = shift;
e96daec8 847 my ($tag, $index) = @_;
d0b74c17 848
849 my $location = unpack(
850 $self->{long_pack},
851 substr(
852 $tag->{content},
853 $index * $self->{long_size},
854 $self->{long_size},
855 ),
856 );
857
858 if (!$location) { return; }
859
e96daec8 860 return $self->load_tag( $location );
d0b74c17 861}
862
6736c116 863sub traverse_index {
beac1dff 864 ##
865 # Scan index and recursively step into deeper levels, looking for next key.
866 ##
6736c116 867 my $self = shift;
868 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 869
e96daec8 870 my $tag = $self->load_tag( $offset );
6736c116 871
8db25060 872 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 873 my $content = $tag->{content};
e5fc7e69 874 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 875
d5d7c51d 876 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 877 my $subloc = unpack(
878 $self->{long_pack},
e06824f8 879 substr(
880 $content,
881 $idx * $self->{long_size},
882 $self->{long_size},
883 ),
e5fc7e69 884 );
885
beac1dff 886 if ($subloc) {
e5fc7e69 887 my $result = $self->traverse_index(
888 $obj, $subloc, $ch + 1, $force_return_next,
889 );
890
beac1dff 891 if (defined($result)) { return $result; }
892 }
893 } # index loop
d0b74c17 894
beac1dff 895 $obj->{return_next} = 1;
42717e46 896 }
897 # This is the bucket list
e5fc7e69 898 else {
beac1dff 899 my $keys = $tag->{content};
900 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 901
beac1dff 902 ##
903 # Iterate through buckets, looking for a key match
904 ##
42717e46 905 my $transaction_id = $self->_fileobj->transaction_id;
8db25060 906 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
42717e46 907 my ($key, $subloc, $size, $trans_id, $is_deleted) = $self->_get_key_subloc( $keys, $i );
908
909 next if $is_deleted;
910#XXX Need to find all the copies of this key to find out if $transaction_id has it
911#XXX marked as deleted, in use, or what.
912 next if $trans_id && $trans_id != $transaction_id;
d0b74c17 913
8db25060 914 # End of bucket list -- return to outer loop
beac1dff 915 if (!$subloc) {
beac1dff 916 $obj->{return_next} = 1;
917 last;
918 }
8db25060 919 # Located previous key -- return next one found
beac1dff 920 elsif ($key eq $obj->{prev_md5}) {
beac1dff 921 $obj->{return_next} = 1;
922 next;
923 }
8db25060 924 # Seek to bucket location and skip over signature
beac1dff 925 elsif ($obj->{return_next}) {
7dcefff3 926 my $fileobj = $self->_fileobj;
d0b74c17 927
beac1dff 928 # Skip over value to get to plain key
7dcefff3 929 my $sig = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 930
7dcefff3 931 my $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 932 $size = unpack($self->{data_pack}, $size);
7dcefff3 933 if ($size) { $fileobj->increment_pointer( $size ); }
d0b74c17 934
beac1dff 935 # Read in plain key and return as scalar
7dcefff3 936 $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 937 $size = unpack($self->{data_pack}, $size);
7dcefff3 938 my $plain_key;
939 if ($size) { $plain_key = $fileobj->read_at( undef, $size); }
d0b74c17 940
beac1dff 941 return $plain_key;
942 }
8db25060 943 }
d0b74c17 944
beac1dff 945 $obj->{return_next} = 1;
42717e46 946 }
d0b74c17 947
beac1dff 948 return;
6736c116 949}
950
951sub get_next_key {
beac1dff 952 ##
953 # Locate next key, given digested previous one
954 ##
6736c116 955 my $self = shift;
956 my ($obj) = @_;
d0b74c17 957
beac1dff 958 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
959 $obj->{return_next} = 0;
d0b74c17 960
beac1dff 961 ##
962 # If the previous key was not specifed, start at the top and
963 # return the first one found.
964 ##
965 if (!$obj->{prev_md5}) {
966 $obj->{prev_md5} = chr(0) x $self->{hash_size};
967 $obj->{return_next} = 1;
968 }
d0b74c17 969
beac1dff 970 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 971}
972
75be6413 973# Utilities
974
9cec1360 975sub _get_key_subloc {
75be6413 976 my $self = shift;
977 my ($keys, $idx) = @_;
978
c9b6d0d8 979 my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack(
28394a1a 980 # This is 'a', not 'A'. Please read the pack() documentation for the
981 # difference between the two and why it's important.
c9b6d0d8 982 "a$self->{hash_size} $self->{long_pack}2 n2",
75be6413 983 substr(
984 $keys,
9cec1360 985 ($idx * $self->{bucket_size}),
986 $self->{bucket_size},
75be6413 987 ),
988 );
989
c9b6d0d8 990 return ($key, $subloc, $size, $transaction_id, $is_deleted);
75be6413 991}
992
d608b06e 993sub _find_in_buckets {
994 my $self = shift;
21838116 995 my ($tag, $md5, $exact) = @_;
22e20cce 996 $exact ||= 0;
d608b06e 997
28394a1a 998 my $trans_id = $self->_fileobj->transaction_id;
999
21838116 1000 my @zero;
1001
d608b06e 1002 BUCKET:
1003 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
c9b6d0d8 1004 my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc(
9a187d8c 1005 $tag->{content}, $i,
1006 );
d608b06e 1007
c9b6d0d8 1008 my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted);
21838116 1009
1010 unless ( $subloc ) {
633df1fd 1011 if ( !$exact && @zero && $trans_id ) {
c9b6d0d8 1012 @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted);
20b7f047 1013 }
21838116 1014 return @rv;
1015 }
1016
1017 next BUCKET if $key ne $md5;
d608b06e 1018
21838116 1019 # Save off the HEAD in case we need it.
c9b6d0d8 1020 @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0;
d608b06e 1021
21838116 1022 next BUCKET if $transaction_id != $trans_id;
1023
1024 return @rv;
d608b06e 1025 }
1026
1027 return;
1028}
1029
994ccd8e 1030sub _release_space {
1031 my $self = shift;
e96daec8 1032 my ($size, $loc) = @_;
994ccd8e 1033
7b1e1aa1 1034 my $next_loc = 0;
1035
019404df 1036 $self->_fileobj->print_at( $loc,
1037 SIG_FREE,
1038 pack($self->{long_pack}, $size ),
1039 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1040 );
1041
994ccd8e 1042 return;
1043}
1044
e96daec8 1045sub _throw_error {
1046 die "DBM::Deep: $_[1]\n";
1047}
1048
a20d9a3f 10491;
1050__END__