Removed final vestiges of misunderstandings
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
9bc79bb6 3#use Sub::Caller qw( load_tag );
1ad1fc2b 4
460b1067 5use 5.6.0;
6
a20d9a3f 7use strict;
8
c3aafc14 9our $VERSION = q(0.99_03);
86867f3a 10
633df1fd 11use Fcntl qw( :DEFAULT :flock );
359a01ac 12use Scalar::Util ();
a20d9a3f 13
21838116 14# File-wide notes:
20b7f047 15# * To add to bucket_size, make sure you modify the following:
16# - calculate_sizes()
17# - _get_key_subloc()
18# - add_bucket() - where the buckets are printed
c3aafc14 19#
83371fe3 20# * Every method in here assumes that the _storage has been appropriately
c3aafc14 21# safeguarded. This can be anything from flock() to some sort of manual
22# mutex. But, it's the caller's responsability to make sure that this has
23# been done.
21838116 24
8db25060 25##
26# Setup file and tag signatures. These should never change.
27##
28sub SIG_FILE () { 'DPDB' }
460b1067 29sub SIG_HEADER () { 'h' }
8db25060 30sub SIG_INTERNAL () { 'i' }
31sub SIG_HASH () { 'H' }
32sub SIG_ARRAY () { 'A' }
8db25060 33sub SIG_NULL () { 'N' }
34sub SIG_DATA () { 'D' }
35sub SIG_INDEX () { 'I' }
36sub SIG_BLIST () { 'B' }
7b1e1aa1 37sub SIG_FREE () { 'F' }
86867f3a 38sub SIG_KEYS () { 'K' }
8db25060 39sub SIG_SIZE () { 1 }
40
fb451ba6 41# This is the transaction ID for the HEAD
42sub HEAD () { 0 }
43
c3aafc14 44################################################################################
45#
46# This is new code. It is a complete rewrite of the engine based on a new API
47#
48################################################################################
49
c3aafc14 50sub read_value {
51 my $self = shift;
40956c06 52 my ($trans_id, $offset, $key, $orig_key) = @_;
c3aafc14 53
40956c06 54 my $dig_key = $self->_apply_digest( $key );
e5a9d386 55 my $tag = $self->find_blist( $offset, $dig_key ) or return;
56 return $self->get_bucket_value( $tag, $dig_key, $orig_key );
c3aafc14 57}
58
c3aafc14 59sub key_exists {
60 my $self = shift;
40956c06 61 my ($trans_id, $offset, $key) = @_;
c3aafc14 62
40956c06 63 my $dig_key = $self->_apply_digest( $key );
e5a9d386 64 # exists() returns the empty string, not undef
65 my $tag = $self->find_blist( $offset, $dig_key ) or return '';
c3aafc14 66 return $self->bucket_exists( $tag, $dig_key, $key );
67}
68
83c43bb5 69sub get_next_key {
c3aafc14 70 my $self = shift;
40956c06 71 my ($trans_id, $offset) = @_;
c3aafc14 72
83c43bb5 73 # If the previous key was not specifed, start at the top and
74 # return the first one found.
75 my $temp;
40956c06 76 if ( @_ > 2 ) {
83c43bb5 77 $temp = {
40956c06 78 prev_md5 => $self->_apply_digest($_[2]),
83c43bb5 79 return_next => 0,
80 };
81 }
82 else {
83 $temp = {
84 prev_md5 => chr(0) x $self->{hash_size},
85 return_next => 1,
86 };
87 }
88
89 return $self->traverse_index( $temp, $offset, 0 );
c3aafc14 90}
91
83371fe3 92sub delete_key {
93 my $self = shift;
40956c06 94 my ($trans_id, $offset, $key, $orig_key) = @_;
83371fe3 95
40956c06 96 my $dig_key = $self->_apply_digest( $key );
83371fe3 97 my $tag = $self->find_blist( $offset, $dig_key ) or return;
98 my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key );
99 $self->delete_bucket( $tag, $dig_key, $orig_key );
100 return $value;
101}
102
83371fe3 103sub write_value {
104 my $self = shift;
40956c06 105 my ($trans_id, $offset, $key, $value, $orig_key) = @_;
83371fe3 106
40956c06 107 my $dig_key = $self->_apply_digest( $key );
83371fe3 108 my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
109 return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
110}
111
c3aafc14 112################################################################################
113#
114# Below here is the old code. It will be folded into the code above as it can.
115#
116################################################################################
117
612969fb 118sub new {
119 my $class = shift;
120 my ($args) = @_;
121
122 my $self = bless {
86867f3a 123 long_size => 4,
124 long_pack => 'N',
125 data_size => 4,
126 data_pack => 'N',
251dfd0e 127
86867f3a 128 digest => \&Digest::MD5::md5,
c3aafc14 129 hash_size => 16, # In bytes
251dfd0e 130
81d16922 131 ##
c3aafc14 132 # Number of buckets per blist before another level of indexing is
e0098e7f 133 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 134 # files. DO NOT decrease this value below 16, due to risk of recursive
135 # reindex overrun.
81d16922 136 ##
612969fb 137 max_buckets => 16,
460b1067 138
83371fe3 139 storage => undef,
359a01ac 140 obj => undef,
612969fb 141 }, $class;
142
e0098e7f 143 if ( defined $args->{pack_size} ) {
144 if ( lc $args->{pack_size} eq 'small' ) {
145 $args->{long_size} = 2;
c9b6d0d8 146 $args->{long_pack} = 'n';
e0098e7f 147 }
148 elsif ( lc $args->{pack_size} eq 'medium' ) {
149 $args->{long_size} = 4;
150 $args->{long_pack} = 'N';
151 }
152 elsif ( lc $args->{pack_size} eq 'large' ) {
153 $args->{long_size} = 8;
154 $args->{long_pack} = 'Q';
155 }
156 else {
157 die "Unknown pack_size value: '$args->{pack_size}'\n";
158 }
159 }
160
fde3db1a 161 # Grab the parameters we want to use
162 foreach my $param ( keys %$self ) {
163 next unless exists $args->{$param};
3e9498a1 164 $self->{$param} = $args->{$param};
fde3db1a 165 }
359a01ac 166 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 167
e0098e7f 168 if ( $self->{max_buckets} < 16 ) {
169 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
170 $self->{max_buckets} = 16;
171 }
172
260a80b4 173 return $self;
174}
175
83371fe3 176sub _storage { return $_[0]{storage} }
460b1067 177
40956c06 178sub _apply_digest {
c3aafc14 179 my $self = shift;
180 return $self->{digest}->(@_);
181}
182
260a80b4 183sub calculate_sizes {
184 my $self = shift;
185
633df1fd 186 # The 2**8 here indicates the number of different characters in the
187 # current hashing algorithm
28394a1a 188 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 189 $self->{hash_chars_used} = (2**8);
190 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
191
192 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 193 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 194
ea2f6d67 195 $self->{key_size} = $self->{long_size} * 2;
196 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
197
260a80b4 198 return;
1bf65be7 199}
200
fde3db1a 201sub write_file_header {
0d0f3d5d 202 my $self = shift;
0d0f3d5d 203
83371fe3 204 my $loc = $self->_storage->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 205
83371fe3 206 $self->_storage->print_at( $loc,
260a80b4 207 SIG_FILE,
460b1067 208 SIG_HEADER,
209 pack('N', 1), # header version
eff6a245 210 pack('N', 24), # header size
211 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 212 pack('n', $self->{long_size}),
260a80b4 213 pack('A', $self->{long_pack}),
c9b6d0d8 214 pack('n', $self->{data_size}),
260a80b4 215 pack('A', $self->{data_pack}),
c9b6d0d8 216 pack('n', $self->{max_buckets}),
260a80b4 217 );
0d0f3d5d 218
83371fe3 219 $self->_storage->set_transaction_offset( 13 );
20b7f047 220
0d0f3d5d 221 return;
222}
223
fde3db1a 224sub read_file_header {
e064ccd1 225 my $self = shift;
e064ccd1 226
83371fe3 227 my $buffer = $self->_storage->read_at( 0, length(SIG_FILE) + 9 );
7dcefff3 228 return unless length($buffer);
460b1067 229
230 my ($file_signature, $sig_header, $header_version, $size) = unpack(
231 'A4 A N N', $buffer
42f79e07 232 );
e064ccd1 233
460b1067 234 unless ( $file_signature eq SIG_FILE ) {
83371fe3 235 $self->_storage->close;
e96daec8 236 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 237 }
260a80b4 238
460b1067 239 unless ( $sig_header eq SIG_HEADER ) {
83371fe3 240 $self->_storage->close;
e96daec8 241 $self->_throw_error( "Old file version found." );
460b1067 242 }
9b2370e0 243
83371fe3 244 my $buffer2 = $self->_storage->read_at( undef, $size );
eff6a245 245 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 246
83371fe3 247 $self->_storage->set_transaction_offset( 13 );
15ba72cc 248
460b1067 249 if ( @values < 5 || grep { !defined } @values ) {
83371fe3 250 $self->_storage->close;
e96daec8 251 $self->_throw_error("Corrupted file - bad header");
e064ccd1 252 }
253
460b1067 254 #XXX Add warnings if values weren't set right
255 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
256
7dcefff3 257 return length($buffer) + length($buffer2);
e064ccd1 258}
259
460b1067 260sub setup_fh {
261 my $self = shift;
262 my ($obj) = @_;
70b55428 263
7dcefff3 264 # Need to remove use of $fh here
83371fe3 265 my $fh = $self->_storage->{fh};
6fde4ed2 266 flock $fh, LOCK_EX;
118ba343 267
260a80b4 268 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 269 unless ( $obj->{base_offset} ) {
e96daec8 270 my $bytes_read = $self->read_file_header;
118ba343 271
260a80b4 272 $self->calculate_sizes;
273
118ba343 274 ##
fde3db1a 275 # File is empty -- write header and master index
118ba343 276 ##
277 if (!$bytes_read) {
83371fe3 278 $self->_storage->audit( "# Database created on" );
359a01ac 279
e96daec8 280 $self->write_file_header;
118ba343 281
83371fe3 282 $obj->{base_offset} = $self->_storage->request_space(
22e20cce 283 $self->tag_size( $self->{index_size} ),
284 );
118ba343 285
9e4f83a0 286 $self->write_tag(
e96daec8 287 $obj->_base_offset, $obj->_type,
f37c15ab 288 chr(0)x$self->{index_size},
118ba343 289 );
290
291 # Flush the filehandle
292 my $old_fh = select $fh;
293 my $old_af = $|; $| = 1; $| = $old_af;
294 select $old_fh;
295 }
296 else {
297 $obj->{base_offset} = $bytes_read;
298
299 ##
fde3db1a 300 # Get our type from master index header
118ba343 301 ##
359a01ac 302 my $tag = $self->load_tag($obj->_base_offset);
303 unless ( $tag ) {
304 flock $fh, LOCK_UN;
305 $self->_throw_error("Corrupted file, no master index record");
306 }
118ba343 307
e96daec8 308 unless ($obj->_type eq $tag->{signature}) {
359a01ac 309 flock $fh, LOCK_UN;
e96daec8 310 $self->_throw_error("File type mismatch");
118ba343 311 }
312 }
118ba343 313 }
260a80b4 314 else {
315 $self->calculate_sizes;
316 }
e06824f8 317
673464d9 318 #XXX We have to make sure we don't mess up when autoflush isn't turned on
83371fe3 319 $self->_storage->set_inode;
70b55428 320
6fde4ed2 321 flock $fh, LOCK_UN;
322
70b55428 323 return 1;
324}
325
16d1ad9b 326sub tag_size {
327 my $self = shift;
328 my ($size) = @_;
329 return SIG_SIZE + $self->{data_size} + $size;
330}
331
9e4f83a0 332sub write_tag {
20f7b20c 333 ##
334 # Given offset, signature and content, create tag and write to disk
335 ##
d4b1166e 336 my $self = shift;
e96daec8 337 my ($offset, $sig, $content) = @_;
f37c15ab 338 my $size = length( $content );
20f7b20c 339
83371fe3 340 $self->_storage->print_at(
7dcefff3 341 $offset,
342 $sig, pack($self->{data_pack}, $size), $content,
343 );
20f7b20c 344
f37c15ab 345 return unless defined $offset;
346
20f7b20c 347 return {
348 signature => $sig,
72e315ac 349 #XXX Is this even used?
86867f3a 350 size => $size,
f5677eab 351 start => $offset,
86867f3a 352 offset => $offset + SIG_SIZE + $self->{data_size},
f5677eab 353 content => $content,
354 is_new => 1,
20f7b20c 355 };
d4b1166e 356}
357
358sub load_tag {
20f7b20c 359 ##
360 # Given offset, load single tag and return signature, size and data
361 ##
d4b1166e 362 my $self = shift;
e96daec8 363 my ($offset) = @_;
9bc79bb6 364 print join(":",map{$_||''}caller) . " - load_tag($offset)\n" if $::DEBUG;
20f7b20c 365
83371fe3 366 my $storage = $self->_storage;
20f7b20c 367
86867f3a 368 my ($sig, $size) = unpack(
369 "A $self->{data_pack}",
83371fe3 370 $storage->read_at( $offset, SIG_SIZE + $self->{data_size} ),
86867f3a 371 );
20f7b20c 372
373 return {
374 signature => $sig,
c3aafc14 375 size => $size, #XXX Is this even used?
fb451ba6 376 start => $offset,
86867f3a 377 offset => $offset + SIG_SIZE + $self->{data_size},
83371fe3 378 content => $storage->read_at( undef, $size ),
20f7b20c 379 };
d4b1166e 380}
381
ea2f6d67 382sub find_keyloc {
383 my $self = shift;
384 my ($tag, $transaction_id) = @_;
83371fe3 385 $transaction_id = $self->_storage->transaction_id
ea2f6d67 386 unless defined $transaction_id;
387
388 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
389 my ($loc, $trans_id, $is_deleted) = unpack(
390 "$self->{long_pack} C C",
391 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
392 );
393
fb451ba6 394 next if $loc != HEAD && $transaction_id != $trans_id;
395 return( $loc, $is_deleted, $i * $self->{key_size} );
ea2f6d67 396 }
397
398 return;
399}
400
20f7b20c 401sub add_bucket {
402 ##
403 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
404 # plain (undigested) key and value.
405 ##
d4b1166e 406 my $self = shift;
359a01ac 407 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 408
eea0d863 409 # This verifies that only supported values will be stored.
410 {
411 my $r = Scalar::Util::reftype( $value );
eea0d863 412
86867f3a 413 last if !defined $r;
eea0d863 414 last if $r eq 'HASH';
415 last if $r eq 'ARRAY';
416
e96daec8 417 $self->_throw_error(
86867f3a 418 "Storage of references of type '$r' is not supported."
eea0d863 419 );
420 }
421
83371fe3 422 my $storage = $self->_storage;
20f7b20c 423
21838116 424 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 425 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 426
427 my @transactions;
83371fe3 428 if ( $storage->transaction_id == 0 ) {
429 @transactions = $storage->current_transactions;
c9b6d0d8 430 }
75be6413 431
e96daec8 432# $self->_release_space( $size, $subloc );
f9c33187 433#XXX This needs updating to use _release_space
ea2f6d67 434
86867f3a 435 my $location;
ea2f6d67 436 my $size = $self->_length_needed( $value, $plain_key );
019404df 437
ea2f6d67 438 # Updating a known md5
439 if ( $keyloc ) {
440 my $keytag = $self->load_tag( $keyloc );
441 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
442
13ff93d5 443 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 444 my $old_value = $self->read_from_loc( $subloc, $orig_key );
445 my $old_size = $self->_length_needed( $old_value, $plain_key );
446
447 for my $trans_id ( @transactions ) {
448 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
449 unless ($loc) {
83371fe3 450 my $location2 = $storage->request_space( $old_size );
451 $storage->print_at( $keytag->{offset} + $offset2,
ea2f6d67 452 pack($self->{long_pack}, $location2 ),
453 pack( 'C C', $trans_id, 0 ),
454 );
c3aafc14 455 $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
ea2f6d67 456 }
457 }
75be6413 458 }
504185fb 459
83371fe3 460 $location = $self->_storage->request_space( $size );
ea2f6d67 461 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
83371fe3 462 $storage->print_at( $keytag->{offset} + $offset,
ea2f6d67 463 pack($self->{long_pack}, $location ),
83371fe3 464 pack( 'C C', $storage->transaction_id, 0 ),
ea2f6d67 465 );
75be6413 466 }
386bab6c 467 # Adding a new md5
ea2f6d67 468 else {
83371fe3 469 my $keyloc = $storage->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 470
ea2f6d67 471 # The bucket fit into list
472 if ( defined $offset ) {
83371fe3 473 $storage->print_at( $tag->{offset} + $offset,
ea2f6d67 474 $md5, pack( $self->{long_pack}, $keyloc ),
475 );
476 }
477 # If bucket didn't fit into list, split into a new index level
478 else {
479 $self->split_index( $tag, $md5, $keyloc );
480 }
481
482 my $keytag = $self->write_tag(
483 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 484 );
c9b6d0d8 485
83371fe3 486 $location = $self->_storage->request_space( $size );
487 $storage->print_at( $keytag->{offset},
ea2f6d67 488 pack( $self->{long_pack}, $location ),
83371fe3 489 pack( 'C C', $storage->transaction_id, 0 ),
ea2f6d67 490 );
491
492 my $offset = 1;
493 for my $trans_id ( @transactions ) {
83371fe3 494 $storage->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 495 pack( $self->{long_pack}, 0 ),
ea2f6d67 496 pack( 'C C', $trans_id, 1 ),
497 );
c9b6d0d8 498 }
386bab6c 499 }
20f7b20c 500
c3aafc14 501 $self->_write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 502
86867f3a 503 return 1;
d5d7c51d 504}
505
c3aafc14 506sub _write_value {
d5d7c51d 507 my $self = shift;
129ea236 508 my ($key_loc, $location, $key, $value, $orig_key) = @_;
d5d7c51d 509
83371fe3 510 my $storage = $self->_storage;
d5d7c51d 511
9d4fa373 512 my $dbm_deep_obj = _get_dbm_object( $value );
83371fe3 513 if ( $dbm_deep_obj && $dbm_deep_obj->_storage ne $storage ) {
e96daec8 514 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 515 }
d5d7c51d 516
20f7b20c 517 ##
d5d7c51d 518 # Write signature based on content type, set content length and write
519 # actual value.
20f7b20c 520 ##
9d4fa373 521 my $r = Scalar::Util::reftype( $value ) || '';
522 if ( $dbm_deep_obj ) {
7dcefff3 523 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 524 }
525 elsif ($r eq 'HASH') {
9d4fa373 526 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 527 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 528 }
7dcefff3 529 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 530 }
531 elsif ($r eq 'ARRAY') {
9d4fa373 532 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 533 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 534 }
7dcefff3 535 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 536 }
537 elsif (!defined($value)) {
7dcefff3 538 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 539 }
540 else {
7dcefff3 541 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 542 }
20f7b20c 543
d5d7c51d 544 ##
545 # Plain key is stored AFTER value, as keys are typically fetched less often.
546 ##
83371fe3 547 $storage->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 548
9a187d8c 549 # Internal references don't care about autobless
9d4fa373 550 return 1 if $dbm_deep_obj;
9a187d8c 551
d5d7c51d 552 ##
553 # If value is blessed, preserve class name
554 ##
83371fe3 555 if ( $storage->{autobless} ) {
633df1fd 556 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
83371fe3 557 $storage->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 558 }
d5d7c51d 559 else {
83371fe3 560 $storage->print_at( undef, chr(0) );
20f7b20c 561 }
d5d7c51d 562 }
20f7b20c 563
d5d7c51d 564 ##
56ec4340 565 # Tie the passed in reference so that changes to it are reflected in the
566 # datafile. The use of $location as the base_offset will act as the
567 # the linkage between parent and child.
568 #
569 # The overall assignment is a hack around the fact that just tying doesn't
570 # store the values. This may not be the wrong thing to do.
d5d7c51d 571 ##
9d4fa373 572 if ($r eq 'HASH') {
573 my %x = %$value;
574 tie %$value, 'DBM::Deep', {
129ea236 575 base_offset => $key_loc,
83371fe3 576 storage => $storage,
359a01ac 577 parent => $self->{obj},
578 parent_key => $orig_key,
9d4fa373 579 };
580 %$value = %x;
bd01f6ec 581 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
9d4fa373 582 }
583 elsif ($r eq 'ARRAY') {
584 my @x = @$value;
585 tie @$value, 'DBM::Deep', {
129ea236 586 base_offset => $key_loc,
83371fe3 587 storage => $storage,
359a01ac 588 parent => $self->{obj},
589 parent_key => $orig_key,
9d4fa373 590 };
591 @$value = @x;
bd01f6ec 592 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
20f7b20c 593 }
d4b1166e 594
d5d7c51d 595 return 1;
d4b1166e 596}
597
75be6413 598sub split_index {
599 my $self = shift;
ea2f6d67 600 my ($tag, $md5, $keyloc) = @_;
75be6413 601
83371fe3 602 my $storage = $self->_storage;
21838116 603
83371fe3 604 my $loc = $storage->request_space(
e96daec8 605 $self->tag_size( $self->{index_size} ),
16d1ad9b 606 );
607
83371fe3 608 $storage->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 609
9e4f83a0 610 my $index_tag = $self->write_tag(
e96daec8 611 $loc, SIG_INDEX,
f37c15ab 612 chr(0)x$self->{index_size},
75be6413 613 );
614
7b1e1aa1 615 my $keys = $tag->{content}
ea2f6d67 616 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 617
f9c33187 618 my @newloc = ();
75be6413 619 BUCKET:
633df1fd 620 # The <= here is deliberate - we have max_buckets+1 keys to iterate
621 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 622 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 623 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 624
f9c33187 625 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
626 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 627
75be6413 628 my $num = ord(substr($key, $tag->{ch} + 1, 1));
629
f9c33187 630 if ($newloc[$num]) {
83371fe3 631 my $subkeys = $storage->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 632
f9c33187 633 # This is looking for the first empty spot
ea2f6d67 634 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 635 { content => $subkeys }, '',
7b1e1aa1 636 );
75be6413 637
83371fe3 638 $storage->print_at(
633df1fd 639 $newloc[$num] + $offset,
640 $key, pack($self->{long_pack}, $old_subloc),
641 );
7b1e1aa1 642
643 next;
75be6413 644 }
75be6413 645
83371fe3 646 my $loc = $storage->request_space(
e96daec8 647 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 648 );
2603d86e 649
83371fe3 650 $storage->print_at(
019404df 651 $index_tag->{offset} + ($num * $self->{long_size}),
652 pack($self->{long_pack}, $loc),
653 );
75be6413 654
7b1e1aa1 655 my $blist_tag = $self->write_tag(
e96daec8 656 $loc, SIG_BLIST,
7b1e1aa1 657 chr(0)x$self->{bucket_list_size},
658 );
659
83371fe3 660 $storage->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 661
f9c33187 662 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 663 }
664
665 $self->_release_space(
e96daec8 666 $self->tag_size( $self->{bucket_list_size} ),
fb451ba6 667 $tag->{start},
7b1e1aa1 668 );
75be6413 669
ea2f6d67 670 return 1;
75be6413 671}
672
8db25060 673sub read_from_loc {
674 my $self = shift;
129ea236 675 my ($key_loc, $subloc, $orig_key) = @_;
8db25060 676
83371fe3 677 my $storage = $self->_storage;
8db25060 678
83371fe3 679 my $signature = $storage->read_at( $subloc, SIG_SIZE );
8db25060 680
681 ##
682 # If value is a hash or array, return new DBM::Deep object with correct offset
683 ##
684 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
c3aafc14 685 #XXX This needs to be a singleton
bd01f6ec 686# my $new_obj;
687# my $is_autobless;
688# if ( $signature eq SIG_HASH ) {
689# $new_obj = {};
690# tie %$new_obj, 'DBM::Deep', {
691# base_offset => $subloc,
83371fe3 692# storage => $self->_storage,
bd01f6ec 693# parent => $self->{obj},
694# parent_key => $orig_key,
695# };
83371fe3 696# $is_autobless = tied(%$new_obj)->_storage->{autobless};
bd01f6ec 697# }
698# else {
699# $new_obj = [];
700# tie @$new_obj, 'DBM::Deep', {
701# base_offset => $subloc,
83371fe3 702# storage => $self->_storage,
bd01f6ec 703# parent => $self->{obj},
704# parent_key => $orig_key,
705# };
83371fe3 706# $is_autobless = tied(@$new_obj)->_storage->{autobless};
bd01f6ec 707# }
708#
709# if ($is_autobless) {
710
685e40f1 711 my $new_obj = DBM::Deep->new({
359a01ac 712 type => $signature,
129ea236 713 base_offset => $key_loc,
83371fe3 714 storage => $self->_storage,
359a01ac 715 parent => $self->{obj},
716 parent_key => $orig_key,
685e40f1 717 });
8db25060 718
83371fe3 719 if ($new_obj->_storage->{autobless}) {
8db25060 720 ##
721 # Skip over value and plain key to see if object needs
722 # to be re-blessed
723 ##
83371fe3 724 $storage->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 725
83371fe3 726 my $size = $storage->read_at( undef, $self->{data_size} );
c6ea6b6c 727 $size = unpack($self->{data_pack}, $size);
83371fe3 728 if ($size) { $storage->increment_pointer( $size ); }
8db25060 729
83371fe3 730 my $bless_bit = $storage->read_at( undef, 1 );
86867f3a 731 if ( ord($bless_bit) ) {
732 my $size = unpack(
733 $self->{data_pack},
83371fe3 734 $storage->read_at( undef, $self->{data_size} ),
86867f3a 735 );
7dcefff3 736
86867f3a 737 if ( $size ) {
83371fe3 738 $new_obj = bless $new_obj, $storage->read_at( undef, $size );
86867f3a 739 }
8db25060 740 }
741 }
742
685e40f1 743 return $new_obj;
8db25060 744 }
745 elsif ( $signature eq SIG_INTERNAL ) {
83371fe3 746 my $size = $storage->read_at( undef, $self->{data_size} );
8db25060 747 $size = unpack($self->{data_pack}, $size);
748
749 if ( $size ) {
83371fe3 750 my $new_loc = $storage->read_at( undef, $size );
7dcefff3 751 $new_loc = unpack( $self->{long_pack}, $new_loc );
129ea236 752 return $self->read_from_loc( $key_loc, $new_loc, $orig_key );
8db25060 753 }
754 else {
755 return;
756 }
757 }
758 ##
759 # Otherwise return actual value
760 ##
460b1067 761 elsif ( $signature eq SIG_DATA ) {
83371fe3 762 my $size = $storage->read_at( undef, $self->{data_size} );
8db25060 763 $size = unpack($self->{data_pack}, $size);
764
83371fe3 765 my $value = $size ? $storage->read_at( undef, $size ) : '';
8db25060 766 return $value;
767 }
768
769 ##
770 # Key exists, but content is null
771 ##
772 return;
773}
774
9020ee8c 775sub get_bucket_value {
beac1dff 776 ##
777 # Fetch single value given tag and MD5 digested key.
778 ##
779 my $self = shift;
359a01ac 780 my ($tag, $md5, $orig_key) = @_;
9020ee8c 781
21838116 782 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 783 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 784
ea2f6d67 785 if ( !$keyloc ) {
94e8af14 786 #XXX Need to use real key
ea2f6d67 787# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 788# return;
789 }
ea2f6d67 790# elsif ( !$is_deleted ) {
791 else {
792 my $keytag = $self->load_tag( $keyloc );
793 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 794 if (!$subloc && !$is_deleted) {
ea2f6d67 795 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
796 }
797 if ( $subloc && !$is_deleted ) {
798 return $self->read_from_loc( $subloc, $orig_key );
799 }
386bab6c 800 }
94e8af14 801
beac1dff 802 return;
9020ee8c 803}
ab0e4957 804
805sub delete_bucket {
beac1dff 806 ##
807 # Delete single key/value pair given tag and MD5 digested key.
808 ##
809 my $self = shift;
a97c8f67 810 my ($tag, $md5, $orig_key) = @_;
ab0e4957 811
22e20cce 812 #ACID - Although this is a mutation, we must find any transaction.
813 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 814 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 815
ea2f6d67 816 return if !$keyloc;
633df1fd 817
83371fe3 818 my $storage = $self->_storage;
633df1fd 819
820 my @transactions;
83371fe3 821 if ( $storage->transaction_id == 0 ) {
822 @transactions = $storage->current_transactions;
633df1fd 823 }
824
83371fe3 825 if ( $storage->transaction_id == 0 ) {
ea2f6d67 826 my $keytag = $self->load_tag( $keyloc );
7a960a12 827
ea2f6d67 828 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 829 return if !$subloc || $is_deleted;
830
633df1fd 831 my $value = $self->read_from_loc( $subloc, $orig_key );
832
ea2f6d67 833 my $size = $self->_length_needed( $value, $orig_key );
834
835 for my $trans_id ( @transactions ) {
836 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
837 unless ($loc) {
83371fe3 838 my $location2 = $storage->request_space( $size );
839 $storage->print_at( $keytag->{offset} + $offset2,
ea2f6d67 840 pack($self->{long_pack}, $location2 ),
841 pack( 'C C', $trans_id, 0 ),
842 );
c3aafc14 843 $self->_write_value( $location2, $orig_key, $value, $orig_key );
ea2f6d67 844 }
633df1fd 845 }
846
ea2f6d67 847 $keytag = $self->load_tag( $keyloc );
848 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
83371fe3 849 $storage->print_at( $keytag->{offset} + $offset,
ea2f6d67 850 substr( $keytag->{content}, $offset + $self->{key_size} ),
851 chr(0) x $self->{key_size},
019404df 852 );
386bab6c 853 }
633df1fd 854 else {
ea2f6d67 855 my $keytag = $self->load_tag( $keyloc );
7a960a12 856
ea2f6d67 857 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 858
83371fe3 859 $storage->print_at( $keytag->{offset} + $offset,
13ff93d5 860 pack($self->{long_pack}, 0 ),
83371fe3 861 pack( 'C C', $storage->transaction_id, 1 ),
ea2f6d67 862 );
633df1fd 863 }
864
865 return 1;
ab0e4957 866}
867
912d50b1 868sub bucket_exists {
beac1dff 869 ##
870 # Check existence of single key given tag and MD5 digested key.
871 ##
872 my $self = shift;
e96daec8 873 my ($tag, $md5) = @_;
912d50b1 874
21838116 875 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 876 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
877 my $keytag = $self->load_tag( $keyloc );
878 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 879 if ( !$subloc && !$is_deleted ) {
ea2f6d67 880 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
881 }
c9b6d0d8 882 return ($subloc && !$is_deleted) && 1;
912d50b1 883}
884
72e315ac 885sub find_blist {
beac1dff 886 ##
887 # Locate offset for bucket list, given digested key
888 ##
889 my $self = shift;
e96daec8 890 my ($offset, $md5, $args) = @_;
d0b74c17 891 $args = {} unless $args;
892
beac1dff 893 ##
894 # Locate offset for bucket list using digest index system
895 ##
e96daec8 896 my $tag = $self->load_tag( $offset )
897 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 898
fb451ba6 899 #XXX What happens when $ch >= $self->{hash_size} ??
900 for (my $ch = 0; $tag->{signature} ne SIG_BLIST; $ch++) {
d0b74c17 901 my $num = ord substr($md5, $ch, 1);
902
903 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 904 $tag = $self->index_lookup( $tag, $num );
d0b74c17 905
906 if (!$tag) {
29b01632 907 return if !$args->{create};
d0b74c17 908
83371fe3 909 my $loc = $self->_storage->request_space(
e96daec8 910 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 911 );
912
83371fe3 913 $self->_storage->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 914
9e4f83a0 915 $tag = $self->write_tag(
e96daec8 916 $loc, SIG_BLIST,
f37c15ab 917 chr(0)x$self->{bucket_list_size},
d5d7c51d 918 );
919
920 $tag->{ref_loc} = $ref_loc;
921 $tag->{ch} = $ch;
922
923 last;
d0b74c17 924 }
925
fb451ba6 926 $tag->{ch} = $ch;
d0b74c17 927 $tag->{ref_loc} = $ref_loc;
beac1dff 928 }
d0b74c17 929
beac1dff 930 return $tag;
6736c116 931}
932
d0b74c17 933sub index_lookup {
934 ##
935 # Given index tag, lookup single entry in index and return .
936 ##
937 my $self = shift;
e96daec8 938 my ($tag, $index) = @_;
d0b74c17 939
940 my $location = unpack(
941 $self->{long_pack},
942 substr(
943 $tag->{content},
944 $index * $self->{long_size},
945 $self->{long_size},
946 ),
947 );
948
949 if (!$location) { return; }
950
e96daec8 951 return $self->load_tag( $location );
d0b74c17 952}
953
6736c116 954sub traverse_index {
beac1dff 955 ##
956 # Scan index and recursively step into deeper levels, looking for next key.
957 ##
6736c116 958 my $self = shift;
ea2f6d67 959 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 960
e96daec8 961 my $tag = $self->load_tag( $offset );
6736c116 962
8db25060 963 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 964 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 965
ea2f6d67 966 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 967 my $subloc = unpack(
968 $self->{long_pack},
e06824f8 969 substr(
ea2f6d67 970 $tag->{content},
e06824f8 971 $idx * $self->{long_size},
972 $self->{long_size},
973 ),
e5fc7e69 974 );
975
beac1dff 976 if ($subloc) {
e5fc7e69 977 my $result = $self->traverse_index(
ea2f6d67 978 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 979 );
980
ea2f6d67 981 if (defined $result) { return $result; }
beac1dff 982 }
983 } # index loop
d0b74c17 984
ea2f6d67 985 $xxxx->{return_next} = 1;
42717e46 986 }
987 # This is the bucket list
e5fc7e69 988 else {
beac1dff 989 my $keys = $tag->{content};
ea2f6d67 990 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 991
beac1dff 992 ##
993 # Iterate through buckets, looking for a key match
994 ##
83371fe3 995 my $transaction_id = $self->_storage->transaction_id;
8db25060 996 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 997 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 998
8db25060 999 # End of bucket list -- return to outer loop
ea2f6d67 1000 if (!$keyloc) {
1001 $xxxx->{return_next} = 1;
beac1dff 1002 last;
1003 }
8db25060 1004 # Located previous key -- return next one found
ea2f6d67 1005 elsif ($key eq $xxxx->{prev_md5}) {
1006 $xxxx->{return_next} = 1;
beac1dff 1007 next;
1008 }
8db25060 1009 # Seek to bucket location and skip over signature
ea2f6d67 1010 elsif ($xxxx->{return_next}) {
83371fe3 1011 my $storage = $self->_storage;
d0b74c17 1012
ea2f6d67 1013 my $keytag = $self->load_tag( $keyloc );
1014 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 1015 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 1016 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
1017 }
1018 next if $is_deleted;
1019
beac1dff 1020 # Skip over value to get to plain key
83371fe3 1021 my $sig = $storage->read_at( $subloc, SIG_SIZE );
8db25060 1022
83371fe3 1023 my $size = $storage->read_at( undef, $self->{data_size} );
e5fc7e69 1024 $size = unpack($self->{data_pack}, $size);
83371fe3 1025 if ($size) { $storage->increment_pointer( $size ); }
d0b74c17 1026
beac1dff 1027 # Read in plain key and return as scalar
83371fe3 1028 $size = $storage->read_at( undef, $self->{data_size} );
e5fc7e69 1029 $size = unpack($self->{data_pack}, $size);
86867f3a 1030
7dcefff3 1031 my $plain_key;
83371fe3 1032 if ($size) { $plain_key = $storage->read_at( undef, $size); }
beac1dff 1033 return $plain_key;
1034 }
8db25060 1035 }
d0b74c17 1036
ea2f6d67 1037 $xxxx->{return_next} = 1;
42717e46 1038 }
d0b74c17 1039
beac1dff 1040 return;
6736c116 1041}
1042
75be6413 1043# Utilities
1044
9cec1360 1045sub _get_key_subloc {
75be6413 1046 my $self = shift;
1047 my ($keys, $idx) = @_;
1048
ea2f6d67 1049 return unpack(
28394a1a 1050 # This is 'a', not 'A'. Please read the pack() documentation for the
1051 # difference between the two and why it's important.
ea2f6d67 1052 "a$self->{hash_size} $self->{long_pack}",
75be6413 1053 substr(
1054 $keys,
9cec1360 1055 ($idx * $self->{bucket_size}),
1056 $self->{bucket_size},
75be6413 1057 ),
1058 );
75be6413 1059}
1060
d608b06e 1061sub _find_in_buckets {
1062 my $self = shift;
ea2f6d67 1063 my ($tag, $md5) = @_;
21838116 1064
d608b06e 1065 BUCKET:
1066 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 1067 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 1068 $tag->{content}, $i,
1069 );
d608b06e 1070
fb451ba6 1071 next BUCKET if $subloc && $key ne $md5;
1072 return( $subloc, $i * $self->{bucket_size} );
d608b06e 1073 }
1074
1075 return;
1076}
1077
994ccd8e 1078sub _release_space {
1079 my $self = shift;
e96daec8 1080 my ($size, $loc) = @_;
994ccd8e 1081
7b1e1aa1 1082 my $next_loc = 0;
1083
83371fe3 1084 $self->_storage->print_at( $loc,
019404df 1085 SIG_FREE,
1086 pack($self->{long_pack}, $size ),
1087 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1088 );
1089
994ccd8e 1090 return;
1091}
1092
e96daec8 1093sub _throw_error {
1094 die "DBM::Deep: $_[1]\n";
1095}
1096
86867f3a 1097sub _get_dbm_object {
1098 my $item = shift;
1099
1100 my $obj = eval {
1101 local $SIG{__DIE__};
1102 if ($item->isa( 'DBM::Deep' )) {
1103 return $item;
1104 }
1105 return;
1106 };
1107 return $obj if $obj;
1108
1109 my $r = Scalar::Util::reftype( $item ) || '';
1110 if ( $r eq 'HASH' ) {
1111 my $obj = eval {
1112 local $SIG{__DIE__};
1113 my $obj = tied(%$item);
1114 if ($obj->isa( 'DBM::Deep' )) {
1115 return $obj;
1116 }
1117 return;
1118 };
1119 return $obj if $obj;
1120 }
1121 elsif ( $r eq 'ARRAY' ) {
1122 my $obj = eval {
1123 local $SIG{__DIE__};
1124 my $obj = tied(@$item);
1125 if ($obj->isa( 'DBM::Deep' )) {
1126 return $obj;
1127 }
1128 return;
1129 };
1130 return $obj if $obj;
1131 }
1132
1133 return;
1134}
1135
1136sub _length_needed {
1137 my $self = shift;
1138 my ($value, $key) = @_;
1139
1140 my $is_dbm_deep = eval {
1141 local $SIG{'__DIE__'};
1142 $value->isa( 'DBM::Deep' );
1143 };
1144
1145 my $len = SIG_SIZE
1146 + $self->{data_size} # size for value
1147 + $self->{data_size} # size for key
1148 + length( $key ); # length of key
1149
83371fe3 1150 if ( $is_dbm_deep && $value->_storage eq $self->_storage ) {
86867f3a 1151 # long_size is for the internal reference
1152 return $len + $self->{long_size};
1153 }
1154
83371fe3 1155 if ( $self->_storage->{autobless} ) {
86867f3a 1156 # This is for the bit saying whether or not this thing is blessed.
1157 $len += 1;
1158 }
1159
1160 my $r = Scalar::Util::reftype( $value ) || '';
1161 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1162 if ( defined $value ) {
1163 $len += length( $value );
1164 }
1165 return $len;
1166 }
1167
1168 $len += $self->{index_size};
1169
1170 # if autobless is enabled, must also take into consideration
1171 # the class name as it is stored after the key.
83371fe3 1172 if ( $self->_storage->{autobless} ) {
86867f3a 1173 my $c = Scalar::Util::blessed($value);
1174 if ( defined $c && !$is_dbm_deep ) {
1175 $len += $self->{data_size} + length($c);
1176 }
1177 }
1178
1179 return $len;
1180}
1181
a20d9a3f 11821;
1183__END__