r11705@rob-kinyons-powerbook58: rob | 2006-05-01 13:19:45 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
86867f3a 8our $VERSION = q(0.99_01);
9
633df1fd 10use Fcntl qw( :DEFAULT :flock );
359a01ac 11use Scalar::Util ();
a20d9a3f 12
21838116 13# File-wide notes:
20b7f047 14# * To add to bucket_size, make sure you modify the following:
15# - calculate_sizes()
16# - _get_key_subloc()
17# - add_bucket() - where the buckets are printed
21838116 18
8db25060 19##
20# Setup file and tag signatures. These should never change.
21##
22sub SIG_FILE () { 'DPDB' }
460b1067 23sub SIG_HEADER () { 'h' }
8db25060 24sub SIG_INTERNAL () { 'i' }
25sub SIG_HASH () { 'H' }
26sub SIG_ARRAY () { 'A' }
8db25060 27sub SIG_NULL () { 'N' }
28sub SIG_DATA () { 'D' }
29sub SIG_INDEX () { 'I' }
30sub SIG_BLIST () { 'B' }
7b1e1aa1 31sub SIG_FREE () { 'F' }
86867f3a 32sub SIG_KEYS () { 'K' }
8db25060 33sub SIG_SIZE () { 1 }
34
612969fb 35sub new {
36 my $class = shift;
37 my ($args) = @_;
38
39 my $self = bless {
86867f3a 40 long_size => 4,
41 long_pack => 'N',
42 data_size => 4,
43 data_pack => 'N',
251dfd0e 44
86867f3a 45 digest => \&Digest::MD5::md5,
46 hash_size => 16,
251dfd0e 47
81d16922 48 ##
86867f3a 49 # Maximum number of buckets per blist before another level of indexing is
e0098e7f 50 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 51 # files. DO NOT decrease this value below 16, due to risk of recursive
52 # reindex overrun.
81d16922 53 ##
612969fb 54 max_buckets => 16,
460b1067 55
56 fileobj => undef,
359a01ac 57 obj => undef,
612969fb 58 }, $class;
59
e0098e7f 60 if ( defined $args->{pack_size} ) {
61 if ( lc $args->{pack_size} eq 'small' ) {
62 $args->{long_size} = 2;
c9b6d0d8 63 $args->{long_pack} = 'n';
e0098e7f 64 }
65 elsif ( lc $args->{pack_size} eq 'medium' ) {
66 $args->{long_size} = 4;
67 $args->{long_pack} = 'N';
68 }
69 elsif ( lc $args->{pack_size} eq 'large' ) {
70 $args->{long_size} = 8;
71 $args->{long_pack} = 'Q';
72 }
73 else {
74 die "Unknown pack_size value: '$args->{pack_size}'\n";
75 }
76 }
77
fde3db1a 78 # Grab the parameters we want to use
79 foreach my $param ( keys %$self ) {
80 next unless exists $args->{$param};
3e9498a1 81 $self->{$param} = $args->{$param};
fde3db1a 82 }
359a01ac 83 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 84
e0098e7f 85 if ( $self->{max_buckets} < 16 ) {
86 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
87 $self->{max_buckets} = 16;
88 }
89
260a80b4 90 return $self;
91}
92
460b1067 93sub _fileobj { return $_[0]{fileobj} }
460b1067 94
260a80b4 95sub calculate_sizes {
96 my $self = shift;
97
633df1fd 98 # The 2**8 here indicates the number of different characters in the
99 # current hashing algorithm
28394a1a 100 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 101 $self->{hash_chars_used} = (2**8);
102 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
103
104 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 105 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 106
ea2f6d67 107 $self->{key_size} = $self->{long_size} * 2;
108 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
109
260a80b4 110 return;
1bf65be7 111}
112
fde3db1a 113sub write_file_header {
0d0f3d5d 114 my $self = shift;
0d0f3d5d 115
eff6a245 116 my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 117
019404df 118 $self->_fileobj->print_at( $loc,
260a80b4 119 SIG_FILE,
460b1067 120 SIG_HEADER,
121 pack('N', 1), # header version
eff6a245 122 pack('N', 24), # header size
123 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 124 pack('n', $self->{long_size}),
260a80b4 125 pack('A', $self->{long_pack}),
c9b6d0d8 126 pack('n', $self->{data_size}),
260a80b4 127 pack('A', $self->{data_pack}),
c9b6d0d8 128 pack('n', $self->{max_buckets}),
260a80b4 129 );
0d0f3d5d 130
20b7f047 131 $self->_fileobj->set_transaction_offset( 13 );
132
0d0f3d5d 133 return;
134}
135
fde3db1a 136sub read_file_header {
e064ccd1 137 my $self = shift;
e064ccd1 138
7dcefff3 139 my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 );
140 return unless length($buffer);
460b1067 141
142 my ($file_signature, $sig_header, $header_version, $size) = unpack(
143 'A4 A N N', $buffer
42f79e07 144 );
e064ccd1 145
460b1067 146 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 147 $self->_fileobj->close;
e96daec8 148 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 149 }
260a80b4 150
460b1067 151 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 152 $self->_fileobj->close;
e96daec8 153 $self->_throw_error( "Old file version found." );
460b1067 154 }
9b2370e0 155
7dcefff3 156 my $buffer2 = $self->_fileobj->read_at( undef, $size );
eff6a245 157 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 158
159 $self->_fileobj->set_transaction_offset( 13 );
160
460b1067 161 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 162 $self->_fileobj->close;
e96daec8 163 $self->_throw_error("Corrupted file - bad header");
e064ccd1 164 }
165
460b1067 166 #XXX Add warnings if values weren't set right
167 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
168
7dcefff3 169 return length($buffer) + length($buffer2);
e064ccd1 170}
171
460b1067 172sub setup_fh {
173 my $self = shift;
174 my ($obj) = @_;
70b55428 175
7dcefff3 176 # Need to remove use of $fh here
177 my $fh = $self->_fileobj->{fh};
6fde4ed2 178 flock $fh, LOCK_EX;
118ba343 179
260a80b4 180 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 181 unless ( $obj->{base_offset} ) {
e96daec8 182 my $bytes_read = $self->read_file_header;
118ba343 183
260a80b4 184 $self->calculate_sizes;
185
118ba343 186 ##
fde3db1a 187 # File is empty -- write header and master index
118ba343 188 ##
189 if (!$bytes_read) {
aa83bc1e 190 $self->_fileobj->audit( "# Database created on" );
359a01ac 191
e96daec8 192 $self->write_file_header;
118ba343 193
22e20cce 194 $obj->{base_offset} = $self->_fileobj->request_space(
195 $self->tag_size( $self->{index_size} ),
196 );
118ba343 197
9e4f83a0 198 $self->write_tag(
e96daec8 199 $obj->_base_offset, $obj->_type,
f37c15ab 200 chr(0)x$self->{index_size},
118ba343 201 );
202
203 # Flush the filehandle
204 my $old_fh = select $fh;
205 my $old_af = $|; $| = 1; $| = $old_af;
206 select $old_fh;
207 }
208 else {
209 $obj->{base_offset} = $bytes_read;
210
211 ##
fde3db1a 212 # Get our type from master index header
118ba343 213 ##
359a01ac 214 my $tag = $self->load_tag($obj->_base_offset);
215 unless ( $tag ) {
216 flock $fh, LOCK_UN;
217 $self->_throw_error("Corrupted file, no master index record");
218 }
118ba343 219
e96daec8 220 unless ($obj->_type eq $tag->{signature}) {
359a01ac 221 flock $fh, LOCK_UN;
e96daec8 222 $self->_throw_error("File type mismatch");
118ba343 223 }
224 }
118ba343 225 }
260a80b4 226 else {
227 $self->calculate_sizes;
228 }
e06824f8 229
673464d9 230 #XXX We have to make sure we don't mess up when autoflush isn't turned on
7dcefff3 231 $self->_fileobj->set_inode;
70b55428 232
6fde4ed2 233 flock $fh, LOCK_UN;
234
70b55428 235 return 1;
236}
237
16d1ad9b 238sub tag_size {
239 my $self = shift;
240 my ($size) = @_;
241 return SIG_SIZE + $self->{data_size} + $size;
242}
243
9e4f83a0 244sub write_tag {
20f7b20c 245 ##
246 # Given offset, signature and content, create tag and write to disk
247 ##
d4b1166e 248 my $self = shift;
e96daec8 249 my ($offset, $sig, $content) = @_;
f37c15ab 250 my $size = length( $content );
20f7b20c 251
7dcefff3 252 $self->_fileobj->print_at(
253 $offset,
254 $sig, pack($self->{data_pack}, $size), $content,
255 );
20f7b20c 256
f37c15ab 257 return unless defined $offset;
258
20f7b20c 259 return {
260 signature => $sig,
72e315ac 261 #XXX Is this even used?
86867f3a 262 size => $size,
263 offset => $offset + SIG_SIZE + $self->{data_size},
264 content => $content
20f7b20c 265 };
d4b1166e 266}
267
268sub load_tag {
20f7b20c 269 ##
270 # Given offset, load single tag and return signature, size and data
271 ##
d4b1166e 272 my $self = shift;
e96daec8 273 my ($offset) = @_;
20f7b20c 274
7dcefff3 275 my $fileobj = $self->_fileobj;
20f7b20c 276
86867f3a 277 my ($sig, $size) = unpack(
278 "A $self->{data_pack}",
279 $fileobj->read_at( $offset, SIG_SIZE + $self->{data_size} ),
280 );
20f7b20c 281
282 return {
283 signature => $sig,
72e315ac 284 #XXX Is this even used?
86867f3a 285 size => $size,
286 offset => $offset + SIG_SIZE + $self->{data_size},
287 content => $fileobj->read_at( undef, $size ),
20f7b20c 288 };
d4b1166e 289}
290
ea2f6d67 291sub find_keyloc {
292 my $self = shift;
293 my ($tag, $transaction_id) = @_;
294 $transaction_id = $self->_fileobj->transaction_id
295 unless defined $transaction_id;
296
297 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
298 my ($loc, $trans_id, $is_deleted) = unpack(
299 "$self->{long_pack} C C",
300 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
301 );
302
303 if ( $loc == 0 ) {
304 return ( $loc, $is_deleted, $i * $self->{key_size} );
305 }
306
307 next if $transaction_id != $trans_id;
308
309 return ( $loc, $is_deleted, $i * $self->{key_size} );
310 }
311
312 return;
313}
314
20f7b20c 315sub add_bucket {
316 ##
317 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
318 # plain (undigested) key and value.
319 ##
d4b1166e 320 my $self = shift;
359a01ac 321 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 322
eea0d863 323 # This verifies that only supported values will be stored.
324 {
325 my $r = Scalar::Util::reftype( $value );
eea0d863 326
86867f3a 327 last if !defined $r;
eea0d863 328 last if $r eq 'HASH';
329 last if $r eq 'ARRAY';
330
e96daec8 331 $self->_throw_error(
86867f3a 332 "Storage of references of type '$r' is not supported."
eea0d863 333 );
334 }
335
019404df 336 my $fileobj = $self->_fileobj;
20f7b20c 337
21838116 338 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 339 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 340
341 my @transactions;
019404df 342 if ( $fileobj->transaction_id == 0 ) {
343 @transactions = $fileobj->current_transactions;
c9b6d0d8 344 }
75be6413 345
e96daec8 346# $self->_release_space( $size, $subloc );
f9c33187 347#XXX This needs updating to use _release_space
ea2f6d67 348
86867f3a 349 my $location;
ea2f6d67 350 my $size = $self->_length_needed( $value, $plain_key );
019404df 351
ea2f6d67 352 # Updating a known md5
353 if ( $keyloc ) {
354 my $keytag = $self->load_tag( $keyloc );
355 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
356
13ff93d5 357 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 358 my $old_value = $self->read_from_loc( $subloc, $orig_key );
359 my $old_size = $self->_length_needed( $old_value, $plain_key );
360
361 for my $trans_id ( @transactions ) {
362 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
363 unless ($loc) {
364 my $location2 = $fileobj->request_space( $old_size );
365 $fileobj->print_at( $keytag->{offset} + $offset2,
366 pack($self->{long_pack}, $location2 ),
367 pack( 'C C', $trans_id, 0 ),
368 );
369 $self->write_value( $location2, $plain_key, $old_value, $orig_key );
370 }
371 }
75be6413 372 }
504185fb 373
ea2f6d67 374 $location = $self->_fileobj->request_space( $size );
375 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
376 $fileobj->print_at( $keytag->{offset} + $offset,
377 pack($self->{long_pack}, $location ),
378 pack( 'C C', $fileobj->transaction_id, 0 ),
379 );
75be6413 380 }
386bab6c 381 # Adding a new md5
ea2f6d67 382 else {
383 my $keyloc = $fileobj->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 384
ea2f6d67 385 # The bucket fit into list
386 if ( defined $offset ) {
387 $fileobj->print_at( $tag->{offset} + $offset,
388 $md5, pack( $self->{long_pack}, $keyloc ),
389 );
390 }
391 # If bucket didn't fit into list, split into a new index level
392 else {
393 $self->split_index( $tag, $md5, $keyloc );
394 }
395
396 my $keytag = $self->write_tag(
397 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 398 );
c9b6d0d8 399
ea2f6d67 400 $location = $self->_fileobj->request_space( $size );
401 $fileobj->print_at( $keytag->{offset},
402 pack( $self->{long_pack}, $location ),
403 pack( 'C C', $fileobj->transaction_id, 0 ),
404 );
405
406 my $offset = 1;
407 for my $trans_id ( @transactions ) {
408 $fileobj->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 409 pack( $self->{long_pack}, 0 ),
ea2f6d67 410 pack( 'C C', $trans_id, 1 ),
411 );
c9b6d0d8 412 }
386bab6c 413 }
20f7b20c 414
359a01ac 415 $self->write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 416
86867f3a 417 return 1;
d5d7c51d 418}
419
420sub write_value {
421 my $self = shift;
359a01ac 422 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 423
7dcefff3 424 my $fileobj = $self->_fileobj;
d5d7c51d 425
9d4fa373 426 my $dbm_deep_obj = _get_dbm_object( $value );
7dcefff3 427 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) {
e96daec8 428 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 429 }
d5d7c51d 430
20f7b20c 431 ##
d5d7c51d 432 # Write signature based on content type, set content length and write
433 # actual value.
20f7b20c 434 ##
9d4fa373 435 my $r = Scalar::Util::reftype( $value ) || '';
436 if ( $dbm_deep_obj ) {
7dcefff3 437 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 438 }
439 elsif ($r eq 'HASH') {
9d4fa373 440 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 441 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 442 }
7dcefff3 443 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 444 }
445 elsif ($r eq 'ARRAY') {
9d4fa373 446 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 447 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 448 }
7dcefff3 449 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 450 }
451 elsif (!defined($value)) {
7dcefff3 452 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 453 }
454 else {
7dcefff3 455 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 456 }
20f7b20c 457
d5d7c51d 458 ##
459 # Plain key is stored AFTER value, as keys are typically fetched less often.
460 ##
7dcefff3 461 $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 462
9a187d8c 463 # Internal references don't care about autobless
9d4fa373 464 return 1 if $dbm_deep_obj;
9a187d8c 465
d5d7c51d 466 ##
467 # If value is blessed, preserve class name
468 ##
7dcefff3 469 if ( $fileobj->{autobless} ) {
633df1fd 470 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
7dcefff3 471 $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 472 }
d5d7c51d 473 else {
7dcefff3 474 $fileobj->print_at( undef, chr(0) );
20f7b20c 475 }
d5d7c51d 476 }
20f7b20c 477
d5d7c51d 478 ##
56ec4340 479 # Tie the passed in reference so that changes to it are reflected in the
480 # datafile. The use of $location as the base_offset will act as the
481 # the linkage between parent and child.
482 #
483 # The overall assignment is a hack around the fact that just tying doesn't
484 # store the values. This may not be the wrong thing to do.
d5d7c51d 485 ##
9d4fa373 486 if ($r eq 'HASH') {
487 my %x = %$value;
488 tie %$value, 'DBM::Deep', {
489 base_offset => $location,
7dcefff3 490 fileobj => $fileobj,
359a01ac 491 parent => $self->{obj},
492 parent_key => $orig_key,
9d4fa373 493 };
494 %$value = %x;
495 }
496 elsif ($r eq 'ARRAY') {
497 my @x = @$value;
498 tie @$value, 'DBM::Deep', {
499 base_offset => $location,
7dcefff3 500 fileobj => $fileobj,
359a01ac 501 parent => $self->{obj},
502 parent_key => $orig_key,
9d4fa373 503 };
504 @$value = @x;
20f7b20c 505 }
d4b1166e 506
d5d7c51d 507 return 1;
d4b1166e 508}
509
75be6413 510sub split_index {
511 my $self = shift;
ea2f6d67 512 my ($tag, $md5, $keyloc) = @_;
75be6413 513
019404df 514 my $fileobj = $self->_fileobj;
21838116 515
019404df 516 my $loc = $fileobj->request_space(
e96daec8 517 $self->tag_size( $self->{index_size} ),
16d1ad9b 518 );
519
019404df 520 $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 521
9e4f83a0 522 my $index_tag = $self->write_tag(
e96daec8 523 $loc, SIG_INDEX,
f37c15ab 524 chr(0)x$self->{index_size},
75be6413 525 );
526
7b1e1aa1 527 my $keys = $tag->{content}
ea2f6d67 528 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 529
f9c33187 530 my @newloc = ();
75be6413 531 BUCKET:
633df1fd 532 # The <= here is deliberate - we have max_buckets+1 keys to iterate
533 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 534 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 535 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 536
f9c33187 537 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
538 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 539
75be6413 540 my $num = ord(substr($key, $tag->{ch} + 1, 1));
541
f9c33187 542 if ($newloc[$num]) {
7dcefff3 543 my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 544
f9c33187 545 # This is looking for the first empty spot
ea2f6d67 546 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 547 { content => $subkeys }, '',
7b1e1aa1 548 );
75be6413 549
633df1fd 550 $fileobj->print_at(
551 $newloc[$num] + $offset,
552 $key, pack($self->{long_pack}, $old_subloc),
553 );
7b1e1aa1 554
555 next;
75be6413 556 }
75be6413 557
019404df 558 my $loc = $fileobj->request_space(
e96daec8 559 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 560 );
2603d86e 561
019404df 562 $fileobj->print_at(
563 $index_tag->{offset} + ($num * $self->{long_size}),
564 pack($self->{long_pack}, $loc),
565 );
75be6413 566
7b1e1aa1 567 my $blist_tag = $self->write_tag(
e96daec8 568 $loc, SIG_BLIST,
7b1e1aa1 569 chr(0)x$self->{bucket_list_size},
570 );
571
019404df 572 $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 573
f9c33187 574 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 575 }
576
577 $self->_release_space(
e96daec8 578 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 579 $tag->{offset} - SIG_SIZE - $self->{data_size},
580 );
75be6413 581
ea2f6d67 582 return 1;
75be6413 583}
584
8db25060 585sub read_from_loc {
586 my $self = shift;
359a01ac 587 my ($subloc, $orig_key) = @_;
8db25060 588
7dcefff3 589 my $fileobj = $self->_fileobj;
8db25060 590
7dcefff3 591 my $signature = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 592
593 ##
594 # If value is a hash or array, return new DBM::Deep object with correct offset
595 ##
596 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 597 my $new_obj = DBM::Deep->new({
359a01ac 598 type => $signature,
8db25060 599 base_offset => $subloc,
e96daec8 600 fileobj => $self->_fileobj,
359a01ac 601 parent => $self->{obj},
602 parent_key => $orig_key,
685e40f1 603 });
8db25060 604
460b1067 605 if ($new_obj->_fileobj->{autobless}) {
8db25060 606 ##
607 # Skip over value and plain key to see if object needs
608 # to be re-blessed
609 ##
7dcefff3 610 $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 611
7dcefff3 612 my $size = $fileobj->read_at( undef, $self->{data_size} );
c6ea6b6c 613 $size = unpack($self->{data_pack}, $size);
7dcefff3 614 if ($size) { $fileobj->increment_pointer( $size ); }
8db25060 615
7dcefff3 616 my $bless_bit = $fileobj->read_at( undef, 1 );
86867f3a 617 if ( ord($bless_bit) ) {
618 my $size = unpack(
619 $self->{data_pack},
620 $fileobj->read_at( undef, $self->{data_size} ),
621 );
7dcefff3 622
86867f3a 623 if ( $size ) {
624 $new_obj = bless $new_obj, $fileobj->read_at( undef, $size );
625 }
8db25060 626 }
627 }
628
685e40f1 629 return $new_obj;
8db25060 630 }
631 elsif ( $signature eq SIG_INTERNAL ) {
7dcefff3 632 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 633 $size = unpack($self->{data_pack}, $size);
634
635 if ( $size ) {
7dcefff3 636 my $new_loc = $fileobj->read_at( undef, $size );
637 $new_loc = unpack( $self->{long_pack}, $new_loc );
359a01ac 638 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 639 }
640 else {
641 return;
642 }
643 }
644 ##
645 # Otherwise return actual value
646 ##
460b1067 647 elsif ( $signature eq SIG_DATA ) {
7dcefff3 648 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 649 $size = unpack($self->{data_pack}, $size);
650
86867f3a 651 my $value = $size ? $fileobj->read_at( undef, $size ) : '';
8db25060 652 return $value;
653 }
654
655 ##
656 # Key exists, but content is null
657 ##
658 return;
659}
660
9020ee8c 661sub get_bucket_value {
beac1dff 662 ##
663 # Fetch single value given tag and MD5 digested key.
664 ##
665 my $self = shift;
359a01ac 666 my ($tag, $md5, $orig_key) = @_;
9020ee8c 667
21838116 668 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 669 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 670
ea2f6d67 671 if ( !$keyloc ) {
94e8af14 672 #XXX Need to use real key
ea2f6d67 673# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 674# return;
675 }
ea2f6d67 676# elsif ( !$is_deleted ) {
677 else {
678 my $keytag = $self->load_tag( $keyloc );
679 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 680 if (!$subloc && !$is_deleted) {
ea2f6d67 681 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
682 }
683 if ( $subloc && !$is_deleted ) {
684 return $self->read_from_loc( $subloc, $orig_key );
685 }
386bab6c 686 }
94e8af14 687
beac1dff 688 return;
9020ee8c 689}
ab0e4957 690
691sub delete_bucket {
beac1dff 692 ##
693 # Delete single key/value pair given tag and MD5 digested key.
694 ##
695 my $self = shift;
a97c8f67 696 my ($tag, $md5, $orig_key) = @_;
ab0e4957 697
22e20cce 698 #ACID - Although this is a mutation, we must find any transaction.
699 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 700 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 701
ea2f6d67 702 return if !$keyloc;
633df1fd 703
704 my $fileobj = $self->_fileobj;
705
706 my @transactions;
707 if ( $fileobj->transaction_id == 0 ) {
708 @transactions = $fileobj->current_transactions;
709 }
710
633df1fd 711 if ( $fileobj->transaction_id == 0 ) {
ea2f6d67 712 my $keytag = $self->load_tag( $keyloc );
7a960a12 713
ea2f6d67 714 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 715 return if !$subloc || $is_deleted;
716
633df1fd 717 my $value = $self->read_from_loc( $subloc, $orig_key );
718
ea2f6d67 719 my $size = $self->_length_needed( $value, $orig_key );
720
721 for my $trans_id ( @transactions ) {
722 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
723 unless ($loc) {
724 my $location2 = $fileobj->request_space( $size );
725 $fileobj->print_at( $keytag->{offset} + $offset2,
726 pack($self->{long_pack}, $location2 ),
727 pack( 'C C', $trans_id, 0 ),
728 );
729 $self->write_value( $location2, $orig_key, $value, $orig_key );
730 }
633df1fd 731 }
732
ea2f6d67 733 $keytag = $self->load_tag( $keyloc );
734 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
735 $fileobj->print_at( $keytag->{offset} + $offset,
736 substr( $keytag->{content}, $offset + $self->{key_size} ),
737 chr(0) x $self->{key_size},
019404df 738 );
386bab6c 739 }
633df1fd 740 else {
ea2f6d67 741 my $keytag = $self->load_tag( $keyloc );
7a960a12 742
ea2f6d67 743 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 744
ea2f6d67 745 $fileobj->print_at( $keytag->{offset} + $offset,
13ff93d5 746 pack($self->{long_pack}, 0 ),
ea2f6d67 747 pack( 'C C', $fileobj->transaction_id, 1 ),
748 );
633df1fd 749 }
750
751 return 1;
ab0e4957 752}
753
912d50b1 754sub bucket_exists {
beac1dff 755 ##
756 # Check existence of single key given tag and MD5 digested key.
757 ##
758 my $self = shift;
e96daec8 759 my ($tag, $md5) = @_;
912d50b1 760
21838116 761 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 762 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
763 my $keytag = $self->load_tag( $keyloc );
764 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 765 if ( !$subloc && !$is_deleted ) {
ea2f6d67 766 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
767 }
c9b6d0d8 768 return ($subloc && !$is_deleted) && 1;
912d50b1 769}
770
72e315ac 771sub find_blist {
beac1dff 772 ##
773 # Locate offset for bucket list, given digested key
774 ##
775 my $self = shift;
e96daec8 776 my ($offset, $md5, $args) = @_;
d0b74c17 777 $args = {} unless $args;
778
beac1dff 779 ##
780 # Locate offset for bucket list using digest index system
781 ##
e96daec8 782 my $tag = $self->load_tag( $offset )
783 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 784
e5fc7e69 785 my $ch = 0;
8db25060 786 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 787 my $num = ord substr($md5, $ch, 1);
788
789 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 790 $tag = $self->index_lookup( $tag, $num );
d0b74c17 791
792 if (!$tag) {
29b01632 793 return if !$args->{create};
d0b74c17 794
019404df 795 my $loc = $self->_fileobj->request_space(
e96daec8 796 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 797 );
798
019404df 799 $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 800
9e4f83a0 801 $tag = $self->write_tag(
e96daec8 802 $loc, SIG_BLIST,
f37c15ab 803 chr(0)x$self->{bucket_list_size},
d5d7c51d 804 );
805
806 $tag->{ref_loc} = $ref_loc;
807 $tag->{ch} = $ch;
808
809 last;
d0b74c17 810 }
811
16d1ad9b 812 $tag->{ch} = $ch++;
d0b74c17 813 $tag->{ref_loc} = $ref_loc;
beac1dff 814 }
d0b74c17 815
beac1dff 816 return $tag;
6736c116 817}
818
d0b74c17 819sub index_lookup {
820 ##
821 # Given index tag, lookup single entry in index and return .
822 ##
823 my $self = shift;
e96daec8 824 my ($tag, $index) = @_;
d0b74c17 825
826 my $location = unpack(
827 $self->{long_pack},
828 substr(
829 $tag->{content},
830 $index * $self->{long_size},
831 $self->{long_size},
832 ),
833 );
834
835 if (!$location) { return; }
836
e96daec8 837 return $self->load_tag( $location );
d0b74c17 838}
839
6736c116 840sub traverse_index {
beac1dff 841 ##
842 # Scan index and recursively step into deeper levels, looking for next key.
843 ##
6736c116 844 my $self = shift;
ea2f6d67 845 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 846
e96daec8 847 my $tag = $self->load_tag( $offset );
6736c116 848
8db25060 849 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 850 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 851
ea2f6d67 852 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 853 my $subloc = unpack(
854 $self->{long_pack},
e06824f8 855 substr(
ea2f6d67 856 $tag->{content},
e06824f8 857 $idx * $self->{long_size},
858 $self->{long_size},
859 ),
e5fc7e69 860 );
861
beac1dff 862 if ($subloc) {
e5fc7e69 863 my $result = $self->traverse_index(
ea2f6d67 864 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 865 );
866
ea2f6d67 867 if (defined $result) { return $result; }
beac1dff 868 }
869 } # index loop
d0b74c17 870
ea2f6d67 871 $xxxx->{return_next} = 1;
42717e46 872 }
873 # This is the bucket list
e5fc7e69 874 else {
beac1dff 875 my $keys = $tag->{content};
ea2f6d67 876 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 877
beac1dff 878 ##
879 # Iterate through buckets, looking for a key match
880 ##
42717e46 881 my $transaction_id = $self->_fileobj->transaction_id;
8db25060 882 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 883 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 884
8db25060 885 # End of bucket list -- return to outer loop
ea2f6d67 886 if (!$keyloc) {
887 $xxxx->{return_next} = 1;
beac1dff 888 last;
889 }
8db25060 890 # Located previous key -- return next one found
ea2f6d67 891 elsif ($key eq $xxxx->{prev_md5}) {
892 $xxxx->{return_next} = 1;
beac1dff 893 next;
894 }
8db25060 895 # Seek to bucket location and skip over signature
ea2f6d67 896 elsif ($xxxx->{return_next}) {
7dcefff3 897 my $fileobj = $self->_fileobj;
d0b74c17 898
ea2f6d67 899 my $keytag = $self->load_tag( $keyloc );
900 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 901 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 902 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
903 }
904 next if $is_deleted;
905
beac1dff 906 # Skip over value to get to plain key
7dcefff3 907 my $sig = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 908
7dcefff3 909 my $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 910 $size = unpack($self->{data_pack}, $size);
7dcefff3 911 if ($size) { $fileobj->increment_pointer( $size ); }
d0b74c17 912
beac1dff 913 # Read in plain key and return as scalar
7dcefff3 914 $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 915 $size = unpack($self->{data_pack}, $size);
86867f3a 916
7dcefff3 917 my $plain_key;
918 if ($size) { $plain_key = $fileobj->read_at( undef, $size); }
beac1dff 919 return $plain_key;
920 }
8db25060 921 }
d0b74c17 922
ea2f6d67 923 $xxxx->{return_next} = 1;
42717e46 924 }
d0b74c17 925
beac1dff 926 return;
6736c116 927}
928
929sub get_next_key {
beac1dff 930 ##
931 # Locate next key, given digested previous one
932 ##
6736c116 933 my $self = shift;
934 my ($obj) = @_;
d0b74c17 935
beac1dff 936 ##
937 # If the previous key was not specifed, start at the top and
938 # return the first one found.
939 ##
ea2f6d67 940 my $temp;
941 if ( @_ > 1 ) {
942 $temp = {
943 prev_md5 => $_[1],
944 return_next => 0,
945 };
946 }
947 else {
948 $temp = {
949 prev_md5 => chr(0) x $self->{hash_size},
950 return_next => 1,
951 };
beac1dff 952 }
d0b74c17 953
ea2f6d67 954 return $self->traverse_index( $temp, $obj->_base_offset, 0 );
6736c116 955}
956
75be6413 957# Utilities
958
9cec1360 959sub _get_key_subloc {
75be6413 960 my $self = shift;
961 my ($keys, $idx) = @_;
962
ea2f6d67 963 return unpack(
28394a1a 964 # This is 'a', not 'A'. Please read the pack() documentation for the
965 # difference between the two and why it's important.
ea2f6d67 966 "a$self->{hash_size} $self->{long_pack}",
75be6413 967 substr(
968 $keys,
9cec1360 969 ($idx * $self->{bucket_size}),
970 $self->{bucket_size},
75be6413 971 ),
972 );
75be6413 973}
974
d608b06e 975sub _find_in_buckets {
976 my $self = shift;
ea2f6d67 977 my ($tag, $md5) = @_;
21838116 978
d608b06e 979 BUCKET:
980 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 981 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 982 $tag->{content}, $i,
983 );
d608b06e 984
ea2f6d67 985 my @rv = ($subloc, $i * $self->{bucket_size});
21838116 986
987 unless ( $subloc ) {
21838116 988 return @rv;
989 }
990
991 next BUCKET if $key ne $md5;
d608b06e 992
21838116 993 return @rv;
d608b06e 994 }
995
996 return;
997}
998
994ccd8e 999sub _release_space {
1000 my $self = shift;
e96daec8 1001 my ($size, $loc) = @_;
994ccd8e 1002
7b1e1aa1 1003 my $next_loc = 0;
1004
019404df 1005 $self->_fileobj->print_at( $loc,
1006 SIG_FREE,
1007 pack($self->{long_pack}, $size ),
1008 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1009 );
1010
994ccd8e 1011 return;
1012}
1013
e96daec8 1014sub _throw_error {
1015 die "DBM::Deep: $_[1]\n";
1016}
1017
86867f3a 1018sub _get_dbm_object {
1019 my $item = shift;
1020
1021 my $obj = eval {
1022 local $SIG{__DIE__};
1023 if ($item->isa( 'DBM::Deep' )) {
1024 return $item;
1025 }
1026 return;
1027 };
1028 return $obj if $obj;
1029
1030 my $r = Scalar::Util::reftype( $item ) || '';
1031 if ( $r eq 'HASH' ) {
1032 my $obj = eval {
1033 local $SIG{__DIE__};
1034 my $obj = tied(%$item);
1035 if ($obj->isa( 'DBM::Deep' )) {
1036 return $obj;
1037 }
1038 return;
1039 };
1040 return $obj if $obj;
1041 }
1042 elsif ( $r eq 'ARRAY' ) {
1043 my $obj = eval {
1044 local $SIG{__DIE__};
1045 my $obj = tied(@$item);
1046 if ($obj->isa( 'DBM::Deep' )) {
1047 return $obj;
1048 }
1049 return;
1050 };
1051 return $obj if $obj;
1052 }
1053
1054 return;
1055}
1056
1057sub _length_needed {
1058 my $self = shift;
1059 my ($value, $key) = @_;
1060
1061 my $is_dbm_deep = eval {
1062 local $SIG{'__DIE__'};
1063 $value->isa( 'DBM::Deep' );
1064 };
1065
1066 my $len = SIG_SIZE
1067 + $self->{data_size} # size for value
1068 + $self->{data_size} # size for key
1069 + length( $key ); # length of key
1070
1071 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
1072 # long_size is for the internal reference
1073 return $len + $self->{long_size};
1074 }
1075
1076 if ( $self->_fileobj->{autobless} ) {
1077 # This is for the bit saying whether or not this thing is blessed.
1078 $len += 1;
1079 }
1080
1081 my $r = Scalar::Util::reftype( $value ) || '';
1082 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1083 if ( defined $value ) {
1084 $len += length( $value );
1085 }
1086 return $len;
1087 }
1088
1089 $len += $self->{index_size};
1090
1091 # if autobless is enabled, must also take into consideration
1092 # the class name as it is stored after the key.
1093 if ( $self->_fileobj->{autobless} ) {
1094 my $c = Scalar::Util::blessed($value);
1095 if ( defined $c && !$is_dbm_deep ) {
1096 $len += $self->{data_size} + length($c);
1097 }
1098 }
1099
1100 return $len;
1101}
1102
a20d9a3f 11031;
1104__END__