r14235@Rob-Kinyons-PowerBook: rob | 2006-06-14 22:24:47 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
c3aafc14 8our $VERSION = q(0.99_03);
86867f3a 9
633df1fd 10use Fcntl qw( :DEFAULT :flock );
359a01ac 11use Scalar::Util ();
a20d9a3f 12
21838116 13# File-wide notes:
20b7f047 14# * To add to bucket_size, make sure you modify the following:
15# - calculate_sizes()
16# - _get_key_subloc()
17# - add_bucket() - where the buckets are printed
c3aafc14 18#
83371fe3 19# * Every method in here assumes that the _storage has been appropriately
c3aafc14 20# safeguarded. This can be anything from flock() to some sort of manual
21# mutex. But, it's the caller's responsability to make sure that this has
22# been done.
21838116 23
8db25060 24##
25# Setup file and tag signatures. These should never change.
26##
27sub SIG_FILE () { 'DPDB' }
460b1067 28sub SIG_HEADER () { 'h' }
8db25060 29sub SIG_INTERNAL () { 'i' }
30sub SIG_HASH () { 'H' }
31sub SIG_ARRAY () { 'A' }
8db25060 32sub SIG_NULL () { 'N' }
33sub SIG_DATA () { 'D' }
34sub SIG_INDEX () { 'I' }
35sub SIG_BLIST () { 'B' }
7b1e1aa1 36sub SIG_FREE () { 'F' }
86867f3a 37sub SIG_KEYS () { 'K' }
8db25060 38sub SIG_SIZE () { 1 }
39
fb451ba6 40# This is the transaction ID for the HEAD
41sub HEAD () { 0 }
42
c3aafc14 43################################################################################
44#
45# This is new code. It is a complete rewrite of the engine based on a new API
46#
47################################################################################
48
c3aafc14 49sub read_value {
50 my $self = shift;
40956c06 51 my ($trans_id, $offset, $key, $orig_key) = @_;
c3aafc14 52
40956c06 53 my $dig_key = $self->_apply_digest( $key );
e5a9d386 54 my $tag = $self->find_blist( $offset, $dig_key ) or return;
55 return $self->get_bucket_value( $tag, $dig_key, $orig_key );
c3aafc14 56}
57
c3aafc14 58sub key_exists {
59 my $self = shift;
40956c06 60 my ($trans_id, $offset, $key) = @_;
c3aafc14 61
40956c06 62 my $dig_key = $self->_apply_digest( $key );
e5a9d386 63 # exists() returns the empty string, not undef
64 my $tag = $self->find_blist( $offset, $dig_key ) or return '';
c3aafc14 65 return $self->bucket_exists( $tag, $dig_key, $key );
66}
67
83c43bb5 68sub get_next_key {
c3aafc14 69 my $self = shift;
40956c06 70 my ($trans_id, $offset) = @_;
c3aafc14 71
83c43bb5 72 # If the previous key was not specifed, start at the top and
73 # return the first one found.
74 my $temp;
40956c06 75 if ( @_ > 2 ) {
83c43bb5 76 $temp = {
40956c06 77 prev_md5 => $self->_apply_digest($_[2]),
83c43bb5 78 return_next => 0,
79 };
80 }
81 else {
82 $temp = {
83 prev_md5 => chr(0) x $self->{hash_size},
84 return_next => 1,
85 };
86 }
87
88 return $self->traverse_index( $temp, $offset, 0 );
c3aafc14 89}
90
83371fe3 91sub delete_key {
92 my $self = shift;
40956c06 93 my ($trans_id, $offset, $key, $orig_key) = @_;
83371fe3 94
40956c06 95 my $dig_key = $self->_apply_digest( $key );
83371fe3 96 my $tag = $self->find_blist( $offset, $dig_key ) or return;
97 my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key );
98 $self->delete_bucket( $tag, $dig_key, $orig_key );
99 return $value;
100}
101
83371fe3 102sub write_value {
103 my $self = shift;
40956c06 104 my ($trans_id, $offset, $key, $value, $orig_key) = @_;
83371fe3 105
40956c06 106 my $dig_key = $self->_apply_digest( $key );
83371fe3 107 my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
108 return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
109}
110
c3aafc14 111################################################################################
112#
113# Below here is the old code. It will be folded into the code above as it can.
114#
115################################################################################
116
612969fb 117sub new {
118 my $class = shift;
119 my ($args) = @_;
120
121 my $self = bless {
86867f3a 122 long_size => 4,
123 long_pack => 'N',
124 data_size => 4,
125 data_pack => 'N',
251dfd0e 126
86867f3a 127 digest => \&Digest::MD5::md5,
c3aafc14 128 hash_size => 16, # In bytes
251dfd0e 129
81d16922 130 ##
c3aafc14 131 # Number of buckets per blist before another level of indexing is
e0098e7f 132 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 133 # files. DO NOT decrease this value below 16, due to risk of recursive
134 # reindex overrun.
81d16922 135 ##
612969fb 136 max_buckets => 16,
460b1067 137
83371fe3 138 storage => undef,
359a01ac 139 obj => undef,
612969fb 140 }, $class;
141
e0098e7f 142 if ( defined $args->{pack_size} ) {
143 if ( lc $args->{pack_size} eq 'small' ) {
144 $args->{long_size} = 2;
c9b6d0d8 145 $args->{long_pack} = 'n';
e0098e7f 146 }
147 elsif ( lc $args->{pack_size} eq 'medium' ) {
148 $args->{long_size} = 4;
149 $args->{long_pack} = 'N';
150 }
151 elsif ( lc $args->{pack_size} eq 'large' ) {
152 $args->{long_size} = 8;
153 $args->{long_pack} = 'Q';
154 }
155 else {
156 die "Unknown pack_size value: '$args->{pack_size}'\n";
157 }
158 }
159
fde3db1a 160 # Grab the parameters we want to use
161 foreach my $param ( keys %$self ) {
162 next unless exists $args->{$param};
3e9498a1 163 $self->{$param} = $args->{$param};
fde3db1a 164 }
359a01ac 165 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 166
e0098e7f 167 if ( $self->{max_buckets} < 16 ) {
168 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
169 $self->{max_buckets} = 16;
170 }
171
260a80b4 172 return $self;
173}
174
83371fe3 175sub _storage { return $_[0]{storage} }
460b1067 176
40956c06 177sub _apply_digest {
c3aafc14 178 my $self = shift;
179 return $self->{digest}->(@_);
180}
181
260a80b4 182sub calculate_sizes {
183 my $self = shift;
184
633df1fd 185 # The 2**8 here indicates the number of different characters in the
186 # current hashing algorithm
28394a1a 187 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 188 $self->{hash_chars_used} = (2**8);
189 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
190
191 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 192 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 193
ea2f6d67 194 $self->{key_size} = $self->{long_size} * 2;
195 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
196
260a80b4 197 return;
1bf65be7 198}
199
fde3db1a 200sub write_file_header {
0d0f3d5d 201 my $self = shift;
0d0f3d5d 202
83371fe3 203 my $loc = $self->_storage->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 204
83371fe3 205 $self->_storage->print_at( $loc,
260a80b4 206 SIG_FILE,
460b1067 207 SIG_HEADER,
208 pack('N', 1), # header version
eff6a245 209 pack('N', 24), # header size
210 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 211 pack('n', $self->{long_size}),
260a80b4 212 pack('A', $self->{long_pack}),
c9b6d0d8 213 pack('n', $self->{data_size}),
260a80b4 214 pack('A', $self->{data_pack}),
c9b6d0d8 215 pack('n', $self->{max_buckets}),
260a80b4 216 );
0d0f3d5d 217
83371fe3 218 $self->_storage->set_transaction_offset( 13 );
20b7f047 219
0d0f3d5d 220 return;
221}
222
fde3db1a 223sub read_file_header {
e064ccd1 224 my $self = shift;
e064ccd1 225
83371fe3 226 my $buffer = $self->_storage->read_at( 0, length(SIG_FILE) + 9 );
7dcefff3 227 return unless length($buffer);
460b1067 228
229 my ($file_signature, $sig_header, $header_version, $size) = unpack(
230 'A4 A N N', $buffer
42f79e07 231 );
e064ccd1 232
460b1067 233 unless ( $file_signature eq SIG_FILE ) {
83371fe3 234 $self->_storage->close;
e96daec8 235 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 236 }
260a80b4 237
460b1067 238 unless ( $sig_header eq SIG_HEADER ) {
83371fe3 239 $self->_storage->close;
e96daec8 240 $self->_throw_error( "Old file version found." );
460b1067 241 }
9b2370e0 242
83371fe3 243 my $buffer2 = $self->_storage->read_at( undef, $size );
eff6a245 244 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 245
83371fe3 246 $self->_storage->set_transaction_offset( 13 );
15ba72cc 247
460b1067 248 if ( @values < 5 || grep { !defined } @values ) {
83371fe3 249 $self->_storage->close;
e96daec8 250 $self->_throw_error("Corrupted file - bad header");
e064ccd1 251 }
252
460b1067 253 #XXX Add warnings if values weren't set right
254 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
255
7dcefff3 256 return length($buffer) + length($buffer2);
e064ccd1 257}
258
460b1067 259sub setup_fh {
260 my $self = shift;
261 my ($obj) = @_;
70b55428 262
7dcefff3 263 # Need to remove use of $fh here
83371fe3 264 my $fh = $self->_storage->{fh};
6fde4ed2 265 flock $fh, LOCK_EX;
118ba343 266
260a80b4 267 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 268 unless ( $obj->{base_offset} ) {
e96daec8 269 my $bytes_read = $self->read_file_header;
118ba343 270
260a80b4 271 $self->calculate_sizes;
272
118ba343 273 ##
fde3db1a 274 # File is empty -- write header and master index
118ba343 275 ##
276 if (!$bytes_read) {
83371fe3 277 $self->_storage->audit( "# Database created on" );
359a01ac 278
e96daec8 279 $self->write_file_header;
118ba343 280
83371fe3 281 $obj->{base_offset} = $self->_storage->request_space(
22e20cce 282 $self->tag_size( $self->{index_size} ),
283 );
118ba343 284
9e4f83a0 285 $self->write_tag(
e96daec8 286 $obj->_base_offset, $obj->_type,
f37c15ab 287 chr(0)x$self->{index_size},
118ba343 288 );
289
290 # Flush the filehandle
291 my $old_fh = select $fh;
292 my $old_af = $|; $| = 1; $| = $old_af;
293 select $old_fh;
294 }
295 else {
296 $obj->{base_offset} = $bytes_read;
297
298 ##
fde3db1a 299 # Get our type from master index header
118ba343 300 ##
359a01ac 301 my $tag = $self->load_tag($obj->_base_offset);
302 unless ( $tag ) {
303 flock $fh, LOCK_UN;
304 $self->_throw_error("Corrupted file, no master index record");
305 }
118ba343 306
e96daec8 307 unless ($obj->_type eq $tag->{signature}) {
359a01ac 308 flock $fh, LOCK_UN;
e96daec8 309 $self->_throw_error("File type mismatch");
118ba343 310 }
311 }
118ba343 312 }
260a80b4 313 else {
314 $self->calculate_sizes;
315 }
e06824f8 316
673464d9 317 #XXX We have to make sure we don't mess up when autoflush isn't turned on
83371fe3 318 $self->_storage->set_inode;
70b55428 319
6fde4ed2 320 flock $fh, LOCK_UN;
321
70b55428 322 return 1;
323}
324
16d1ad9b 325sub tag_size {
326 my $self = shift;
327 my ($size) = @_;
328 return SIG_SIZE + $self->{data_size} + $size;
329}
330
9e4f83a0 331sub write_tag {
20f7b20c 332 ##
333 # Given offset, signature and content, create tag and write to disk
334 ##
d4b1166e 335 my $self = shift;
e96daec8 336 my ($offset, $sig, $content) = @_;
f37c15ab 337 my $size = length( $content );
20f7b20c 338
83371fe3 339 $self->_storage->print_at(
7dcefff3 340 $offset,
341 $sig, pack($self->{data_pack}, $size), $content,
342 );
20f7b20c 343
f37c15ab 344 return unless defined $offset;
345
20f7b20c 346 return {
347 signature => $sig,
72e315ac 348 #XXX Is this even used?
86867f3a 349 size => $size,
350 offset => $offset + SIG_SIZE + $self->{data_size},
351 content => $content
20f7b20c 352 };
d4b1166e 353}
354
355sub load_tag {
20f7b20c 356 ##
357 # Given offset, load single tag and return signature, size and data
358 ##
d4b1166e 359 my $self = shift;
e96daec8 360 my ($offset) = @_;
20f7b20c 361
83371fe3 362 my $storage = $self->_storage;
20f7b20c 363
86867f3a 364 my ($sig, $size) = unpack(
365 "A $self->{data_pack}",
83371fe3 366 $storage->read_at( $offset, SIG_SIZE + $self->{data_size} ),
86867f3a 367 );
20f7b20c 368
369 return {
370 signature => $sig,
c3aafc14 371 size => $size, #XXX Is this even used?
fb451ba6 372 start => $offset,
86867f3a 373 offset => $offset + SIG_SIZE + $self->{data_size},
83371fe3 374 content => $storage->read_at( undef, $size ),
20f7b20c 375 };
d4b1166e 376}
377
ea2f6d67 378sub find_keyloc {
379 my $self = shift;
380 my ($tag, $transaction_id) = @_;
83371fe3 381 $transaction_id = $self->_storage->transaction_id
ea2f6d67 382 unless defined $transaction_id;
383
384 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
385 my ($loc, $trans_id, $is_deleted) = unpack(
386 "$self->{long_pack} C C",
387 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
388 );
389
fb451ba6 390 next if $loc != HEAD && $transaction_id != $trans_id;
391 return( $loc, $is_deleted, $i * $self->{key_size} );
ea2f6d67 392 }
393
394 return;
395}
396
20f7b20c 397sub add_bucket {
398 ##
399 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
400 # plain (undigested) key and value.
401 ##
d4b1166e 402 my $self = shift;
359a01ac 403 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 404
eea0d863 405 # This verifies that only supported values will be stored.
406 {
407 my $r = Scalar::Util::reftype( $value );
eea0d863 408
86867f3a 409 last if !defined $r;
eea0d863 410 last if $r eq 'HASH';
411 last if $r eq 'ARRAY';
412
e96daec8 413 $self->_throw_error(
86867f3a 414 "Storage of references of type '$r' is not supported."
eea0d863 415 );
416 }
417
83371fe3 418 my $storage = $self->_storage;
20f7b20c 419
21838116 420 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 421 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 422
423 my @transactions;
83371fe3 424 if ( $storage->transaction_id == 0 ) {
425 @transactions = $storage->current_transactions;
c9b6d0d8 426 }
75be6413 427
e96daec8 428# $self->_release_space( $size, $subloc );
f9c33187 429#XXX This needs updating to use _release_space
ea2f6d67 430
86867f3a 431 my $location;
ea2f6d67 432 my $size = $self->_length_needed( $value, $plain_key );
019404df 433
ea2f6d67 434 # Updating a known md5
435 if ( $keyloc ) {
436 my $keytag = $self->load_tag( $keyloc );
437 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
438
13ff93d5 439 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 440 my $old_value = $self->read_from_loc( $subloc, $orig_key );
441 my $old_size = $self->_length_needed( $old_value, $plain_key );
442
443 for my $trans_id ( @transactions ) {
444 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
445 unless ($loc) {
83371fe3 446 my $location2 = $storage->request_space( $old_size );
447 $storage->print_at( $keytag->{offset} + $offset2,
ea2f6d67 448 pack($self->{long_pack}, $location2 ),
449 pack( 'C C', $trans_id, 0 ),
450 );
c3aafc14 451 $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
ea2f6d67 452 }
453 }
75be6413 454 }
504185fb 455
83371fe3 456 $location = $self->_storage->request_space( $size );
ea2f6d67 457 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
83371fe3 458 $storage->print_at( $keytag->{offset} + $offset,
ea2f6d67 459 pack($self->{long_pack}, $location ),
83371fe3 460 pack( 'C C', $storage->transaction_id, 0 ),
ea2f6d67 461 );
75be6413 462 }
386bab6c 463 # Adding a new md5
ea2f6d67 464 else {
83371fe3 465 my $keyloc = $storage->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 466
ea2f6d67 467 # The bucket fit into list
468 if ( defined $offset ) {
83371fe3 469 $storage->print_at( $tag->{offset} + $offset,
ea2f6d67 470 $md5, pack( $self->{long_pack}, $keyloc ),
471 );
472 }
473 # If bucket didn't fit into list, split into a new index level
474 else {
475 $self->split_index( $tag, $md5, $keyloc );
476 }
477
478 my $keytag = $self->write_tag(
479 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 480 );
c9b6d0d8 481
83371fe3 482 $location = $self->_storage->request_space( $size );
483 $storage->print_at( $keytag->{offset},
ea2f6d67 484 pack( $self->{long_pack}, $location ),
83371fe3 485 pack( 'C C', $storage->transaction_id, 0 ),
ea2f6d67 486 );
487
488 my $offset = 1;
489 for my $trans_id ( @transactions ) {
83371fe3 490 $storage->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 491 pack( $self->{long_pack}, 0 ),
ea2f6d67 492 pack( 'C C', $trans_id, 1 ),
493 );
c9b6d0d8 494 }
386bab6c 495 }
20f7b20c 496
c3aafc14 497 $self->_write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 498
86867f3a 499 return 1;
d5d7c51d 500}
501
c3aafc14 502sub _write_value {
d5d7c51d 503 my $self = shift;
359a01ac 504 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 505
83371fe3 506 my $storage = $self->_storage;
d5d7c51d 507
9d4fa373 508 my $dbm_deep_obj = _get_dbm_object( $value );
83371fe3 509 if ( $dbm_deep_obj && $dbm_deep_obj->_storage ne $storage ) {
e96daec8 510 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 511 }
d5d7c51d 512
20f7b20c 513 ##
d5d7c51d 514 # Write signature based on content type, set content length and write
515 # actual value.
20f7b20c 516 ##
9d4fa373 517 my $r = Scalar::Util::reftype( $value ) || '';
518 if ( $dbm_deep_obj ) {
7dcefff3 519 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 520 }
521 elsif ($r eq 'HASH') {
9d4fa373 522 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 523 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 524 }
7dcefff3 525 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 526 }
527 elsif ($r eq 'ARRAY') {
9d4fa373 528 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 529 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 530 }
7dcefff3 531 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 532 }
533 elsif (!defined($value)) {
7dcefff3 534 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 535 }
536 else {
7dcefff3 537 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 538 }
20f7b20c 539
d5d7c51d 540 ##
541 # Plain key is stored AFTER value, as keys are typically fetched less often.
542 ##
83371fe3 543 $storage->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 544
9a187d8c 545 # Internal references don't care about autobless
9d4fa373 546 return 1 if $dbm_deep_obj;
9a187d8c 547
d5d7c51d 548 ##
549 # If value is blessed, preserve class name
550 ##
83371fe3 551 if ( $storage->{autobless} ) {
633df1fd 552 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
83371fe3 553 $storage->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 554 }
d5d7c51d 555 else {
83371fe3 556 $storage->print_at( undef, chr(0) );
20f7b20c 557 }
d5d7c51d 558 }
20f7b20c 559
d5d7c51d 560 ##
56ec4340 561 # Tie the passed in reference so that changes to it are reflected in the
562 # datafile. The use of $location as the base_offset will act as the
563 # the linkage between parent and child.
564 #
565 # The overall assignment is a hack around the fact that just tying doesn't
566 # store the values. This may not be the wrong thing to do.
d5d7c51d 567 ##
9d4fa373 568 if ($r eq 'HASH') {
569 my %x = %$value;
570 tie %$value, 'DBM::Deep', {
571 base_offset => $location,
83371fe3 572 storage => $storage,
359a01ac 573 parent => $self->{obj},
574 parent_key => $orig_key,
9d4fa373 575 };
576 %$value = %x;
bd01f6ec 577 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
9d4fa373 578 }
579 elsif ($r eq 'ARRAY') {
580 my @x = @$value;
581 tie @$value, 'DBM::Deep', {
582 base_offset => $location,
83371fe3 583 storage => $storage,
359a01ac 584 parent => $self->{obj},
585 parent_key => $orig_key,
9d4fa373 586 };
587 @$value = @x;
bd01f6ec 588 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
20f7b20c 589 }
d4b1166e 590
d5d7c51d 591 return 1;
d4b1166e 592}
593
75be6413 594sub split_index {
595 my $self = shift;
ea2f6d67 596 my ($tag, $md5, $keyloc) = @_;
75be6413 597
83371fe3 598 my $storage = $self->_storage;
21838116 599
83371fe3 600 my $loc = $storage->request_space(
e96daec8 601 $self->tag_size( $self->{index_size} ),
16d1ad9b 602 );
603
83371fe3 604 $storage->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 605
9e4f83a0 606 my $index_tag = $self->write_tag(
e96daec8 607 $loc, SIG_INDEX,
f37c15ab 608 chr(0)x$self->{index_size},
75be6413 609 );
610
7b1e1aa1 611 my $keys = $tag->{content}
ea2f6d67 612 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 613
f9c33187 614 my @newloc = ();
75be6413 615 BUCKET:
633df1fd 616 # The <= here is deliberate - we have max_buckets+1 keys to iterate
617 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 618 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 619 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 620
f9c33187 621 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
622 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 623
75be6413 624 my $num = ord(substr($key, $tag->{ch} + 1, 1));
625
f9c33187 626 if ($newloc[$num]) {
83371fe3 627 my $subkeys = $storage->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 628
f9c33187 629 # This is looking for the first empty spot
ea2f6d67 630 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 631 { content => $subkeys }, '',
7b1e1aa1 632 );
75be6413 633
83371fe3 634 $storage->print_at(
633df1fd 635 $newloc[$num] + $offset,
636 $key, pack($self->{long_pack}, $old_subloc),
637 );
7b1e1aa1 638
639 next;
75be6413 640 }
75be6413 641
83371fe3 642 my $loc = $storage->request_space(
e96daec8 643 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 644 );
2603d86e 645
83371fe3 646 $storage->print_at(
019404df 647 $index_tag->{offset} + ($num * $self->{long_size}),
648 pack($self->{long_pack}, $loc),
649 );
75be6413 650
7b1e1aa1 651 my $blist_tag = $self->write_tag(
e96daec8 652 $loc, SIG_BLIST,
7b1e1aa1 653 chr(0)x$self->{bucket_list_size},
654 );
655
83371fe3 656 $storage->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 657
f9c33187 658 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 659 }
660
661 $self->_release_space(
e96daec8 662 $self->tag_size( $self->{bucket_list_size} ),
fb451ba6 663 $tag->{start},
7b1e1aa1 664 );
75be6413 665
ea2f6d67 666 return 1;
75be6413 667}
668
8db25060 669sub read_from_loc {
670 my $self = shift;
359a01ac 671 my ($subloc, $orig_key) = @_;
8db25060 672
83371fe3 673 my $storage = $self->_storage;
8db25060 674
83371fe3 675 my $signature = $storage->read_at( $subloc, SIG_SIZE );
8db25060 676
677 ##
678 # If value is a hash or array, return new DBM::Deep object with correct offset
679 ##
680 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
c3aafc14 681 #XXX This needs to be a singleton
bd01f6ec 682# my $new_obj;
683# my $is_autobless;
684# if ( $signature eq SIG_HASH ) {
685# $new_obj = {};
686# tie %$new_obj, 'DBM::Deep', {
687# base_offset => $subloc,
83371fe3 688# storage => $self->_storage,
bd01f6ec 689# parent => $self->{obj},
690# parent_key => $orig_key,
691# };
83371fe3 692# $is_autobless = tied(%$new_obj)->_storage->{autobless};
bd01f6ec 693# }
694# else {
695# $new_obj = [];
696# tie @$new_obj, 'DBM::Deep', {
697# base_offset => $subloc,
83371fe3 698# storage => $self->_storage,
bd01f6ec 699# parent => $self->{obj},
700# parent_key => $orig_key,
701# };
83371fe3 702# $is_autobless = tied(@$new_obj)->_storage->{autobless};
bd01f6ec 703# }
704#
705# if ($is_autobless) {
706
685e40f1 707 my $new_obj = DBM::Deep->new({
359a01ac 708 type => $signature,
8db25060 709 base_offset => $subloc,
83371fe3 710 storage => $self->_storage,
359a01ac 711 parent => $self->{obj},
712 parent_key => $orig_key,
685e40f1 713 });
8db25060 714
83371fe3 715 if ($new_obj->_storage->{autobless}) {
8db25060 716 ##
717 # Skip over value and plain key to see if object needs
718 # to be re-blessed
719 ##
83371fe3 720 $storage->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 721
83371fe3 722 my $size = $storage->read_at( undef, $self->{data_size} );
c6ea6b6c 723 $size = unpack($self->{data_pack}, $size);
83371fe3 724 if ($size) { $storage->increment_pointer( $size ); }
8db25060 725
83371fe3 726 my $bless_bit = $storage->read_at( undef, 1 );
86867f3a 727 if ( ord($bless_bit) ) {
728 my $size = unpack(
729 $self->{data_pack},
83371fe3 730 $storage->read_at( undef, $self->{data_size} ),
86867f3a 731 );
7dcefff3 732
86867f3a 733 if ( $size ) {
83371fe3 734 $new_obj = bless $new_obj, $storage->read_at( undef, $size );
86867f3a 735 }
8db25060 736 }
737 }
738
685e40f1 739 return $new_obj;
8db25060 740 }
741 elsif ( $signature eq SIG_INTERNAL ) {
83371fe3 742 my $size = $storage->read_at( undef, $self->{data_size} );
8db25060 743 $size = unpack($self->{data_pack}, $size);
744
745 if ( $size ) {
83371fe3 746 my $new_loc = $storage->read_at( undef, $size );
7dcefff3 747 $new_loc = unpack( $self->{long_pack}, $new_loc );
359a01ac 748 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 749 }
750 else {
751 return;
752 }
753 }
754 ##
755 # Otherwise return actual value
756 ##
460b1067 757 elsif ( $signature eq SIG_DATA ) {
83371fe3 758 my $size = $storage->read_at( undef, $self->{data_size} );
8db25060 759 $size = unpack($self->{data_pack}, $size);
760
83371fe3 761 my $value = $size ? $storage->read_at( undef, $size ) : '';
8db25060 762 return $value;
763 }
764
765 ##
766 # Key exists, but content is null
767 ##
768 return;
769}
770
9020ee8c 771sub get_bucket_value {
beac1dff 772 ##
773 # Fetch single value given tag and MD5 digested key.
774 ##
775 my $self = shift;
359a01ac 776 my ($tag, $md5, $orig_key) = @_;
9020ee8c 777
21838116 778 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 779 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 780
ea2f6d67 781 if ( !$keyloc ) {
94e8af14 782 #XXX Need to use real key
ea2f6d67 783# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 784# return;
785 }
ea2f6d67 786# elsif ( !$is_deleted ) {
787 else {
788 my $keytag = $self->load_tag( $keyloc );
789 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 790 if (!$subloc && !$is_deleted) {
ea2f6d67 791 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
792 }
793 if ( $subloc && !$is_deleted ) {
794 return $self->read_from_loc( $subloc, $orig_key );
795 }
386bab6c 796 }
94e8af14 797
beac1dff 798 return;
9020ee8c 799}
ab0e4957 800
801sub delete_bucket {
beac1dff 802 ##
803 # Delete single key/value pair given tag and MD5 digested key.
804 ##
805 my $self = shift;
a97c8f67 806 my ($tag, $md5, $orig_key) = @_;
ab0e4957 807
22e20cce 808 #ACID - Although this is a mutation, we must find any transaction.
809 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 810 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 811
ea2f6d67 812 return if !$keyloc;
633df1fd 813
83371fe3 814 my $storage = $self->_storage;
633df1fd 815
816 my @transactions;
83371fe3 817 if ( $storage->transaction_id == 0 ) {
818 @transactions = $storage->current_transactions;
633df1fd 819 }
820
83371fe3 821 if ( $storage->transaction_id == 0 ) {
ea2f6d67 822 my $keytag = $self->load_tag( $keyloc );
7a960a12 823
ea2f6d67 824 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 825 return if !$subloc || $is_deleted;
826
633df1fd 827 my $value = $self->read_from_loc( $subloc, $orig_key );
828
ea2f6d67 829 my $size = $self->_length_needed( $value, $orig_key );
830
831 for my $trans_id ( @transactions ) {
832 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
833 unless ($loc) {
83371fe3 834 my $location2 = $storage->request_space( $size );
835 $storage->print_at( $keytag->{offset} + $offset2,
ea2f6d67 836 pack($self->{long_pack}, $location2 ),
837 pack( 'C C', $trans_id, 0 ),
838 );
c3aafc14 839 $self->_write_value( $location2, $orig_key, $value, $orig_key );
ea2f6d67 840 }
633df1fd 841 }
842
ea2f6d67 843 $keytag = $self->load_tag( $keyloc );
844 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
83371fe3 845 $storage->print_at( $keytag->{offset} + $offset,
ea2f6d67 846 substr( $keytag->{content}, $offset + $self->{key_size} ),
847 chr(0) x $self->{key_size},
019404df 848 );
386bab6c 849 }
633df1fd 850 else {
ea2f6d67 851 my $keytag = $self->load_tag( $keyloc );
7a960a12 852
ea2f6d67 853 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 854
83371fe3 855 $storage->print_at( $keytag->{offset} + $offset,
13ff93d5 856 pack($self->{long_pack}, 0 ),
83371fe3 857 pack( 'C C', $storage->transaction_id, 1 ),
ea2f6d67 858 );
633df1fd 859 }
860
861 return 1;
ab0e4957 862}
863
912d50b1 864sub bucket_exists {
beac1dff 865 ##
866 # Check existence of single key given tag and MD5 digested key.
867 ##
868 my $self = shift;
e96daec8 869 my ($tag, $md5) = @_;
912d50b1 870
21838116 871 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 872 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
873 my $keytag = $self->load_tag( $keyloc );
874 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 875 if ( !$subloc && !$is_deleted ) {
ea2f6d67 876 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
877 }
c9b6d0d8 878 return ($subloc && !$is_deleted) && 1;
912d50b1 879}
880
72e315ac 881sub find_blist {
beac1dff 882 ##
883 # Locate offset for bucket list, given digested key
884 ##
885 my $self = shift;
e96daec8 886 my ($offset, $md5, $args) = @_;
d0b74c17 887 $args = {} unless $args;
888
beac1dff 889 ##
890 # Locate offset for bucket list using digest index system
891 ##
e96daec8 892 my $tag = $self->load_tag( $offset )
893 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 894
fb451ba6 895 #XXX What happens when $ch >= $self->{hash_size} ??
896 for (my $ch = 0; $tag->{signature} ne SIG_BLIST; $ch++) {
d0b74c17 897 my $num = ord substr($md5, $ch, 1);
898
899 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 900 $tag = $self->index_lookup( $tag, $num );
d0b74c17 901
902 if (!$tag) {
29b01632 903 return if !$args->{create};
d0b74c17 904
83371fe3 905 my $loc = $self->_storage->request_space(
e96daec8 906 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 907 );
908
83371fe3 909 $self->_storage->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 910
9e4f83a0 911 $tag = $self->write_tag(
e96daec8 912 $loc, SIG_BLIST,
f37c15ab 913 chr(0)x$self->{bucket_list_size},
d5d7c51d 914 );
915
916 $tag->{ref_loc} = $ref_loc;
917 $tag->{ch} = $ch;
918
919 last;
d0b74c17 920 }
921
fb451ba6 922 $tag->{ch} = $ch;
d0b74c17 923 $tag->{ref_loc} = $ref_loc;
beac1dff 924 }
d0b74c17 925
beac1dff 926 return $tag;
6736c116 927}
928
d0b74c17 929sub index_lookup {
930 ##
931 # Given index tag, lookup single entry in index and return .
932 ##
933 my $self = shift;
e96daec8 934 my ($tag, $index) = @_;
d0b74c17 935
936 my $location = unpack(
937 $self->{long_pack},
938 substr(
939 $tag->{content},
940 $index * $self->{long_size},
941 $self->{long_size},
942 ),
943 );
944
945 if (!$location) { return; }
946
e96daec8 947 return $self->load_tag( $location );
d0b74c17 948}
949
6736c116 950sub traverse_index {
beac1dff 951 ##
952 # Scan index and recursively step into deeper levels, looking for next key.
953 ##
6736c116 954 my $self = shift;
ea2f6d67 955 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 956
e96daec8 957 my $tag = $self->load_tag( $offset );
6736c116 958
8db25060 959 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 960 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 961
ea2f6d67 962 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 963 my $subloc = unpack(
964 $self->{long_pack},
e06824f8 965 substr(
ea2f6d67 966 $tag->{content},
e06824f8 967 $idx * $self->{long_size},
968 $self->{long_size},
969 ),
e5fc7e69 970 );
971
beac1dff 972 if ($subloc) {
e5fc7e69 973 my $result = $self->traverse_index(
ea2f6d67 974 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 975 );
976
ea2f6d67 977 if (defined $result) { return $result; }
beac1dff 978 }
979 } # index loop
d0b74c17 980
ea2f6d67 981 $xxxx->{return_next} = 1;
42717e46 982 }
983 # This is the bucket list
e5fc7e69 984 else {
beac1dff 985 my $keys = $tag->{content};
ea2f6d67 986 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 987
beac1dff 988 ##
989 # Iterate through buckets, looking for a key match
990 ##
83371fe3 991 my $transaction_id = $self->_storage->transaction_id;
8db25060 992 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 993 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 994
8db25060 995 # End of bucket list -- return to outer loop
ea2f6d67 996 if (!$keyloc) {
997 $xxxx->{return_next} = 1;
beac1dff 998 last;
999 }
8db25060 1000 # Located previous key -- return next one found
ea2f6d67 1001 elsif ($key eq $xxxx->{prev_md5}) {
1002 $xxxx->{return_next} = 1;
beac1dff 1003 next;
1004 }
8db25060 1005 # Seek to bucket location and skip over signature
ea2f6d67 1006 elsif ($xxxx->{return_next}) {
83371fe3 1007 my $storage = $self->_storage;
d0b74c17 1008
ea2f6d67 1009 my $keytag = $self->load_tag( $keyloc );
1010 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 1011 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 1012 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
1013 }
1014 next if $is_deleted;
1015
beac1dff 1016 # Skip over value to get to plain key
83371fe3 1017 my $sig = $storage->read_at( $subloc, SIG_SIZE );
8db25060 1018
83371fe3 1019 my $size = $storage->read_at( undef, $self->{data_size} );
e5fc7e69 1020 $size = unpack($self->{data_pack}, $size);
83371fe3 1021 if ($size) { $storage->increment_pointer( $size ); }
d0b74c17 1022
beac1dff 1023 # Read in plain key and return as scalar
83371fe3 1024 $size = $storage->read_at( undef, $self->{data_size} );
e5fc7e69 1025 $size = unpack($self->{data_pack}, $size);
86867f3a 1026
7dcefff3 1027 my $plain_key;
83371fe3 1028 if ($size) { $plain_key = $storage->read_at( undef, $size); }
beac1dff 1029 return $plain_key;
1030 }
8db25060 1031 }
d0b74c17 1032
ea2f6d67 1033 $xxxx->{return_next} = 1;
42717e46 1034 }
d0b74c17 1035
beac1dff 1036 return;
6736c116 1037}
1038
75be6413 1039# Utilities
1040
9cec1360 1041sub _get_key_subloc {
75be6413 1042 my $self = shift;
1043 my ($keys, $idx) = @_;
1044
ea2f6d67 1045 return unpack(
28394a1a 1046 # This is 'a', not 'A'. Please read the pack() documentation for the
1047 # difference between the two and why it's important.
ea2f6d67 1048 "a$self->{hash_size} $self->{long_pack}",
75be6413 1049 substr(
1050 $keys,
9cec1360 1051 ($idx * $self->{bucket_size}),
1052 $self->{bucket_size},
75be6413 1053 ),
1054 );
75be6413 1055}
1056
d608b06e 1057sub _find_in_buckets {
1058 my $self = shift;
ea2f6d67 1059 my ($tag, $md5) = @_;
21838116 1060
d608b06e 1061 BUCKET:
1062 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 1063 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 1064 $tag->{content}, $i,
1065 );
d608b06e 1066
fb451ba6 1067 next BUCKET if $subloc && $key ne $md5;
1068 return( $subloc, $i * $self->{bucket_size} );
d608b06e 1069 }
1070
1071 return;
1072}
1073
994ccd8e 1074sub _release_space {
1075 my $self = shift;
e96daec8 1076 my ($size, $loc) = @_;
994ccd8e 1077
7b1e1aa1 1078 my $next_loc = 0;
1079
83371fe3 1080 $self->_storage->print_at( $loc,
019404df 1081 SIG_FREE,
1082 pack($self->{long_pack}, $size ),
1083 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1084 );
1085
994ccd8e 1086 return;
1087}
1088
e96daec8 1089sub _throw_error {
1090 die "DBM::Deep: $_[1]\n";
1091}
1092
86867f3a 1093sub _get_dbm_object {
1094 my $item = shift;
1095
1096 my $obj = eval {
1097 local $SIG{__DIE__};
1098 if ($item->isa( 'DBM::Deep' )) {
1099 return $item;
1100 }
1101 return;
1102 };
1103 return $obj if $obj;
1104
1105 my $r = Scalar::Util::reftype( $item ) || '';
1106 if ( $r eq 'HASH' ) {
1107 my $obj = eval {
1108 local $SIG{__DIE__};
1109 my $obj = tied(%$item);
1110 if ($obj->isa( 'DBM::Deep' )) {
1111 return $obj;
1112 }
1113 return;
1114 };
1115 return $obj if $obj;
1116 }
1117 elsif ( $r eq 'ARRAY' ) {
1118 my $obj = eval {
1119 local $SIG{__DIE__};
1120 my $obj = tied(@$item);
1121 if ($obj->isa( 'DBM::Deep' )) {
1122 return $obj;
1123 }
1124 return;
1125 };
1126 return $obj if $obj;
1127 }
1128
1129 return;
1130}
1131
1132sub _length_needed {
1133 my $self = shift;
1134 my ($value, $key) = @_;
1135
1136 my $is_dbm_deep = eval {
1137 local $SIG{'__DIE__'};
1138 $value->isa( 'DBM::Deep' );
1139 };
1140
1141 my $len = SIG_SIZE
1142 + $self->{data_size} # size for value
1143 + $self->{data_size} # size for key
1144 + length( $key ); # length of key
1145
83371fe3 1146 if ( $is_dbm_deep && $value->_storage eq $self->_storage ) {
86867f3a 1147 # long_size is for the internal reference
1148 return $len + $self->{long_size};
1149 }
1150
83371fe3 1151 if ( $self->_storage->{autobless} ) {
86867f3a 1152 # This is for the bit saying whether or not this thing is blessed.
1153 $len += 1;
1154 }
1155
1156 my $r = Scalar::Util::reftype( $value ) || '';
1157 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1158 if ( defined $value ) {
1159 $len += length( $value );
1160 }
1161 return $len;
1162 }
1163
1164 $len += $self->{index_size};
1165
1166 # if autobless is enabled, must also take into consideration
1167 # the class name as it is stored after the key.
83371fe3 1168 if ( $self->_storage->{autobless} ) {
86867f3a 1169 my $c = Scalar::Util::blessed($value);
1170 if ( defined $c && !$is_dbm_deep ) {
1171 $len += $self->{data_size} + length($c);
1172 }
1173 }
1174
1175 return $len;
1176}
1177
a20d9a3f 11781;
1179__END__