r14236@Rob-Kinyons-PowerBook: rob | 2006-06-14 23:07:31 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
6
c3aafc14 7our $VERSION = q(0.99_03);
86867f3a 8
633df1fd 9use Fcntl qw( :DEFAULT :flock );
359a01ac 10use Scalar::Util ();
a20d9a3f 11
21838116 12# File-wide notes:
20b7f047 13# * To add to bucket_size, make sure you modify the following:
14# - calculate_sizes()
15# - _get_key_subloc()
16# - add_bucket() - where the buckets are printed
c3aafc14 17#
83371fe3 18# * Every method in here assumes that the _storage has been appropriately
c3aafc14 19# safeguarded. This can be anything from flock() to some sort of manual
20# mutex. But, it's the caller's responsability to make sure that this has
21# been done.
21838116 22
8db25060 23##
24# Setup file and tag signatures. These should never change.
25##
26sub SIG_FILE () { 'DPDB' }
460b1067 27sub SIG_HEADER () { 'h' }
8db25060 28sub SIG_INTERNAL () { 'i' }
29sub SIG_HASH () { 'H' }
30sub SIG_ARRAY () { 'A' }
8db25060 31sub SIG_NULL () { 'N' }
32sub SIG_DATA () { 'D' }
33sub SIG_INDEX () { 'I' }
34sub SIG_BLIST () { 'B' }
7b1e1aa1 35sub SIG_FREE () { 'F' }
86867f3a 36sub SIG_KEYS () { 'K' }
8db25060 37sub SIG_SIZE () { 1 }
38
fb451ba6 39# This is the transaction ID for the HEAD
40sub HEAD () { 0 }
41
c3aafc14 42################################################################################
43#
44# This is new code. It is a complete rewrite of the engine based on a new API
45#
46################################################################################
47
c3aafc14 48sub read_value {
49 my $self = shift;
40956c06 50 my ($trans_id, $offset, $key, $orig_key) = @_;
c3aafc14 51
40956c06 52 my $dig_key = $self->_apply_digest( $key );
e5a9d386 53 my $tag = $self->find_blist( $offset, $dig_key ) or return;
54 return $self->get_bucket_value( $tag, $dig_key, $orig_key );
c3aafc14 55}
56
c3aafc14 57sub key_exists {
58 my $self = shift;
40956c06 59 my ($trans_id, $offset, $key) = @_;
c3aafc14 60
40956c06 61 my $dig_key = $self->_apply_digest( $key );
e5a9d386 62 # exists() returns the empty string, not undef
63 my $tag = $self->find_blist( $offset, $dig_key ) or return '';
c3aafc14 64 return $self->bucket_exists( $tag, $dig_key, $key );
65}
66
83c43bb5 67sub get_next_key {
c3aafc14 68 my $self = shift;
40956c06 69 my ($trans_id, $offset) = @_;
c3aafc14 70
83c43bb5 71 # If the previous key was not specifed, start at the top and
72 # return the first one found.
73 my $temp;
40956c06 74 if ( @_ > 2 ) {
83c43bb5 75 $temp = {
40956c06 76 prev_md5 => $self->_apply_digest($_[2]),
83c43bb5 77 return_next => 0,
78 };
79 }
80 else {
81 $temp = {
82 prev_md5 => chr(0) x $self->{hash_size},
83 return_next => 1,
84 };
85 }
86
87 return $self->traverse_index( $temp, $offset, 0 );
c3aafc14 88}
89
83371fe3 90sub delete_key {
91 my $self = shift;
40956c06 92 my ($trans_id, $offset, $key, $orig_key) = @_;
83371fe3 93
40956c06 94 my $dig_key = $self->_apply_digest( $key );
83371fe3 95 my $tag = $self->find_blist( $offset, $dig_key ) or return;
96 my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key );
97 $self->delete_bucket( $tag, $dig_key, $orig_key );
98 return $value;
99}
100
83371fe3 101sub write_value {
102 my $self = shift;
40956c06 103 my ($trans_id, $offset, $key, $value, $orig_key) = @_;
83371fe3 104
40956c06 105 my $dig_key = $self->_apply_digest( $key );
83371fe3 106 my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
107 return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
108}
109
c3aafc14 110################################################################################
111#
112# Below here is the old code. It will be folded into the code above as it can.
113#
114################################################################################
115
612969fb 116sub new {
117 my $class = shift;
118 my ($args) = @_;
119
120 my $self = bless {
86867f3a 121 long_size => 4,
122 long_pack => 'N',
123 data_size => 4,
124 data_pack => 'N',
251dfd0e 125
86867f3a 126 digest => \&Digest::MD5::md5,
c3aafc14 127 hash_size => 16, # In bytes
251dfd0e 128
81d16922 129 ##
c3aafc14 130 # Number of buckets per blist before another level of indexing is
e0098e7f 131 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 132 # files. DO NOT decrease this value below 16, due to risk of recursive
133 # reindex overrun.
81d16922 134 ##
612969fb 135 max_buckets => 16,
460b1067 136
83371fe3 137 storage => undef,
359a01ac 138 obj => undef,
612969fb 139 }, $class;
140
e0098e7f 141 if ( defined $args->{pack_size} ) {
142 if ( lc $args->{pack_size} eq 'small' ) {
143 $args->{long_size} = 2;
c9b6d0d8 144 $args->{long_pack} = 'n';
e0098e7f 145 }
146 elsif ( lc $args->{pack_size} eq 'medium' ) {
147 $args->{long_size} = 4;
148 $args->{long_pack} = 'N';
149 }
150 elsif ( lc $args->{pack_size} eq 'large' ) {
151 $args->{long_size} = 8;
152 $args->{long_pack} = 'Q';
153 }
154 else {
155 die "Unknown pack_size value: '$args->{pack_size}'\n";
156 }
157 }
158
fde3db1a 159 # Grab the parameters we want to use
160 foreach my $param ( keys %$self ) {
161 next unless exists $args->{$param};
3e9498a1 162 $self->{$param} = $args->{$param};
fde3db1a 163 }
359a01ac 164 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 165
e0098e7f 166 if ( $self->{max_buckets} < 16 ) {
167 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
168 $self->{max_buckets} = 16;
169 }
170
260a80b4 171 return $self;
172}
173
83371fe3 174sub _storage { return $_[0]{storage} }
460b1067 175
40956c06 176sub _apply_digest {
c3aafc14 177 my $self = shift;
178 return $self->{digest}->(@_);
179}
180
260a80b4 181sub calculate_sizes {
182 my $self = shift;
183
633df1fd 184 # The 2**8 here indicates the number of different characters in the
185 # current hashing algorithm
28394a1a 186 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 187 $self->{hash_chars_used} = (2**8);
188 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
189
190 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 191 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 192
ea2f6d67 193 $self->{key_size} = $self->{long_size} * 2;
194 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
195
260a80b4 196 return;
1bf65be7 197}
198
fde3db1a 199sub write_file_header {
0d0f3d5d 200 my $self = shift;
0d0f3d5d 201
83371fe3 202 my $loc = $self->_storage->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 203
83371fe3 204 $self->_storage->print_at( $loc,
260a80b4 205 SIG_FILE,
460b1067 206 SIG_HEADER,
207 pack('N', 1), # header version
eff6a245 208 pack('N', 24), # header size
209 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 210 pack('n', $self->{long_size}),
260a80b4 211 pack('A', $self->{long_pack}),
c9b6d0d8 212 pack('n', $self->{data_size}),
260a80b4 213 pack('A', $self->{data_pack}),
c9b6d0d8 214 pack('n', $self->{max_buckets}),
260a80b4 215 );
0d0f3d5d 216
83371fe3 217 $self->_storage->set_transaction_offset( 13 );
20b7f047 218
0d0f3d5d 219 return;
220}
221
fde3db1a 222sub read_file_header {
e064ccd1 223 my $self = shift;
e064ccd1 224
83371fe3 225 my $buffer = $self->_storage->read_at( 0, length(SIG_FILE) + 9 );
7dcefff3 226 return unless length($buffer);
460b1067 227
228 my ($file_signature, $sig_header, $header_version, $size) = unpack(
229 'A4 A N N', $buffer
42f79e07 230 );
e064ccd1 231
460b1067 232 unless ( $file_signature eq SIG_FILE ) {
83371fe3 233 $self->_storage->close;
e96daec8 234 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 235 }
260a80b4 236
460b1067 237 unless ( $sig_header eq SIG_HEADER ) {
83371fe3 238 $self->_storage->close;
e96daec8 239 $self->_throw_error( "Old file version found." );
460b1067 240 }
9b2370e0 241
83371fe3 242 my $buffer2 = $self->_storage->read_at( undef, $size );
eff6a245 243 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 244
83371fe3 245 $self->_storage->set_transaction_offset( 13 );
15ba72cc 246
460b1067 247 if ( @values < 5 || grep { !defined } @values ) {
83371fe3 248 $self->_storage->close;
e96daec8 249 $self->_throw_error("Corrupted file - bad header");
e064ccd1 250 }
251
460b1067 252 #XXX Add warnings if values weren't set right
253 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
254
7dcefff3 255 return length($buffer) + length($buffer2);
e064ccd1 256}
257
460b1067 258sub setup_fh {
259 my $self = shift;
260 my ($obj) = @_;
70b55428 261
7dcefff3 262 # Need to remove use of $fh here
83371fe3 263 my $fh = $self->_storage->{fh};
6fde4ed2 264 flock $fh, LOCK_EX;
118ba343 265
260a80b4 266 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 267 unless ( $obj->{base_offset} ) {
e96daec8 268 my $bytes_read = $self->read_file_header;
118ba343 269
260a80b4 270 $self->calculate_sizes;
271
118ba343 272 ##
fde3db1a 273 # File is empty -- write header and master index
118ba343 274 ##
275 if (!$bytes_read) {
83371fe3 276 $self->_storage->audit( "# Database created on" );
359a01ac 277
e96daec8 278 $self->write_file_header;
118ba343 279
83371fe3 280 $obj->{base_offset} = $self->_storage->request_space(
22e20cce 281 $self->tag_size( $self->{index_size} ),
282 );
118ba343 283
9e4f83a0 284 $self->write_tag(
e96daec8 285 $obj->_base_offset, $obj->_type,
f37c15ab 286 chr(0)x$self->{index_size},
118ba343 287 );
288
289 # Flush the filehandle
290 my $old_fh = select $fh;
291 my $old_af = $|; $| = 1; $| = $old_af;
292 select $old_fh;
293 }
294 else {
295 $obj->{base_offset} = $bytes_read;
296
297 ##
fde3db1a 298 # Get our type from master index header
118ba343 299 ##
359a01ac 300 my $tag = $self->load_tag($obj->_base_offset);
301 unless ( $tag ) {
302 flock $fh, LOCK_UN;
303 $self->_throw_error("Corrupted file, no master index record");
304 }
118ba343 305
e96daec8 306 unless ($obj->_type eq $tag->{signature}) {
359a01ac 307 flock $fh, LOCK_UN;
e96daec8 308 $self->_throw_error("File type mismatch");
118ba343 309 }
310 }
118ba343 311 }
260a80b4 312 else {
313 $self->calculate_sizes;
314 }
e06824f8 315
673464d9 316 #XXX We have to make sure we don't mess up when autoflush isn't turned on
83371fe3 317 $self->_storage->set_inode;
70b55428 318
6fde4ed2 319 flock $fh, LOCK_UN;
320
70b55428 321 return 1;
322}
323
16d1ad9b 324sub tag_size {
325 my $self = shift;
326 my ($size) = @_;
327 return SIG_SIZE + $self->{data_size} + $size;
328}
329
9e4f83a0 330sub write_tag {
20f7b20c 331 ##
332 # Given offset, signature and content, create tag and write to disk
333 ##
d4b1166e 334 my $self = shift;
e96daec8 335 my ($offset, $sig, $content) = @_;
f37c15ab 336 my $size = length( $content );
20f7b20c 337
83371fe3 338 $self->_storage->print_at(
7dcefff3 339 $offset,
340 $sig, pack($self->{data_pack}, $size), $content,
341 );
20f7b20c 342
f37c15ab 343 return unless defined $offset;
344
20f7b20c 345 return {
346 signature => $sig,
72e315ac 347 #XXX Is this even used?
86867f3a 348 size => $size,
f5677eab 349 start => $offset,
86867f3a 350 offset => $offset + SIG_SIZE + $self->{data_size},
f5677eab 351 content => $content,
352 is_new => 1,
20f7b20c 353 };
d4b1166e 354}
355
356sub load_tag {
20f7b20c 357 ##
358 # Given offset, load single tag and return signature, size and data
359 ##
d4b1166e 360 my $self = shift;
e96daec8 361 my ($offset) = @_;
20f7b20c 362
83371fe3 363 my $storage = $self->_storage;
20f7b20c 364
86867f3a 365 my ($sig, $size) = unpack(
366 "A $self->{data_pack}",
83371fe3 367 $storage->read_at( $offset, SIG_SIZE + $self->{data_size} ),
86867f3a 368 );
20f7b20c 369
370 return {
371 signature => $sig,
c3aafc14 372 size => $size, #XXX Is this even used?
fb451ba6 373 start => $offset,
86867f3a 374 offset => $offset + SIG_SIZE + $self->{data_size},
83371fe3 375 content => $storage->read_at( undef, $size ),
20f7b20c 376 };
d4b1166e 377}
378
ea2f6d67 379sub find_keyloc {
380 my $self = shift;
381 my ($tag, $transaction_id) = @_;
83371fe3 382 $transaction_id = $self->_storage->transaction_id
ea2f6d67 383 unless defined $transaction_id;
384
385 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
386 my ($loc, $trans_id, $is_deleted) = unpack(
387 "$self->{long_pack} C C",
388 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
389 );
390
fb451ba6 391 next if $loc != HEAD && $transaction_id != $trans_id;
392 return( $loc, $is_deleted, $i * $self->{key_size} );
ea2f6d67 393 }
394
395 return;
396}
397
20f7b20c 398sub add_bucket {
399 ##
400 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
401 # plain (undigested) key and value.
402 ##
d4b1166e 403 my $self = shift;
359a01ac 404 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 405
eea0d863 406 # This verifies that only supported values will be stored.
407 {
408 my $r = Scalar::Util::reftype( $value );
eea0d863 409
86867f3a 410 last if !defined $r;
eea0d863 411 last if $r eq 'HASH';
412 last if $r eq 'ARRAY';
413
e96daec8 414 $self->_throw_error(
86867f3a 415 "Storage of references of type '$r' is not supported."
eea0d863 416 );
417 }
418
83371fe3 419 my $storage = $self->_storage;
20f7b20c 420
21838116 421 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 422 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 423
424 my @transactions;
83371fe3 425 if ( $storage->transaction_id == 0 ) {
426 @transactions = $storage->current_transactions;
c9b6d0d8 427 }
75be6413 428
e96daec8 429# $self->_release_space( $size, $subloc );
f9c33187 430#XXX This needs updating to use _release_space
ea2f6d67 431
86867f3a 432 my $location;
ea2f6d67 433 my $size = $self->_length_needed( $value, $plain_key );
019404df 434
ea2f6d67 435 # Updating a known md5
436 if ( $keyloc ) {
437 my $keytag = $self->load_tag( $keyloc );
438 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
439
13ff93d5 440 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 441 my $old_value = $self->read_from_loc( $subloc, $orig_key );
442 my $old_size = $self->_length_needed( $old_value, $plain_key );
443
444 for my $trans_id ( @transactions ) {
445 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
446 unless ($loc) {
83371fe3 447 my $location2 = $storage->request_space( $old_size );
448 $storage->print_at( $keytag->{offset} + $offset2,
ea2f6d67 449 pack($self->{long_pack}, $location2 ),
450 pack( 'C C', $trans_id, 0 ),
451 );
c3aafc14 452 $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
ea2f6d67 453 }
454 }
75be6413 455 }
504185fb 456
83371fe3 457 $location = $self->_storage->request_space( $size );
ea2f6d67 458 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
83371fe3 459 $storage->print_at( $keytag->{offset} + $offset,
ea2f6d67 460 pack($self->{long_pack}, $location ),
83371fe3 461 pack( 'C C', $storage->transaction_id, 0 ),
ea2f6d67 462 );
75be6413 463 }
386bab6c 464 # Adding a new md5
ea2f6d67 465 else {
83371fe3 466 my $keyloc = $storage->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 467
ea2f6d67 468 # The bucket fit into list
469 if ( defined $offset ) {
83371fe3 470 $storage->print_at( $tag->{offset} + $offset,
ea2f6d67 471 $md5, pack( $self->{long_pack}, $keyloc ),
472 );
473 }
474 # If bucket didn't fit into list, split into a new index level
475 else {
476 $self->split_index( $tag, $md5, $keyloc );
477 }
478
479 my $keytag = $self->write_tag(
480 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 481 );
c9b6d0d8 482
83371fe3 483 $location = $self->_storage->request_space( $size );
484 $storage->print_at( $keytag->{offset},
ea2f6d67 485 pack( $self->{long_pack}, $location ),
83371fe3 486 pack( 'C C', $storage->transaction_id, 0 ),
ea2f6d67 487 );
488
489 my $offset = 1;
490 for my $trans_id ( @transactions ) {
83371fe3 491 $storage->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 492 pack( $self->{long_pack}, 0 ),
ea2f6d67 493 pack( 'C C', $trans_id, 1 ),
494 );
c9b6d0d8 495 }
386bab6c 496 }
20f7b20c 497
c3aafc14 498 $self->_write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 499
86867f3a 500 return 1;
d5d7c51d 501}
502
c3aafc14 503sub _write_value {
d5d7c51d 504 my $self = shift;
129ea236 505 my ($key_loc, $location, $key, $value, $orig_key) = @_;
d5d7c51d 506
83371fe3 507 my $storage = $self->_storage;
d5d7c51d 508
9d4fa373 509 my $dbm_deep_obj = _get_dbm_object( $value );
83371fe3 510 if ( $dbm_deep_obj && $dbm_deep_obj->_storage ne $storage ) {
e96daec8 511 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 512 }
d5d7c51d 513
20f7b20c 514 ##
d5d7c51d 515 # Write signature based on content type, set content length and write
516 # actual value.
20f7b20c 517 ##
9d4fa373 518 my $r = Scalar::Util::reftype( $value ) || '';
519 if ( $dbm_deep_obj ) {
7dcefff3 520 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 521 }
522 elsif ($r eq 'HASH') {
9d4fa373 523 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 524 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 525 }
7dcefff3 526 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 527 }
528 elsif ($r eq 'ARRAY') {
9d4fa373 529 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 530 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 531 }
7dcefff3 532 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 533 }
534 elsif (!defined($value)) {
7dcefff3 535 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 536 }
537 else {
7dcefff3 538 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 539 }
20f7b20c 540
d5d7c51d 541 ##
542 # Plain key is stored AFTER value, as keys are typically fetched less often.
543 ##
83371fe3 544 $storage->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 545
9a187d8c 546 # Internal references don't care about autobless
9d4fa373 547 return 1 if $dbm_deep_obj;
9a187d8c 548
d5d7c51d 549 ##
550 # If value is blessed, preserve class name
551 ##
83371fe3 552 if ( $storage->{autobless} ) {
633df1fd 553 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
83371fe3 554 $storage->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 555 }
d5d7c51d 556 else {
83371fe3 557 $storage->print_at( undef, chr(0) );
20f7b20c 558 }
d5d7c51d 559 }
20f7b20c 560
d5d7c51d 561 ##
56ec4340 562 # Tie the passed in reference so that changes to it are reflected in the
563 # datafile. The use of $location as the base_offset will act as the
564 # the linkage between parent and child.
565 #
566 # The overall assignment is a hack around the fact that just tying doesn't
567 # store the values. This may not be the wrong thing to do.
d5d7c51d 568 ##
9d4fa373 569 if ($r eq 'HASH') {
570 my %x = %$value;
571 tie %$value, 'DBM::Deep', {
129ea236 572 base_offset => $key_loc,
83371fe3 573 storage => $storage,
359a01ac 574 parent => $self->{obj},
575 parent_key => $orig_key,
9d4fa373 576 };
577 %$value = %x;
bd01f6ec 578 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
9d4fa373 579 }
580 elsif ($r eq 'ARRAY') {
581 my @x = @$value;
582 tie @$value, 'DBM::Deep', {
129ea236 583 base_offset => $key_loc,
83371fe3 584 storage => $storage,
359a01ac 585 parent => $self->{obj},
586 parent_key => $orig_key,
9d4fa373 587 };
588 @$value = @x;
bd01f6ec 589 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
20f7b20c 590 }
d4b1166e 591
d5d7c51d 592 return 1;
d4b1166e 593}
594
75be6413 595sub split_index {
596 my $self = shift;
ea2f6d67 597 my ($tag, $md5, $keyloc) = @_;
75be6413 598
83371fe3 599 my $storage = $self->_storage;
21838116 600
83371fe3 601 my $loc = $storage->request_space(
e96daec8 602 $self->tag_size( $self->{index_size} ),
16d1ad9b 603 );
604
83371fe3 605 $storage->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 606
9e4f83a0 607 my $index_tag = $self->write_tag(
e96daec8 608 $loc, SIG_INDEX,
f37c15ab 609 chr(0)x$self->{index_size},
75be6413 610 );
611
7b1e1aa1 612 my $keys = $tag->{content}
ea2f6d67 613 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 614
f9c33187 615 my @newloc = ();
75be6413 616 BUCKET:
633df1fd 617 # The <= here is deliberate - we have max_buckets+1 keys to iterate
618 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 619 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 620 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 621
f9c33187 622 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
623 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 624
75be6413 625 my $num = ord(substr($key, $tag->{ch} + 1, 1));
626
f9c33187 627 if ($newloc[$num]) {
83371fe3 628 my $subkeys = $storage->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 629
f9c33187 630 # This is looking for the first empty spot
ea2f6d67 631 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 632 { content => $subkeys }, '',
7b1e1aa1 633 );
75be6413 634
83371fe3 635 $storage->print_at(
633df1fd 636 $newloc[$num] + $offset,
637 $key, pack($self->{long_pack}, $old_subloc),
638 );
7b1e1aa1 639
640 next;
75be6413 641 }
75be6413 642
83371fe3 643 my $loc = $storage->request_space(
e96daec8 644 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 645 );
2603d86e 646
83371fe3 647 $storage->print_at(
019404df 648 $index_tag->{offset} + ($num * $self->{long_size}),
649 pack($self->{long_pack}, $loc),
650 );
75be6413 651
7b1e1aa1 652 my $blist_tag = $self->write_tag(
e96daec8 653 $loc, SIG_BLIST,
7b1e1aa1 654 chr(0)x$self->{bucket_list_size},
655 );
656
83371fe3 657 $storage->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 658
f9c33187 659 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 660 }
661
662 $self->_release_space(
e96daec8 663 $self->tag_size( $self->{bucket_list_size} ),
fb451ba6 664 $tag->{start},
7b1e1aa1 665 );
75be6413 666
ea2f6d67 667 return 1;
75be6413 668}
669
8db25060 670sub read_from_loc {
671 my $self = shift;
129ea236 672 my ($key_loc, $subloc, $orig_key) = @_;
8db25060 673
83371fe3 674 my $storage = $self->_storage;
8db25060 675
83371fe3 676 my $signature = $storage->read_at( $subloc, SIG_SIZE );
8db25060 677
678 ##
679 # If value is a hash or array, return new DBM::Deep object with correct offset
680 ##
681 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
c3aafc14 682 #XXX This needs to be a singleton
bd01f6ec 683# my $new_obj;
684# my $is_autobless;
685# if ( $signature eq SIG_HASH ) {
686# $new_obj = {};
687# tie %$new_obj, 'DBM::Deep', {
688# base_offset => $subloc,
83371fe3 689# storage => $self->_storage,
bd01f6ec 690# parent => $self->{obj},
691# parent_key => $orig_key,
692# };
83371fe3 693# $is_autobless = tied(%$new_obj)->_storage->{autobless};
bd01f6ec 694# }
695# else {
696# $new_obj = [];
697# tie @$new_obj, 'DBM::Deep', {
698# base_offset => $subloc,
83371fe3 699# storage => $self->_storage,
bd01f6ec 700# parent => $self->{obj},
701# parent_key => $orig_key,
702# };
83371fe3 703# $is_autobless = tied(@$new_obj)->_storage->{autobless};
bd01f6ec 704# }
705#
706# if ($is_autobless) {
707
685e40f1 708 my $new_obj = DBM::Deep->new({
359a01ac 709 type => $signature,
129ea236 710 base_offset => $key_loc,
83371fe3 711 storage => $self->_storage,
359a01ac 712 parent => $self->{obj},
713 parent_key => $orig_key,
685e40f1 714 });
8db25060 715
83371fe3 716 if ($new_obj->_storage->{autobless}) {
8db25060 717 ##
718 # Skip over value and plain key to see if object needs
719 # to be re-blessed
720 ##
83371fe3 721 $storage->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 722
83371fe3 723 my $size = $storage->read_at( undef, $self->{data_size} );
c6ea6b6c 724 $size = unpack($self->{data_pack}, $size);
83371fe3 725 if ($size) { $storage->increment_pointer( $size ); }
8db25060 726
83371fe3 727 my $bless_bit = $storage->read_at( undef, 1 );
86867f3a 728 if ( ord($bless_bit) ) {
729 my $size = unpack(
730 $self->{data_pack},
83371fe3 731 $storage->read_at( undef, $self->{data_size} ),
86867f3a 732 );
7dcefff3 733
86867f3a 734 if ( $size ) {
83371fe3 735 $new_obj = bless $new_obj, $storage->read_at( undef, $size );
86867f3a 736 }
8db25060 737 }
738 }
739
685e40f1 740 return $new_obj;
8db25060 741 }
742 elsif ( $signature eq SIG_INTERNAL ) {
83371fe3 743 my $size = $storage->read_at( undef, $self->{data_size} );
8db25060 744 $size = unpack($self->{data_pack}, $size);
745
746 if ( $size ) {
83371fe3 747 my $new_loc = $storage->read_at( undef, $size );
7dcefff3 748 $new_loc = unpack( $self->{long_pack}, $new_loc );
129ea236 749 return $self->read_from_loc( $key_loc, $new_loc, $orig_key );
8db25060 750 }
751 else {
752 return;
753 }
754 }
755 ##
756 # Otherwise return actual value
757 ##
460b1067 758 elsif ( $signature eq SIG_DATA ) {
83371fe3 759 my $size = $storage->read_at( undef, $self->{data_size} );
8db25060 760 $size = unpack($self->{data_pack}, $size);
761
83371fe3 762 my $value = $size ? $storage->read_at( undef, $size ) : '';
8db25060 763 return $value;
764 }
765
766 ##
767 # Key exists, but content is null
768 ##
769 return;
770}
771
9020ee8c 772sub get_bucket_value {
beac1dff 773 ##
774 # Fetch single value given tag and MD5 digested key.
775 ##
776 my $self = shift;
359a01ac 777 my ($tag, $md5, $orig_key) = @_;
9020ee8c 778
21838116 779 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 780 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 781
ea2f6d67 782 if ( !$keyloc ) {
94e8af14 783 #XXX Need to use real key
ea2f6d67 784# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 785# return;
786 }
ea2f6d67 787# elsif ( !$is_deleted ) {
788 else {
789 my $keytag = $self->load_tag( $keyloc );
790 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 791 if (!$subloc && !$is_deleted) {
ea2f6d67 792 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
793 }
794 if ( $subloc && !$is_deleted ) {
795 return $self->read_from_loc( $subloc, $orig_key );
796 }
386bab6c 797 }
94e8af14 798
beac1dff 799 return;
9020ee8c 800}
ab0e4957 801
802sub delete_bucket {
beac1dff 803 ##
804 # Delete single key/value pair given tag and MD5 digested key.
805 ##
806 my $self = shift;
a97c8f67 807 my ($tag, $md5, $orig_key) = @_;
ab0e4957 808
22e20cce 809 #ACID - Although this is a mutation, we must find any transaction.
810 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 811 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 812
ea2f6d67 813 return if !$keyloc;
633df1fd 814
83371fe3 815 my $storage = $self->_storage;
633df1fd 816
817 my @transactions;
83371fe3 818 if ( $storage->transaction_id == 0 ) {
819 @transactions = $storage->current_transactions;
633df1fd 820 }
821
83371fe3 822 if ( $storage->transaction_id == 0 ) {
ea2f6d67 823 my $keytag = $self->load_tag( $keyloc );
7a960a12 824
ea2f6d67 825 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 826 return if !$subloc || $is_deleted;
827
633df1fd 828 my $value = $self->read_from_loc( $subloc, $orig_key );
829
ea2f6d67 830 my $size = $self->_length_needed( $value, $orig_key );
831
832 for my $trans_id ( @transactions ) {
833 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
834 unless ($loc) {
83371fe3 835 my $location2 = $storage->request_space( $size );
836 $storage->print_at( $keytag->{offset} + $offset2,
ea2f6d67 837 pack($self->{long_pack}, $location2 ),
838 pack( 'C C', $trans_id, 0 ),
839 );
c3aafc14 840 $self->_write_value( $location2, $orig_key, $value, $orig_key );
ea2f6d67 841 }
633df1fd 842 }
843
ea2f6d67 844 $keytag = $self->load_tag( $keyloc );
845 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
83371fe3 846 $storage->print_at( $keytag->{offset} + $offset,
ea2f6d67 847 substr( $keytag->{content}, $offset + $self->{key_size} ),
848 chr(0) x $self->{key_size},
019404df 849 );
386bab6c 850 }
633df1fd 851 else {
ea2f6d67 852 my $keytag = $self->load_tag( $keyloc );
7a960a12 853
ea2f6d67 854 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 855
83371fe3 856 $storage->print_at( $keytag->{offset} + $offset,
13ff93d5 857 pack($self->{long_pack}, 0 ),
83371fe3 858 pack( 'C C', $storage->transaction_id, 1 ),
ea2f6d67 859 );
633df1fd 860 }
861
862 return 1;
ab0e4957 863}
864
912d50b1 865sub bucket_exists {
beac1dff 866 ##
867 # Check existence of single key given tag and MD5 digested key.
868 ##
869 my $self = shift;
e96daec8 870 my ($tag, $md5) = @_;
912d50b1 871
21838116 872 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 873 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
874 my $keytag = $self->load_tag( $keyloc );
875 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 876 if ( !$subloc && !$is_deleted ) {
ea2f6d67 877 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
878 }
c9b6d0d8 879 return ($subloc && !$is_deleted) && 1;
912d50b1 880}
881
72e315ac 882sub find_blist {
beac1dff 883 ##
884 # Locate offset for bucket list, given digested key
885 ##
886 my $self = shift;
e96daec8 887 my ($offset, $md5, $args) = @_;
d0b74c17 888 $args = {} unless $args;
889
beac1dff 890 ##
891 # Locate offset for bucket list using digest index system
892 ##
e96daec8 893 my $tag = $self->load_tag( $offset )
894 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 895
fb451ba6 896 #XXX What happens when $ch >= $self->{hash_size} ??
897 for (my $ch = 0; $tag->{signature} ne SIG_BLIST; $ch++) {
d0b74c17 898 my $num = ord substr($md5, $ch, 1);
899
900 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 901 $tag = $self->index_lookup( $tag, $num );
d0b74c17 902
903 if (!$tag) {
29b01632 904 return if !$args->{create};
d0b74c17 905
83371fe3 906 my $loc = $self->_storage->request_space(
e96daec8 907 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 908 );
909
83371fe3 910 $self->_storage->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 911
9e4f83a0 912 $tag = $self->write_tag(
e96daec8 913 $loc, SIG_BLIST,
f37c15ab 914 chr(0)x$self->{bucket_list_size},
d5d7c51d 915 );
916
917 $tag->{ref_loc} = $ref_loc;
918 $tag->{ch} = $ch;
919
920 last;
d0b74c17 921 }
922
fb451ba6 923 $tag->{ch} = $ch;
d0b74c17 924 $tag->{ref_loc} = $ref_loc;
beac1dff 925 }
d0b74c17 926
beac1dff 927 return $tag;
6736c116 928}
929
d0b74c17 930sub index_lookup {
931 ##
932 # Given index tag, lookup single entry in index and return .
933 ##
934 my $self = shift;
e96daec8 935 my ($tag, $index) = @_;
d0b74c17 936
937 my $location = unpack(
938 $self->{long_pack},
939 substr(
940 $tag->{content},
941 $index * $self->{long_size},
942 $self->{long_size},
943 ),
944 );
945
946 if (!$location) { return; }
947
e96daec8 948 return $self->load_tag( $location );
d0b74c17 949}
950
6736c116 951sub traverse_index {
beac1dff 952 ##
953 # Scan index and recursively step into deeper levels, looking for next key.
954 ##
6736c116 955 my $self = shift;
ea2f6d67 956 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 957
e96daec8 958 my $tag = $self->load_tag( $offset );
6736c116 959
8db25060 960 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 961 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 962
ea2f6d67 963 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 964 my $subloc = unpack(
965 $self->{long_pack},
e06824f8 966 substr(
ea2f6d67 967 $tag->{content},
e06824f8 968 $idx * $self->{long_size},
969 $self->{long_size},
970 ),
e5fc7e69 971 );
972
beac1dff 973 if ($subloc) {
e5fc7e69 974 my $result = $self->traverse_index(
ea2f6d67 975 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 976 );
977
ea2f6d67 978 if (defined $result) { return $result; }
beac1dff 979 }
980 } # index loop
d0b74c17 981
ea2f6d67 982 $xxxx->{return_next} = 1;
42717e46 983 }
984 # This is the bucket list
e5fc7e69 985 else {
beac1dff 986 my $keys = $tag->{content};
ea2f6d67 987 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 988
beac1dff 989 ##
990 # Iterate through buckets, looking for a key match
991 ##
83371fe3 992 my $transaction_id = $self->_storage->transaction_id;
8db25060 993 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 994 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 995
8db25060 996 # End of bucket list -- return to outer loop
ea2f6d67 997 if (!$keyloc) {
998 $xxxx->{return_next} = 1;
beac1dff 999 last;
1000 }
8db25060 1001 # Located previous key -- return next one found
ea2f6d67 1002 elsif ($key eq $xxxx->{prev_md5}) {
1003 $xxxx->{return_next} = 1;
beac1dff 1004 next;
1005 }
8db25060 1006 # Seek to bucket location and skip over signature
ea2f6d67 1007 elsif ($xxxx->{return_next}) {
83371fe3 1008 my $storage = $self->_storage;
d0b74c17 1009
ea2f6d67 1010 my $keytag = $self->load_tag( $keyloc );
1011 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 1012 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 1013 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
1014 }
1015 next if $is_deleted;
1016
beac1dff 1017 # Skip over value to get to plain key
83371fe3 1018 my $sig = $storage->read_at( $subloc, SIG_SIZE );
8db25060 1019
83371fe3 1020 my $size = $storage->read_at( undef, $self->{data_size} );
e5fc7e69 1021 $size = unpack($self->{data_pack}, $size);
83371fe3 1022 if ($size) { $storage->increment_pointer( $size ); }
d0b74c17 1023
beac1dff 1024 # Read in plain key and return as scalar
83371fe3 1025 $size = $storage->read_at( undef, $self->{data_size} );
e5fc7e69 1026 $size = unpack($self->{data_pack}, $size);
86867f3a 1027
7dcefff3 1028 my $plain_key;
83371fe3 1029 if ($size) { $plain_key = $storage->read_at( undef, $size); }
beac1dff 1030 return $plain_key;
1031 }
8db25060 1032 }
d0b74c17 1033
ea2f6d67 1034 $xxxx->{return_next} = 1;
42717e46 1035 }
d0b74c17 1036
beac1dff 1037 return;
6736c116 1038}
1039
75be6413 1040# Utilities
1041
9cec1360 1042sub _get_key_subloc {
75be6413 1043 my $self = shift;
1044 my ($keys, $idx) = @_;
1045
ea2f6d67 1046 return unpack(
28394a1a 1047 # This is 'a', not 'A'. Please read the pack() documentation for the
1048 # difference between the two and why it's important.
ea2f6d67 1049 "a$self->{hash_size} $self->{long_pack}",
75be6413 1050 substr(
1051 $keys,
9cec1360 1052 ($idx * $self->{bucket_size}),
1053 $self->{bucket_size},
75be6413 1054 ),
1055 );
75be6413 1056}
1057
d608b06e 1058sub _find_in_buckets {
1059 my $self = shift;
ea2f6d67 1060 my ($tag, $md5) = @_;
21838116 1061
d608b06e 1062 BUCKET:
1063 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 1064 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 1065 $tag->{content}, $i,
1066 );
d608b06e 1067
fb451ba6 1068 next BUCKET if $subloc && $key ne $md5;
1069 return( $subloc, $i * $self->{bucket_size} );
d608b06e 1070 }
1071
1072 return;
1073}
1074
994ccd8e 1075sub _release_space {
1076 my $self = shift;
e96daec8 1077 my ($size, $loc) = @_;
994ccd8e 1078
7b1e1aa1 1079 my $next_loc = 0;
1080
83371fe3 1081 $self->_storage->print_at( $loc,
019404df 1082 SIG_FREE,
1083 pack($self->{long_pack}, $size ),
1084 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1085 );
1086
994ccd8e 1087 return;
1088}
1089
e96daec8 1090sub _throw_error {
1091 die "DBM::Deep: $_[1]\n";
1092}
1093
86867f3a 1094sub _get_dbm_object {
1095 my $item = shift;
1096
1097 my $obj = eval {
1098 local $SIG{__DIE__};
1099 if ($item->isa( 'DBM::Deep' )) {
1100 return $item;
1101 }
1102 return;
1103 };
1104 return $obj if $obj;
1105
1106 my $r = Scalar::Util::reftype( $item ) || '';
1107 if ( $r eq 'HASH' ) {
1108 my $obj = eval {
1109 local $SIG{__DIE__};
1110 my $obj = tied(%$item);
1111 if ($obj->isa( 'DBM::Deep' )) {
1112 return $obj;
1113 }
1114 return;
1115 };
1116 return $obj if $obj;
1117 }
1118 elsif ( $r eq 'ARRAY' ) {
1119 my $obj = eval {
1120 local $SIG{__DIE__};
1121 my $obj = tied(@$item);
1122 if ($obj->isa( 'DBM::Deep' )) {
1123 return $obj;
1124 }
1125 return;
1126 };
1127 return $obj if $obj;
1128 }
1129
1130 return;
1131}
1132
1133sub _length_needed {
1134 my $self = shift;
1135 my ($value, $key) = @_;
1136
1137 my $is_dbm_deep = eval {
1138 local $SIG{'__DIE__'};
1139 $value->isa( 'DBM::Deep' );
1140 };
1141
1142 my $len = SIG_SIZE
1143 + $self->{data_size} # size for value
1144 + $self->{data_size} # size for key
1145 + length( $key ); # length of key
1146
83371fe3 1147 if ( $is_dbm_deep && $value->_storage eq $self->_storage ) {
86867f3a 1148 # long_size is for the internal reference
1149 return $len + $self->{long_size};
1150 }
1151
83371fe3 1152 if ( $self->_storage->{autobless} ) {
86867f3a 1153 # This is for the bit saying whether or not this thing is blessed.
1154 $len += 1;
1155 }
1156
1157 my $r = Scalar::Util::reftype( $value ) || '';
1158 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1159 if ( defined $value ) {
1160 $len += length( $value );
1161 }
1162 return $len;
1163 }
1164
1165 $len += $self->{index_size};
1166
1167 # if autobless is enabled, must also take into consideration
1168 # the class name as it is stored after the key.
83371fe3 1169 if ( $self->_storage->{autobless} ) {
86867f3a 1170 my $c = Scalar::Util::blessed($value);
1171 if ( defined $c && !$is_dbm_deep ) {
1172 $len += $self->{data_size} + length($c);
1173 }
1174 }
1175
1176 return $len;
1177}
1178
a20d9a3f 11791;
1180__END__