r14236@Rob-Kinyons-PowerBook: rob | 2006-06-14 23:07:31 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
6
c3aafc14 7our $VERSION = q(0.99_03);
86867f3a 8
633df1fd 9use Fcntl qw( :DEFAULT :flock );
359a01ac 10use Scalar::Util ();
a20d9a3f 11
21838116 12# File-wide notes:
20b7f047 13# * To add to bucket_size, make sure you modify the following:
14# - calculate_sizes()
15# - _get_key_subloc()
16# - add_bucket() - where the buckets are printed
c3aafc14 17#
83371fe3 18# * Every method in here assumes that the _storage has been appropriately
c3aafc14 19# safeguarded. This can be anything from flock() to some sort of manual
20# mutex. But, it's the caller's responsability to make sure that this has
21# been done.
21838116 22
8db25060 23##
24# Setup file and tag signatures. These should never change.
25##
26sub SIG_FILE () { 'DPDB' }
460b1067 27sub SIG_HEADER () { 'h' }
8db25060 28sub SIG_INTERNAL () { 'i' }
29sub SIG_HASH () { 'H' }
30sub SIG_ARRAY () { 'A' }
8db25060 31sub SIG_NULL () { 'N' }
32sub SIG_DATA () { 'D' }
33sub SIG_INDEX () { 'I' }
34sub SIG_BLIST () { 'B' }
7b1e1aa1 35sub SIG_FREE () { 'F' }
86867f3a 36sub SIG_KEYS () { 'K' }
8db25060 37sub SIG_SIZE () { 1 }
38
fb451ba6 39# This is the transaction ID for the HEAD
40sub HEAD () { 0 }
41
c3aafc14 42################################################################################
43#
44# This is new code. It is a complete rewrite of the engine based on a new API
45#
46################################################################################
47
c3aafc14 48sub read_value {
49 my $self = shift;
40956c06 50 my ($trans_id, $offset, $key, $orig_key) = @_;
c3aafc14 51
40956c06 52 my $dig_key = $self->_apply_digest( $key );
e5a9d386 53 my $tag = $self->find_blist( $offset, $dig_key ) or return;
54 return $self->get_bucket_value( $tag, $dig_key, $orig_key );
c3aafc14 55}
56
c3aafc14 57sub key_exists {
58 my $self = shift;
40956c06 59 my ($trans_id, $offset, $key) = @_;
c3aafc14 60
40956c06 61 my $dig_key = $self->_apply_digest( $key );
e5a9d386 62 # exists() returns the empty string, not undef
63 my $tag = $self->find_blist( $offset, $dig_key ) or return '';
c3aafc14 64 return $self->bucket_exists( $tag, $dig_key, $key );
65}
66
83c43bb5 67sub get_next_key {
c3aafc14 68 my $self = shift;
40956c06 69 my ($trans_id, $offset) = @_;
c3aafc14 70
83c43bb5 71 # If the previous key was not specifed, start at the top and
72 # return the first one found.
73 my $temp;
40956c06 74 if ( @_ > 2 ) {
83c43bb5 75 $temp = {
40956c06 76 prev_md5 => $self->_apply_digest($_[2]),
83c43bb5 77 return_next => 0,
78 };
79 }
80 else {
81 $temp = {
82 prev_md5 => chr(0) x $self->{hash_size},
83 return_next => 1,
84 };
85 }
86
87 return $self->traverse_index( $temp, $offset, 0 );
c3aafc14 88}
89
83371fe3 90sub delete_key {
91 my $self = shift;
40956c06 92 my ($trans_id, $offset, $key, $orig_key) = @_;
83371fe3 93
40956c06 94 my $dig_key = $self->_apply_digest( $key );
83371fe3 95 my $tag = $self->find_blist( $offset, $dig_key ) or return;
96 my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key );
97 $self->delete_bucket( $tag, $dig_key, $orig_key );
98 return $value;
99}
100
83371fe3 101sub write_value {
102 my $self = shift;
40956c06 103 my ($trans_id, $offset, $key, $value, $orig_key) = @_;
83371fe3 104
40956c06 105 my $dig_key = $self->_apply_digest( $key );
83371fe3 106 my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
107 return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
108}
109
c3aafc14 110################################################################################
111#
112# Below here is the old code. It will be folded into the code above as it can.
113#
114################################################################################
115
612969fb 116sub new {
117 my $class = shift;
118 my ($args) = @_;
119
120 my $self = bless {
86867f3a 121 long_size => 4,
122 long_pack => 'N',
123 data_size => 4,
124 data_pack => 'N',
251dfd0e 125
86867f3a 126 digest => \&Digest::MD5::md5,
c3aafc14 127 hash_size => 16, # In bytes
251dfd0e 128
81d16922 129 ##
c3aafc14 130 # Number of buckets per blist before another level of indexing is
e0098e7f 131 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 132 # files. DO NOT decrease this value below 16, due to risk of recursive
133 # reindex overrun.
81d16922 134 ##
612969fb 135 max_buckets => 16,
460b1067 136
83371fe3 137 storage => undef,
359a01ac 138 obj => undef,
612969fb 139 }, $class;
140
e0098e7f 141 if ( defined $args->{pack_size} ) {
142 if ( lc $args->{pack_size} eq 'small' ) {
143 $args->{long_size} = 2;
c9b6d0d8 144 $args->{long_pack} = 'n';
e0098e7f 145 }
146 elsif ( lc $args->{pack_size} eq 'medium' ) {
147 $args->{long_size} = 4;
148 $args->{long_pack} = 'N';
149 }
150 elsif ( lc $args->{pack_size} eq 'large' ) {
151 $args->{long_size} = 8;
152 $args->{long_pack} = 'Q';
153 }
154 else {
155 die "Unknown pack_size value: '$args->{pack_size}'\n";
156 }
157 }
158
fde3db1a 159 # Grab the parameters we want to use
160 foreach my $param ( keys %$self ) {
161 next unless exists $args->{$param};
3e9498a1 162 $self->{$param} = $args->{$param};
fde3db1a 163 }
359a01ac 164 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 165
e0098e7f 166 if ( $self->{max_buckets} < 16 ) {
167 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
168 $self->{max_buckets} = 16;
169 }
170
260a80b4 171 return $self;
172}
173
83371fe3 174sub _storage { return $_[0]{storage} }
460b1067 175
40956c06 176sub _apply_digest {
c3aafc14 177 my $self = shift;
178 return $self->{digest}->(@_);
179}
180
260a80b4 181sub calculate_sizes {
182 my $self = shift;
183
633df1fd 184 # The 2**8 here indicates the number of different characters in the
185 # current hashing algorithm
28394a1a 186 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 187 $self->{hash_chars_used} = (2**8);
188 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
189
190 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 191 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 192
ea2f6d67 193 $self->{key_size} = $self->{long_size} * 2;
194 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
195
260a80b4 196 return;
1bf65be7 197}
198
fde3db1a 199sub write_file_header {
0d0f3d5d 200 my $self = shift;
0d0f3d5d 201
83371fe3 202 my $loc = $self->_storage->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 203
83371fe3 204 $self->_storage->print_at( $loc,
260a80b4 205 SIG_FILE,
460b1067 206 SIG_HEADER,
207 pack('N', 1), # header version
eff6a245 208 pack('N', 24), # header size
209 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 210 pack('n', $self->{long_size}),
260a80b4 211 pack('A', $self->{long_pack}),
c9b6d0d8 212 pack('n', $self->{data_size}),
260a80b4 213 pack('A', $self->{data_pack}),
c9b6d0d8 214 pack('n', $self->{max_buckets}),
260a80b4 215 );
0d0f3d5d 216
83371fe3 217 $self->_storage->set_transaction_offset( 13 );
20b7f047 218
0d0f3d5d 219 return;
220}
221
fde3db1a 222sub read_file_header {
e064ccd1 223 my $self = shift;
e064ccd1 224
83371fe3 225 my $buffer = $self->_storage->read_at( 0, length(SIG_FILE) + 9 );
7dcefff3 226 return unless length($buffer);
460b1067 227
228 my ($file_signature, $sig_header, $header_version, $size) = unpack(
229 'A4 A N N', $buffer
42f79e07 230 );
e064ccd1 231
460b1067 232 unless ( $file_signature eq SIG_FILE ) {
83371fe3 233 $self->_storage->close;
e96daec8 234 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 235 }
260a80b4 236
460b1067 237 unless ( $sig_header eq SIG_HEADER ) {
83371fe3 238 $self->_storage->close;
e96daec8 239 $self->_throw_error( "Old file version found." );
460b1067 240 }
9b2370e0 241
83371fe3 242 my $buffer2 = $self->_storage->read_at( undef, $size );
eff6a245 243 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 244
83371fe3 245 $self->_storage->set_transaction_offset( 13 );
15ba72cc 246
460b1067 247 if ( @values < 5 || grep { !defined } @values ) {
83371fe3 248 $self->_storage->close;
e96daec8 249 $self->_throw_error("Corrupted file - bad header");
e064ccd1 250 }
251
460b1067 252 #XXX Add warnings if values weren't set right
253 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
254
7dcefff3 255 return length($buffer) + length($buffer2);
e064ccd1 256}
257
460b1067 258sub setup_fh {
259 my $self = shift;
260 my ($obj) = @_;
70b55428 261
7dcefff3 262 # Need to remove use of $fh here
83371fe3 263 my $fh = $self->_storage->{fh};
6fde4ed2 264 flock $fh, LOCK_EX;
118ba343 265
260a80b4 266 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 267 unless ( $obj->{base_offset} ) {
e96daec8 268 my $bytes_read = $self->read_file_header;
118ba343 269
260a80b4 270 $self->calculate_sizes;
271
118ba343 272 ##
fde3db1a 273 # File is empty -- write header and master index
118ba343 274 ##
275 if (!$bytes_read) {
83371fe3 276 $self->_storage->audit( "# Database created on" );
359a01ac 277
e96daec8 278 $self->write_file_header;
118ba343 279
83371fe3 280 $obj->{base_offset} = $self->_storage->request_space(
22e20cce 281 $self->tag_size( $self->{index_size} ),
282 );
118ba343 283
9e4f83a0 284 $self->write_tag(
e96daec8 285 $obj->_base_offset, $obj->_type,
f37c15ab 286 chr(0)x$self->{index_size},
118ba343 287 );
288
289 # Flush the filehandle
290 my $old_fh = select $fh;
291 my $old_af = $|; $| = 1; $| = $old_af;
292 select $old_fh;
293 }
294 else {
295 $obj->{base_offset} = $bytes_read;
296
297 ##
fde3db1a 298 # Get our type from master index header
118ba343 299 ##
359a01ac 300 my $tag = $self->load_tag($obj->_base_offset);
301 unless ( $tag ) {
302 flock $fh, LOCK_UN;
303 $self->_throw_error("Corrupted file, no master index record");
304 }
118ba343 305
e96daec8 306 unless ($obj->_type eq $tag->{signature}) {
359a01ac 307 flock $fh, LOCK_UN;
e96daec8 308 $self->_throw_error("File type mismatch");
118ba343 309 }
310 }
118ba343 311 }
260a80b4 312 else {
313 $self->calculate_sizes;
314 }
e06824f8 315
673464d9 316 #XXX We have to make sure we don't mess up when autoflush isn't turned on
83371fe3 317 $self->_storage->set_inode;
70b55428 318
6fde4ed2 319 flock $fh, LOCK_UN;
320
70b55428 321 return 1;
322}
323
16d1ad9b 324sub tag_size {
325 my $self = shift;
326 my ($size) = @_;
327 return SIG_SIZE + $self->{data_size} + $size;
328}
329
9e4f83a0 330sub write_tag {
20f7b20c 331 ##
332 # Given offset, signature and content, create tag and write to disk
333 ##
d4b1166e 334 my $self = shift;
e96daec8 335 my ($offset, $sig, $content) = @_;
f37c15ab 336 my $size = length( $content );
20f7b20c 337
83371fe3 338 $self->_storage->print_at(
7dcefff3 339 $offset,
340 $sig, pack($self->{data_pack}, $size), $content,
341 );
20f7b20c 342
f37c15ab 343 return unless defined $offset;
344
20f7b20c 345 return {
346 signature => $sig,
72e315ac 347 #XXX Is this even used?
86867f3a 348 size => $size,
349 offset => $offset + SIG_SIZE + $self->{data_size},
350 content => $content
20f7b20c 351 };
d4b1166e 352}
353
354sub load_tag {
20f7b20c 355 ##
356 # Given offset, load single tag and return signature, size and data
357 ##
d4b1166e 358 my $self = shift;
e96daec8 359 my ($offset) = @_;
20f7b20c 360
83371fe3 361 my $storage = $self->_storage;
20f7b20c 362
86867f3a 363 my ($sig, $size) = unpack(
364 "A $self->{data_pack}",
83371fe3 365 $storage->read_at( $offset, SIG_SIZE + $self->{data_size} ),
86867f3a 366 );
20f7b20c 367
368 return {
369 signature => $sig,
c3aafc14 370 size => $size, #XXX Is this even used?
fb451ba6 371 start => $offset,
86867f3a 372 offset => $offset + SIG_SIZE + $self->{data_size},
83371fe3 373 content => $storage->read_at( undef, $size ),
20f7b20c 374 };
d4b1166e 375}
376
ea2f6d67 377sub find_keyloc {
378 my $self = shift;
379 my ($tag, $transaction_id) = @_;
83371fe3 380 $transaction_id = $self->_storage->transaction_id
ea2f6d67 381 unless defined $transaction_id;
382
383 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
384 my ($loc, $trans_id, $is_deleted) = unpack(
385 "$self->{long_pack} C C",
386 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
387 );
388
fb451ba6 389 next if $loc != HEAD && $transaction_id != $trans_id;
390 return( $loc, $is_deleted, $i * $self->{key_size} );
ea2f6d67 391 }
392
393 return;
394}
395
20f7b20c 396sub add_bucket {
397 ##
398 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
399 # plain (undigested) key and value.
400 ##
d4b1166e 401 my $self = shift;
359a01ac 402 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 403
eea0d863 404 # This verifies that only supported values will be stored.
405 {
406 my $r = Scalar::Util::reftype( $value );
eea0d863 407
86867f3a 408 last if !defined $r;
eea0d863 409 last if $r eq 'HASH';
410 last if $r eq 'ARRAY';
411
e96daec8 412 $self->_throw_error(
86867f3a 413 "Storage of references of type '$r' is not supported."
eea0d863 414 );
415 }
416
83371fe3 417 my $storage = $self->_storage;
20f7b20c 418
21838116 419 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 420 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 421
422 my @transactions;
83371fe3 423 if ( $storage->transaction_id == 0 ) {
424 @transactions = $storage->current_transactions;
c9b6d0d8 425 }
75be6413 426
e96daec8 427# $self->_release_space( $size, $subloc );
f9c33187 428#XXX This needs updating to use _release_space
ea2f6d67 429
86867f3a 430 my $location;
ea2f6d67 431 my $size = $self->_length_needed( $value, $plain_key );
019404df 432
ea2f6d67 433 # Updating a known md5
434 if ( $keyloc ) {
435 my $keytag = $self->load_tag( $keyloc );
436 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
437
13ff93d5 438 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 439 my $old_value = $self->read_from_loc( $subloc, $orig_key );
440 my $old_size = $self->_length_needed( $old_value, $plain_key );
441
442 for my $trans_id ( @transactions ) {
443 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
444 unless ($loc) {
83371fe3 445 my $location2 = $storage->request_space( $old_size );
446 $storage->print_at( $keytag->{offset} + $offset2,
ea2f6d67 447 pack($self->{long_pack}, $location2 ),
448 pack( 'C C', $trans_id, 0 ),
449 );
c3aafc14 450 $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
ea2f6d67 451 }
452 }
75be6413 453 }
504185fb 454
83371fe3 455 $location = $self->_storage->request_space( $size );
ea2f6d67 456 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
83371fe3 457 $storage->print_at( $keytag->{offset} + $offset,
ea2f6d67 458 pack($self->{long_pack}, $location ),
83371fe3 459 pack( 'C C', $storage->transaction_id, 0 ),
ea2f6d67 460 );
75be6413 461 }
386bab6c 462 # Adding a new md5
ea2f6d67 463 else {
83371fe3 464 my $keyloc = $storage->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 465
ea2f6d67 466 # The bucket fit into list
467 if ( defined $offset ) {
83371fe3 468 $storage->print_at( $tag->{offset} + $offset,
ea2f6d67 469 $md5, pack( $self->{long_pack}, $keyloc ),
470 );
471 }
472 # If bucket didn't fit into list, split into a new index level
473 else {
474 $self->split_index( $tag, $md5, $keyloc );
475 }
476
477 my $keytag = $self->write_tag(
478 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 479 );
c9b6d0d8 480
83371fe3 481 $location = $self->_storage->request_space( $size );
482 $storage->print_at( $keytag->{offset},
ea2f6d67 483 pack( $self->{long_pack}, $location ),
83371fe3 484 pack( 'C C', $storage->transaction_id, 0 ),
ea2f6d67 485 );
486
487 my $offset = 1;
488 for my $trans_id ( @transactions ) {
83371fe3 489 $storage->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 490 pack( $self->{long_pack}, 0 ),
ea2f6d67 491 pack( 'C C', $trans_id, 1 ),
492 );
c9b6d0d8 493 }
386bab6c 494 }
20f7b20c 495
c3aafc14 496 $self->_write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 497
86867f3a 498 return 1;
d5d7c51d 499}
500
c3aafc14 501sub _write_value {
d5d7c51d 502 my $self = shift;
129ea236 503 my ($key_loc, $location, $key, $value, $orig_key) = @_;
d5d7c51d 504
83371fe3 505 my $storage = $self->_storage;
d5d7c51d 506
9d4fa373 507 my $dbm_deep_obj = _get_dbm_object( $value );
83371fe3 508 if ( $dbm_deep_obj && $dbm_deep_obj->_storage ne $storage ) {
e96daec8 509 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 510 }
d5d7c51d 511
20f7b20c 512 ##
d5d7c51d 513 # Write signature based on content type, set content length and write
514 # actual value.
20f7b20c 515 ##
9d4fa373 516 my $r = Scalar::Util::reftype( $value ) || '';
517 if ( $dbm_deep_obj ) {
7dcefff3 518 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 519 }
520 elsif ($r eq 'HASH') {
9d4fa373 521 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 522 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 523 }
7dcefff3 524 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 525 }
526 elsif ($r eq 'ARRAY') {
9d4fa373 527 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 528 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 529 }
7dcefff3 530 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 531 }
532 elsif (!defined($value)) {
7dcefff3 533 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 534 }
535 else {
7dcefff3 536 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 537 }
20f7b20c 538
d5d7c51d 539 ##
540 # Plain key is stored AFTER value, as keys are typically fetched less often.
541 ##
83371fe3 542 $storage->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 543
9a187d8c 544 # Internal references don't care about autobless
9d4fa373 545 return 1 if $dbm_deep_obj;
9a187d8c 546
d5d7c51d 547 ##
548 # If value is blessed, preserve class name
549 ##
83371fe3 550 if ( $storage->{autobless} ) {
633df1fd 551 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
83371fe3 552 $storage->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 553 }
d5d7c51d 554 else {
83371fe3 555 $storage->print_at( undef, chr(0) );
20f7b20c 556 }
d5d7c51d 557 }
20f7b20c 558
d5d7c51d 559 ##
56ec4340 560 # Tie the passed in reference so that changes to it are reflected in the
561 # datafile. The use of $location as the base_offset will act as the
562 # the linkage between parent and child.
563 #
564 # The overall assignment is a hack around the fact that just tying doesn't
565 # store the values. This may not be the wrong thing to do.
d5d7c51d 566 ##
9d4fa373 567 if ($r eq 'HASH') {
568 my %x = %$value;
569 tie %$value, 'DBM::Deep', {
129ea236 570 base_offset => $key_loc,
83371fe3 571 storage => $storage,
359a01ac 572 parent => $self->{obj},
573 parent_key => $orig_key,
9d4fa373 574 };
575 %$value = %x;
bd01f6ec 576 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
9d4fa373 577 }
578 elsif ($r eq 'ARRAY') {
579 my @x = @$value;
580 tie @$value, 'DBM::Deep', {
129ea236 581 base_offset => $key_loc,
83371fe3 582 storage => $storage,
359a01ac 583 parent => $self->{obj},
584 parent_key => $orig_key,
9d4fa373 585 };
586 @$value = @x;
bd01f6ec 587 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
20f7b20c 588 }
d4b1166e 589
d5d7c51d 590 return 1;
d4b1166e 591}
592
75be6413 593sub split_index {
594 my $self = shift;
ea2f6d67 595 my ($tag, $md5, $keyloc) = @_;
75be6413 596
83371fe3 597 my $storage = $self->_storage;
21838116 598
83371fe3 599 my $loc = $storage->request_space(
e96daec8 600 $self->tag_size( $self->{index_size} ),
16d1ad9b 601 );
602
83371fe3 603 $storage->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 604
9e4f83a0 605 my $index_tag = $self->write_tag(
e96daec8 606 $loc, SIG_INDEX,
f37c15ab 607 chr(0)x$self->{index_size},
75be6413 608 );
609
7b1e1aa1 610 my $keys = $tag->{content}
ea2f6d67 611 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 612
f9c33187 613 my @newloc = ();
75be6413 614 BUCKET:
633df1fd 615 # The <= here is deliberate - we have max_buckets+1 keys to iterate
616 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 617 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 618 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 619
f9c33187 620 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
621 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 622
75be6413 623 my $num = ord(substr($key, $tag->{ch} + 1, 1));
624
f9c33187 625 if ($newloc[$num]) {
83371fe3 626 my $subkeys = $storage->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 627
f9c33187 628 # This is looking for the first empty spot
ea2f6d67 629 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 630 { content => $subkeys }, '',
7b1e1aa1 631 );
75be6413 632
83371fe3 633 $storage->print_at(
633df1fd 634 $newloc[$num] + $offset,
635 $key, pack($self->{long_pack}, $old_subloc),
636 );
7b1e1aa1 637
638 next;
75be6413 639 }
75be6413 640
83371fe3 641 my $loc = $storage->request_space(
e96daec8 642 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 643 );
2603d86e 644
83371fe3 645 $storage->print_at(
019404df 646 $index_tag->{offset} + ($num * $self->{long_size}),
647 pack($self->{long_pack}, $loc),
648 );
75be6413 649
7b1e1aa1 650 my $blist_tag = $self->write_tag(
e96daec8 651 $loc, SIG_BLIST,
7b1e1aa1 652 chr(0)x$self->{bucket_list_size},
653 );
654
83371fe3 655 $storage->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 656
f9c33187 657 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 658 }
659
660 $self->_release_space(
e96daec8 661 $self->tag_size( $self->{bucket_list_size} ),
fb451ba6 662 $tag->{start},
7b1e1aa1 663 );
75be6413 664
ea2f6d67 665 return 1;
75be6413 666}
667
8db25060 668sub read_from_loc {
669 my $self = shift;
129ea236 670 my ($key_loc, $subloc, $orig_key) = @_;
8db25060 671
83371fe3 672 my $storage = $self->_storage;
8db25060 673
83371fe3 674 my $signature = $storage->read_at( $subloc, SIG_SIZE );
8db25060 675
676 ##
677 # If value is a hash or array, return new DBM::Deep object with correct offset
678 ##
679 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
c3aafc14 680 #XXX This needs to be a singleton
bd01f6ec 681# my $new_obj;
682# my $is_autobless;
683# if ( $signature eq SIG_HASH ) {
684# $new_obj = {};
685# tie %$new_obj, 'DBM::Deep', {
686# base_offset => $subloc,
83371fe3 687# storage => $self->_storage,
bd01f6ec 688# parent => $self->{obj},
689# parent_key => $orig_key,
690# };
83371fe3 691# $is_autobless = tied(%$new_obj)->_storage->{autobless};
bd01f6ec 692# }
693# else {
694# $new_obj = [];
695# tie @$new_obj, 'DBM::Deep', {
696# base_offset => $subloc,
83371fe3 697# storage => $self->_storage,
bd01f6ec 698# parent => $self->{obj},
699# parent_key => $orig_key,
700# };
83371fe3 701# $is_autobless = tied(@$new_obj)->_storage->{autobless};
bd01f6ec 702# }
703#
704# if ($is_autobless) {
705
685e40f1 706 my $new_obj = DBM::Deep->new({
359a01ac 707 type => $signature,
129ea236 708 base_offset => $key_loc,
83371fe3 709 storage => $self->_storage,
359a01ac 710 parent => $self->{obj},
711 parent_key => $orig_key,
685e40f1 712 });
8db25060 713
83371fe3 714 if ($new_obj->_storage->{autobless}) {
8db25060 715 ##
716 # Skip over value and plain key to see if object needs
717 # to be re-blessed
718 ##
83371fe3 719 $storage->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 720
83371fe3 721 my $size = $storage->read_at( undef, $self->{data_size} );
c6ea6b6c 722 $size = unpack($self->{data_pack}, $size);
83371fe3 723 if ($size) { $storage->increment_pointer( $size ); }
8db25060 724
83371fe3 725 my $bless_bit = $storage->read_at( undef, 1 );
86867f3a 726 if ( ord($bless_bit) ) {
727 my $size = unpack(
728 $self->{data_pack},
83371fe3 729 $storage->read_at( undef, $self->{data_size} ),
86867f3a 730 );
7dcefff3 731
86867f3a 732 if ( $size ) {
83371fe3 733 $new_obj = bless $new_obj, $storage->read_at( undef, $size );
86867f3a 734 }
8db25060 735 }
736 }
737
685e40f1 738 return $new_obj;
8db25060 739 }
740 elsif ( $signature eq SIG_INTERNAL ) {
83371fe3 741 my $size = $storage->read_at( undef, $self->{data_size} );
8db25060 742 $size = unpack($self->{data_pack}, $size);
743
744 if ( $size ) {
83371fe3 745 my $new_loc = $storage->read_at( undef, $size );
7dcefff3 746 $new_loc = unpack( $self->{long_pack}, $new_loc );
129ea236 747 return $self->read_from_loc( $key_loc, $new_loc, $orig_key );
8db25060 748 }
749 else {
750 return;
751 }
752 }
753 ##
754 # Otherwise return actual value
755 ##
460b1067 756 elsif ( $signature eq SIG_DATA ) {
83371fe3 757 my $size = $storage->read_at( undef, $self->{data_size} );
8db25060 758 $size = unpack($self->{data_pack}, $size);
759
83371fe3 760 my $value = $size ? $storage->read_at( undef, $size ) : '';
8db25060 761 return $value;
762 }
763
764 ##
765 # Key exists, but content is null
766 ##
767 return;
768}
769
9020ee8c 770sub get_bucket_value {
beac1dff 771 ##
772 # Fetch single value given tag and MD5 digested key.
773 ##
774 my $self = shift;
359a01ac 775 my ($tag, $md5, $orig_key) = @_;
9020ee8c 776
21838116 777 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 778 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 779
ea2f6d67 780 if ( !$keyloc ) {
94e8af14 781 #XXX Need to use real key
ea2f6d67 782# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 783# return;
784 }
ea2f6d67 785# elsif ( !$is_deleted ) {
786 else {
787 my $keytag = $self->load_tag( $keyloc );
788 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 789 if (!$subloc && !$is_deleted) {
ea2f6d67 790 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
791 }
792 if ( $subloc && !$is_deleted ) {
793 return $self->read_from_loc( $subloc, $orig_key );
794 }
386bab6c 795 }
94e8af14 796
beac1dff 797 return;
9020ee8c 798}
ab0e4957 799
800sub delete_bucket {
beac1dff 801 ##
802 # Delete single key/value pair given tag and MD5 digested key.
803 ##
804 my $self = shift;
a97c8f67 805 my ($tag, $md5, $orig_key) = @_;
ab0e4957 806
22e20cce 807 #ACID - Although this is a mutation, we must find any transaction.
808 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 809 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 810
ea2f6d67 811 return if !$keyloc;
633df1fd 812
83371fe3 813 my $storage = $self->_storage;
633df1fd 814
815 my @transactions;
83371fe3 816 if ( $storage->transaction_id == 0 ) {
817 @transactions = $storage->current_transactions;
633df1fd 818 }
819
83371fe3 820 if ( $storage->transaction_id == 0 ) {
ea2f6d67 821 my $keytag = $self->load_tag( $keyloc );
7a960a12 822
ea2f6d67 823 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 824 return if !$subloc || $is_deleted;
825
633df1fd 826 my $value = $self->read_from_loc( $subloc, $orig_key );
827
ea2f6d67 828 my $size = $self->_length_needed( $value, $orig_key );
829
830 for my $trans_id ( @transactions ) {
831 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
832 unless ($loc) {
83371fe3 833 my $location2 = $storage->request_space( $size );
834 $storage->print_at( $keytag->{offset} + $offset2,
ea2f6d67 835 pack($self->{long_pack}, $location2 ),
836 pack( 'C C', $trans_id, 0 ),
837 );
c3aafc14 838 $self->_write_value( $location2, $orig_key, $value, $orig_key );
ea2f6d67 839 }
633df1fd 840 }
841
ea2f6d67 842 $keytag = $self->load_tag( $keyloc );
843 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
83371fe3 844 $storage->print_at( $keytag->{offset} + $offset,
ea2f6d67 845 substr( $keytag->{content}, $offset + $self->{key_size} ),
846 chr(0) x $self->{key_size},
019404df 847 );
386bab6c 848 }
633df1fd 849 else {
ea2f6d67 850 my $keytag = $self->load_tag( $keyloc );
7a960a12 851
ea2f6d67 852 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 853
83371fe3 854 $storage->print_at( $keytag->{offset} + $offset,
13ff93d5 855 pack($self->{long_pack}, 0 ),
83371fe3 856 pack( 'C C', $storage->transaction_id, 1 ),
ea2f6d67 857 );
633df1fd 858 }
859
860 return 1;
ab0e4957 861}
862
912d50b1 863sub bucket_exists {
beac1dff 864 ##
865 # Check existence of single key given tag and MD5 digested key.
866 ##
867 my $self = shift;
e96daec8 868 my ($tag, $md5) = @_;
912d50b1 869
21838116 870 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 871 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
872 my $keytag = $self->load_tag( $keyloc );
873 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 874 if ( !$subloc && !$is_deleted ) {
ea2f6d67 875 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
876 }
c9b6d0d8 877 return ($subloc && !$is_deleted) && 1;
912d50b1 878}
879
72e315ac 880sub find_blist {
beac1dff 881 ##
882 # Locate offset for bucket list, given digested key
883 ##
884 my $self = shift;
e96daec8 885 my ($offset, $md5, $args) = @_;
d0b74c17 886 $args = {} unless $args;
887
beac1dff 888 ##
889 # Locate offset for bucket list using digest index system
890 ##
e96daec8 891 my $tag = $self->load_tag( $offset )
892 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 893
fb451ba6 894 #XXX What happens when $ch >= $self->{hash_size} ??
895 for (my $ch = 0; $tag->{signature} ne SIG_BLIST; $ch++) {
d0b74c17 896 my $num = ord substr($md5, $ch, 1);
897
898 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 899 $tag = $self->index_lookup( $tag, $num );
d0b74c17 900
901 if (!$tag) {
29b01632 902 return if !$args->{create};
d0b74c17 903
83371fe3 904 my $loc = $self->_storage->request_space(
e96daec8 905 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 906 );
907
83371fe3 908 $self->_storage->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 909
9e4f83a0 910 $tag = $self->write_tag(
e96daec8 911 $loc, SIG_BLIST,
f37c15ab 912 chr(0)x$self->{bucket_list_size},
d5d7c51d 913 );
914
915 $tag->{ref_loc} = $ref_loc;
916 $tag->{ch} = $ch;
917
918 last;
d0b74c17 919 }
920
fb451ba6 921 $tag->{ch} = $ch;
d0b74c17 922 $tag->{ref_loc} = $ref_loc;
beac1dff 923 }
d0b74c17 924
beac1dff 925 return $tag;
6736c116 926}
927
d0b74c17 928sub index_lookup {
929 ##
930 # Given index tag, lookup single entry in index and return .
931 ##
932 my $self = shift;
e96daec8 933 my ($tag, $index) = @_;
d0b74c17 934
935 my $location = unpack(
936 $self->{long_pack},
937 substr(
938 $tag->{content},
939 $index * $self->{long_size},
940 $self->{long_size},
941 ),
942 );
943
944 if (!$location) { return; }
945
e96daec8 946 return $self->load_tag( $location );
d0b74c17 947}
948
6736c116 949sub traverse_index {
beac1dff 950 ##
951 # Scan index and recursively step into deeper levels, looking for next key.
952 ##
6736c116 953 my $self = shift;
ea2f6d67 954 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 955
e96daec8 956 my $tag = $self->load_tag( $offset );
6736c116 957
8db25060 958 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 959 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 960
ea2f6d67 961 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 962 my $subloc = unpack(
963 $self->{long_pack},
e06824f8 964 substr(
ea2f6d67 965 $tag->{content},
e06824f8 966 $idx * $self->{long_size},
967 $self->{long_size},
968 ),
e5fc7e69 969 );
970
beac1dff 971 if ($subloc) {
e5fc7e69 972 my $result = $self->traverse_index(
ea2f6d67 973 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 974 );
975
ea2f6d67 976 if (defined $result) { return $result; }
beac1dff 977 }
978 } # index loop
d0b74c17 979
ea2f6d67 980 $xxxx->{return_next} = 1;
42717e46 981 }
982 # This is the bucket list
e5fc7e69 983 else {
beac1dff 984 my $keys = $tag->{content};
ea2f6d67 985 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 986
beac1dff 987 ##
988 # Iterate through buckets, looking for a key match
989 ##
83371fe3 990 my $transaction_id = $self->_storage->transaction_id;
8db25060 991 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 992 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 993
8db25060 994 # End of bucket list -- return to outer loop
ea2f6d67 995 if (!$keyloc) {
996 $xxxx->{return_next} = 1;
beac1dff 997 last;
998 }
8db25060 999 # Located previous key -- return next one found
ea2f6d67 1000 elsif ($key eq $xxxx->{prev_md5}) {
1001 $xxxx->{return_next} = 1;
beac1dff 1002 next;
1003 }
8db25060 1004 # Seek to bucket location and skip over signature
ea2f6d67 1005 elsif ($xxxx->{return_next}) {
83371fe3 1006 my $storage = $self->_storage;
d0b74c17 1007
ea2f6d67 1008 my $keytag = $self->load_tag( $keyloc );
1009 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 1010 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 1011 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
1012 }
1013 next if $is_deleted;
1014
beac1dff 1015 # Skip over value to get to plain key
83371fe3 1016 my $sig = $storage->read_at( $subloc, SIG_SIZE );
8db25060 1017
83371fe3 1018 my $size = $storage->read_at( undef, $self->{data_size} );
e5fc7e69 1019 $size = unpack($self->{data_pack}, $size);
83371fe3 1020 if ($size) { $storage->increment_pointer( $size ); }
d0b74c17 1021
beac1dff 1022 # Read in plain key and return as scalar
83371fe3 1023 $size = $storage->read_at( undef, $self->{data_size} );
e5fc7e69 1024 $size = unpack($self->{data_pack}, $size);
86867f3a 1025
7dcefff3 1026 my $plain_key;
83371fe3 1027 if ($size) { $plain_key = $storage->read_at( undef, $size); }
beac1dff 1028 return $plain_key;
1029 }
8db25060 1030 }
d0b74c17 1031
ea2f6d67 1032 $xxxx->{return_next} = 1;
42717e46 1033 }
d0b74c17 1034
beac1dff 1035 return;
6736c116 1036}
1037
75be6413 1038# Utilities
1039
9cec1360 1040sub _get_key_subloc {
75be6413 1041 my $self = shift;
1042 my ($keys, $idx) = @_;
1043
ea2f6d67 1044 return unpack(
28394a1a 1045 # This is 'a', not 'A'. Please read the pack() documentation for the
1046 # difference between the two and why it's important.
ea2f6d67 1047 "a$self->{hash_size} $self->{long_pack}",
75be6413 1048 substr(
1049 $keys,
9cec1360 1050 ($idx * $self->{bucket_size}),
1051 $self->{bucket_size},
75be6413 1052 ),
1053 );
75be6413 1054}
1055
d608b06e 1056sub _find_in_buckets {
1057 my $self = shift;
ea2f6d67 1058 my ($tag, $md5) = @_;
21838116 1059
d608b06e 1060 BUCKET:
1061 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 1062 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 1063 $tag->{content}, $i,
1064 );
d608b06e 1065
fb451ba6 1066 next BUCKET if $subloc && $key ne $md5;
1067 return( $subloc, $i * $self->{bucket_size} );
d608b06e 1068 }
1069
1070 return;
1071}
1072
994ccd8e 1073sub _release_space {
1074 my $self = shift;
e96daec8 1075 my ($size, $loc) = @_;
994ccd8e 1076
7b1e1aa1 1077 my $next_loc = 0;
1078
83371fe3 1079 $self->_storage->print_at( $loc,
019404df 1080 SIG_FREE,
1081 pack($self->{long_pack}, $size ),
1082 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1083 );
1084
994ccd8e 1085 return;
1086}
1087
e96daec8 1088sub _throw_error {
1089 die "DBM::Deep: $_[1]\n";
1090}
1091
86867f3a 1092sub _get_dbm_object {
1093 my $item = shift;
1094
1095 my $obj = eval {
1096 local $SIG{__DIE__};
1097 if ($item->isa( 'DBM::Deep' )) {
1098 return $item;
1099 }
1100 return;
1101 };
1102 return $obj if $obj;
1103
1104 my $r = Scalar::Util::reftype( $item ) || '';
1105 if ( $r eq 'HASH' ) {
1106 my $obj = eval {
1107 local $SIG{__DIE__};
1108 my $obj = tied(%$item);
1109 if ($obj->isa( 'DBM::Deep' )) {
1110 return $obj;
1111 }
1112 return;
1113 };
1114 return $obj if $obj;
1115 }
1116 elsif ( $r eq 'ARRAY' ) {
1117 my $obj = eval {
1118 local $SIG{__DIE__};
1119 my $obj = tied(@$item);
1120 if ($obj->isa( 'DBM::Deep' )) {
1121 return $obj;
1122 }
1123 return;
1124 };
1125 return $obj if $obj;
1126 }
1127
1128 return;
1129}
1130
1131sub _length_needed {
1132 my $self = shift;
1133 my ($value, $key) = @_;
1134
1135 my $is_dbm_deep = eval {
1136 local $SIG{'__DIE__'};
1137 $value->isa( 'DBM::Deep' );
1138 };
1139
1140 my $len = SIG_SIZE
1141 + $self->{data_size} # size for value
1142 + $self->{data_size} # size for key
1143 + length( $key ); # length of key
1144
83371fe3 1145 if ( $is_dbm_deep && $value->_storage eq $self->_storage ) {
86867f3a 1146 # long_size is for the internal reference
1147 return $len + $self->{long_size};
1148 }
1149
83371fe3 1150 if ( $self->_storage->{autobless} ) {
86867f3a 1151 # This is for the bit saying whether or not this thing is blessed.
1152 $len += 1;
1153 }
1154
1155 my $r = Scalar::Util::reftype( $value ) || '';
1156 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1157 if ( defined $value ) {
1158 $len += length( $value );
1159 }
1160 return $len;
1161 }
1162
1163 $len += $self->{index_size};
1164
1165 # if autobless is enabled, must also take into consideration
1166 # the class name as it is stored after the key.
83371fe3 1167 if ( $self->_storage->{autobless} ) {
86867f3a 1168 my $c = Scalar::Util::blessed($value);
1169 if ( defined $c && !$is_dbm_deep ) {
1170 $len += $self->{data_size} + length($c);
1171 }
1172 }
1173
1174 return $len;
1175}
1176
a20d9a3f 11771;
1178__END__