r13599@rob-kinyons-powerbook58: rob | 2006-05-25 14:21:08 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
c3aafc14 8our $VERSION = q(0.99_03);
86867f3a 9
633df1fd 10use Fcntl qw( :DEFAULT :flock );
359a01ac 11use Scalar::Util ();
a20d9a3f 12
21838116 13# File-wide notes:
20b7f047 14# * To add to bucket_size, make sure you modify the following:
15# - calculate_sizes()
16# - _get_key_subloc()
17# - add_bucket() - where the buckets are printed
c3aafc14 18#
19# * Every method in here assumes that the _fileobj has been appropriately
20# safeguarded. This can be anything from flock() to some sort of manual
21# mutex. But, it's the caller's responsability to make sure that this has
22# been done.
21838116 23
8db25060 24##
25# Setup file and tag signatures. These should never change.
26##
27sub SIG_FILE () { 'DPDB' }
460b1067 28sub SIG_HEADER () { 'h' }
8db25060 29sub SIG_INTERNAL () { 'i' }
30sub SIG_HASH () { 'H' }
31sub SIG_ARRAY () { 'A' }
8db25060 32sub SIG_NULL () { 'N' }
33sub SIG_DATA () { 'D' }
34sub SIG_INDEX () { 'I' }
35sub SIG_BLIST () { 'B' }
7b1e1aa1 36sub SIG_FREE () { 'F' }
86867f3a 37sub SIG_KEYS () { 'K' }
8db25060 38sub SIG_SIZE () { 1 }
39
c3aafc14 40################################################################################
41#
42# This is new code. It is a complete rewrite of the engine based on a new API
43#
44################################################################################
45
46sub write_value {
47 my $self = shift;
48 my ($offset, $key, $value, $orig_key) = @_;
49
50 my $dig_key = $self->apply_digest( $key );
51 my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
52 return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
53}
54
55sub read_value {
56 my $self = shift;
e5a9d386 57 my ($offset, $key, $orig_key) = @_;
c3aafc14 58
59 my $dig_key = $self->apply_digest( $key );
e5a9d386 60 my $tag = $self->find_blist( $offset, $dig_key ) or return;
61 return $self->get_bucket_value( $tag, $dig_key, $orig_key );
c3aafc14 62}
63
64sub delete_key {
65 my $self = shift;
e5a9d386 66 my ($offset, $key, $orig_key) = @_;
c3aafc14 67
68 my $dig_key = $self->apply_digest( $key );
e5a9d386 69 my $tag = $self->find_blist( $offset, $dig_key ) or return;
70 my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key );
71 $self->delete_bucket( $tag, $dig_key, $orig_key );
72 return $value;
c3aafc14 73}
74
75sub key_exists {
76 my $self = shift;
77 my ($offset, $key) = @_;
78
79 my $dig_key = $self->apply_digest( $key );
e5a9d386 80 # exists() returns the empty string, not undef
81 my $tag = $self->find_blist( $offset, $dig_key ) or return '';
c3aafc14 82 return $self->bucket_exists( $tag, $dig_key, $key );
83}
84
83c43bb5 85sub get_next_key {
c3aafc14 86 my $self = shift;
83c43bb5 87 my ($offset) = @_;
c3aafc14 88
83c43bb5 89 # If the previous key was not specifed, start at the top and
90 # return the first one found.
91 my $temp;
92 if ( @_ > 1 ) {
93 $temp = {
94 prev_md5 => $self->apply_digest($_[1]),
95 return_next => 0,
96 };
97 }
98 else {
99 $temp = {
100 prev_md5 => chr(0) x $self->{hash_size},
101 return_next => 1,
102 };
103 }
104
105 return $self->traverse_index( $temp, $offset, 0 );
c3aafc14 106}
107
108################################################################################
109#
110# Below here is the old code. It will be folded into the code above as it can.
111#
112################################################################################
113
612969fb 114sub new {
115 my $class = shift;
116 my ($args) = @_;
117
118 my $self = bless {
86867f3a 119 long_size => 4,
120 long_pack => 'N',
121 data_size => 4,
122 data_pack => 'N',
251dfd0e 123
86867f3a 124 digest => \&Digest::MD5::md5,
c3aafc14 125 hash_size => 16, # In bytes
251dfd0e 126
81d16922 127 ##
c3aafc14 128 # Number of buckets per blist before another level of indexing is
e0098e7f 129 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 130 # files. DO NOT decrease this value below 16, due to risk of recursive
131 # reindex overrun.
81d16922 132 ##
612969fb 133 max_buckets => 16,
460b1067 134
135 fileobj => undef,
359a01ac 136 obj => undef,
612969fb 137 }, $class;
138
e0098e7f 139 if ( defined $args->{pack_size} ) {
140 if ( lc $args->{pack_size} eq 'small' ) {
141 $args->{long_size} = 2;
c9b6d0d8 142 $args->{long_pack} = 'n';
e0098e7f 143 }
144 elsif ( lc $args->{pack_size} eq 'medium' ) {
145 $args->{long_size} = 4;
146 $args->{long_pack} = 'N';
147 }
148 elsif ( lc $args->{pack_size} eq 'large' ) {
149 $args->{long_size} = 8;
150 $args->{long_pack} = 'Q';
151 }
152 else {
153 die "Unknown pack_size value: '$args->{pack_size}'\n";
154 }
155 }
156
fde3db1a 157 # Grab the parameters we want to use
158 foreach my $param ( keys %$self ) {
159 next unless exists $args->{$param};
3e9498a1 160 $self->{$param} = $args->{$param};
fde3db1a 161 }
359a01ac 162 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 163
e0098e7f 164 if ( $self->{max_buckets} < 16 ) {
165 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
166 $self->{max_buckets} = 16;
167 }
168
260a80b4 169 return $self;
170}
171
460b1067 172sub _fileobj { return $_[0]{fileobj} }
460b1067 173
c3aafc14 174sub apply_digest {
175 my $self = shift;
176 return $self->{digest}->(@_);
177}
178
260a80b4 179sub calculate_sizes {
180 my $self = shift;
181
633df1fd 182 # The 2**8 here indicates the number of different characters in the
183 # current hashing algorithm
28394a1a 184 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 185 $self->{hash_chars_used} = (2**8);
186 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
187
188 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 189 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 190
ea2f6d67 191 $self->{key_size} = $self->{long_size} * 2;
192 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
193
260a80b4 194 return;
1bf65be7 195}
196
fde3db1a 197sub write_file_header {
0d0f3d5d 198 my $self = shift;
0d0f3d5d 199
eff6a245 200 my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 201
019404df 202 $self->_fileobj->print_at( $loc,
260a80b4 203 SIG_FILE,
460b1067 204 SIG_HEADER,
205 pack('N', 1), # header version
eff6a245 206 pack('N', 24), # header size
207 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 208 pack('n', $self->{long_size}),
260a80b4 209 pack('A', $self->{long_pack}),
c9b6d0d8 210 pack('n', $self->{data_size}),
260a80b4 211 pack('A', $self->{data_pack}),
c9b6d0d8 212 pack('n', $self->{max_buckets}),
260a80b4 213 );
0d0f3d5d 214
20b7f047 215 $self->_fileobj->set_transaction_offset( 13 );
216
0d0f3d5d 217 return;
218}
219
fde3db1a 220sub read_file_header {
e064ccd1 221 my $self = shift;
e064ccd1 222
7dcefff3 223 my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 );
224 return unless length($buffer);
460b1067 225
226 my ($file_signature, $sig_header, $header_version, $size) = unpack(
227 'A4 A N N', $buffer
42f79e07 228 );
e064ccd1 229
460b1067 230 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 231 $self->_fileobj->close;
e96daec8 232 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 233 }
260a80b4 234
460b1067 235 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 236 $self->_fileobj->close;
e96daec8 237 $self->_throw_error( "Old file version found." );
460b1067 238 }
9b2370e0 239
7dcefff3 240 my $buffer2 = $self->_fileobj->read_at( undef, $size );
eff6a245 241 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 242
243 $self->_fileobj->set_transaction_offset( 13 );
244
460b1067 245 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 246 $self->_fileobj->close;
e96daec8 247 $self->_throw_error("Corrupted file - bad header");
e064ccd1 248 }
249
460b1067 250 #XXX Add warnings if values weren't set right
251 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
252
7dcefff3 253 return length($buffer) + length($buffer2);
e064ccd1 254}
255
460b1067 256sub setup_fh {
257 my $self = shift;
258 my ($obj) = @_;
70b55428 259
7dcefff3 260 # Need to remove use of $fh here
261 my $fh = $self->_fileobj->{fh};
6fde4ed2 262 flock $fh, LOCK_EX;
118ba343 263
260a80b4 264 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 265 unless ( $obj->{base_offset} ) {
e96daec8 266 my $bytes_read = $self->read_file_header;
118ba343 267
260a80b4 268 $self->calculate_sizes;
269
118ba343 270 ##
fde3db1a 271 # File is empty -- write header and master index
118ba343 272 ##
273 if (!$bytes_read) {
aa83bc1e 274 $self->_fileobj->audit( "# Database created on" );
359a01ac 275
e96daec8 276 $self->write_file_header;
118ba343 277
22e20cce 278 $obj->{base_offset} = $self->_fileobj->request_space(
279 $self->tag_size( $self->{index_size} ),
280 );
118ba343 281
9e4f83a0 282 $self->write_tag(
e96daec8 283 $obj->_base_offset, $obj->_type,
f37c15ab 284 chr(0)x$self->{index_size},
118ba343 285 );
286
287 # Flush the filehandle
288 my $old_fh = select $fh;
289 my $old_af = $|; $| = 1; $| = $old_af;
290 select $old_fh;
291 }
292 else {
293 $obj->{base_offset} = $bytes_read;
294
295 ##
fde3db1a 296 # Get our type from master index header
118ba343 297 ##
359a01ac 298 my $tag = $self->load_tag($obj->_base_offset);
299 unless ( $tag ) {
300 flock $fh, LOCK_UN;
301 $self->_throw_error("Corrupted file, no master index record");
302 }
118ba343 303
e96daec8 304 unless ($obj->_type eq $tag->{signature}) {
359a01ac 305 flock $fh, LOCK_UN;
e96daec8 306 $self->_throw_error("File type mismatch");
118ba343 307 }
308 }
118ba343 309 }
260a80b4 310 else {
311 $self->calculate_sizes;
312 }
e06824f8 313
673464d9 314 #XXX We have to make sure we don't mess up when autoflush isn't turned on
7dcefff3 315 $self->_fileobj->set_inode;
70b55428 316
6fde4ed2 317 flock $fh, LOCK_UN;
318
70b55428 319 return 1;
320}
321
16d1ad9b 322sub tag_size {
323 my $self = shift;
324 my ($size) = @_;
325 return SIG_SIZE + $self->{data_size} + $size;
326}
327
9e4f83a0 328sub write_tag {
20f7b20c 329 ##
330 # Given offset, signature and content, create tag and write to disk
331 ##
d4b1166e 332 my $self = shift;
e96daec8 333 my ($offset, $sig, $content) = @_;
f37c15ab 334 my $size = length( $content );
20f7b20c 335
7dcefff3 336 $self->_fileobj->print_at(
337 $offset,
338 $sig, pack($self->{data_pack}, $size), $content,
339 );
20f7b20c 340
f37c15ab 341 return unless defined $offset;
342
20f7b20c 343 return {
344 signature => $sig,
72e315ac 345 #XXX Is this even used?
86867f3a 346 size => $size,
347 offset => $offset + SIG_SIZE + $self->{data_size},
348 content => $content
20f7b20c 349 };
d4b1166e 350}
351
352sub load_tag {
20f7b20c 353 ##
354 # Given offset, load single tag and return signature, size and data
355 ##
d4b1166e 356 my $self = shift;
e96daec8 357 my ($offset) = @_;
20f7b20c 358
7dcefff3 359 my $fileobj = $self->_fileobj;
20f7b20c 360
86867f3a 361 my ($sig, $size) = unpack(
362 "A $self->{data_pack}",
363 $fileobj->read_at( $offset, SIG_SIZE + $self->{data_size} ),
364 );
20f7b20c 365
366 return {
367 signature => $sig,
c3aafc14 368 size => $size, #XXX Is this even used?
86867f3a 369 offset => $offset + SIG_SIZE + $self->{data_size},
370 content => $fileobj->read_at( undef, $size ),
20f7b20c 371 };
d4b1166e 372}
373
ea2f6d67 374sub find_keyloc {
375 my $self = shift;
376 my ($tag, $transaction_id) = @_;
377 $transaction_id = $self->_fileobj->transaction_id
378 unless defined $transaction_id;
379
380 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
381 my ($loc, $trans_id, $is_deleted) = unpack(
382 "$self->{long_pack} C C",
383 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
384 );
385
386 if ( $loc == 0 ) {
387 return ( $loc, $is_deleted, $i * $self->{key_size} );
388 }
389
390 next if $transaction_id != $trans_id;
391
392 return ( $loc, $is_deleted, $i * $self->{key_size} );
393 }
394
395 return;
396}
397
20f7b20c 398sub add_bucket {
399 ##
400 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
401 # plain (undigested) key and value.
402 ##
d4b1166e 403 my $self = shift;
359a01ac 404 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 405
eea0d863 406 # This verifies that only supported values will be stored.
407 {
408 my $r = Scalar::Util::reftype( $value );
eea0d863 409
86867f3a 410 last if !defined $r;
eea0d863 411 last if $r eq 'HASH';
412 last if $r eq 'ARRAY';
413
e96daec8 414 $self->_throw_error(
86867f3a 415 "Storage of references of type '$r' is not supported."
eea0d863 416 );
417 }
418
019404df 419 my $fileobj = $self->_fileobj;
20f7b20c 420
21838116 421 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 422 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 423
424 my @transactions;
019404df 425 if ( $fileobj->transaction_id == 0 ) {
426 @transactions = $fileobj->current_transactions;
c9b6d0d8 427 }
75be6413 428
e96daec8 429# $self->_release_space( $size, $subloc );
f9c33187 430#XXX This needs updating to use _release_space
ea2f6d67 431
86867f3a 432 my $location;
ea2f6d67 433 my $size = $self->_length_needed( $value, $plain_key );
019404df 434
ea2f6d67 435 # Updating a known md5
436 if ( $keyloc ) {
437 my $keytag = $self->load_tag( $keyloc );
438 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
439
13ff93d5 440 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 441 my $old_value = $self->read_from_loc( $subloc, $orig_key );
442 my $old_size = $self->_length_needed( $old_value, $plain_key );
443
444 for my $trans_id ( @transactions ) {
445 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
446 unless ($loc) {
447 my $location2 = $fileobj->request_space( $old_size );
448 $fileobj->print_at( $keytag->{offset} + $offset2,
449 pack($self->{long_pack}, $location2 ),
450 pack( 'C C', $trans_id, 0 ),
451 );
c3aafc14 452 $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
ea2f6d67 453 }
454 }
75be6413 455 }
504185fb 456
ea2f6d67 457 $location = $self->_fileobj->request_space( $size );
458 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
459 $fileobj->print_at( $keytag->{offset} + $offset,
460 pack($self->{long_pack}, $location ),
461 pack( 'C C', $fileobj->transaction_id, 0 ),
462 );
75be6413 463 }
386bab6c 464 # Adding a new md5
ea2f6d67 465 else {
466 my $keyloc = $fileobj->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 467
ea2f6d67 468 # The bucket fit into list
469 if ( defined $offset ) {
470 $fileobj->print_at( $tag->{offset} + $offset,
471 $md5, pack( $self->{long_pack}, $keyloc ),
472 );
473 }
474 # If bucket didn't fit into list, split into a new index level
475 else {
476 $self->split_index( $tag, $md5, $keyloc );
477 }
478
479 my $keytag = $self->write_tag(
480 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 481 );
c9b6d0d8 482
ea2f6d67 483 $location = $self->_fileobj->request_space( $size );
484 $fileobj->print_at( $keytag->{offset},
485 pack( $self->{long_pack}, $location ),
486 pack( 'C C', $fileobj->transaction_id, 0 ),
487 );
488
489 my $offset = 1;
490 for my $trans_id ( @transactions ) {
491 $fileobj->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 492 pack( $self->{long_pack}, 0 ),
ea2f6d67 493 pack( 'C C', $trans_id, 1 ),
494 );
c9b6d0d8 495 }
386bab6c 496 }
20f7b20c 497
c3aafc14 498 $self->_write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 499
86867f3a 500 return 1;
d5d7c51d 501}
502
c3aafc14 503sub _write_value {
d5d7c51d 504 my $self = shift;
359a01ac 505 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 506
7dcefff3 507 my $fileobj = $self->_fileobj;
d5d7c51d 508
9d4fa373 509 my $dbm_deep_obj = _get_dbm_object( $value );
7dcefff3 510 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) {
e96daec8 511 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 512 }
d5d7c51d 513
20f7b20c 514 ##
d5d7c51d 515 # Write signature based on content type, set content length and write
516 # actual value.
20f7b20c 517 ##
9d4fa373 518 my $r = Scalar::Util::reftype( $value ) || '';
519 if ( $dbm_deep_obj ) {
7dcefff3 520 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 521 }
522 elsif ($r eq 'HASH') {
9d4fa373 523 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 524 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 525 }
7dcefff3 526 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 527 }
528 elsif ($r eq 'ARRAY') {
9d4fa373 529 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 530 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 531 }
7dcefff3 532 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 533 }
534 elsif (!defined($value)) {
7dcefff3 535 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 536 }
537 else {
7dcefff3 538 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 539 }
20f7b20c 540
d5d7c51d 541 ##
542 # Plain key is stored AFTER value, as keys are typically fetched less often.
543 ##
7dcefff3 544 $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 545
9a187d8c 546 # Internal references don't care about autobless
9d4fa373 547 return 1 if $dbm_deep_obj;
9a187d8c 548
d5d7c51d 549 ##
550 # If value is blessed, preserve class name
551 ##
7dcefff3 552 if ( $fileobj->{autobless} ) {
633df1fd 553 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
7dcefff3 554 $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 555 }
d5d7c51d 556 else {
7dcefff3 557 $fileobj->print_at( undef, chr(0) );
20f7b20c 558 }
d5d7c51d 559 }
20f7b20c 560
d5d7c51d 561 ##
56ec4340 562 # Tie the passed in reference so that changes to it are reflected in the
563 # datafile. The use of $location as the base_offset will act as the
564 # the linkage between parent and child.
565 #
566 # The overall assignment is a hack around the fact that just tying doesn't
567 # store the values. This may not be the wrong thing to do.
d5d7c51d 568 ##
9d4fa373 569 if ($r eq 'HASH') {
570 my %x = %$value;
571 tie %$value, 'DBM::Deep', {
572 base_offset => $location,
7dcefff3 573 fileobj => $fileobj,
359a01ac 574 parent => $self->{obj},
575 parent_key => $orig_key,
9d4fa373 576 };
577 %$value = %x;
578 }
579 elsif ($r eq 'ARRAY') {
580 my @x = @$value;
581 tie @$value, 'DBM::Deep', {
582 base_offset => $location,
7dcefff3 583 fileobj => $fileobj,
359a01ac 584 parent => $self->{obj},
585 parent_key => $orig_key,
9d4fa373 586 };
587 @$value = @x;
20f7b20c 588 }
d4b1166e 589
d5d7c51d 590 return 1;
d4b1166e 591}
592
75be6413 593sub split_index {
594 my $self = shift;
ea2f6d67 595 my ($tag, $md5, $keyloc) = @_;
75be6413 596
019404df 597 my $fileobj = $self->_fileobj;
21838116 598
019404df 599 my $loc = $fileobj->request_space(
e96daec8 600 $self->tag_size( $self->{index_size} ),
16d1ad9b 601 );
602
019404df 603 $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 604
9e4f83a0 605 my $index_tag = $self->write_tag(
e96daec8 606 $loc, SIG_INDEX,
f37c15ab 607 chr(0)x$self->{index_size},
75be6413 608 );
609
7b1e1aa1 610 my $keys = $tag->{content}
ea2f6d67 611 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 612
f9c33187 613 my @newloc = ();
75be6413 614 BUCKET:
633df1fd 615 # The <= here is deliberate - we have max_buckets+1 keys to iterate
616 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 617 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 618 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 619
f9c33187 620 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
621 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 622
75be6413 623 my $num = ord(substr($key, $tag->{ch} + 1, 1));
624
f9c33187 625 if ($newloc[$num]) {
7dcefff3 626 my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 627
f9c33187 628 # This is looking for the first empty spot
ea2f6d67 629 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 630 { content => $subkeys }, '',
7b1e1aa1 631 );
75be6413 632
633df1fd 633 $fileobj->print_at(
634 $newloc[$num] + $offset,
635 $key, pack($self->{long_pack}, $old_subloc),
636 );
7b1e1aa1 637
638 next;
75be6413 639 }
75be6413 640
019404df 641 my $loc = $fileobj->request_space(
e96daec8 642 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 643 );
2603d86e 644
019404df 645 $fileobj->print_at(
646 $index_tag->{offset} + ($num * $self->{long_size}),
647 pack($self->{long_pack}, $loc),
648 );
75be6413 649
7b1e1aa1 650 my $blist_tag = $self->write_tag(
e96daec8 651 $loc, SIG_BLIST,
7b1e1aa1 652 chr(0)x$self->{bucket_list_size},
653 );
654
019404df 655 $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 656
f9c33187 657 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 658 }
659
660 $self->_release_space(
e96daec8 661 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 662 $tag->{offset} - SIG_SIZE - $self->{data_size},
663 );
75be6413 664
ea2f6d67 665 return 1;
75be6413 666}
667
8db25060 668sub read_from_loc {
669 my $self = shift;
359a01ac 670 my ($subloc, $orig_key) = @_;
8db25060 671
7dcefff3 672 my $fileobj = $self->_fileobj;
8db25060 673
7dcefff3 674 my $signature = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 675
676 ##
677 # If value is a hash or array, return new DBM::Deep object with correct offset
678 ##
679 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
c3aafc14 680 #XXX This needs to be a singleton
685e40f1 681 my $new_obj = DBM::Deep->new({
359a01ac 682 type => $signature,
8db25060 683 base_offset => $subloc,
e96daec8 684 fileobj => $self->_fileobj,
359a01ac 685 parent => $self->{obj},
686 parent_key => $orig_key,
685e40f1 687 });
8db25060 688
460b1067 689 if ($new_obj->_fileobj->{autobless}) {
8db25060 690 ##
691 # Skip over value and plain key to see if object needs
692 # to be re-blessed
693 ##
7dcefff3 694 $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 695
7dcefff3 696 my $size = $fileobj->read_at( undef, $self->{data_size} );
c6ea6b6c 697 $size = unpack($self->{data_pack}, $size);
7dcefff3 698 if ($size) { $fileobj->increment_pointer( $size ); }
8db25060 699
7dcefff3 700 my $bless_bit = $fileobj->read_at( undef, 1 );
86867f3a 701 if ( ord($bless_bit) ) {
702 my $size = unpack(
703 $self->{data_pack},
704 $fileobj->read_at( undef, $self->{data_size} ),
705 );
7dcefff3 706
86867f3a 707 if ( $size ) {
708 $new_obj = bless $new_obj, $fileobj->read_at( undef, $size );
709 }
8db25060 710 }
711 }
712
685e40f1 713 return $new_obj;
8db25060 714 }
715 elsif ( $signature eq SIG_INTERNAL ) {
7dcefff3 716 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 717 $size = unpack($self->{data_pack}, $size);
718
719 if ( $size ) {
7dcefff3 720 my $new_loc = $fileobj->read_at( undef, $size );
721 $new_loc = unpack( $self->{long_pack}, $new_loc );
359a01ac 722 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 723 }
724 else {
725 return;
726 }
727 }
728 ##
729 # Otherwise return actual value
730 ##
460b1067 731 elsif ( $signature eq SIG_DATA ) {
7dcefff3 732 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 733 $size = unpack($self->{data_pack}, $size);
734
86867f3a 735 my $value = $size ? $fileobj->read_at( undef, $size ) : '';
8db25060 736 return $value;
737 }
738
739 ##
740 # Key exists, but content is null
741 ##
742 return;
743}
744
9020ee8c 745sub get_bucket_value {
beac1dff 746 ##
747 # Fetch single value given tag and MD5 digested key.
748 ##
749 my $self = shift;
359a01ac 750 my ($tag, $md5, $orig_key) = @_;
9020ee8c 751
21838116 752 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 753 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 754
ea2f6d67 755 if ( !$keyloc ) {
94e8af14 756 #XXX Need to use real key
ea2f6d67 757# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 758# return;
759 }
ea2f6d67 760# elsif ( !$is_deleted ) {
761 else {
762 my $keytag = $self->load_tag( $keyloc );
763 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 764 if (!$subloc && !$is_deleted) {
ea2f6d67 765 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
766 }
767 if ( $subloc && !$is_deleted ) {
768 return $self->read_from_loc( $subloc, $orig_key );
769 }
386bab6c 770 }
94e8af14 771
beac1dff 772 return;
9020ee8c 773}
ab0e4957 774
775sub delete_bucket {
beac1dff 776 ##
777 # Delete single key/value pair given tag and MD5 digested key.
778 ##
779 my $self = shift;
a97c8f67 780 my ($tag, $md5, $orig_key) = @_;
ab0e4957 781
22e20cce 782 #ACID - Although this is a mutation, we must find any transaction.
783 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 784 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 785
ea2f6d67 786 return if !$keyloc;
633df1fd 787
788 my $fileobj = $self->_fileobj;
789
790 my @transactions;
791 if ( $fileobj->transaction_id == 0 ) {
792 @transactions = $fileobj->current_transactions;
793 }
794
633df1fd 795 if ( $fileobj->transaction_id == 0 ) {
ea2f6d67 796 my $keytag = $self->load_tag( $keyloc );
7a960a12 797
ea2f6d67 798 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 799 return if !$subloc || $is_deleted;
800
633df1fd 801 my $value = $self->read_from_loc( $subloc, $orig_key );
802
ea2f6d67 803 my $size = $self->_length_needed( $value, $orig_key );
804
805 for my $trans_id ( @transactions ) {
806 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
807 unless ($loc) {
808 my $location2 = $fileobj->request_space( $size );
809 $fileobj->print_at( $keytag->{offset} + $offset2,
810 pack($self->{long_pack}, $location2 ),
811 pack( 'C C', $trans_id, 0 ),
812 );
c3aafc14 813 $self->_write_value( $location2, $orig_key, $value, $orig_key );
ea2f6d67 814 }
633df1fd 815 }
816
ea2f6d67 817 $keytag = $self->load_tag( $keyloc );
818 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
819 $fileobj->print_at( $keytag->{offset} + $offset,
820 substr( $keytag->{content}, $offset + $self->{key_size} ),
821 chr(0) x $self->{key_size},
019404df 822 );
386bab6c 823 }
633df1fd 824 else {
ea2f6d67 825 my $keytag = $self->load_tag( $keyloc );
7a960a12 826
ea2f6d67 827 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 828
ea2f6d67 829 $fileobj->print_at( $keytag->{offset} + $offset,
13ff93d5 830 pack($self->{long_pack}, 0 ),
ea2f6d67 831 pack( 'C C', $fileobj->transaction_id, 1 ),
832 );
633df1fd 833 }
834
835 return 1;
ab0e4957 836}
837
912d50b1 838sub bucket_exists {
beac1dff 839 ##
840 # Check existence of single key given tag and MD5 digested key.
841 ##
842 my $self = shift;
e96daec8 843 my ($tag, $md5) = @_;
912d50b1 844
21838116 845 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 846 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
847 my $keytag = $self->load_tag( $keyloc );
848 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 849 if ( !$subloc && !$is_deleted ) {
ea2f6d67 850 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
851 }
c9b6d0d8 852 return ($subloc && !$is_deleted) && 1;
912d50b1 853}
854
72e315ac 855sub find_blist {
beac1dff 856 ##
857 # Locate offset for bucket list, given digested key
858 ##
859 my $self = shift;
e96daec8 860 my ($offset, $md5, $args) = @_;
d0b74c17 861 $args = {} unless $args;
862
beac1dff 863 ##
864 # Locate offset for bucket list using digest index system
865 ##
e96daec8 866 my $tag = $self->load_tag( $offset )
867 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 868
e5fc7e69 869 my $ch = 0;
8db25060 870 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 871 my $num = ord substr($md5, $ch, 1);
872
873 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 874 $tag = $self->index_lookup( $tag, $num );
d0b74c17 875
876 if (!$tag) {
29b01632 877 return if !$args->{create};
d0b74c17 878
019404df 879 my $loc = $self->_fileobj->request_space(
e96daec8 880 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 881 );
882
019404df 883 $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 884
9e4f83a0 885 $tag = $self->write_tag(
e96daec8 886 $loc, SIG_BLIST,
f37c15ab 887 chr(0)x$self->{bucket_list_size},
d5d7c51d 888 );
889
890 $tag->{ref_loc} = $ref_loc;
891 $tag->{ch} = $ch;
892
893 last;
d0b74c17 894 }
895
16d1ad9b 896 $tag->{ch} = $ch++;
d0b74c17 897 $tag->{ref_loc} = $ref_loc;
beac1dff 898 }
d0b74c17 899
beac1dff 900 return $tag;
6736c116 901}
902
d0b74c17 903sub index_lookup {
904 ##
905 # Given index tag, lookup single entry in index and return .
906 ##
907 my $self = shift;
e96daec8 908 my ($tag, $index) = @_;
d0b74c17 909
910 my $location = unpack(
911 $self->{long_pack},
912 substr(
913 $tag->{content},
914 $index * $self->{long_size},
915 $self->{long_size},
916 ),
917 );
918
919 if (!$location) { return; }
920
e96daec8 921 return $self->load_tag( $location );
d0b74c17 922}
923
6736c116 924sub traverse_index {
beac1dff 925 ##
926 # Scan index and recursively step into deeper levels, looking for next key.
927 ##
6736c116 928 my $self = shift;
ea2f6d67 929 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 930
e96daec8 931 my $tag = $self->load_tag( $offset );
6736c116 932
8db25060 933 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 934 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 935
ea2f6d67 936 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 937 my $subloc = unpack(
938 $self->{long_pack},
e06824f8 939 substr(
ea2f6d67 940 $tag->{content},
e06824f8 941 $idx * $self->{long_size},
942 $self->{long_size},
943 ),
e5fc7e69 944 );
945
beac1dff 946 if ($subloc) {
e5fc7e69 947 my $result = $self->traverse_index(
ea2f6d67 948 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 949 );
950
ea2f6d67 951 if (defined $result) { return $result; }
beac1dff 952 }
953 } # index loop
d0b74c17 954
ea2f6d67 955 $xxxx->{return_next} = 1;
42717e46 956 }
957 # This is the bucket list
e5fc7e69 958 else {
beac1dff 959 my $keys = $tag->{content};
ea2f6d67 960 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 961
beac1dff 962 ##
963 # Iterate through buckets, looking for a key match
964 ##
42717e46 965 my $transaction_id = $self->_fileobj->transaction_id;
8db25060 966 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 967 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 968
8db25060 969 # End of bucket list -- return to outer loop
ea2f6d67 970 if (!$keyloc) {
971 $xxxx->{return_next} = 1;
beac1dff 972 last;
973 }
8db25060 974 # Located previous key -- return next one found
ea2f6d67 975 elsif ($key eq $xxxx->{prev_md5}) {
976 $xxxx->{return_next} = 1;
beac1dff 977 next;
978 }
8db25060 979 # Seek to bucket location and skip over signature
ea2f6d67 980 elsif ($xxxx->{return_next}) {
7dcefff3 981 my $fileobj = $self->_fileobj;
d0b74c17 982
ea2f6d67 983 my $keytag = $self->load_tag( $keyloc );
984 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 985 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 986 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
987 }
988 next if $is_deleted;
989
beac1dff 990 # Skip over value to get to plain key
7dcefff3 991 my $sig = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 992
7dcefff3 993 my $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 994 $size = unpack($self->{data_pack}, $size);
7dcefff3 995 if ($size) { $fileobj->increment_pointer( $size ); }
d0b74c17 996
beac1dff 997 # Read in plain key and return as scalar
7dcefff3 998 $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 999 $size = unpack($self->{data_pack}, $size);
86867f3a 1000
7dcefff3 1001 my $plain_key;
1002 if ($size) { $plain_key = $fileobj->read_at( undef, $size); }
beac1dff 1003 return $plain_key;
1004 }
8db25060 1005 }
d0b74c17 1006
ea2f6d67 1007 $xxxx->{return_next} = 1;
42717e46 1008 }
d0b74c17 1009
beac1dff 1010 return;
6736c116 1011}
1012
75be6413 1013# Utilities
1014
9cec1360 1015sub _get_key_subloc {
75be6413 1016 my $self = shift;
1017 my ($keys, $idx) = @_;
1018
ea2f6d67 1019 return unpack(
28394a1a 1020 # This is 'a', not 'A'. Please read the pack() documentation for the
1021 # difference between the two and why it's important.
ea2f6d67 1022 "a$self->{hash_size} $self->{long_pack}",
75be6413 1023 substr(
1024 $keys,
9cec1360 1025 ($idx * $self->{bucket_size}),
1026 $self->{bucket_size},
75be6413 1027 ),
1028 );
75be6413 1029}
1030
d608b06e 1031sub _find_in_buckets {
1032 my $self = shift;
ea2f6d67 1033 my ($tag, $md5) = @_;
21838116 1034
d608b06e 1035 BUCKET:
1036 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 1037 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 1038 $tag->{content}, $i,
1039 );
d608b06e 1040
ea2f6d67 1041 my @rv = ($subloc, $i * $self->{bucket_size});
21838116 1042
1043 unless ( $subloc ) {
21838116 1044 return @rv;
1045 }
1046
1047 next BUCKET if $key ne $md5;
d608b06e 1048
21838116 1049 return @rv;
d608b06e 1050 }
1051
1052 return;
1053}
1054
994ccd8e 1055sub _release_space {
1056 my $self = shift;
e96daec8 1057 my ($size, $loc) = @_;
994ccd8e 1058
7b1e1aa1 1059 my $next_loc = 0;
1060
019404df 1061 $self->_fileobj->print_at( $loc,
1062 SIG_FREE,
1063 pack($self->{long_pack}, $size ),
1064 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1065 );
1066
994ccd8e 1067 return;
1068}
1069
e96daec8 1070sub _throw_error {
1071 die "DBM::Deep: $_[1]\n";
1072}
1073
86867f3a 1074sub _get_dbm_object {
1075 my $item = shift;
1076
1077 my $obj = eval {
1078 local $SIG{__DIE__};
1079 if ($item->isa( 'DBM::Deep' )) {
1080 return $item;
1081 }
1082 return;
1083 };
1084 return $obj if $obj;
1085
1086 my $r = Scalar::Util::reftype( $item ) || '';
1087 if ( $r eq 'HASH' ) {
1088 my $obj = eval {
1089 local $SIG{__DIE__};
1090 my $obj = tied(%$item);
1091 if ($obj->isa( 'DBM::Deep' )) {
1092 return $obj;
1093 }
1094 return;
1095 };
1096 return $obj if $obj;
1097 }
1098 elsif ( $r eq 'ARRAY' ) {
1099 my $obj = eval {
1100 local $SIG{__DIE__};
1101 my $obj = tied(@$item);
1102 if ($obj->isa( 'DBM::Deep' )) {
1103 return $obj;
1104 }
1105 return;
1106 };
1107 return $obj if $obj;
1108 }
1109
1110 return;
1111}
1112
1113sub _length_needed {
1114 my $self = shift;
1115 my ($value, $key) = @_;
1116
1117 my $is_dbm_deep = eval {
1118 local $SIG{'__DIE__'};
1119 $value->isa( 'DBM::Deep' );
1120 };
1121
1122 my $len = SIG_SIZE
1123 + $self->{data_size} # size for value
1124 + $self->{data_size} # size for key
1125 + length( $key ); # length of key
1126
1127 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
1128 # long_size is for the internal reference
1129 return $len + $self->{long_size};
1130 }
1131
1132 if ( $self->_fileobj->{autobless} ) {
1133 # This is for the bit saying whether or not this thing is blessed.
1134 $len += 1;
1135 }
1136
1137 my $r = Scalar::Util::reftype( $value ) || '';
1138 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1139 if ( defined $value ) {
1140 $len += length( $value );
1141 }
1142 return $len;
1143 }
1144
1145 $len += $self->{index_size};
1146
1147 # if autobless is enabled, must also take into consideration
1148 # the class name as it is stored after the key.
1149 if ( $self->_fileobj->{autobless} ) {
1150 my $c = Scalar::Util::blessed($value);
1151 if ( defined $c && !$is_dbm_deep ) {
1152 $len += $self->{data_size} + length($c);
1153 }
1154 }
1155
1156 return $len;
1157}
1158
a20d9a3f 11591;
1160__END__