r13607@rob-kinyons-powerbook58: rob | 2006-05-25 15:12:31 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
c3aafc14 8our $VERSION = q(0.99_03);
86867f3a 9
633df1fd 10use Fcntl qw( :DEFAULT :flock );
359a01ac 11use Scalar::Util ();
a20d9a3f 12
21838116 13# File-wide notes:
20b7f047 14# * To add to bucket_size, make sure you modify the following:
15# - calculate_sizes()
16# - _get_key_subloc()
17# - add_bucket() - where the buckets are printed
c3aafc14 18#
19# * Every method in here assumes that the _fileobj has been appropriately
20# safeguarded. This can be anything from flock() to some sort of manual
21# mutex. But, it's the caller's responsability to make sure that this has
22# been done.
21838116 23
8db25060 24##
25# Setup file and tag signatures. These should never change.
26##
27sub SIG_FILE () { 'DPDB' }
460b1067 28sub SIG_HEADER () { 'h' }
8db25060 29sub SIG_INTERNAL () { 'i' }
30sub SIG_HASH () { 'H' }
31sub SIG_ARRAY () { 'A' }
8db25060 32sub SIG_NULL () { 'N' }
33sub SIG_DATA () { 'D' }
34sub SIG_INDEX () { 'I' }
35sub SIG_BLIST () { 'B' }
7b1e1aa1 36sub SIG_FREE () { 'F' }
86867f3a 37sub SIG_KEYS () { 'K' }
8db25060 38sub SIG_SIZE () { 1 }
39
c3aafc14 40################################################################################
41#
42# This is new code. It is a complete rewrite of the engine based on a new API
43#
44################################################################################
45
46sub write_value {
47 my $self = shift;
48 my ($offset, $key, $value, $orig_key) = @_;
49
50 my $dig_key = $self->apply_digest( $key );
51 my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
52 return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
53}
54
55sub read_value {
56 my $self = shift;
e5a9d386 57 my ($offset, $key, $orig_key) = @_;
c3aafc14 58
59 my $dig_key = $self->apply_digest( $key );
e5a9d386 60 my $tag = $self->find_blist( $offset, $dig_key ) or return;
61 return $self->get_bucket_value( $tag, $dig_key, $orig_key );
c3aafc14 62}
63
64sub delete_key {
65 my $self = shift;
e5a9d386 66 my ($offset, $key, $orig_key) = @_;
c3aafc14 67
68 my $dig_key = $self->apply_digest( $key );
e5a9d386 69 my $tag = $self->find_blist( $offset, $dig_key ) or return;
70 my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key );
71 $self->delete_bucket( $tag, $dig_key, $orig_key );
72 return $value;
c3aafc14 73}
74
75sub key_exists {
76 my $self = shift;
77 my ($offset, $key) = @_;
78
79 my $dig_key = $self->apply_digest( $key );
e5a9d386 80 # exists() returns the empty string, not undef
81 my $tag = $self->find_blist( $offset, $dig_key ) or return '';
c3aafc14 82 return $self->bucket_exists( $tag, $dig_key, $key );
83}
84
83c43bb5 85sub get_next_key {
c3aafc14 86 my $self = shift;
83c43bb5 87 my ($offset) = @_;
c3aafc14 88
83c43bb5 89 # If the previous key was not specifed, start at the top and
90 # return the first one found.
91 my $temp;
92 if ( @_ > 1 ) {
93 $temp = {
94 prev_md5 => $self->apply_digest($_[1]),
95 return_next => 0,
96 };
97 }
98 else {
99 $temp = {
100 prev_md5 => chr(0) x $self->{hash_size},
101 return_next => 1,
102 };
103 }
104
105 return $self->traverse_index( $temp, $offset, 0 );
c3aafc14 106}
107
108################################################################################
109#
110# Below here is the old code. It will be folded into the code above as it can.
111#
112################################################################################
113
612969fb 114sub new {
115 my $class = shift;
116 my ($args) = @_;
117
118 my $self = bless {
86867f3a 119 long_size => 4,
120 long_pack => 'N',
121 data_size => 4,
122 data_pack => 'N',
251dfd0e 123
86867f3a 124 digest => \&Digest::MD5::md5,
c3aafc14 125 hash_size => 16, # In bytes
251dfd0e 126
81d16922 127 ##
c3aafc14 128 # Number of buckets per blist before another level of indexing is
e0098e7f 129 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 130 # files. DO NOT decrease this value below 16, due to risk of recursive
131 # reindex overrun.
81d16922 132 ##
612969fb 133 max_buckets => 16,
460b1067 134
135 fileobj => undef,
359a01ac 136 obj => undef,
612969fb 137 }, $class;
138
e0098e7f 139 if ( defined $args->{pack_size} ) {
140 if ( lc $args->{pack_size} eq 'small' ) {
141 $args->{long_size} = 2;
c9b6d0d8 142 $args->{long_pack} = 'n';
e0098e7f 143 }
144 elsif ( lc $args->{pack_size} eq 'medium' ) {
145 $args->{long_size} = 4;
146 $args->{long_pack} = 'N';
147 }
148 elsif ( lc $args->{pack_size} eq 'large' ) {
149 $args->{long_size} = 8;
150 $args->{long_pack} = 'Q';
151 }
152 else {
153 die "Unknown pack_size value: '$args->{pack_size}'\n";
154 }
155 }
156
fde3db1a 157 # Grab the parameters we want to use
158 foreach my $param ( keys %$self ) {
159 next unless exists $args->{$param};
3e9498a1 160 $self->{$param} = $args->{$param};
fde3db1a 161 }
359a01ac 162 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 163
e0098e7f 164 if ( $self->{max_buckets} < 16 ) {
165 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
166 $self->{max_buckets} = 16;
167 }
168
260a80b4 169 return $self;
170}
171
460b1067 172sub _fileobj { return $_[0]{fileobj} }
460b1067 173
c3aafc14 174sub apply_digest {
175 my $self = shift;
176 return $self->{digest}->(@_);
177}
178
260a80b4 179sub calculate_sizes {
180 my $self = shift;
181
633df1fd 182 # The 2**8 here indicates the number of different characters in the
183 # current hashing algorithm
28394a1a 184 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 185 $self->{hash_chars_used} = (2**8);
186 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
187
188 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 189 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 190
ea2f6d67 191 $self->{key_size} = $self->{long_size} * 2;
192 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
193
260a80b4 194 return;
1bf65be7 195}
196
fde3db1a 197sub write_file_header {
0d0f3d5d 198 my $self = shift;
0d0f3d5d 199
eff6a245 200 my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 201
019404df 202 $self->_fileobj->print_at( $loc,
260a80b4 203 SIG_FILE,
460b1067 204 SIG_HEADER,
205 pack('N', 1), # header version
eff6a245 206 pack('N', 24), # header size
207 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 208 pack('n', $self->{long_size}),
260a80b4 209 pack('A', $self->{long_pack}),
c9b6d0d8 210 pack('n', $self->{data_size}),
260a80b4 211 pack('A', $self->{data_pack}),
c9b6d0d8 212 pack('n', $self->{max_buckets}),
260a80b4 213 );
0d0f3d5d 214
20b7f047 215 $self->_fileobj->set_transaction_offset( 13 );
216
0d0f3d5d 217 return;
218}
219
fde3db1a 220sub read_file_header {
e064ccd1 221 my $self = shift;
e064ccd1 222
7dcefff3 223 my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 );
224 return unless length($buffer);
460b1067 225
226 my ($file_signature, $sig_header, $header_version, $size) = unpack(
227 'A4 A N N', $buffer
42f79e07 228 );
e064ccd1 229
460b1067 230 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 231 $self->_fileobj->close;
e96daec8 232 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 233 }
260a80b4 234
460b1067 235 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 236 $self->_fileobj->close;
e96daec8 237 $self->_throw_error( "Old file version found." );
460b1067 238 }
9b2370e0 239
7dcefff3 240 my $buffer2 = $self->_fileobj->read_at( undef, $size );
eff6a245 241 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 242
243 $self->_fileobj->set_transaction_offset( 13 );
244
460b1067 245 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 246 $self->_fileobj->close;
e96daec8 247 $self->_throw_error("Corrupted file - bad header");
e064ccd1 248 }
249
460b1067 250 #XXX Add warnings if values weren't set right
251 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
252
7dcefff3 253 return length($buffer) + length($buffer2);
e064ccd1 254}
255
460b1067 256sub setup_fh {
257 my $self = shift;
258 my ($obj) = @_;
70b55428 259
7dcefff3 260 # Need to remove use of $fh here
261 my $fh = $self->_fileobj->{fh};
6fde4ed2 262 flock $fh, LOCK_EX;
118ba343 263
260a80b4 264 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 265 unless ( $obj->{base_offset} ) {
e96daec8 266 my $bytes_read = $self->read_file_header;
118ba343 267
260a80b4 268 $self->calculate_sizes;
269
118ba343 270 ##
fde3db1a 271 # File is empty -- write header and master index
118ba343 272 ##
273 if (!$bytes_read) {
aa83bc1e 274 $self->_fileobj->audit( "# Database created on" );
359a01ac 275
e96daec8 276 $self->write_file_header;
118ba343 277
22e20cce 278 $obj->{base_offset} = $self->_fileobj->request_space(
279 $self->tag_size( $self->{index_size} ),
280 );
118ba343 281
9e4f83a0 282 $self->write_tag(
e96daec8 283 $obj->_base_offset, $obj->_type,
f37c15ab 284 chr(0)x$self->{index_size},
118ba343 285 );
286
287 # Flush the filehandle
288 my $old_fh = select $fh;
289 my $old_af = $|; $| = 1; $| = $old_af;
290 select $old_fh;
291 }
292 else {
293 $obj->{base_offset} = $bytes_read;
294
295 ##
fde3db1a 296 # Get our type from master index header
118ba343 297 ##
359a01ac 298 my $tag = $self->load_tag($obj->_base_offset);
299 unless ( $tag ) {
300 flock $fh, LOCK_UN;
301 $self->_throw_error("Corrupted file, no master index record");
302 }
118ba343 303
e96daec8 304 unless ($obj->_type eq $tag->{signature}) {
359a01ac 305 flock $fh, LOCK_UN;
e96daec8 306 $self->_throw_error("File type mismatch");
118ba343 307 }
308 }
118ba343 309 }
260a80b4 310 else {
311 $self->calculate_sizes;
312 }
e06824f8 313
673464d9 314 #XXX We have to make sure we don't mess up when autoflush isn't turned on
7dcefff3 315 $self->_fileobj->set_inode;
70b55428 316
6fde4ed2 317 flock $fh, LOCK_UN;
318
70b55428 319 return 1;
320}
321
16d1ad9b 322sub tag_size {
323 my $self = shift;
324 my ($size) = @_;
325 return SIG_SIZE + $self->{data_size} + $size;
326}
327
9e4f83a0 328sub write_tag {
20f7b20c 329 ##
330 # Given offset, signature and content, create tag and write to disk
331 ##
d4b1166e 332 my $self = shift;
e96daec8 333 my ($offset, $sig, $content) = @_;
f37c15ab 334 my $size = length( $content );
20f7b20c 335
7dcefff3 336 $self->_fileobj->print_at(
337 $offset,
338 $sig, pack($self->{data_pack}, $size), $content,
339 );
20f7b20c 340
f37c15ab 341 return unless defined $offset;
342
20f7b20c 343 return {
344 signature => $sig,
72e315ac 345 #XXX Is this even used?
86867f3a 346 size => $size,
347 offset => $offset + SIG_SIZE + $self->{data_size},
348 content => $content
20f7b20c 349 };
d4b1166e 350}
351
352sub load_tag {
20f7b20c 353 ##
354 # Given offset, load single tag and return signature, size and data
355 ##
d4b1166e 356 my $self = shift;
e96daec8 357 my ($offset) = @_;
20f7b20c 358
7dcefff3 359 my $fileobj = $self->_fileobj;
20f7b20c 360
86867f3a 361 my ($sig, $size) = unpack(
362 "A $self->{data_pack}",
363 $fileobj->read_at( $offset, SIG_SIZE + $self->{data_size} ),
364 );
20f7b20c 365
366 return {
367 signature => $sig,
c3aafc14 368 size => $size, #XXX Is this even used?
86867f3a 369 offset => $offset + SIG_SIZE + $self->{data_size},
370 content => $fileobj->read_at( undef, $size ),
20f7b20c 371 };
d4b1166e 372}
373
ea2f6d67 374sub find_keyloc {
375 my $self = shift;
376 my ($tag, $transaction_id) = @_;
377 $transaction_id = $self->_fileobj->transaction_id
378 unless defined $transaction_id;
379
380 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
381 my ($loc, $trans_id, $is_deleted) = unpack(
382 "$self->{long_pack} C C",
383 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
384 );
385
386 if ( $loc == 0 ) {
387 return ( $loc, $is_deleted, $i * $self->{key_size} );
388 }
389
390 next if $transaction_id != $trans_id;
391
392 return ( $loc, $is_deleted, $i * $self->{key_size} );
393 }
394
395 return;
396}
397
20f7b20c 398sub add_bucket {
399 ##
400 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
401 # plain (undigested) key and value.
402 ##
d4b1166e 403 my $self = shift;
359a01ac 404 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 405
eea0d863 406 # This verifies that only supported values will be stored.
407 {
408 my $r = Scalar::Util::reftype( $value );
eea0d863 409
86867f3a 410 last if !defined $r;
eea0d863 411 last if $r eq 'HASH';
412 last if $r eq 'ARRAY';
413
e96daec8 414 $self->_throw_error(
86867f3a 415 "Storage of references of type '$r' is not supported."
eea0d863 416 );
417 }
418
019404df 419 my $fileobj = $self->_fileobj;
20f7b20c 420
21838116 421 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 422 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 423
424 my @transactions;
019404df 425 if ( $fileobj->transaction_id == 0 ) {
426 @transactions = $fileobj->current_transactions;
c9b6d0d8 427 }
75be6413 428
e96daec8 429# $self->_release_space( $size, $subloc );
f9c33187 430#XXX This needs updating to use _release_space
ea2f6d67 431
86867f3a 432 my $location;
ea2f6d67 433 my $size = $self->_length_needed( $value, $plain_key );
019404df 434
ea2f6d67 435 # Updating a known md5
436 if ( $keyloc ) {
437 my $keytag = $self->load_tag( $keyloc );
438 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
439
13ff93d5 440 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 441 my $old_value = $self->read_from_loc( $subloc, $orig_key );
442 my $old_size = $self->_length_needed( $old_value, $plain_key );
443
444 for my $trans_id ( @transactions ) {
445 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
446 unless ($loc) {
447 my $location2 = $fileobj->request_space( $old_size );
448 $fileobj->print_at( $keytag->{offset} + $offset2,
449 pack($self->{long_pack}, $location2 ),
450 pack( 'C C', $trans_id, 0 ),
451 );
c3aafc14 452 $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
ea2f6d67 453 }
454 }
75be6413 455 }
504185fb 456
ea2f6d67 457 $location = $self->_fileobj->request_space( $size );
458 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
459 $fileobj->print_at( $keytag->{offset} + $offset,
460 pack($self->{long_pack}, $location ),
461 pack( 'C C', $fileobj->transaction_id, 0 ),
462 );
75be6413 463 }
386bab6c 464 # Adding a new md5
ea2f6d67 465 else {
466 my $keyloc = $fileobj->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 467
ea2f6d67 468 # The bucket fit into list
469 if ( defined $offset ) {
470 $fileobj->print_at( $tag->{offset} + $offset,
471 $md5, pack( $self->{long_pack}, $keyloc ),
472 );
473 }
474 # If bucket didn't fit into list, split into a new index level
475 else {
476 $self->split_index( $tag, $md5, $keyloc );
477 }
478
479 my $keytag = $self->write_tag(
480 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 481 );
c9b6d0d8 482
ea2f6d67 483 $location = $self->_fileobj->request_space( $size );
484 $fileobj->print_at( $keytag->{offset},
485 pack( $self->{long_pack}, $location ),
486 pack( 'C C', $fileobj->transaction_id, 0 ),
487 );
488
489 my $offset = 1;
490 for my $trans_id ( @transactions ) {
491 $fileobj->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 492 pack( $self->{long_pack}, 0 ),
ea2f6d67 493 pack( 'C C', $trans_id, 1 ),
494 );
c9b6d0d8 495 }
386bab6c 496 }
20f7b20c 497
c3aafc14 498 $self->_write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 499
86867f3a 500 return 1;
d5d7c51d 501}
502
c3aafc14 503sub _write_value {
d5d7c51d 504 my $self = shift;
359a01ac 505 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 506
7dcefff3 507 my $fileobj = $self->_fileobj;
d5d7c51d 508
9d4fa373 509 my $dbm_deep_obj = _get_dbm_object( $value );
7dcefff3 510 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) {
e96daec8 511 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 512 }
d5d7c51d 513
20f7b20c 514 ##
d5d7c51d 515 # Write signature based on content type, set content length and write
516 # actual value.
20f7b20c 517 ##
9d4fa373 518 my $r = Scalar::Util::reftype( $value ) || '';
519 if ( $dbm_deep_obj ) {
7dcefff3 520 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 521 }
522 elsif ($r eq 'HASH') {
9d4fa373 523 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 524 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 525 }
7dcefff3 526 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 527 }
528 elsif ($r eq 'ARRAY') {
9d4fa373 529 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 530 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 531 }
7dcefff3 532 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 533 }
534 elsif (!defined($value)) {
7dcefff3 535 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 536 }
537 else {
7dcefff3 538 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 539 }
20f7b20c 540
d5d7c51d 541 ##
542 # Plain key is stored AFTER value, as keys are typically fetched less often.
543 ##
7dcefff3 544 $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 545
9a187d8c 546 # Internal references don't care about autobless
9d4fa373 547 return 1 if $dbm_deep_obj;
9a187d8c 548
d5d7c51d 549 ##
550 # If value is blessed, preserve class name
551 ##
7dcefff3 552 if ( $fileobj->{autobless} ) {
633df1fd 553 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
7dcefff3 554 $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 555 }
d5d7c51d 556 else {
7dcefff3 557 $fileobj->print_at( undef, chr(0) );
20f7b20c 558 }
d5d7c51d 559 }
20f7b20c 560
d5d7c51d 561 ##
56ec4340 562 # Tie the passed in reference so that changes to it are reflected in the
563 # datafile. The use of $location as the base_offset will act as the
564 # the linkage between parent and child.
565 #
566 # The overall assignment is a hack around the fact that just tying doesn't
567 # store the values. This may not be the wrong thing to do.
d5d7c51d 568 ##
9d4fa373 569 if ($r eq 'HASH') {
570 my %x = %$value;
571 tie %$value, 'DBM::Deep', {
572 base_offset => $location,
7dcefff3 573 fileobj => $fileobj,
359a01ac 574 parent => $self->{obj},
575 parent_key => $orig_key,
9d4fa373 576 };
577 %$value = %x;
bd01f6ec 578 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
9d4fa373 579 }
580 elsif ($r eq 'ARRAY') {
581 my @x = @$value;
582 tie @$value, 'DBM::Deep', {
583 base_offset => $location,
7dcefff3 584 fileobj => $fileobj,
359a01ac 585 parent => $self->{obj},
586 parent_key => $orig_key,
9d4fa373 587 };
588 @$value = @x;
bd01f6ec 589 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
20f7b20c 590 }
d4b1166e 591
d5d7c51d 592 return 1;
d4b1166e 593}
594
75be6413 595sub split_index {
596 my $self = shift;
ea2f6d67 597 my ($tag, $md5, $keyloc) = @_;
75be6413 598
019404df 599 my $fileobj = $self->_fileobj;
21838116 600
019404df 601 my $loc = $fileobj->request_space(
e96daec8 602 $self->tag_size( $self->{index_size} ),
16d1ad9b 603 );
604
019404df 605 $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 606
9e4f83a0 607 my $index_tag = $self->write_tag(
e96daec8 608 $loc, SIG_INDEX,
f37c15ab 609 chr(0)x$self->{index_size},
75be6413 610 );
611
7b1e1aa1 612 my $keys = $tag->{content}
ea2f6d67 613 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 614
f9c33187 615 my @newloc = ();
75be6413 616 BUCKET:
633df1fd 617 # The <= here is deliberate - we have max_buckets+1 keys to iterate
618 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 619 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 620 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 621
f9c33187 622 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
623 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 624
75be6413 625 my $num = ord(substr($key, $tag->{ch} + 1, 1));
626
f9c33187 627 if ($newloc[$num]) {
7dcefff3 628 my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 629
f9c33187 630 # This is looking for the first empty spot
ea2f6d67 631 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 632 { content => $subkeys }, '',
7b1e1aa1 633 );
75be6413 634
633df1fd 635 $fileobj->print_at(
636 $newloc[$num] + $offset,
637 $key, pack($self->{long_pack}, $old_subloc),
638 );
7b1e1aa1 639
640 next;
75be6413 641 }
75be6413 642
019404df 643 my $loc = $fileobj->request_space(
e96daec8 644 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 645 );
2603d86e 646
019404df 647 $fileobj->print_at(
648 $index_tag->{offset} + ($num * $self->{long_size}),
649 pack($self->{long_pack}, $loc),
650 );
75be6413 651
7b1e1aa1 652 my $blist_tag = $self->write_tag(
e96daec8 653 $loc, SIG_BLIST,
7b1e1aa1 654 chr(0)x$self->{bucket_list_size},
655 );
656
019404df 657 $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 658
f9c33187 659 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 660 }
661
662 $self->_release_space(
e96daec8 663 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 664 $tag->{offset} - SIG_SIZE - $self->{data_size},
665 );
75be6413 666
ea2f6d67 667 return 1;
75be6413 668}
669
8db25060 670sub read_from_loc {
671 my $self = shift;
359a01ac 672 my ($subloc, $orig_key) = @_;
8db25060 673
7dcefff3 674 my $fileobj = $self->_fileobj;
8db25060 675
7dcefff3 676 my $signature = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 677
678 ##
679 # If value is a hash or array, return new DBM::Deep object with correct offset
680 ##
681 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
c3aafc14 682 #XXX This needs to be a singleton
bd01f6ec 683# my $new_obj;
684# my $is_autobless;
685# if ( $signature eq SIG_HASH ) {
686# $new_obj = {};
687# tie %$new_obj, 'DBM::Deep', {
688# base_offset => $subloc,
689# fileobj => $self->_fileobj,
690# parent => $self->{obj},
691# parent_key => $orig_key,
692# };
693# $is_autobless = tied(%$new_obj)->_fileobj->{autobless};
694# }
695# else {
696# $new_obj = [];
697# tie @$new_obj, 'DBM::Deep', {
698# base_offset => $subloc,
699# fileobj => $self->_fileobj,
700# parent => $self->{obj},
701# parent_key => $orig_key,
702# };
703# $is_autobless = tied(@$new_obj)->_fileobj->{autobless};
704# }
705#
706# if ($is_autobless) {
707
685e40f1 708 my $new_obj = DBM::Deep->new({
359a01ac 709 type => $signature,
8db25060 710 base_offset => $subloc,
e96daec8 711 fileobj => $self->_fileobj,
359a01ac 712 parent => $self->{obj},
713 parent_key => $orig_key,
685e40f1 714 });
8db25060 715
460b1067 716 if ($new_obj->_fileobj->{autobless}) {
8db25060 717 ##
718 # Skip over value and plain key to see if object needs
719 # to be re-blessed
720 ##
7dcefff3 721 $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 722
7dcefff3 723 my $size = $fileobj->read_at( undef, $self->{data_size} );
c6ea6b6c 724 $size = unpack($self->{data_pack}, $size);
7dcefff3 725 if ($size) { $fileobj->increment_pointer( $size ); }
8db25060 726
7dcefff3 727 my $bless_bit = $fileobj->read_at( undef, 1 );
86867f3a 728 if ( ord($bless_bit) ) {
729 my $size = unpack(
730 $self->{data_pack},
731 $fileobj->read_at( undef, $self->{data_size} ),
732 );
7dcefff3 733
86867f3a 734 if ( $size ) {
735 $new_obj = bless $new_obj, $fileobj->read_at( undef, $size );
736 }
8db25060 737 }
738 }
739
685e40f1 740 return $new_obj;
8db25060 741 }
742 elsif ( $signature eq SIG_INTERNAL ) {
7dcefff3 743 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 744 $size = unpack($self->{data_pack}, $size);
745
746 if ( $size ) {
7dcefff3 747 my $new_loc = $fileobj->read_at( undef, $size );
748 $new_loc = unpack( $self->{long_pack}, $new_loc );
359a01ac 749 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 750 }
751 else {
752 return;
753 }
754 }
755 ##
756 # Otherwise return actual value
757 ##
460b1067 758 elsif ( $signature eq SIG_DATA ) {
7dcefff3 759 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 760 $size = unpack($self->{data_pack}, $size);
761
86867f3a 762 my $value = $size ? $fileobj->read_at( undef, $size ) : '';
8db25060 763 return $value;
764 }
765
766 ##
767 # Key exists, but content is null
768 ##
769 return;
770}
771
9020ee8c 772sub get_bucket_value {
beac1dff 773 ##
774 # Fetch single value given tag and MD5 digested key.
775 ##
776 my $self = shift;
359a01ac 777 my ($tag, $md5, $orig_key) = @_;
9020ee8c 778
21838116 779 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 780 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 781
ea2f6d67 782 if ( !$keyloc ) {
94e8af14 783 #XXX Need to use real key
ea2f6d67 784# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 785# return;
786 }
ea2f6d67 787# elsif ( !$is_deleted ) {
788 else {
789 my $keytag = $self->load_tag( $keyloc );
790 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 791 if (!$subloc && !$is_deleted) {
ea2f6d67 792 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
793 }
794 if ( $subloc && !$is_deleted ) {
795 return $self->read_from_loc( $subloc, $orig_key );
796 }
386bab6c 797 }
94e8af14 798
beac1dff 799 return;
9020ee8c 800}
ab0e4957 801
802sub delete_bucket {
beac1dff 803 ##
804 # Delete single key/value pair given tag and MD5 digested key.
805 ##
806 my $self = shift;
a97c8f67 807 my ($tag, $md5, $orig_key) = @_;
ab0e4957 808
22e20cce 809 #ACID - Although this is a mutation, we must find any transaction.
810 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 811 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 812
ea2f6d67 813 return if !$keyloc;
633df1fd 814
815 my $fileobj = $self->_fileobj;
816
817 my @transactions;
818 if ( $fileobj->transaction_id == 0 ) {
819 @transactions = $fileobj->current_transactions;
820 }
821
633df1fd 822 if ( $fileobj->transaction_id == 0 ) {
ea2f6d67 823 my $keytag = $self->load_tag( $keyloc );
7a960a12 824
ea2f6d67 825 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 826 return if !$subloc || $is_deleted;
827
633df1fd 828 my $value = $self->read_from_loc( $subloc, $orig_key );
829
ea2f6d67 830 my $size = $self->_length_needed( $value, $orig_key );
831
832 for my $trans_id ( @transactions ) {
833 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
834 unless ($loc) {
835 my $location2 = $fileobj->request_space( $size );
836 $fileobj->print_at( $keytag->{offset} + $offset2,
837 pack($self->{long_pack}, $location2 ),
838 pack( 'C C', $trans_id, 0 ),
839 );
c3aafc14 840 $self->_write_value( $location2, $orig_key, $value, $orig_key );
ea2f6d67 841 }
633df1fd 842 }
843
ea2f6d67 844 $keytag = $self->load_tag( $keyloc );
845 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
846 $fileobj->print_at( $keytag->{offset} + $offset,
847 substr( $keytag->{content}, $offset + $self->{key_size} ),
848 chr(0) x $self->{key_size},
019404df 849 );
386bab6c 850 }
633df1fd 851 else {
ea2f6d67 852 my $keytag = $self->load_tag( $keyloc );
7a960a12 853
ea2f6d67 854 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 855
ea2f6d67 856 $fileobj->print_at( $keytag->{offset} + $offset,
13ff93d5 857 pack($self->{long_pack}, 0 ),
ea2f6d67 858 pack( 'C C', $fileobj->transaction_id, 1 ),
859 );
633df1fd 860 }
861
862 return 1;
ab0e4957 863}
864
912d50b1 865sub bucket_exists {
beac1dff 866 ##
867 # Check existence of single key given tag and MD5 digested key.
868 ##
869 my $self = shift;
e96daec8 870 my ($tag, $md5) = @_;
912d50b1 871
21838116 872 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 873 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
874 my $keytag = $self->load_tag( $keyloc );
875 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 876 if ( !$subloc && !$is_deleted ) {
ea2f6d67 877 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
878 }
c9b6d0d8 879 return ($subloc && !$is_deleted) && 1;
912d50b1 880}
881
72e315ac 882sub find_blist {
beac1dff 883 ##
884 # Locate offset for bucket list, given digested key
885 ##
886 my $self = shift;
e96daec8 887 my ($offset, $md5, $args) = @_;
d0b74c17 888 $args = {} unless $args;
889
beac1dff 890 ##
891 # Locate offset for bucket list using digest index system
892 ##
e96daec8 893 my $tag = $self->load_tag( $offset )
894 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 895
e5fc7e69 896 my $ch = 0;
8db25060 897 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 898 my $num = ord substr($md5, $ch, 1);
899
900 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 901 $tag = $self->index_lookup( $tag, $num );
d0b74c17 902
903 if (!$tag) {
29b01632 904 return if !$args->{create};
d0b74c17 905
019404df 906 my $loc = $self->_fileobj->request_space(
e96daec8 907 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 908 );
909
019404df 910 $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 911
9e4f83a0 912 $tag = $self->write_tag(
e96daec8 913 $loc, SIG_BLIST,
f37c15ab 914 chr(0)x$self->{bucket_list_size},
d5d7c51d 915 );
916
917 $tag->{ref_loc} = $ref_loc;
918 $tag->{ch} = $ch;
919
920 last;
d0b74c17 921 }
922
16d1ad9b 923 $tag->{ch} = $ch++;
d0b74c17 924 $tag->{ref_loc} = $ref_loc;
beac1dff 925 }
d0b74c17 926
beac1dff 927 return $tag;
6736c116 928}
929
d0b74c17 930sub index_lookup {
931 ##
932 # Given index tag, lookup single entry in index and return .
933 ##
934 my $self = shift;
e96daec8 935 my ($tag, $index) = @_;
d0b74c17 936
937 my $location = unpack(
938 $self->{long_pack},
939 substr(
940 $tag->{content},
941 $index * $self->{long_size},
942 $self->{long_size},
943 ),
944 );
945
946 if (!$location) { return; }
947
e96daec8 948 return $self->load_tag( $location );
d0b74c17 949}
950
6736c116 951sub traverse_index {
beac1dff 952 ##
953 # Scan index and recursively step into deeper levels, looking for next key.
954 ##
6736c116 955 my $self = shift;
ea2f6d67 956 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 957
e96daec8 958 my $tag = $self->load_tag( $offset );
6736c116 959
8db25060 960 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 961 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 962
ea2f6d67 963 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 964 my $subloc = unpack(
965 $self->{long_pack},
e06824f8 966 substr(
ea2f6d67 967 $tag->{content},
e06824f8 968 $idx * $self->{long_size},
969 $self->{long_size},
970 ),
e5fc7e69 971 );
972
beac1dff 973 if ($subloc) {
e5fc7e69 974 my $result = $self->traverse_index(
ea2f6d67 975 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 976 );
977
ea2f6d67 978 if (defined $result) { return $result; }
beac1dff 979 }
980 } # index loop
d0b74c17 981
ea2f6d67 982 $xxxx->{return_next} = 1;
42717e46 983 }
984 # This is the bucket list
e5fc7e69 985 else {
beac1dff 986 my $keys = $tag->{content};
ea2f6d67 987 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 988
beac1dff 989 ##
990 # Iterate through buckets, looking for a key match
991 ##
42717e46 992 my $transaction_id = $self->_fileobj->transaction_id;
8db25060 993 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 994 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 995
8db25060 996 # End of bucket list -- return to outer loop
ea2f6d67 997 if (!$keyloc) {
998 $xxxx->{return_next} = 1;
beac1dff 999 last;
1000 }
8db25060 1001 # Located previous key -- return next one found
ea2f6d67 1002 elsif ($key eq $xxxx->{prev_md5}) {
1003 $xxxx->{return_next} = 1;
beac1dff 1004 next;
1005 }
8db25060 1006 # Seek to bucket location and skip over signature
ea2f6d67 1007 elsif ($xxxx->{return_next}) {
7dcefff3 1008 my $fileobj = $self->_fileobj;
d0b74c17 1009
ea2f6d67 1010 my $keytag = $self->load_tag( $keyloc );
1011 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 1012 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 1013 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
1014 }
1015 next if $is_deleted;
1016
beac1dff 1017 # Skip over value to get to plain key
7dcefff3 1018 my $sig = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 1019
7dcefff3 1020 my $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 1021 $size = unpack($self->{data_pack}, $size);
7dcefff3 1022 if ($size) { $fileobj->increment_pointer( $size ); }
d0b74c17 1023
beac1dff 1024 # Read in plain key and return as scalar
7dcefff3 1025 $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 1026 $size = unpack($self->{data_pack}, $size);
86867f3a 1027
7dcefff3 1028 my $plain_key;
1029 if ($size) { $plain_key = $fileobj->read_at( undef, $size); }
beac1dff 1030 return $plain_key;
1031 }
8db25060 1032 }
d0b74c17 1033
ea2f6d67 1034 $xxxx->{return_next} = 1;
42717e46 1035 }
d0b74c17 1036
beac1dff 1037 return;
6736c116 1038}
1039
75be6413 1040# Utilities
1041
9cec1360 1042sub _get_key_subloc {
75be6413 1043 my $self = shift;
1044 my ($keys, $idx) = @_;
1045
ea2f6d67 1046 return unpack(
28394a1a 1047 # This is 'a', not 'A'. Please read the pack() documentation for the
1048 # difference between the two and why it's important.
ea2f6d67 1049 "a$self->{hash_size} $self->{long_pack}",
75be6413 1050 substr(
1051 $keys,
9cec1360 1052 ($idx * $self->{bucket_size}),
1053 $self->{bucket_size},
75be6413 1054 ),
1055 );
75be6413 1056}
1057
d608b06e 1058sub _find_in_buckets {
1059 my $self = shift;
ea2f6d67 1060 my ($tag, $md5) = @_;
21838116 1061
d608b06e 1062 BUCKET:
1063 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 1064 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 1065 $tag->{content}, $i,
1066 );
d608b06e 1067
ea2f6d67 1068 my @rv = ($subloc, $i * $self->{bucket_size});
21838116 1069
1070 unless ( $subloc ) {
21838116 1071 return @rv;
1072 }
1073
1074 next BUCKET if $key ne $md5;
d608b06e 1075
21838116 1076 return @rv;
d608b06e 1077 }
1078
1079 return;
1080}
1081
994ccd8e 1082sub _release_space {
1083 my $self = shift;
e96daec8 1084 my ($size, $loc) = @_;
994ccd8e 1085
7b1e1aa1 1086 my $next_loc = 0;
1087
019404df 1088 $self->_fileobj->print_at( $loc,
1089 SIG_FREE,
1090 pack($self->{long_pack}, $size ),
1091 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1092 );
1093
994ccd8e 1094 return;
1095}
1096
e96daec8 1097sub _throw_error {
1098 die "DBM::Deep: $_[1]\n";
1099}
1100
86867f3a 1101sub _get_dbm_object {
1102 my $item = shift;
1103
1104 my $obj = eval {
1105 local $SIG{__DIE__};
1106 if ($item->isa( 'DBM::Deep' )) {
1107 return $item;
1108 }
1109 return;
1110 };
1111 return $obj if $obj;
1112
1113 my $r = Scalar::Util::reftype( $item ) || '';
1114 if ( $r eq 'HASH' ) {
1115 my $obj = eval {
1116 local $SIG{__DIE__};
1117 my $obj = tied(%$item);
1118 if ($obj->isa( 'DBM::Deep' )) {
1119 return $obj;
1120 }
1121 return;
1122 };
1123 return $obj if $obj;
1124 }
1125 elsif ( $r eq 'ARRAY' ) {
1126 my $obj = eval {
1127 local $SIG{__DIE__};
1128 my $obj = tied(@$item);
1129 if ($obj->isa( 'DBM::Deep' )) {
1130 return $obj;
1131 }
1132 return;
1133 };
1134 return $obj if $obj;
1135 }
1136
1137 return;
1138}
1139
1140sub _length_needed {
1141 my $self = shift;
1142 my ($value, $key) = @_;
1143
1144 my $is_dbm_deep = eval {
1145 local $SIG{'__DIE__'};
1146 $value->isa( 'DBM::Deep' );
1147 };
1148
1149 my $len = SIG_SIZE
1150 + $self->{data_size} # size for value
1151 + $self->{data_size} # size for key
1152 + length( $key ); # length of key
1153
1154 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
1155 # long_size is for the internal reference
1156 return $len + $self->{long_size};
1157 }
1158
1159 if ( $self->_fileobj->{autobless} ) {
1160 # This is for the bit saying whether or not this thing is blessed.
1161 $len += 1;
1162 }
1163
1164 my $r = Scalar::Util::reftype( $value ) || '';
1165 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1166 if ( defined $value ) {
1167 $len += length( $value );
1168 }
1169 return $len;
1170 }
1171
1172 $len += $self->{index_size};
1173
1174 # if autobless is enabled, must also take into consideration
1175 # the class name as it is stored after the key.
1176 if ( $self->_fileobj->{autobless} ) {
1177 my $c = Scalar::Util::blessed($value);
1178 if ( defined $c && !$is_dbm_deep ) {
1179 $len += $self->{data_size} + length($c);
1180 }
1181 }
1182
1183 return $len;
1184}
1185
a20d9a3f 11861;
1187__END__