r13306@rob-kinyons-powerbook58: rob | 2006-05-18 16:15:49 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
c3aafc14 8our $VERSION = q(0.99_03);
86867f3a 9
633df1fd 10use Fcntl qw( :DEFAULT :flock );
359a01ac 11use Scalar::Util ();
a20d9a3f 12
21838116 13# File-wide notes:
20b7f047 14# * To add to bucket_size, make sure you modify the following:
15# - calculate_sizes()
16# - _get_key_subloc()
17# - add_bucket() - where the buckets are printed
c3aafc14 18#
19# * Every method in here assumes that the _fileobj has been appropriately
20# safeguarded. This can be anything from flock() to some sort of manual
21# mutex. But, it's the caller's responsability to make sure that this has
22# been done.
21838116 23
8db25060 24##
25# Setup file and tag signatures. These should never change.
26##
27sub SIG_FILE () { 'DPDB' }
460b1067 28sub SIG_HEADER () { 'h' }
8db25060 29sub SIG_INTERNAL () { 'i' }
30sub SIG_HASH () { 'H' }
31sub SIG_ARRAY () { 'A' }
8db25060 32sub SIG_NULL () { 'N' }
33sub SIG_DATA () { 'D' }
34sub SIG_INDEX () { 'I' }
35sub SIG_BLIST () { 'B' }
7b1e1aa1 36sub SIG_FREE () { 'F' }
86867f3a 37sub SIG_KEYS () { 'K' }
8db25060 38sub SIG_SIZE () { 1 }
39
c3aafc14 40################################################################################
41#
42# This is new code. It is a complete rewrite of the engine based on a new API
43#
44################################################################################
45
46sub write_value {
47 my $self = shift;
48 my ($offset, $key, $value, $orig_key) = @_;
49
50 my $dig_key = $self->apply_digest( $key );
51 my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
52 return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
53}
54
55sub read_value {
56 my $self = shift;
e5a9d386 57 my ($offset, $key, $orig_key) = @_;
c3aafc14 58
59 my $dig_key = $self->apply_digest( $key );
e5a9d386 60 my $tag = $self->find_blist( $offset, $dig_key ) or return;
61 return $self->get_bucket_value( $tag, $dig_key, $orig_key );
c3aafc14 62}
63
64sub delete_key {
65 my $self = shift;
e5a9d386 66 my ($offset, $key, $orig_key) = @_;
c3aafc14 67
68 my $dig_key = $self->apply_digest( $key );
e5a9d386 69 my $tag = $self->find_blist( $offset, $dig_key ) or return;
70 my $value = $self->get_bucket_value( $tag, $dig_key, $orig_key );
71 $self->delete_bucket( $tag, $dig_key, $orig_key );
72 return $value;
c3aafc14 73}
74
75sub key_exists {
76 my $self = shift;
77 my ($offset, $key) = @_;
78
79 my $dig_key = $self->apply_digest( $key );
e5a9d386 80 # exists() returns the empty string, not undef
81 my $tag = $self->find_blist( $offset, $dig_key ) or return '';
c3aafc14 82 return $self->bucket_exists( $tag, $dig_key, $key );
83}
84
85sub XXXget_next_key {
86 my $self = shift;
87 my ($offset, $prev_key) = @_;
88
89# my $dig_key = $self->apply_digest( $key );
90}
91
92################################################################################
93#
94# Below here is the old code. It will be folded into the code above as it can.
95#
96################################################################################
97
612969fb 98sub new {
99 my $class = shift;
100 my ($args) = @_;
101
102 my $self = bless {
86867f3a 103 long_size => 4,
104 long_pack => 'N',
105 data_size => 4,
106 data_pack => 'N',
251dfd0e 107
86867f3a 108 digest => \&Digest::MD5::md5,
c3aafc14 109 hash_size => 16, # In bytes
251dfd0e 110
81d16922 111 ##
c3aafc14 112 # Number of buckets per blist before another level of indexing is
e0098e7f 113 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 114 # files. DO NOT decrease this value below 16, due to risk of recursive
115 # reindex overrun.
81d16922 116 ##
612969fb 117 max_buckets => 16,
460b1067 118
119 fileobj => undef,
359a01ac 120 obj => undef,
612969fb 121 }, $class;
122
e0098e7f 123 if ( defined $args->{pack_size} ) {
124 if ( lc $args->{pack_size} eq 'small' ) {
125 $args->{long_size} = 2;
c9b6d0d8 126 $args->{long_pack} = 'n';
e0098e7f 127 }
128 elsif ( lc $args->{pack_size} eq 'medium' ) {
129 $args->{long_size} = 4;
130 $args->{long_pack} = 'N';
131 }
132 elsif ( lc $args->{pack_size} eq 'large' ) {
133 $args->{long_size} = 8;
134 $args->{long_pack} = 'Q';
135 }
136 else {
137 die "Unknown pack_size value: '$args->{pack_size}'\n";
138 }
139 }
140
fde3db1a 141 # Grab the parameters we want to use
142 foreach my $param ( keys %$self ) {
143 next unless exists $args->{$param};
3e9498a1 144 $self->{$param} = $args->{$param};
fde3db1a 145 }
359a01ac 146 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 147
e0098e7f 148 if ( $self->{max_buckets} < 16 ) {
149 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
150 $self->{max_buckets} = 16;
151 }
152
260a80b4 153 return $self;
154}
155
460b1067 156sub _fileobj { return $_[0]{fileobj} }
460b1067 157
c3aafc14 158sub apply_digest {
159 my $self = shift;
160 return $self->{digest}->(@_);
161}
162
260a80b4 163sub calculate_sizes {
164 my $self = shift;
165
633df1fd 166 # The 2**8 here indicates the number of different characters in the
167 # current hashing algorithm
28394a1a 168 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 169 $self->{hash_chars_used} = (2**8);
170 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
171
172 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 173 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 174
ea2f6d67 175 $self->{key_size} = $self->{long_size} * 2;
176 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
177
260a80b4 178 return;
1bf65be7 179}
180
fde3db1a 181sub write_file_header {
0d0f3d5d 182 my $self = shift;
0d0f3d5d 183
eff6a245 184 my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 185
019404df 186 $self->_fileobj->print_at( $loc,
260a80b4 187 SIG_FILE,
460b1067 188 SIG_HEADER,
189 pack('N', 1), # header version
eff6a245 190 pack('N', 24), # header size
191 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 192 pack('n', $self->{long_size}),
260a80b4 193 pack('A', $self->{long_pack}),
c9b6d0d8 194 pack('n', $self->{data_size}),
260a80b4 195 pack('A', $self->{data_pack}),
c9b6d0d8 196 pack('n', $self->{max_buckets}),
260a80b4 197 );
0d0f3d5d 198
20b7f047 199 $self->_fileobj->set_transaction_offset( 13 );
200
0d0f3d5d 201 return;
202}
203
fde3db1a 204sub read_file_header {
e064ccd1 205 my $self = shift;
e064ccd1 206
7dcefff3 207 my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 );
208 return unless length($buffer);
460b1067 209
210 my ($file_signature, $sig_header, $header_version, $size) = unpack(
211 'A4 A N N', $buffer
42f79e07 212 );
e064ccd1 213
460b1067 214 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 215 $self->_fileobj->close;
e96daec8 216 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 217 }
260a80b4 218
460b1067 219 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 220 $self->_fileobj->close;
e96daec8 221 $self->_throw_error( "Old file version found." );
460b1067 222 }
9b2370e0 223
7dcefff3 224 my $buffer2 = $self->_fileobj->read_at( undef, $size );
eff6a245 225 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 226
227 $self->_fileobj->set_transaction_offset( 13 );
228
460b1067 229 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 230 $self->_fileobj->close;
e96daec8 231 $self->_throw_error("Corrupted file - bad header");
e064ccd1 232 }
233
460b1067 234 #XXX Add warnings if values weren't set right
235 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
236
7dcefff3 237 return length($buffer) + length($buffer2);
e064ccd1 238}
239
460b1067 240sub setup_fh {
241 my $self = shift;
242 my ($obj) = @_;
70b55428 243
7dcefff3 244 # Need to remove use of $fh here
245 my $fh = $self->_fileobj->{fh};
6fde4ed2 246 flock $fh, LOCK_EX;
118ba343 247
260a80b4 248 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 249 unless ( $obj->{base_offset} ) {
e96daec8 250 my $bytes_read = $self->read_file_header;
118ba343 251
260a80b4 252 $self->calculate_sizes;
253
118ba343 254 ##
fde3db1a 255 # File is empty -- write header and master index
118ba343 256 ##
257 if (!$bytes_read) {
aa83bc1e 258 $self->_fileobj->audit( "# Database created on" );
359a01ac 259
e96daec8 260 $self->write_file_header;
118ba343 261
22e20cce 262 $obj->{base_offset} = $self->_fileobj->request_space(
263 $self->tag_size( $self->{index_size} ),
264 );
118ba343 265
9e4f83a0 266 $self->write_tag(
e96daec8 267 $obj->_base_offset, $obj->_type,
f37c15ab 268 chr(0)x$self->{index_size},
118ba343 269 );
270
271 # Flush the filehandle
272 my $old_fh = select $fh;
273 my $old_af = $|; $| = 1; $| = $old_af;
274 select $old_fh;
275 }
276 else {
277 $obj->{base_offset} = $bytes_read;
278
279 ##
fde3db1a 280 # Get our type from master index header
118ba343 281 ##
359a01ac 282 my $tag = $self->load_tag($obj->_base_offset);
283 unless ( $tag ) {
284 flock $fh, LOCK_UN;
285 $self->_throw_error("Corrupted file, no master index record");
286 }
118ba343 287
e96daec8 288 unless ($obj->_type eq $tag->{signature}) {
359a01ac 289 flock $fh, LOCK_UN;
e96daec8 290 $self->_throw_error("File type mismatch");
118ba343 291 }
292 }
118ba343 293 }
260a80b4 294 else {
295 $self->calculate_sizes;
296 }
e06824f8 297
673464d9 298 #XXX We have to make sure we don't mess up when autoflush isn't turned on
7dcefff3 299 $self->_fileobj->set_inode;
70b55428 300
6fde4ed2 301 flock $fh, LOCK_UN;
302
70b55428 303 return 1;
304}
305
16d1ad9b 306sub tag_size {
307 my $self = shift;
308 my ($size) = @_;
309 return SIG_SIZE + $self->{data_size} + $size;
310}
311
9e4f83a0 312sub write_tag {
20f7b20c 313 ##
314 # Given offset, signature and content, create tag and write to disk
315 ##
d4b1166e 316 my $self = shift;
e96daec8 317 my ($offset, $sig, $content) = @_;
f37c15ab 318 my $size = length( $content );
20f7b20c 319
7dcefff3 320 $self->_fileobj->print_at(
321 $offset,
322 $sig, pack($self->{data_pack}, $size), $content,
323 );
20f7b20c 324
f37c15ab 325 return unless defined $offset;
326
20f7b20c 327 return {
328 signature => $sig,
72e315ac 329 #XXX Is this even used?
86867f3a 330 size => $size,
331 offset => $offset + SIG_SIZE + $self->{data_size},
332 content => $content
20f7b20c 333 };
d4b1166e 334}
335
336sub load_tag {
20f7b20c 337 ##
338 # Given offset, load single tag and return signature, size and data
339 ##
d4b1166e 340 my $self = shift;
e96daec8 341 my ($offset) = @_;
20f7b20c 342
7dcefff3 343 my $fileobj = $self->_fileobj;
20f7b20c 344
86867f3a 345 my ($sig, $size) = unpack(
346 "A $self->{data_pack}",
347 $fileobj->read_at( $offset, SIG_SIZE + $self->{data_size} ),
348 );
20f7b20c 349
350 return {
351 signature => $sig,
c3aafc14 352 size => $size, #XXX Is this even used?
86867f3a 353 offset => $offset + SIG_SIZE + $self->{data_size},
354 content => $fileobj->read_at( undef, $size ),
20f7b20c 355 };
d4b1166e 356}
357
ea2f6d67 358sub find_keyloc {
359 my $self = shift;
360 my ($tag, $transaction_id) = @_;
361 $transaction_id = $self->_fileobj->transaction_id
362 unless defined $transaction_id;
363
364 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
365 my ($loc, $trans_id, $is_deleted) = unpack(
366 "$self->{long_pack} C C",
367 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
368 );
369
370 if ( $loc == 0 ) {
371 return ( $loc, $is_deleted, $i * $self->{key_size} );
372 }
373
374 next if $transaction_id != $trans_id;
375
376 return ( $loc, $is_deleted, $i * $self->{key_size} );
377 }
378
379 return;
380}
381
20f7b20c 382sub add_bucket {
383 ##
384 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
385 # plain (undigested) key and value.
386 ##
d4b1166e 387 my $self = shift;
359a01ac 388 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 389
eea0d863 390 # This verifies that only supported values will be stored.
391 {
392 my $r = Scalar::Util::reftype( $value );
eea0d863 393
86867f3a 394 last if !defined $r;
eea0d863 395 last if $r eq 'HASH';
396 last if $r eq 'ARRAY';
397
e96daec8 398 $self->_throw_error(
86867f3a 399 "Storage of references of type '$r' is not supported."
eea0d863 400 );
401 }
402
019404df 403 my $fileobj = $self->_fileobj;
20f7b20c 404
21838116 405 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 406 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 407
408 my @transactions;
019404df 409 if ( $fileobj->transaction_id == 0 ) {
410 @transactions = $fileobj->current_transactions;
c9b6d0d8 411 }
75be6413 412
e96daec8 413# $self->_release_space( $size, $subloc );
f9c33187 414#XXX This needs updating to use _release_space
ea2f6d67 415
86867f3a 416 my $location;
ea2f6d67 417 my $size = $self->_length_needed( $value, $plain_key );
019404df 418
ea2f6d67 419 # Updating a known md5
420 if ( $keyloc ) {
421 my $keytag = $self->load_tag( $keyloc );
422 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
423
13ff93d5 424 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 425 my $old_value = $self->read_from_loc( $subloc, $orig_key );
426 my $old_size = $self->_length_needed( $old_value, $plain_key );
427
428 for my $trans_id ( @transactions ) {
429 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
430 unless ($loc) {
431 my $location2 = $fileobj->request_space( $old_size );
432 $fileobj->print_at( $keytag->{offset} + $offset2,
433 pack($self->{long_pack}, $location2 ),
434 pack( 'C C', $trans_id, 0 ),
435 );
c3aafc14 436 $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
ea2f6d67 437 }
438 }
75be6413 439 }
504185fb 440
ea2f6d67 441 $location = $self->_fileobj->request_space( $size );
442 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
443 $fileobj->print_at( $keytag->{offset} + $offset,
444 pack($self->{long_pack}, $location ),
445 pack( 'C C', $fileobj->transaction_id, 0 ),
446 );
75be6413 447 }
386bab6c 448 # Adding a new md5
ea2f6d67 449 else {
450 my $keyloc = $fileobj->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 451
ea2f6d67 452 # The bucket fit into list
453 if ( defined $offset ) {
454 $fileobj->print_at( $tag->{offset} + $offset,
455 $md5, pack( $self->{long_pack}, $keyloc ),
456 );
457 }
458 # If bucket didn't fit into list, split into a new index level
459 else {
460 $self->split_index( $tag, $md5, $keyloc );
461 }
462
463 my $keytag = $self->write_tag(
464 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 465 );
c9b6d0d8 466
ea2f6d67 467 $location = $self->_fileobj->request_space( $size );
468 $fileobj->print_at( $keytag->{offset},
469 pack( $self->{long_pack}, $location ),
470 pack( 'C C', $fileobj->transaction_id, 0 ),
471 );
472
473 my $offset = 1;
474 for my $trans_id ( @transactions ) {
475 $fileobj->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 476 pack( $self->{long_pack}, 0 ),
ea2f6d67 477 pack( 'C C', $trans_id, 1 ),
478 );
c9b6d0d8 479 }
386bab6c 480 }
20f7b20c 481
c3aafc14 482 $self->_write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 483
86867f3a 484 return 1;
d5d7c51d 485}
486
c3aafc14 487sub _write_value {
d5d7c51d 488 my $self = shift;
359a01ac 489 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 490
7dcefff3 491 my $fileobj = $self->_fileobj;
d5d7c51d 492
9d4fa373 493 my $dbm_deep_obj = _get_dbm_object( $value );
7dcefff3 494 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) {
e96daec8 495 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 496 }
d5d7c51d 497
20f7b20c 498 ##
d5d7c51d 499 # Write signature based on content type, set content length and write
500 # actual value.
20f7b20c 501 ##
9d4fa373 502 my $r = Scalar::Util::reftype( $value ) || '';
503 if ( $dbm_deep_obj ) {
7dcefff3 504 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 505 }
506 elsif ($r eq 'HASH') {
9d4fa373 507 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 508 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 509 }
7dcefff3 510 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 511 }
512 elsif ($r eq 'ARRAY') {
9d4fa373 513 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 514 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 515 }
7dcefff3 516 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 517 }
518 elsif (!defined($value)) {
7dcefff3 519 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 520 }
521 else {
7dcefff3 522 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 523 }
20f7b20c 524
d5d7c51d 525 ##
526 # Plain key is stored AFTER value, as keys are typically fetched less often.
527 ##
7dcefff3 528 $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 529
9a187d8c 530 # Internal references don't care about autobless
9d4fa373 531 return 1 if $dbm_deep_obj;
9a187d8c 532
d5d7c51d 533 ##
534 # If value is blessed, preserve class name
535 ##
7dcefff3 536 if ( $fileobj->{autobless} ) {
633df1fd 537 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
7dcefff3 538 $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 539 }
d5d7c51d 540 else {
7dcefff3 541 $fileobj->print_at( undef, chr(0) );
20f7b20c 542 }
d5d7c51d 543 }
20f7b20c 544
d5d7c51d 545 ##
56ec4340 546 # Tie the passed in reference so that changes to it are reflected in the
547 # datafile. The use of $location as the base_offset will act as the
548 # the linkage between parent and child.
549 #
550 # The overall assignment is a hack around the fact that just tying doesn't
551 # store the values. This may not be the wrong thing to do.
d5d7c51d 552 ##
9d4fa373 553 if ($r eq 'HASH') {
554 my %x = %$value;
555 tie %$value, 'DBM::Deep', {
556 base_offset => $location,
7dcefff3 557 fileobj => $fileobj,
359a01ac 558 parent => $self->{obj},
559 parent_key => $orig_key,
9d4fa373 560 };
561 %$value = %x;
562 }
563 elsif ($r eq 'ARRAY') {
564 my @x = @$value;
565 tie @$value, 'DBM::Deep', {
566 base_offset => $location,
7dcefff3 567 fileobj => $fileobj,
359a01ac 568 parent => $self->{obj},
569 parent_key => $orig_key,
9d4fa373 570 };
571 @$value = @x;
20f7b20c 572 }
d4b1166e 573
d5d7c51d 574 return 1;
d4b1166e 575}
576
75be6413 577sub split_index {
578 my $self = shift;
ea2f6d67 579 my ($tag, $md5, $keyloc) = @_;
75be6413 580
019404df 581 my $fileobj = $self->_fileobj;
21838116 582
019404df 583 my $loc = $fileobj->request_space(
e96daec8 584 $self->tag_size( $self->{index_size} ),
16d1ad9b 585 );
586
019404df 587 $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 588
9e4f83a0 589 my $index_tag = $self->write_tag(
e96daec8 590 $loc, SIG_INDEX,
f37c15ab 591 chr(0)x$self->{index_size},
75be6413 592 );
593
7b1e1aa1 594 my $keys = $tag->{content}
ea2f6d67 595 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 596
f9c33187 597 my @newloc = ();
75be6413 598 BUCKET:
633df1fd 599 # The <= here is deliberate - we have max_buckets+1 keys to iterate
600 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 601 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 602 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 603
f9c33187 604 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
605 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 606
75be6413 607 my $num = ord(substr($key, $tag->{ch} + 1, 1));
608
f9c33187 609 if ($newloc[$num]) {
7dcefff3 610 my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 611
f9c33187 612 # This is looking for the first empty spot
ea2f6d67 613 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 614 { content => $subkeys }, '',
7b1e1aa1 615 );
75be6413 616
633df1fd 617 $fileobj->print_at(
618 $newloc[$num] + $offset,
619 $key, pack($self->{long_pack}, $old_subloc),
620 );
7b1e1aa1 621
622 next;
75be6413 623 }
75be6413 624
019404df 625 my $loc = $fileobj->request_space(
e96daec8 626 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 627 );
2603d86e 628
019404df 629 $fileobj->print_at(
630 $index_tag->{offset} + ($num * $self->{long_size}),
631 pack($self->{long_pack}, $loc),
632 );
75be6413 633
7b1e1aa1 634 my $blist_tag = $self->write_tag(
e96daec8 635 $loc, SIG_BLIST,
7b1e1aa1 636 chr(0)x$self->{bucket_list_size},
637 );
638
019404df 639 $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 640
f9c33187 641 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 642 }
643
644 $self->_release_space(
e96daec8 645 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 646 $tag->{offset} - SIG_SIZE - $self->{data_size},
647 );
75be6413 648
ea2f6d67 649 return 1;
75be6413 650}
651
8db25060 652sub read_from_loc {
653 my $self = shift;
359a01ac 654 my ($subloc, $orig_key) = @_;
8db25060 655
7dcefff3 656 my $fileobj = $self->_fileobj;
8db25060 657
7dcefff3 658 my $signature = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 659
660 ##
661 # If value is a hash or array, return new DBM::Deep object with correct offset
662 ##
663 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
c3aafc14 664 #XXX This needs to be a singleton
685e40f1 665 my $new_obj = DBM::Deep->new({
359a01ac 666 type => $signature,
8db25060 667 base_offset => $subloc,
e96daec8 668 fileobj => $self->_fileobj,
359a01ac 669 parent => $self->{obj},
670 parent_key => $orig_key,
685e40f1 671 });
8db25060 672
460b1067 673 if ($new_obj->_fileobj->{autobless}) {
8db25060 674 ##
675 # Skip over value and plain key to see if object needs
676 # to be re-blessed
677 ##
7dcefff3 678 $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 679
7dcefff3 680 my $size = $fileobj->read_at( undef, $self->{data_size} );
c6ea6b6c 681 $size = unpack($self->{data_pack}, $size);
7dcefff3 682 if ($size) { $fileobj->increment_pointer( $size ); }
8db25060 683
7dcefff3 684 my $bless_bit = $fileobj->read_at( undef, 1 );
86867f3a 685 if ( ord($bless_bit) ) {
686 my $size = unpack(
687 $self->{data_pack},
688 $fileobj->read_at( undef, $self->{data_size} ),
689 );
7dcefff3 690
86867f3a 691 if ( $size ) {
692 $new_obj = bless $new_obj, $fileobj->read_at( undef, $size );
693 }
8db25060 694 }
695 }
696
685e40f1 697 return $new_obj;
8db25060 698 }
699 elsif ( $signature eq SIG_INTERNAL ) {
7dcefff3 700 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 701 $size = unpack($self->{data_pack}, $size);
702
703 if ( $size ) {
7dcefff3 704 my $new_loc = $fileobj->read_at( undef, $size );
705 $new_loc = unpack( $self->{long_pack}, $new_loc );
359a01ac 706 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 707 }
708 else {
709 return;
710 }
711 }
712 ##
713 # Otherwise return actual value
714 ##
460b1067 715 elsif ( $signature eq SIG_DATA ) {
7dcefff3 716 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 717 $size = unpack($self->{data_pack}, $size);
718
86867f3a 719 my $value = $size ? $fileobj->read_at( undef, $size ) : '';
8db25060 720 return $value;
721 }
722
723 ##
724 # Key exists, but content is null
725 ##
726 return;
727}
728
9020ee8c 729sub get_bucket_value {
beac1dff 730 ##
731 # Fetch single value given tag and MD5 digested key.
732 ##
733 my $self = shift;
359a01ac 734 my ($tag, $md5, $orig_key) = @_;
9020ee8c 735
21838116 736 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 737 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 738
ea2f6d67 739 if ( !$keyloc ) {
94e8af14 740 #XXX Need to use real key
ea2f6d67 741# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 742# return;
743 }
ea2f6d67 744# elsif ( !$is_deleted ) {
745 else {
746 my $keytag = $self->load_tag( $keyloc );
747 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 748 if (!$subloc && !$is_deleted) {
ea2f6d67 749 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
750 }
751 if ( $subloc && !$is_deleted ) {
752 return $self->read_from_loc( $subloc, $orig_key );
753 }
386bab6c 754 }
94e8af14 755
beac1dff 756 return;
9020ee8c 757}
ab0e4957 758
759sub delete_bucket {
beac1dff 760 ##
761 # Delete single key/value pair given tag and MD5 digested key.
762 ##
763 my $self = shift;
a97c8f67 764 my ($tag, $md5, $orig_key) = @_;
ab0e4957 765
22e20cce 766 #ACID - Although this is a mutation, we must find any transaction.
767 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 768 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 769
ea2f6d67 770 return if !$keyloc;
633df1fd 771
772 my $fileobj = $self->_fileobj;
773
774 my @transactions;
775 if ( $fileobj->transaction_id == 0 ) {
776 @transactions = $fileobj->current_transactions;
777 }
778
633df1fd 779 if ( $fileobj->transaction_id == 0 ) {
ea2f6d67 780 my $keytag = $self->load_tag( $keyloc );
7a960a12 781
ea2f6d67 782 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 783 return if !$subloc || $is_deleted;
784
633df1fd 785 my $value = $self->read_from_loc( $subloc, $orig_key );
786
ea2f6d67 787 my $size = $self->_length_needed( $value, $orig_key );
788
789 for my $trans_id ( @transactions ) {
790 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
791 unless ($loc) {
792 my $location2 = $fileobj->request_space( $size );
793 $fileobj->print_at( $keytag->{offset} + $offset2,
794 pack($self->{long_pack}, $location2 ),
795 pack( 'C C', $trans_id, 0 ),
796 );
c3aafc14 797 $self->_write_value( $location2, $orig_key, $value, $orig_key );
ea2f6d67 798 }
633df1fd 799 }
800
ea2f6d67 801 $keytag = $self->load_tag( $keyloc );
802 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
803 $fileobj->print_at( $keytag->{offset} + $offset,
804 substr( $keytag->{content}, $offset + $self->{key_size} ),
805 chr(0) x $self->{key_size},
019404df 806 );
386bab6c 807 }
633df1fd 808 else {
ea2f6d67 809 my $keytag = $self->load_tag( $keyloc );
7a960a12 810
ea2f6d67 811 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 812
ea2f6d67 813 $fileobj->print_at( $keytag->{offset} + $offset,
13ff93d5 814 pack($self->{long_pack}, 0 ),
ea2f6d67 815 pack( 'C C', $fileobj->transaction_id, 1 ),
816 );
633df1fd 817 }
818
819 return 1;
ab0e4957 820}
821
912d50b1 822sub bucket_exists {
beac1dff 823 ##
824 # Check existence of single key given tag and MD5 digested key.
825 ##
826 my $self = shift;
e96daec8 827 my ($tag, $md5) = @_;
912d50b1 828
21838116 829 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 830 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
831 my $keytag = $self->load_tag( $keyloc );
832 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 833 if ( !$subloc && !$is_deleted ) {
ea2f6d67 834 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
835 }
c9b6d0d8 836 return ($subloc && !$is_deleted) && 1;
912d50b1 837}
838
72e315ac 839sub find_blist {
beac1dff 840 ##
841 # Locate offset for bucket list, given digested key
842 ##
843 my $self = shift;
e96daec8 844 my ($offset, $md5, $args) = @_;
d0b74c17 845 $args = {} unless $args;
846
beac1dff 847 ##
848 # Locate offset for bucket list using digest index system
849 ##
e96daec8 850 my $tag = $self->load_tag( $offset )
851 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 852
e5fc7e69 853 my $ch = 0;
8db25060 854 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 855 my $num = ord substr($md5, $ch, 1);
856
857 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 858 $tag = $self->index_lookup( $tag, $num );
d0b74c17 859
860 if (!$tag) {
29b01632 861 return if !$args->{create};
d0b74c17 862
019404df 863 my $loc = $self->_fileobj->request_space(
e96daec8 864 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 865 );
866
019404df 867 $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 868
9e4f83a0 869 $tag = $self->write_tag(
e96daec8 870 $loc, SIG_BLIST,
f37c15ab 871 chr(0)x$self->{bucket_list_size},
d5d7c51d 872 );
873
874 $tag->{ref_loc} = $ref_loc;
875 $tag->{ch} = $ch;
876
877 last;
d0b74c17 878 }
879
16d1ad9b 880 $tag->{ch} = $ch++;
d0b74c17 881 $tag->{ref_loc} = $ref_loc;
beac1dff 882 }
d0b74c17 883
beac1dff 884 return $tag;
6736c116 885}
886
d0b74c17 887sub index_lookup {
888 ##
889 # Given index tag, lookup single entry in index and return .
890 ##
891 my $self = shift;
e96daec8 892 my ($tag, $index) = @_;
d0b74c17 893
894 my $location = unpack(
895 $self->{long_pack},
896 substr(
897 $tag->{content},
898 $index * $self->{long_size},
899 $self->{long_size},
900 ),
901 );
902
903 if (!$location) { return; }
904
e96daec8 905 return $self->load_tag( $location );
d0b74c17 906}
907
6736c116 908sub traverse_index {
beac1dff 909 ##
910 # Scan index and recursively step into deeper levels, looking for next key.
911 ##
6736c116 912 my $self = shift;
ea2f6d67 913 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 914
e96daec8 915 my $tag = $self->load_tag( $offset );
6736c116 916
8db25060 917 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 918 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 919
ea2f6d67 920 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 921 my $subloc = unpack(
922 $self->{long_pack},
e06824f8 923 substr(
ea2f6d67 924 $tag->{content},
e06824f8 925 $idx * $self->{long_size},
926 $self->{long_size},
927 ),
e5fc7e69 928 );
929
beac1dff 930 if ($subloc) {
e5fc7e69 931 my $result = $self->traverse_index(
ea2f6d67 932 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 933 );
934
ea2f6d67 935 if (defined $result) { return $result; }
beac1dff 936 }
937 } # index loop
d0b74c17 938
ea2f6d67 939 $xxxx->{return_next} = 1;
42717e46 940 }
941 # This is the bucket list
e5fc7e69 942 else {
beac1dff 943 my $keys = $tag->{content};
ea2f6d67 944 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 945
beac1dff 946 ##
947 # Iterate through buckets, looking for a key match
948 ##
42717e46 949 my $transaction_id = $self->_fileobj->transaction_id;
8db25060 950 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 951 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 952
8db25060 953 # End of bucket list -- return to outer loop
ea2f6d67 954 if (!$keyloc) {
955 $xxxx->{return_next} = 1;
beac1dff 956 last;
957 }
8db25060 958 # Located previous key -- return next one found
ea2f6d67 959 elsif ($key eq $xxxx->{prev_md5}) {
960 $xxxx->{return_next} = 1;
beac1dff 961 next;
962 }
8db25060 963 # Seek to bucket location and skip over signature
ea2f6d67 964 elsif ($xxxx->{return_next}) {
7dcefff3 965 my $fileobj = $self->_fileobj;
d0b74c17 966
ea2f6d67 967 my $keytag = $self->load_tag( $keyloc );
968 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 969 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 970 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
971 }
972 next if $is_deleted;
973
beac1dff 974 # Skip over value to get to plain key
7dcefff3 975 my $sig = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 976
7dcefff3 977 my $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 978 $size = unpack($self->{data_pack}, $size);
7dcefff3 979 if ($size) { $fileobj->increment_pointer( $size ); }
d0b74c17 980
beac1dff 981 # Read in plain key and return as scalar
7dcefff3 982 $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 983 $size = unpack($self->{data_pack}, $size);
86867f3a 984
7dcefff3 985 my $plain_key;
986 if ($size) { $plain_key = $fileobj->read_at( undef, $size); }
beac1dff 987 return $plain_key;
988 }
8db25060 989 }
d0b74c17 990
ea2f6d67 991 $xxxx->{return_next} = 1;
42717e46 992 }
d0b74c17 993
beac1dff 994 return;
6736c116 995}
996
997sub get_next_key {
beac1dff 998 ##
999 # Locate next key, given digested previous one
1000 ##
6736c116 1001 my $self = shift;
1002 my ($obj) = @_;
d0b74c17 1003
beac1dff 1004 ##
1005 # If the previous key was not specifed, start at the top and
1006 # return the first one found.
1007 ##
ea2f6d67 1008 my $temp;
1009 if ( @_ > 1 ) {
1010 $temp = {
1011 prev_md5 => $_[1],
1012 return_next => 0,
1013 };
1014 }
1015 else {
1016 $temp = {
1017 prev_md5 => chr(0) x $self->{hash_size},
1018 return_next => 1,
1019 };
beac1dff 1020 }
d0b74c17 1021
ea2f6d67 1022 return $self->traverse_index( $temp, $obj->_base_offset, 0 );
6736c116 1023}
1024
75be6413 1025# Utilities
1026
9cec1360 1027sub _get_key_subloc {
75be6413 1028 my $self = shift;
1029 my ($keys, $idx) = @_;
1030
ea2f6d67 1031 return unpack(
28394a1a 1032 # This is 'a', not 'A'. Please read the pack() documentation for the
1033 # difference between the two and why it's important.
ea2f6d67 1034 "a$self->{hash_size} $self->{long_pack}",
75be6413 1035 substr(
1036 $keys,
9cec1360 1037 ($idx * $self->{bucket_size}),
1038 $self->{bucket_size},
75be6413 1039 ),
1040 );
75be6413 1041}
1042
d608b06e 1043sub _find_in_buckets {
1044 my $self = shift;
ea2f6d67 1045 my ($tag, $md5) = @_;
21838116 1046
d608b06e 1047 BUCKET:
1048 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 1049 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 1050 $tag->{content}, $i,
1051 );
d608b06e 1052
ea2f6d67 1053 my @rv = ($subloc, $i * $self->{bucket_size});
21838116 1054
1055 unless ( $subloc ) {
21838116 1056 return @rv;
1057 }
1058
1059 next BUCKET if $key ne $md5;
d608b06e 1060
21838116 1061 return @rv;
d608b06e 1062 }
1063
1064 return;
1065}
1066
994ccd8e 1067sub _release_space {
1068 my $self = shift;
e96daec8 1069 my ($size, $loc) = @_;
994ccd8e 1070
7b1e1aa1 1071 my $next_loc = 0;
1072
019404df 1073 $self->_fileobj->print_at( $loc,
1074 SIG_FREE,
1075 pack($self->{long_pack}, $size ),
1076 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1077 );
1078
994ccd8e 1079 return;
1080}
1081
e96daec8 1082sub _throw_error {
1083 die "DBM::Deep: $_[1]\n";
1084}
1085
86867f3a 1086sub _get_dbm_object {
1087 my $item = shift;
1088
1089 my $obj = eval {
1090 local $SIG{__DIE__};
1091 if ($item->isa( 'DBM::Deep' )) {
1092 return $item;
1093 }
1094 return;
1095 };
1096 return $obj if $obj;
1097
1098 my $r = Scalar::Util::reftype( $item ) || '';
1099 if ( $r eq 'HASH' ) {
1100 my $obj = eval {
1101 local $SIG{__DIE__};
1102 my $obj = tied(%$item);
1103 if ($obj->isa( 'DBM::Deep' )) {
1104 return $obj;
1105 }
1106 return;
1107 };
1108 return $obj if $obj;
1109 }
1110 elsif ( $r eq 'ARRAY' ) {
1111 my $obj = eval {
1112 local $SIG{__DIE__};
1113 my $obj = tied(@$item);
1114 if ($obj->isa( 'DBM::Deep' )) {
1115 return $obj;
1116 }
1117 return;
1118 };
1119 return $obj if $obj;
1120 }
1121
1122 return;
1123}
1124
1125sub _length_needed {
1126 my $self = shift;
1127 my ($value, $key) = @_;
1128
1129 my $is_dbm_deep = eval {
1130 local $SIG{'__DIE__'};
1131 $value->isa( 'DBM::Deep' );
1132 };
1133
1134 my $len = SIG_SIZE
1135 + $self->{data_size} # size for value
1136 + $self->{data_size} # size for key
1137 + length( $key ); # length of key
1138
1139 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
1140 # long_size is for the internal reference
1141 return $len + $self->{long_size};
1142 }
1143
1144 if ( $self->_fileobj->{autobless} ) {
1145 # This is for the bit saying whether or not this thing is blessed.
1146 $len += 1;
1147 }
1148
1149 my $r = Scalar::Util::reftype( $value ) || '';
1150 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1151 if ( defined $value ) {
1152 $len += length( $value );
1153 }
1154 return $len;
1155 }
1156
1157 $len += $self->{index_size};
1158
1159 # if autobless is enabled, must also take into consideration
1160 # the class name as it is stored after the key.
1161 if ( $self->_fileobj->{autobless} ) {
1162 my $c = Scalar::Util::blessed($value);
1163 if ( defined $c && !$is_dbm_deep ) {
1164 $len += $self->{data_size} + length($c);
1165 }
1166 }
1167
1168 return $len;
1169}
1170
a20d9a3f 11711;
1172__END__