r13304@rob-kinyons-powerbook58: rob | 2006-05-18 15:10:48 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
c3aafc14 8our $VERSION = q(0.99_03);
86867f3a 9
633df1fd 10use Fcntl qw( :DEFAULT :flock );
359a01ac 11use Scalar::Util ();
a20d9a3f 12
21838116 13# File-wide notes:
20b7f047 14# * To add to bucket_size, make sure you modify the following:
15# - calculate_sizes()
16# - _get_key_subloc()
17# - add_bucket() - where the buckets are printed
c3aafc14 18#
19# * Every method in here assumes that the _fileobj has been appropriately
20# safeguarded. This can be anything from flock() to some sort of manual
21# mutex. But, it's the caller's responsability to make sure that this has
22# been done.
21838116 23
8db25060 24##
25# Setup file and tag signatures. These should never change.
26##
27sub SIG_FILE () { 'DPDB' }
460b1067 28sub SIG_HEADER () { 'h' }
8db25060 29sub SIG_INTERNAL () { 'i' }
30sub SIG_HASH () { 'H' }
31sub SIG_ARRAY () { 'A' }
8db25060 32sub SIG_NULL () { 'N' }
33sub SIG_DATA () { 'D' }
34sub SIG_INDEX () { 'I' }
35sub SIG_BLIST () { 'B' }
7b1e1aa1 36sub SIG_FREE () { 'F' }
86867f3a 37sub SIG_KEYS () { 'K' }
8db25060 38sub SIG_SIZE () { 1 }
39
c3aafc14 40################################################################################
41#
42# This is new code. It is a complete rewrite of the engine based on a new API
43#
44################################################################################
45
46sub write_value {
47 my $self = shift;
48 my ($offset, $key, $value, $orig_key) = @_;
49
50 my $dig_key = $self->apply_digest( $key );
51 my $tag = $self->find_blist( $offset, $dig_key, { create => 1 } );
52 return $self->add_bucket( $tag, $dig_key, $key, $value, undef, $orig_key );
53}
54
55sub read_value {
56 my $self = shift;
57 my ($offset, $key) = @_;
58
59 my $dig_key = $self->apply_digest( $key );
60 my $tag = $self->find_blist( $offset, $dig_key );
61 return $self->get_bucket_value( $tag, $dig_key, $key );
62}
63
64sub delete_key {
65 my $self = shift;
66 my ($offset, $key) = @_;
67
68 my $dig_key = $self->apply_digest( $key );
69 my $tag = $self->find_blist( $offset, $dig_key );
70 return $self->delete_bucket( $tag, $dig_key, $key );
71}
72
73sub key_exists {
74 my $self = shift;
75 my ($offset, $key) = @_;
76
77 my $dig_key = $self->apply_digest( $key );
78 my $tag = $self->find_blist( $offset, $dig_key );
79 return $self->bucket_exists( $tag, $dig_key, $key );
80}
81
82sub XXXget_next_key {
83 my $self = shift;
84 my ($offset, $prev_key) = @_;
85
86# my $dig_key = $self->apply_digest( $key );
87}
88
89################################################################################
90#
91# Below here is the old code. It will be folded into the code above as it can.
92#
93################################################################################
94
612969fb 95sub new {
96 my $class = shift;
97 my ($args) = @_;
98
99 my $self = bless {
86867f3a 100 long_size => 4,
101 long_pack => 'N',
102 data_size => 4,
103 data_pack => 'N',
251dfd0e 104
86867f3a 105 digest => \&Digest::MD5::md5,
c3aafc14 106 hash_size => 16, # In bytes
251dfd0e 107
81d16922 108 ##
c3aafc14 109 # Number of buckets per blist before another level of indexing is
e0098e7f 110 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 111 # files. DO NOT decrease this value below 16, due to risk of recursive
112 # reindex overrun.
81d16922 113 ##
612969fb 114 max_buckets => 16,
460b1067 115
116 fileobj => undef,
359a01ac 117 obj => undef,
612969fb 118 }, $class;
119
e0098e7f 120 if ( defined $args->{pack_size} ) {
121 if ( lc $args->{pack_size} eq 'small' ) {
122 $args->{long_size} = 2;
c9b6d0d8 123 $args->{long_pack} = 'n';
e0098e7f 124 }
125 elsif ( lc $args->{pack_size} eq 'medium' ) {
126 $args->{long_size} = 4;
127 $args->{long_pack} = 'N';
128 }
129 elsif ( lc $args->{pack_size} eq 'large' ) {
130 $args->{long_size} = 8;
131 $args->{long_pack} = 'Q';
132 }
133 else {
134 die "Unknown pack_size value: '$args->{pack_size}'\n";
135 }
136 }
137
fde3db1a 138 # Grab the parameters we want to use
139 foreach my $param ( keys %$self ) {
140 next unless exists $args->{$param};
3e9498a1 141 $self->{$param} = $args->{$param};
fde3db1a 142 }
359a01ac 143 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 144
e0098e7f 145 if ( $self->{max_buckets} < 16 ) {
146 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
147 $self->{max_buckets} = 16;
148 }
149
260a80b4 150 return $self;
151}
152
460b1067 153sub _fileobj { return $_[0]{fileobj} }
460b1067 154
c3aafc14 155sub apply_digest {
156 my $self = shift;
157 return $self->{digest}->(@_);
158}
159
260a80b4 160sub calculate_sizes {
161 my $self = shift;
162
633df1fd 163 # The 2**8 here indicates the number of different characters in the
164 # current hashing algorithm
28394a1a 165 #XXX Does this need to be updated with different hashing algorithms?
ea2f6d67 166 $self->{hash_chars_used} = (2**8);
167 $self->{index_size} = $self->{hash_chars_used} * $self->{long_size};
168
169 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2;
e0098e7f 170 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 171
ea2f6d67 172 $self->{key_size} = $self->{long_size} * 2;
173 $self->{keyloc_size} = $self->{max_buckets} * $self->{key_size};
174
260a80b4 175 return;
1bf65be7 176}
177
fde3db1a 178sub write_file_header {
0d0f3d5d 179 my $self = shift;
0d0f3d5d 180
eff6a245 181 my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 33 );
0d0f3d5d 182
019404df 183 $self->_fileobj->print_at( $loc,
260a80b4 184 SIG_FILE,
460b1067 185 SIG_HEADER,
186 pack('N', 1), # header version
eff6a245 187 pack('N', 24), # header size
188 pack('N4', 0, 0, 0, 0), # currently running transaction IDs
c9b6d0d8 189 pack('n', $self->{long_size}),
260a80b4 190 pack('A', $self->{long_pack}),
c9b6d0d8 191 pack('n', $self->{data_size}),
260a80b4 192 pack('A', $self->{data_pack}),
c9b6d0d8 193 pack('n', $self->{max_buckets}),
260a80b4 194 );
0d0f3d5d 195
20b7f047 196 $self->_fileobj->set_transaction_offset( 13 );
197
0d0f3d5d 198 return;
199}
200
fde3db1a 201sub read_file_header {
e064ccd1 202 my $self = shift;
e064ccd1 203
7dcefff3 204 my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 );
205 return unless length($buffer);
460b1067 206
207 my ($file_signature, $sig_header, $header_version, $size) = unpack(
208 'A4 A N N', $buffer
42f79e07 209 );
e064ccd1 210
460b1067 211 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 212 $self->_fileobj->close;
e96daec8 213 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 214 }
260a80b4 215
460b1067 216 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 217 $self->_fileobj->close;
e96daec8 218 $self->_throw_error( "Old file version found." );
460b1067 219 }
9b2370e0 220
7dcefff3 221 my $buffer2 = $self->_fileobj->read_at( undef, $size );
eff6a245 222 my ($a1, $a2, $a3, $a4, @values) = unpack( 'N4 n A n A n', $buffer2 );
15ba72cc 223
224 $self->_fileobj->set_transaction_offset( 13 );
225
460b1067 226 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 227 $self->_fileobj->close;
e96daec8 228 $self->_throw_error("Corrupted file - bad header");
e064ccd1 229 }
230
460b1067 231 #XXX Add warnings if values weren't set right
232 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
233
7dcefff3 234 return length($buffer) + length($buffer2);
e064ccd1 235}
236
460b1067 237sub setup_fh {
238 my $self = shift;
239 my ($obj) = @_;
70b55428 240
7dcefff3 241 # Need to remove use of $fh here
242 my $fh = $self->_fileobj->{fh};
6fde4ed2 243 flock $fh, LOCK_EX;
118ba343 244
260a80b4 245 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 246 unless ( $obj->{base_offset} ) {
e96daec8 247 my $bytes_read = $self->read_file_header;
118ba343 248
260a80b4 249 $self->calculate_sizes;
250
118ba343 251 ##
fde3db1a 252 # File is empty -- write header and master index
118ba343 253 ##
254 if (!$bytes_read) {
aa83bc1e 255 $self->_fileobj->audit( "# Database created on" );
359a01ac 256
e96daec8 257 $self->write_file_header;
118ba343 258
22e20cce 259 $obj->{base_offset} = $self->_fileobj->request_space(
260 $self->tag_size( $self->{index_size} ),
261 );
118ba343 262
9e4f83a0 263 $self->write_tag(
e96daec8 264 $obj->_base_offset, $obj->_type,
f37c15ab 265 chr(0)x$self->{index_size},
118ba343 266 );
267
268 # Flush the filehandle
269 my $old_fh = select $fh;
270 my $old_af = $|; $| = 1; $| = $old_af;
271 select $old_fh;
272 }
273 else {
274 $obj->{base_offset} = $bytes_read;
275
276 ##
fde3db1a 277 # Get our type from master index header
118ba343 278 ##
359a01ac 279 my $tag = $self->load_tag($obj->_base_offset);
280 unless ( $tag ) {
281 flock $fh, LOCK_UN;
282 $self->_throw_error("Corrupted file, no master index record");
283 }
118ba343 284
e96daec8 285 unless ($obj->_type eq $tag->{signature}) {
359a01ac 286 flock $fh, LOCK_UN;
e96daec8 287 $self->_throw_error("File type mismatch");
118ba343 288 }
289 }
118ba343 290 }
260a80b4 291 else {
292 $self->calculate_sizes;
293 }
e06824f8 294
673464d9 295 #XXX We have to make sure we don't mess up when autoflush isn't turned on
7dcefff3 296 $self->_fileobj->set_inode;
70b55428 297
6fde4ed2 298 flock $fh, LOCK_UN;
299
70b55428 300 return 1;
301}
302
16d1ad9b 303sub tag_size {
304 my $self = shift;
305 my ($size) = @_;
306 return SIG_SIZE + $self->{data_size} + $size;
307}
308
9e4f83a0 309sub write_tag {
20f7b20c 310 ##
311 # Given offset, signature and content, create tag and write to disk
312 ##
d4b1166e 313 my $self = shift;
e96daec8 314 my ($offset, $sig, $content) = @_;
f37c15ab 315 my $size = length( $content );
20f7b20c 316
7dcefff3 317 $self->_fileobj->print_at(
318 $offset,
319 $sig, pack($self->{data_pack}, $size), $content,
320 );
20f7b20c 321
f37c15ab 322 return unless defined $offset;
323
20f7b20c 324 return {
325 signature => $sig,
72e315ac 326 #XXX Is this even used?
86867f3a 327 size => $size,
328 offset => $offset + SIG_SIZE + $self->{data_size},
329 content => $content
20f7b20c 330 };
d4b1166e 331}
332
333sub load_tag {
20f7b20c 334 ##
335 # Given offset, load single tag and return signature, size and data
336 ##
d4b1166e 337 my $self = shift;
e96daec8 338 my ($offset) = @_;
20f7b20c 339
7dcefff3 340 my $fileobj = $self->_fileobj;
20f7b20c 341
86867f3a 342 my ($sig, $size) = unpack(
343 "A $self->{data_pack}",
344 $fileobj->read_at( $offset, SIG_SIZE + $self->{data_size} ),
345 );
20f7b20c 346
347 return {
348 signature => $sig,
c3aafc14 349 size => $size, #XXX Is this even used?
86867f3a 350 offset => $offset + SIG_SIZE + $self->{data_size},
351 content => $fileobj->read_at( undef, $size ),
20f7b20c 352 };
d4b1166e 353}
354
ea2f6d67 355sub find_keyloc {
356 my $self = shift;
357 my ($tag, $transaction_id) = @_;
358 $transaction_id = $self->_fileobj->transaction_id
359 unless defined $transaction_id;
360
361 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
362 my ($loc, $trans_id, $is_deleted) = unpack(
363 "$self->{long_pack} C C",
364 substr( $tag->{content}, $i * $self->{key_size}, $self->{key_size} ),
365 );
366
367 if ( $loc == 0 ) {
368 return ( $loc, $is_deleted, $i * $self->{key_size} );
369 }
370
371 next if $transaction_id != $trans_id;
372
373 return ( $loc, $is_deleted, $i * $self->{key_size} );
374 }
375
376 return;
377}
378
20f7b20c 379sub add_bucket {
380 ##
381 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
382 # plain (undigested) key and value.
383 ##
d4b1166e 384 my $self = shift;
359a01ac 385 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
75be6413 386
eea0d863 387 # This verifies that only supported values will be stored.
388 {
389 my $r = Scalar::Util::reftype( $value );
eea0d863 390
86867f3a 391 last if !defined $r;
eea0d863 392 last if $r eq 'HASH';
393 last if $r eq 'ARRAY';
394
e96daec8 395 $self->_throw_error(
86867f3a 396 "Storage of references of type '$r' is not supported."
eea0d863 397 );
398 }
399
019404df 400 my $fileobj = $self->_fileobj;
20f7b20c 401
21838116 402 #ACID - This is a mutation. Must only find the exact transaction
ea2f6d67 403 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5, 1 );
c9b6d0d8 404
405 my @transactions;
019404df 406 if ( $fileobj->transaction_id == 0 ) {
407 @transactions = $fileobj->current_transactions;
c9b6d0d8 408 }
75be6413 409
e96daec8 410# $self->_release_space( $size, $subloc );
f9c33187 411#XXX This needs updating to use _release_space
ea2f6d67 412
86867f3a 413 my $location;
ea2f6d67 414 my $size = $self->_length_needed( $value, $plain_key );
019404df 415
ea2f6d67 416 # Updating a known md5
417 if ( $keyloc ) {
418 my $keytag = $self->load_tag( $keyloc );
419 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
420
13ff93d5 421 if ( $subloc && !$is_deleted && @transactions ) {
ea2f6d67 422 my $old_value = $self->read_from_loc( $subloc, $orig_key );
423 my $old_size = $self->_length_needed( $old_value, $plain_key );
424
425 for my $trans_id ( @transactions ) {
426 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
427 unless ($loc) {
428 my $location2 = $fileobj->request_space( $old_size );
429 $fileobj->print_at( $keytag->{offset} + $offset2,
430 pack($self->{long_pack}, $location2 ),
431 pack( 'C C', $trans_id, 0 ),
432 );
c3aafc14 433 $self->_write_value( $location2, $plain_key, $old_value, $orig_key );
ea2f6d67 434 }
435 }
75be6413 436 }
504185fb 437
ea2f6d67 438 $location = $self->_fileobj->request_space( $size );
439 #XXX This needs to be transactionally-aware in terms of which keytag->{offset} to use
440 $fileobj->print_at( $keytag->{offset} + $offset,
441 pack($self->{long_pack}, $location ),
442 pack( 'C C', $fileobj->transaction_id, 0 ),
443 );
75be6413 444 }
386bab6c 445 # Adding a new md5
ea2f6d67 446 else {
447 my $keyloc = $fileobj->request_space( $self->tag_size( $self->{keyloc_size} ) );
386bab6c 448
ea2f6d67 449 # The bucket fit into list
450 if ( defined $offset ) {
451 $fileobj->print_at( $tag->{offset} + $offset,
452 $md5, pack( $self->{long_pack}, $keyloc ),
453 );
454 }
455 # If bucket didn't fit into list, split into a new index level
456 else {
457 $self->split_index( $tag, $md5, $keyloc );
458 }
459
460 my $keytag = $self->write_tag(
461 $keyloc, SIG_KEYS, chr(0)x$self->{keyloc_size},
019404df 462 );
c9b6d0d8 463
ea2f6d67 464 $location = $self->_fileobj->request_space( $size );
465 $fileobj->print_at( $keytag->{offset},
466 pack( $self->{long_pack}, $location ),
467 pack( 'C C', $fileobj->transaction_id, 0 ),
468 );
469
470 my $offset = 1;
471 for my $trans_id ( @transactions ) {
472 $fileobj->print_at( $keytag->{offset} + $self->{key_size} * $offset++,
13ff93d5 473 pack( $self->{long_pack}, 0 ),
ea2f6d67 474 pack( 'C C', $trans_id, 1 ),
475 );
c9b6d0d8 476 }
386bab6c 477 }
20f7b20c 478
c3aafc14 479 $self->_write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 480
86867f3a 481 return 1;
d5d7c51d 482}
483
c3aafc14 484sub _write_value {
d5d7c51d 485 my $self = shift;
359a01ac 486 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 487
7dcefff3 488 my $fileobj = $self->_fileobj;
d5d7c51d 489
9d4fa373 490 my $dbm_deep_obj = _get_dbm_object( $value );
7dcefff3 491 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) {
e96daec8 492 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 493 }
d5d7c51d 494
20f7b20c 495 ##
d5d7c51d 496 # Write signature based on content type, set content length and write
497 # actual value.
20f7b20c 498 ##
9d4fa373 499 my $r = Scalar::Util::reftype( $value ) || '';
500 if ( $dbm_deep_obj ) {
7dcefff3 501 $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 502 }
503 elsif ($r eq 'HASH') {
9d4fa373 504 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 505 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 506 }
7dcefff3 507 $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 508 }
509 elsif ($r eq 'ARRAY') {
9d4fa373 510 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 511 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 512 }
7dcefff3 513 $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 514 }
515 elsif (!defined($value)) {
7dcefff3 516 $self->write_tag( $location, SIG_NULL, '' );
d5d7c51d 517 }
518 else {
7dcefff3 519 $self->write_tag( $location, SIG_DATA, $value );
d5d7c51d 520 }
20f7b20c 521
d5d7c51d 522 ##
523 # Plain key is stored AFTER value, as keys are typically fetched less often.
524 ##
7dcefff3 525 $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key );
20f7b20c 526
9a187d8c 527 # Internal references don't care about autobless
9d4fa373 528 return 1 if $dbm_deep_obj;
9a187d8c 529
d5d7c51d 530 ##
531 # If value is blessed, preserve class name
532 ##
7dcefff3 533 if ( $fileobj->{autobless} ) {
633df1fd 534 if ( defined( my $c = Scalar::Util::blessed($value) ) ) {
7dcefff3 535 $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c );
20f7b20c 536 }
d5d7c51d 537 else {
7dcefff3 538 $fileobj->print_at( undef, chr(0) );
20f7b20c 539 }
d5d7c51d 540 }
20f7b20c 541
d5d7c51d 542 ##
56ec4340 543 # Tie the passed in reference so that changes to it are reflected in the
544 # datafile. The use of $location as the base_offset will act as the
545 # the linkage between parent and child.
546 #
547 # The overall assignment is a hack around the fact that just tying doesn't
548 # store the values. This may not be the wrong thing to do.
d5d7c51d 549 ##
9d4fa373 550 if ($r eq 'HASH') {
551 my %x = %$value;
552 tie %$value, 'DBM::Deep', {
553 base_offset => $location,
7dcefff3 554 fileobj => $fileobj,
359a01ac 555 parent => $self->{obj},
556 parent_key => $orig_key,
9d4fa373 557 };
558 %$value = %x;
559 }
560 elsif ($r eq 'ARRAY') {
561 my @x = @$value;
562 tie @$value, 'DBM::Deep', {
563 base_offset => $location,
7dcefff3 564 fileobj => $fileobj,
359a01ac 565 parent => $self->{obj},
566 parent_key => $orig_key,
9d4fa373 567 };
568 @$value = @x;
20f7b20c 569 }
d4b1166e 570
d5d7c51d 571 return 1;
d4b1166e 572}
573
75be6413 574sub split_index {
575 my $self = shift;
ea2f6d67 576 my ($tag, $md5, $keyloc) = @_;
75be6413 577
019404df 578 my $fileobj = $self->_fileobj;
21838116 579
019404df 580 my $loc = $fileobj->request_space(
e96daec8 581 $self->tag_size( $self->{index_size} ),
16d1ad9b 582 );
583
019404df 584 $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 585
9e4f83a0 586 my $index_tag = $self->write_tag(
e96daec8 587 $loc, SIG_INDEX,
f37c15ab 588 chr(0)x$self->{index_size},
75be6413 589 );
590
7b1e1aa1 591 my $keys = $tag->{content}
ea2f6d67 592 . $md5 . pack($self->{long_pack}, $keyloc);
75be6413 593
f9c33187 594 my @newloc = ();
75be6413 595 BUCKET:
633df1fd 596 # The <= here is deliberate - we have max_buckets+1 keys to iterate
597 # through, unlike every other loop that uses max_buckets as a stop.
75be6413 598 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
ea2f6d67 599 my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i );
75be6413 600
f9c33187 601 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
602 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 603
75be6413 604 my $num = ord(substr($key, $tag->{ch} + 1, 1));
605
f9c33187 606 if ($newloc[$num]) {
7dcefff3 607 my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} );
75be6413 608
f9c33187 609 # This is looking for the first empty spot
ea2f6d67 610 my ($subloc, $offset) = $self->_find_in_buckets(
f9c33187 611 { content => $subkeys }, '',
7b1e1aa1 612 );
75be6413 613
633df1fd 614 $fileobj->print_at(
615 $newloc[$num] + $offset,
616 $key, pack($self->{long_pack}, $old_subloc),
617 );
7b1e1aa1 618
619 next;
75be6413 620 }
75be6413 621
019404df 622 my $loc = $fileobj->request_space(
e96daec8 623 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 624 );
2603d86e 625
019404df 626 $fileobj->print_at(
627 $index_tag->{offset} + ($num * $self->{long_size}),
628 pack($self->{long_pack}, $loc),
629 );
75be6413 630
7b1e1aa1 631 my $blist_tag = $self->write_tag(
e96daec8 632 $loc, SIG_BLIST,
7b1e1aa1 633 chr(0)x$self->{bucket_list_size},
634 );
635
019404df 636 $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 637
f9c33187 638 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 639 }
640
641 $self->_release_space(
e96daec8 642 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 643 $tag->{offset} - SIG_SIZE - $self->{data_size},
644 );
75be6413 645
ea2f6d67 646 return 1;
75be6413 647}
648
8db25060 649sub read_from_loc {
650 my $self = shift;
359a01ac 651 my ($subloc, $orig_key) = @_;
8db25060 652
7dcefff3 653 my $fileobj = $self->_fileobj;
8db25060 654
7dcefff3 655 my $signature = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 656
657 ##
658 # If value is a hash or array, return new DBM::Deep object with correct offset
659 ##
660 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
c3aafc14 661 #XXX This needs to be a singleton
685e40f1 662 my $new_obj = DBM::Deep->new({
359a01ac 663 type => $signature,
8db25060 664 base_offset => $subloc,
e96daec8 665 fileobj => $self->_fileobj,
359a01ac 666 parent => $self->{obj},
667 parent_key => $orig_key,
685e40f1 668 });
8db25060 669
460b1067 670 if ($new_obj->_fileobj->{autobless}) {
8db25060 671 ##
672 # Skip over value and plain key to see if object needs
673 # to be re-blessed
674 ##
7dcefff3 675 $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} );
8db25060 676
7dcefff3 677 my $size = $fileobj->read_at( undef, $self->{data_size} );
c6ea6b6c 678 $size = unpack($self->{data_pack}, $size);
7dcefff3 679 if ($size) { $fileobj->increment_pointer( $size ); }
8db25060 680
7dcefff3 681 my $bless_bit = $fileobj->read_at( undef, 1 );
86867f3a 682 if ( ord($bless_bit) ) {
683 my $size = unpack(
684 $self->{data_pack},
685 $fileobj->read_at( undef, $self->{data_size} ),
686 );
7dcefff3 687
86867f3a 688 if ( $size ) {
689 $new_obj = bless $new_obj, $fileobj->read_at( undef, $size );
690 }
8db25060 691 }
692 }
693
685e40f1 694 return $new_obj;
8db25060 695 }
696 elsif ( $signature eq SIG_INTERNAL ) {
7dcefff3 697 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 698 $size = unpack($self->{data_pack}, $size);
699
700 if ( $size ) {
7dcefff3 701 my $new_loc = $fileobj->read_at( undef, $size );
702 $new_loc = unpack( $self->{long_pack}, $new_loc );
359a01ac 703 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 704 }
705 else {
706 return;
707 }
708 }
709 ##
710 # Otherwise return actual value
711 ##
460b1067 712 elsif ( $signature eq SIG_DATA ) {
7dcefff3 713 my $size = $fileobj->read_at( undef, $self->{data_size} );
8db25060 714 $size = unpack($self->{data_pack}, $size);
715
86867f3a 716 my $value = $size ? $fileobj->read_at( undef, $size ) : '';
8db25060 717 return $value;
718 }
719
720 ##
721 # Key exists, but content is null
722 ##
723 return;
724}
725
9020ee8c 726sub get_bucket_value {
beac1dff 727 ##
728 # Fetch single value given tag and MD5 digested key.
729 ##
730 my $self = shift;
359a01ac 731 my ($tag, $md5, $orig_key) = @_;
9020ee8c 732
21838116 733 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 734 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
94e8af14 735
ea2f6d67 736 if ( !$keyloc ) {
94e8af14 737 #XXX Need to use real key
ea2f6d67 738# $self->add_bucket( $tag, $md5, $orig_key, undef, $orig_key );
94e8af14 739# return;
740 }
ea2f6d67 741# elsif ( !$is_deleted ) {
742 else {
743 my $keytag = $self->load_tag( $keyloc );
744 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 745 if (!$subloc && !$is_deleted) {
ea2f6d67 746 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
747 }
748 if ( $subloc && !$is_deleted ) {
749 return $self->read_from_loc( $subloc, $orig_key );
750 }
386bab6c 751 }
94e8af14 752
beac1dff 753 return;
9020ee8c 754}
ab0e4957 755
756sub delete_bucket {
beac1dff 757 ##
758 # Delete single key/value pair given tag and MD5 digested key.
759 ##
760 my $self = shift;
a97c8f67 761 my ($tag, $md5, $orig_key) = @_;
ab0e4957 762
22e20cce 763 #ACID - Although this is a mutation, we must find any transaction.
764 # This is because we need to mark something as deleted that is in the HEAD.
ea2f6d67 765 my ($keyloc, $offset) = $self->_find_in_buckets( $tag, $md5 );
633df1fd 766
ea2f6d67 767 return if !$keyloc;
633df1fd 768
769 my $fileobj = $self->_fileobj;
770
771 my @transactions;
772 if ( $fileobj->transaction_id == 0 ) {
773 @transactions = $fileobj->current_transactions;
774 }
775
633df1fd 776 if ( $fileobj->transaction_id == 0 ) {
ea2f6d67 777 my $keytag = $self->load_tag( $keyloc );
7a960a12 778
ea2f6d67 779 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 780 return if !$subloc || $is_deleted;
781
633df1fd 782 my $value = $self->read_from_loc( $subloc, $orig_key );
783
ea2f6d67 784 my $size = $self->_length_needed( $value, $orig_key );
785
786 for my $trans_id ( @transactions ) {
787 my ($loc, $is_deleted, $offset2) = $self->find_keyloc( $keytag, $trans_id );
788 unless ($loc) {
789 my $location2 = $fileobj->request_space( $size );
790 $fileobj->print_at( $keytag->{offset} + $offset2,
791 pack($self->{long_pack}, $location2 ),
792 pack( 'C C', $trans_id, 0 ),
793 );
c3aafc14 794 $self->_write_value( $location2, $orig_key, $value, $orig_key );
ea2f6d67 795 }
633df1fd 796 }
797
ea2f6d67 798 $keytag = $self->load_tag( $keyloc );
799 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
800 $fileobj->print_at( $keytag->{offset} + $offset,
801 substr( $keytag->{content}, $offset + $self->{key_size} ),
802 chr(0) x $self->{key_size},
019404df 803 );
386bab6c 804 }
633df1fd 805 else {
ea2f6d67 806 my $keytag = $self->load_tag( $keyloc );
7a960a12 807
ea2f6d67 808 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
7a960a12 809
ea2f6d67 810 $fileobj->print_at( $keytag->{offset} + $offset,
13ff93d5 811 pack($self->{long_pack}, 0 ),
ea2f6d67 812 pack( 'C C', $fileobj->transaction_id, 1 ),
813 );
633df1fd 814 }
815
816 return 1;
ab0e4957 817}
818
912d50b1 819sub bucket_exists {
beac1dff 820 ##
821 # Check existence of single key given tag and MD5 digested key.
822 ##
823 my $self = shift;
e96daec8 824 my ($tag, $md5) = @_;
912d50b1 825
21838116 826 #ACID - This is a read. Can find exact or HEAD
ea2f6d67 827 my ($keyloc) = $self->_find_in_buckets( $tag, $md5 );
828 my $keytag = $self->load_tag( $keyloc );
829 my ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag );
13ff93d5 830 if ( !$subloc && !$is_deleted ) {
ea2f6d67 831 ($subloc, $is_deleted, $offset) = $self->find_keyloc( $keytag, 0 );
832 }
c9b6d0d8 833 return ($subloc && !$is_deleted) && 1;
912d50b1 834}
835
72e315ac 836sub find_blist {
beac1dff 837 ##
838 # Locate offset for bucket list, given digested key
839 ##
840 my $self = shift;
e96daec8 841 my ($offset, $md5, $args) = @_;
d0b74c17 842 $args = {} unless $args;
843
beac1dff 844 ##
845 # Locate offset for bucket list using digest index system
846 ##
e96daec8 847 my $tag = $self->load_tag( $offset )
848 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 849
e5fc7e69 850 my $ch = 0;
8db25060 851 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 852 my $num = ord substr($md5, $ch, 1);
853
854 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 855 $tag = $self->index_lookup( $tag, $num );
d0b74c17 856
857 if (!$tag) {
29b01632 858 return if !$args->{create};
d0b74c17 859
019404df 860 my $loc = $self->_fileobj->request_space(
e96daec8 861 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 862 );
863
019404df 864 $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 865
9e4f83a0 866 $tag = $self->write_tag(
e96daec8 867 $loc, SIG_BLIST,
f37c15ab 868 chr(0)x$self->{bucket_list_size},
d5d7c51d 869 );
870
871 $tag->{ref_loc} = $ref_loc;
872 $tag->{ch} = $ch;
873
874 last;
d0b74c17 875 }
876
16d1ad9b 877 $tag->{ch} = $ch++;
d0b74c17 878 $tag->{ref_loc} = $ref_loc;
beac1dff 879 }
d0b74c17 880
beac1dff 881 return $tag;
6736c116 882}
883
d0b74c17 884sub index_lookup {
885 ##
886 # Given index tag, lookup single entry in index and return .
887 ##
888 my $self = shift;
e96daec8 889 my ($tag, $index) = @_;
d0b74c17 890
891 my $location = unpack(
892 $self->{long_pack},
893 substr(
894 $tag->{content},
895 $index * $self->{long_size},
896 $self->{long_size},
897 ),
898 );
899
900 if (!$location) { return; }
901
e96daec8 902 return $self->load_tag( $location );
d0b74c17 903}
904
6736c116 905sub traverse_index {
beac1dff 906 ##
907 # Scan index and recursively step into deeper levels, looking for next key.
908 ##
6736c116 909 my $self = shift;
ea2f6d67 910 my ($xxxx, $offset, $ch, $force_return_next) = @_;
d0b74c17 911
e96daec8 912 my $tag = $self->load_tag( $offset );
6736c116 913
8db25060 914 if ($tag->{signature} ne SIG_BLIST) {
ea2f6d67 915 my $start = $xxxx->{return_next} ? 0 : ord(substr($xxxx->{prev_md5}, $ch, 1));
d0b74c17 916
ea2f6d67 917 for (my $idx = $start; $idx < $self->{hash_chars_used}; $idx++) {
e5fc7e69 918 my $subloc = unpack(
919 $self->{long_pack},
e06824f8 920 substr(
ea2f6d67 921 $tag->{content},
e06824f8 922 $idx * $self->{long_size},
923 $self->{long_size},
924 ),
e5fc7e69 925 );
926
beac1dff 927 if ($subloc) {
e5fc7e69 928 my $result = $self->traverse_index(
ea2f6d67 929 $xxxx, $subloc, $ch + 1, $force_return_next,
e5fc7e69 930 );
931
ea2f6d67 932 if (defined $result) { return $result; }
beac1dff 933 }
934 } # index loop
d0b74c17 935
ea2f6d67 936 $xxxx->{return_next} = 1;
42717e46 937 }
938 # This is the bucket list
e5fc7e69 939 else {
beac1dff 940 my $keys = $tag->{content};
ea2f6d67 941 if ($force_return_next) { $xxxx->{return_next} = 1; }
d0b74c17 942
beac1dff 943 ##
944 # Iterate through buckets, looking for a key match
945 ##
42717e46 946 my $transaction_id = $self->_fileobj->transaction_id;
8db25060 947 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
ea2f6d67 948 my ($key, $keyloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 949
8db25060 950 # End of bucket list -- return to outer loop
ea2f6d67 951 if (!$keyloc) {
952 $xxxx->{return_next} = 1;
beac1dff 953 last;
954 }
8db25060 955 # Located previous key -- return next one found
ea2f6d67 956 elsif ($key eq $xxxx->{prev_md5}) {
957 $xxxx->{return_next} = 1;
beac1dff 958 next;
959 }
8db25060 960 # Seek to bucket location and skip over signature
ea2f6d67 961 elsif ($xxxx->{return_next}) {
7dcefff3 962 my $fileobj = $self->_fileobj;
d0b74c17 963
ea2f6d67 964 my $keytag = $self->load_tag( $keyloc );
965 my ($subloc, $is_deleted) = $self->find_keyloc( $keytag );
13ff93d5 966 if ( $subloc == 0 && !$is_deleted ) {
ea2f6d67 967 ($subloc, $is_deleted) = $self->find_keyloc( $keytag, 0 );
968 }
969 next if $is_deleted;
970
beac1dff 971 # Skip over value to get to plain key
7dcefff3 972 my $sig = $fileobj->read_at( $subloc, SIG_SIZE );
8db25060 973
7dcefff3 974 my $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 975 $size = unpack($self->{data_pack}, $size);
7dcefff3 976 if ($size) { $fileobj->increment_pointer( $size ); }
d0b74c17 977
beac1dff 978 # Read in plain key and return as scalar
7dcefff3 979 $size = $fileobj->read_at( undef, $self->{data_size} );
e5fc7e69 980 $size = unpack($self->{data_pack}, $size);
86867f3a 981
7dcefff3 982 my $plain_key;
983 if ($size) { $plain_key = $fileobj->read_at( undef, $size); }
beac1dff 984 return $plain_key;
985 }
8db25060 986 }
d0b74c17 987
ea2f6d67 988 $xxxx->{return_next} = 1;
42717e46 989 }
d0b74c17 990
beac1dff 991 return;
6736c116 992}
993
994sub get_next_key {
beac1dff 995 ##
996 # Locate next key, given digested previous one
997 ##
6736c116 998 my $self = shift;
999 my ($obj) = @_;
d0b74c17 1000
beac1dff 1001 ##
1002 # If the previous key was not specifed, start at the top and
1003 # return the first one found.
1004 ##
ea2f6d67 1005 my $temp;
1006 if ( @_ > 1 ) {
1007 $temp = {
1008 prev_md5 => $_[1],
1009 return_next => 0,
1010 };
1011 }
1012 else {
1013 $temp = {
1014 prev_md5 => chr(0) x $self->{hash_size},
1015 return_next => 1,
1016 };
beac1dff 1017 }
d0b74c17 1018
ea2f6d67 1019 return $self->traverse_index( $temp, $obj->_base_offset, 0 );
6736c116 1020}
1021
75be6413 1022# Utilities
1023
9cec1360 1024sub _get_key_subloc {
75be6413 1025 my $self = shift;
1026 my ($keys, $idx) = @_;
1027
ea2f6d67 1028 return unpack(
28394a1a 1029 # This is 'a', not 'A'. Please read the pack() documentation for the
1030 # difference between the two and why it's important.
ea2f6d67 1031 "a$self->{hash_size} $self->{long_pack}",
75be6413 1032 substr(
1033 $keys,
9cec1360 1034 ($idx * $self->{bucket_size}),
1035 $self->{bucket_size},
75be6413 1036 ),
1037 );
75be6413 1038}
1039
d608b06e 1040sub _find_in_buckets {
1041 my $self = shift;
ea2f6d67 1042 my ($tag, $md5) = @_;
21838116 1043
d608b06e 1044 BUCKET:
1045 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
ea2f6d67 1046 my ($key, $subloc) = $self->_get_key_subloc(
9a187d8c 1047 $tag->{content}, $i,
1048 );
d608b06e 1049
ea2f6d67 1050 my @rv = ($subloc, $i * $self->{bucket_size});
21838116 1051
1052 unless ( $subloc ) {
21838116 1053 return @rv;
1054 }
1055
1056 next BUCKET if $key ne $md5;
d608b06e 1057
21838116 1058 return @rv;
d608b06e 1059 }
1060
1061 return;
1062}
1063
994ccd8e 1064sub _release_space {
1065 my $self = shift;
e96daec8 1066 my ($size, $loc) = @_;
994ccd8e 1067
7b1e1aa1 1068 my $next_loc = 0;
1069
019404df 1070 $self->_fileobj->print_at( $loc,
1071 SIG_FREE,
1072 pack($self->{long_pack}, $size ),
1073 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1074 );
1075
994ccd8e 1076 return;
1077}
1078
e96daec8 1079sub _throw_error {
1080 die "DBM::Deep: $_[1]\n";
1081}
1082
86867f3a 1083sub _get_dbm_object {
1084 my $item = shift;
1085
1086 my $obj = eval {
1087 local $SIG{__DIE__};
1088 if ($item->isa( 'DBM::Deep' )) {
1089 return $item;
1090 }
1091 return;
1092 };
1093 return $obj if $obj;
1094
1095 my $r = Scalar::Util::reftype( $item ) || '';
1096 if ( $r eq 'HASH' ) {
1097 my $obj = eval {
1098 local $SIG{__DIE__};
1099 my $obj = tied(%$item);
1100 if ($obj->isa( 'DBM::Deep' )) {
1101 return $obj;
1102 }
1103 return;
1104 };
1105 return $obj if $obj;
1106 }
1107 elsif ( $r eq 'ARRAY' ) {
1108 my $obj = eval {
1109 local $SIG{__DIE__};
1110 my $obj = tied(@$item);
1111 if ($obj->isa( 'DBM::Deep' )) {
1112 return $obj;
1113 }
1114 return;
1115 };
1116 return $obj if $obj;
1117 }
1118
1119 return;
1120}
1121
1122sub _length_needed {
1123 my $self = shift;
1124 my ($value, $key) = @_;
1125
1126 my $is_dbm_deep = eval {
1127 local $SIG{'__DIE__'};
1128 $value->isa( 'DBM::Deep' );
1129 };
1130
1131 my $len = SIG_SIZE
1132 + $self->{data_size} # size for value
1133 + $self->{data_size} # size for key
1134 + length( $key ); # length of key
1135
1136 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
1137 # long_size is for the internal reference
1138 return $len + $self->{long_size};
1139 }
1140
1141 if ( $self->_fileobj->{autobless} ) {
1142 # This is for the bit saying whether or not this thing is blessed.
1143 $len += 1;
1144 }
1145
1146 my $r = Scalar::Util::reftype( $value ) || '';
1147 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
1148 if ( defined $value ) {
1149 $len += length( $value );
1150 }
1151 return $len;
1152 }
1153
1154 $len += $self->{index_size};
1155
1156 # if autobless is enabled, must also take into consideration
1157 # the class name as it is stored after the key.
1158 if ( $self->_fileobj->{autobless} ) {
1159 my $c = Scalar::Util::blessed($value);
1160 if ( defined $c && !$is_dbm_deep ) {
1161 $len += $self->{data_size} + length($c);
1162 }
1163 }
1164
1165 return $len;
1166}
1167
a20d9a3f 11681;
1169__END__