Got auditing all the way brought over. Need to add array tests, but those can wait
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
8use Fcntl qw( :DEFAULT :flock :seek );
359a01ac 9use Scalar::Util ();
a20d9a3f 10
21838116 11# File-wide notes:
12# * All the local($/,$\); are to protect read() and print() from -l.
20b7f047 13# * To add to bucket_size, make sure you modify the following:
14# - calculate_sizes()
15# - _get_key_subloc()
16# - add_bucket() - where the buckets are printed
21838116 17
8db25060 18##
19# Setup file and tag signatures. These should never change.
20##
21sub SIG_FILE () { 'DPDB' }
460b1067 22sub SIG_HEADER () { 'h' }
8db25060 23sub SIG_INTERNAL () { 'i' }
24sub SIG_HASH () { 'H' }
25sub SIG_ARRAY () { 'A' }
8db25060 26sub SIG_NULL () { 'N' }
27sub SIG_DATA () { 'D' }
28sub SIG_INDEX () { 'I' }
29sub SIG_BLIST () { 'B' }
7b1e1aa1 30sub SIG_FREE () { 'F' }
8db25060 31sub SIG_SIZE () { 1 }
32
612969fb 33sub new {
34 my $class = shift;
35 my ($args) = @_;
36
37 my $self = bless {
38 long_size => 4,
39 long_pack => 'N',
40 data_size => 4,
41 data_pack => 'N',
251dfd0e 42
612969fb 43 digest => \&Digest::MD5::md5,
44 hash_size => 16,
251dfd0e 45
81d16922 46 ##
d5d7c51d 47 # Maximum number of buckets per list before another level of indexing is
e0098e7f 48 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 49 # files. DO NOT decrease this value below 16, due to risk of recursive
50 # reindex overrun.
81d16922 51 ##
612969fb 52 max_buckets => 16,
460b1067 53
54 fileobj => undef,
359a01ac 55 obj => undef,
612969fb 56 }, $class;
57
e0098e7f 58 if ( defined $args->{pack_size} ) {
59 if ( lc $args->{pack_size} eq 'small' ) {
60 $args->{long_size} = 2;
c9b6d0d8 61 $args->{long_pack} = 'n';
e0098e7f 62 }
63 elsif ( lc $args->{pack_size} eq 'medium' ) {
64 $args->{long_size} = 4;
65 $args->{long_pack} = 'N';
66 }
67 elsif ( lc $args->{pack_size} eq 'large' ) {
68 $args->{long_size} = 8;
69 $args->{long_pack} = 'Q';
70 }
71 else {
72 die "Unknown pack_size value: '$args->{pack_size}'\n";
73 }
74 }
75
fde3db1a 76 # Grab the parameters we want to use
77 foreach my $param ( keys %$self ) {
78 next unless exists $args->{$param};
3e9498a1 79 $self->{$param} = $args->{$param};
fde3db1a 80 }
359a01ac 81 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 82
e0098e7f 83 if ( $self->{max_buckets} < 16 ) {
84 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
85 $self->{max_buckets} = 16;
86 }
87
260a80b4 88 return $self;
89}
90
460b1067 91sub _fileobj { return $_[0]{fileobj} }
92sub _fh { return $_[0]->_fileobj->{fh} }
93
260a80b4 94sub calculate_sizes {
95 my $self = shift;
96
28394a1a 97 #XXX Does this need to be updated with different hashing algorithms?
e0098e7f 98 $self->{index_size} = (2**8) * $self->{long_size};
28394a1a 99 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
e0098e7f 100 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 101
260a80b4 102 return;
1bf65be7 103}
104
fde3db1a 105sub write_file_header {
0d0f3d5d 106 my $self = shift;
0d0f3d5d 107
21838116 108 local($/,$\);
109
460b1067 110 my $fh = $self->_fh;
0d0f3d5d 111
e96daec8 112 my $loc = $self->_request_space( length( SIG_FILE ) + 21 );
460b1067 113 seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET);
260a80b4 114 print( $fh
115 SIG_FILE,
460b1067 116 SIG_HEADER,
117 pack('N', 1), # header version
118 pack('N', 12), # header size
15ba72cc 119 pack('N', 0), # currently running transaction IDs
c9b6d0d8 120 pack('n', $self->{long_size}),
260a80b4 121 pack('A', $self->{long_pack}),
c9b6d0d8 122 pack('n', $self->{data_size}),
260a80b4 123 pack('A', $self->{data_pack}),
c9b6d0d8 124 pack('n', $self->{max_buckets}),
260a80b4 125 );
0d0f3d5d 126
20b7f047 127 $self->_fileobj->set_transaction_offset( 13 );
128
0d0f3d5d 129 return;
130}
131
fde3db1a 132sub read_file_header {
e064ccd1 133 my $self = shift;
e064ccd1 134
21838116 135 local($/,$\);
136
e96daec8 137 my $fh = $self->_fh;
e064ccd1 138
e96daec8 139 seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET);
42f79e07 140 my $buffer;
460b1067 141 my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
142
143 return unless $bytes_read;
144
145 my ($file_signature, $sig_header, $header_version, $size) = unpack(
146 'A4 A N N', $buffer
42f79e07 147 );
e064ccd1 148
460b1067 149 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 150 $self->_fileobj->close;
e96daec8 151 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 152 }
260a80b4 153
460b1067 154 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 155 $self->_fileobj->close;
e96daec8 156 $self->_throw_error( "Old file version found." );
460b1067 157 }
9b2370e0 158
460b1067 159 my $buffer2;
160 $bytes_read += read( $fh, $buffer2, $size );
c9b6d0d8 161 my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 );
15ba72cc 162
163 $self->_fileobj->set_transaction_offset( 13 );
164
460b1067 165 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 166 $self->_fileobj->close;
e96daec8 167 $self->_throw_error("Corrupted file - bad header");
e064ccd1 168 }
169
460b1067 170 #XXX Add warnings if values weren't set right
171 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
172
e064ccd1 173 return $bytes_read;
174}
175
460b1067 176sub setup_fh {
177 my $self = shift;
178 my ($obj) = @_;
70b55428 179
359a01ac 180 local($/,$\);
181
e96daec8 182 my $fh = $self->_fh;
6fde4ed2 183 flock $fh, LOCK_EX;
118ba343 184
260a80b4 185 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 186 unless ( $obj->{base_offset} ) {
e96daec8 187 my $bytes_read = $self->read_file_header;
118ba343 188
260a80b4 189 $self->calculate_sizes;
190
118ba343 191 ##
fde3db1a 192 # File is empty -- write header and master index
118ba343 193 ##
194 if (!$bytes_read) {
359a01ac 195 if ( my $afh = $self->_fileobj->{audit_fh} ) {
196 flock( $afh, LOCK_EX );
197 print( $afh "# Database created on " . localtime(time) . "\n" );
198 flock( $afh, LOCK_UN );
199 }
200
e96daec8 201 $self->write_file_header;
118ba343 202
e96daec8 203 $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) );
118ba343 204
9e4f83a0 205 $self->write_tag(
e96daec8 206 $obj->_base_offset, $obj->_type,
f37c15ab 207 chr(0)x$self->{index_size},
118ba343 208 );
209
210 # Flush the filehandle
211 my $old_fh = select $fh;
212 my $old_af = $|; $| = 1; $| = $old_af;
213 select $old_fh;
214 }
215 else {
216 $obj->{base_offset} = $bytes_read;
217
218 ##
fde3db1a 219 # Get our type from master index header
118ba343 220 ##
359a01ac 221 my $tag = $self->load_tag($obj->_base_offset);
222 unless ( $tag ) {
223 flock $fh, LOCK_UN;
224 $self->_throw_error("Corrupted file, no master index record");
225 }
118ba343 226
e96daec8 227 unless ($obj->_type eq $tag->{signature}) {
359a01ac 228 flock $fh, LOCK_UN;
e96daec8 229 $self->_throw_error("File type mismatch");
118ba343 230 }
231 }
118ba343 232 }
260a80b4 233 else {
234 $self->calculate_sizes;
235 }
e06824f8 236
673464d9 237 #XXX We have to make sure we don't mess up when autoflush isn't turned on
e96daec8 238 unless ( $self->_fileobj->{inode} ) {
239 my @stats = stat($fh);
240 $self->_fileobj->{inode} = $stats[1];
241 $self->_fileobj->{end} = $stats[7];
70b55428 242 }
243
6fde4ed2 244 flock $fh, LOCK_UN;
245
70b55428 246 return 1;
247}
248
16d1ad9b 249sub tag_size {
250 my $self = shift;
251 my ($size) = @_;
252 return SIG_SIZE + $self->{data_size} + $size;
253}
254
9e4f83a0 255sub write_tag {
20f7b20c 256 ##
257 # Given offset, signature and content, create tag and write to disk
258 ##
d4b1166e 259 my $self = shift;
e96daec8 260 my ($offset, $sig, $content) = @_;
f37c15ab 261 my $size = length( $content );
20f7b20c 262
21838116 263 local($/,$\);
264
e96daec8 265 my $fh = $self->_fh;
d4b1166e 266
f37c15ab 267 if ( defined $offset ) {
e96daec8 268 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
f37c15ab 269 }
270
251dfd0e 271 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 272
f37c15ab 273 return unless defined $offset;
274
20f7b20c 275 return {
276 signature => $sig,
277 size => $size,
8db25060 278 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 279 content => $content
280 };
d4b1166e 281}
282
283sub load_tag {
20f7b20c 284 ##
285 # Given offset, load single tag and return signature, size and data
286 ##
d4b1166e 287 my $self = shift;
e96daec8 288 my ($offset) = @_;
20f7b20c 289
21838116 290 local($/,$\);
291
e06824f8 292# print join(':',map{$_||''}caller(1)), $/;
293
e96daec8 294 my $fh = $self->_fh;
d4b1166e 295
e96daec8 296 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
e5fc7e69 297
75be6413 298 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 299 return if eof $fh;
20f7b20c 300
d4b1166e 301 my $b;
8db25060 302 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 303 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 304
305 my $buffer;
306 read( $fh, $buffer, $size);
307
308 return {
309 signature => $sig,
310 size => $size,
8db25060 311 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 312 content => $buffer
313 };
d4b1166e 314}
315
56ec4340 316sub _get_dbm_object {
317 my $item = shift;
318
319 my $obj = eval {
320 local $SIG{__DIE__};
321 if ($item->isa( 'DBM::Deep' )) {
322 return $item;
323 }
324 return;
325 };
326 return $obj if $obj;
327
328 my $r = Scalar::Util::reftype( $item ) || '';
329 if ( $r eq 'HASH' ) {
330 my $obj = eval {
331 local $SIG{__DIE__};
332 my $obj = tied(%$item);
333 if ($obj->isa( 'DBM::Deep' )) {
334 return $obj;
335 }
336 return;
337 };
338 return $obj if $obj;
339 }
340 elsif ( $r eq 'ARRAY' ) {
341 my $obj = eval {
342 local $SIG{__DIE__};
343 my $obj = tied(@$item);
344 if ($obj->isa( 'DBM::Deep' )) {
345 return $obj;
346 }
347 return;
348 };
349 return $obj if $obj;
350 }
351
352 return;
353}
354
29b01632 355sub _length_needed {
356 my $self = shift;
e96daec8 357 my ($value, $key) = @_;
29b01632 358
359 my $is_dbm_deep = eval {
360 local $SIG{'__DIE__'};
361 $value->isa( 'DBM::Deep' );
362 };
363
f37c15ab 364 my $len = SIG_SIZE + $self->{data_size}
365 + $self->{data_size} + length( $key );
29b01632 366
e96daec8 367 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
f37c15ab 368 return $len + $self->{long_size};
29b01632 369 }
370
371 my $r = Scalar::Util::reftype( $value ) || '';
e96daec8 372 if ( $self->_fileobj->{autobless} ) {
9a187d8c 373 # This is for the bit saying whether or not this thing is blessed.
374 $len += 1;
375 }
376
29b01632 377 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 378 if ( defined $value ) {
379 $len += length( $value );
380 }
381 return $len;
29b01632 382 }
383
f37c15ab 384 $len += $self->{index_size};
29b01632 385
386 # if autobless is enabled, must also take into consideration
f37c15ab 387 # the class name as it is stored after the key.
e96daec8 388 if ( $self->_fileobj->{autobless} ) {
56ec4340 389 my $c = Scalar::Util::blessed($value);
390 if ( defined $c && !$is_dbm_deep ) {
391 $len += $self->{data_size} + length($c);
29b01632 392 }
393 }
394
f37c15ab 395 return $len;
29b01632 396}
397
20f7b20c 398sub add_bucket {
399 ##
400 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
401 # plain (undigested) key and value.
402 ##
d4b1166e 403 my $self = shift;
359a01ac 404 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
c9b6d0d8 405 $deleted ||= 0;
75be6413 406
21838116 407 local($/,$\);
408
eea0d863 409 # This verifies that only supported values will be stored.
410 {
411 my $r = Scalar::Util::reftype( $value );
412 last if !defined $r;
413
414 last if $r eq 'HASH';
415 last if $r eq 'ARRAY';
416
e96daec8 417 $self->_throw_error(
eea0d863 418 "Storage of variables of type '$r' is not supported."
419 );
420 }
421
20f7b20c 422 my $location = 0;
423 my $result = 2;
424
e96daec8 425 my $root = $self->_fileobj;
426 my $fh = $self->_fh;
20f7b20c 427
e96daec8 428 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 429
21838116 430 #ACID - This is a mutation. Must only find the exact transaction
c9b6d0d8 431 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 );
432
433 my @transactions;
434 if ( $self->_fileobj->transaction_id == 0 ) {
435 @transactions = $self->_fileobj->current_transactions;
436 }
75be6413 437
e96daec8 438# $self->_release_space( $size, $subloc );
386bab6c 439 # Updating a known md5
f9c33187 440#XXX This needs updating to use _release_space
386bab6c 441 if ( $subloc ) {
442 $result = 1;
20f7b20c 443
386bab6c 444 if ($actual_length <= $size) {
445 $location = $subloc;
20f7b20c 446 }
75be6413 447 else {
e96daec8 448 $location = $self->_request_space( $actual_length );
386bab6c 449 seek(
450 $fh,
9a187d8c 451 $tag->{offset} + $offset
452 + $self->{hash_size} + $root->{file_offset},
386bab6c 453 SEEK_SET,
454 );
9a187d8c 455 print( $fh pack($self->{long_pack}, $location ) );
456 print( $fh pack($self->{long_pack}, $actual_length ) );
c9b6d0d8 457 print( $fh pack('n n', $root->transaction_id, $deleted ) );
75be6413 458 }
75be6413 459 }
386bab6c 460 # Adding a new md5
461 elsif ( defined $offset ) {
e96daec8 462 $location = $self->_request_space( $actual_length );
386bab6c 463
464 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 465 print( $fh $md5 . pack($self->{long_pack}, $location ) );
466 print( $fh pack($self->{long_pack}, $actual_length ) );
c9b6d0d8 467 print( $fh pack('n n', $root->transaction_id, $deleted ) );
468
469 for ( @transactions ) {
470 my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
471 $self->_fileobj->{transaction_id} = $_;
359a01ac 472 $self->add_bucket( $tag2, $md5, '', '', 1, $orig_key );
c9b6d0d8 473 $self->_fileobj->{transaction_id} = 0;
474 }
386bab6c 475 }
476 # If bucket didn't fit into list, split into a new index level
f9c33187 477 # split_index() will do the _request_space() call
386bab6c 478 else {
e96daec8 479 $location = $self->split_index( $md5, $tag );
386bab6c 480 }
20f7b20c 481
359a01ac 482 $self->write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 483
484 return $result;
485}
486
487sub write_value {
488 my $self = shift;
359a01ac 489 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 490
21838116 491 local($/,$\);
492
e96daec8 493 my $fh = $self->_fh;
494 my $root = $self->_fileobj;
d5d7c51d 495
9d4fa373 496 my $dbm_deep_obj = _get_dbm_object( $value );
e96daec8 497 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) {
498 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 499 }
d5d7c51d 500
501 seek($fh, $location + $root->{file_offset}, SEEK_SET);
502
20f7b20c 503 ##
d5d7c51d 504 # Write signature based on content type, set content length and write
505 # actual value.
20f7b20c 506 ##
9d4fa373 507 my $r = Scalar::Util::reftype( $value ) || '';
508 if ( $dbm_deep_obj ) {
e96daec8 509 $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 510 }
511 elsif ($r eq 'HASH') {
9d4fa373 512 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 513 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 514 }
e96daec8 515 $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 516 }
517 elsif ($r eq 'ARRAY') {
9d4fa373 518 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 519 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 520 }
e96daec8 521 $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 522 }
523 elsif (!defined($value)) {
e96daec8 524 $self->write_tag( undef, SIG_NULL, '' );
d5d7c51d 525 }
526 else {
e96daec8 527 $self->write_tag( undef, SIG_DATA, $value );
d5d7c51d 528 }
20f7b20c 529
d5d7c51d 530 ##
531 # Plain key is stored AFTER value, as keys are typically fetched less often.
532 ##
533 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 534
9a187d8c 535 # Internal references don't care about autobless
9d4fa373 536 return 1 if $dbm_deep_obj;
9a187d8c 537
d5d7c51d 538 ##
539 # If value is blessed, preserve class name
540 ##
541 if ( $root->{autobless} ) {
56ec4340 542 my $c = Scalar::Util::blessed($value);
543 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 544 print( $fh chr(1) );
56ec4340 545 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 546 }
d5d7c51d 547 else {
548 print( $fh chr(0) );
20f7b20c 549 }
d5d7c51d 550 }
20f7b20c 551
d5d7c51d 552 ##
56ec4340 553 # Tie the passed in reference so that changes to it are reflected in the
554 # datafile. The use of $location as the base_offset will act as the
555 # the linkage between parent and child.
556 #
557 # The overall assignment is a hack around the fact that just tying doesn't
558 # store the values. This may not be the wrong thing to do.
d5d7c51d 559 ##
9d4fa373 560 if ($r eq 'HASH') {
561 my %x = %$value;
562 tie %$value, 'DBM::Deep', {
563 base_offset => $location,
460b1067 564 fileobj => $root,
359a01ac 565 parent => $self->{obj},
566 parent_key => $orig_key,
9d4fa373 567 };
568 %$value = %x;
569 }
570 elsif ($r eq 'ARRAY') {
571 my @x = @$value;
572 tie @$value, 'DBM::Deep', {
573 base_offset => $location,
460b1067 574 fileobj => $root,
359a01ac 575 parent => $self->{obj},
576 parent_key => $orig_key,
9d4fa373 577 };
578 @$value = @x;
20f7b20c 579 }
d4b1166e 580
d5d7c51d 581 return 1;
d4b1166e 582}
583
75be6413 584sub split_index {
585 my $self = shift;
e96daec8 586 my ($md5, $tag) = @_;
75be6413 587
21838116 588 local($/,$\);
589
e96daec8 590 my $fh = $self->_fh;
591 my $root = $self->_fileobj;
16d1ad9b 592
593 my $loc = $self->_request_space(
e96daec8 594 $self->tag_size( $self->{index_size} ),
16d1ad9b 595 );
596
7b1e1aa1 597 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 598 print( $fh pack($self->{long_pack}, $loc) );
75be6413 599
9e4f83a0 600 my $index_tag = $self->write_tag(
e96daec8 601 $loc, SIG_INDEX,
f37c15ab 602 chr(0)x$self->{index_size},
75be6413 603 );
604
f9c33187 605 my $newtag_loc = $self->_request_space(
e96daec8 606 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 607 );
75be6413 608
7b1e1aa1 609 my $keys = $tag->{content}
f9c33187 610 . $md5 . pack($self->{long_pack}, $newtag_loc)
28394a1a 611 . pack($self->{long_pack}, 0) # size
20b7f047 612 . pack($self->{long_pack}, 0); # transaction ID
75be6413 613
f9c33187 614 my @newloc = ();
75be6413 615 BUCKET:
616 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 617 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 618
f9c33187 619 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
620 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 621
75be6413 622 my $num = ord(substr($key, $tag->{ch} + 1, 1));
623
f9c33187 624 if ($newloc[$num]) {
625 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 626 my $subkeys;
627 read( $fh, $subkeys, $self->{bucket_list_size});
628
f9c33187 629 # This is looking for the first empty spot
7b1e1aa1 630 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 631 { content => $subkeys }, '',
7b1e1aa1 632 );
75be6413 633
f9c33187 634 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
635 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 636
637 next;
75be6413 638 }
75be6413 639
7b1e1aa1 640 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 641
7b1e1aa1 642 my $loc = $self->_request_space(
e96daec8 643 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 644 );
2603d86e 645
7b1e1aa1 646 print( $fh pack($self->{long_pack}, $loc) );
75be6413 647
7b1e1aa1 648 my $blist_tag = $self->write_tag(
e96daec8 649 $loc, SIG_BLIST,
7b1e1aa1 650 chr(0)x$self->{bucket_list_size},
651 );
652
653 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 654 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 655
f9c33187 656 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 657 }
658
659 $self->_release_space(
e96daec8 660 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 661 $tag->{offset} - SIG_SIZE - $self->{data_size},
662 );
75be6413 663
f9c33187 664 return $newtag_loc;
75be6413 665}
666
8db25060 667sub read_from_loc {
668 my $self = shift;
359a01ac 669 my ($subloc, $orig_key) = @_;
8db25060 670
21838116 671 local($/,$\);
672
e96daec8 673 my $fh = $self->_fh;
8db25060 674
675 ##
676 # Found match -- seek to offset and read signature
677 ##
678 my $signature;
e96daec8 679 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
8db25060 680 read( $fh, $signature, SIG_SIZE);
681
682 ##
683 # If value is a hash or array, return new DBM::Deep object with correct offset
684 ##
685 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 686 my $new_obj = DBM::Deep->new({
359a01ac 687 type => $signature,
8db25060 688 base_offset => $subloc,
e96daec8 689 fileobj => $self->_fileobj,
359a01ac 690 parent => $self->{obj},
691 parent_key => $orig_key,
685e40f1 692 });
8db25060 693
460b1067 694 if ($new_obj->_fileobj->{autobless}) {
8db25060 695 ##
696 # Skip over value and plain key to see if object needs
697 # to be re-blessed
698 ##
699 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
700
701 my $size;
c6ea6b6c 702 read( $fh, $size, $self->{data_size});
703 $size = unpack($self->{data_pack}, $size);
8db25060 704 if ($size) { seek($fh, $size, SEEK_CUR); }
705
706 my $bless_bit;
707 read( $fh, $bless_bit, 1);
708 if (ord($bless_bit)) {
709 ##
710 # Yes, object needs to be re-blessed
711 ##
712 my $class_name;
c6ea6b6c 713 read( $fh, $size, $self->{data_size});
714 $size = unpack($self->{data_pack}, $size);
8db25060 715 if ($size) { read( $fh, $class_name, $size); }
685e40f1 716 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 717 }
718 }
719
685e40f1 720 return $new_obj;
8db25060 721 }
722 elsif ( $signature eq SIG_INTERNAL ) {
723 my $size;
724 read( $fh, $size, $self->{data_size});
725 $size = unpack($self->{data_pack}, $size);
726
727 if ( $size ) {
728 my $new_loc;
729 read( $fh, $new_loc, $size );
730 $new_loc = unpack( $self->{long_pack}, $new_loc );
731
359a01ac 732 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 733 }
734 else {
735 return;
736 }
737 }
738 ##
739 # Otherwise return actual value
740 ##
460b1067 741 elsif ( $signature eq SIG_DATA ) {
8db25060 742 my $size;
743 read( $fh, $size, $self->{data_size});
744 $size = unpack($self->{data_pack}, $size);
745
746 my $value = '';
747 if ($size) { read( $fh, $value, $size); }
748 return $value;
749 }
750
751 ##
752 # Key exists, but content is null
753 ##
754 return;
755}
756
9020ee8c 757sub get_bucket_value {
beac1dff 758 ##
759 # Fetch single value given tag and MD5 digested key.
760 ##
761 my $self = shift;
359a01ac 762 my ($tag, $md5, $orig_key) = @_;
9020ee8c 763
21838116 764 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 765 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 );
766 if ( $subloc && !$is_deleted ) {
359a01ac 767 return $self->read_from_loc( $subloc, $orig_key );
386bab6c 768 }
beac1dff 769 return;
9020ee8c 770}
ab0e4957 771
772sub delete_bucket {
beac1dff 773 ##
774 # Delete single key/value pair given tag and MD5 digested key.
775 ##
776 my $self = shift;
a97c8f67 777 my ($tag, $md5, $orig_key) = @_;
ab0e4957 778
21838116 779 local($/,$\);
780
781 #ACID - This is a mutation. Must only find the exact transaction
782 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
f9c33187 783#XXX This needs _release_space()
386bab6c 784 if ( $subloc ) {
e96daec8 785 my $fh = $self->_fh;
786 seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET);
386bab6c 787 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 788 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 789
ab0e4957 790 return 1;
386bab6c 791 }
beac1dff 792 return;
ab0e4957 793}
794
912d50b1 795sub bucket_exists {
beac1dff 796 ##
797 # Check existence of single key given tag and MD5 digested key.
798 ##
799 my $self = shift;
e96daec8 800 my ($tag, $md5) = @_;
912d50b1 801
21838116 802 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 803 my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
804 return ($subloc && !$is_deleted) && 1;
912d50b1 805}
806
6736c116 807sub find_bucket_list {
beac1dff 808 ##
809 # Locate offset for bucket list, given digested key
810 ##
811 my $self = shift;
e96daec8 812 my ($offset, $md5, $args) = @_;
d0b74c17 813 $args = {} unless $args;
814
21838116 815 local($/,$\);
816
beac1dff 817 ##
818 # Locate offset for bucket list using digest index system
819 ##
e96daec8 820 my $tag = $self->load_tag( $offset )
821 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 822
e5fc7e69 823 my $ch = 0;
8db25060 824 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 825 my $num = ord substr($md5, $ch, 1);
826
827 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 828 $tag = $self->index_lookup( $tag, $num );
d0b74c17 829
830 if (!$tag) {
29b01632 831 return if !$args->{create};
d0b74c17 832
16d1ad9b 833 my $loc = $self->_request_space(
e96daec8 834 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 835 );
836
e96daec8 837 my $fh = $self->_fh;
838 seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET);
16d1ad9b 839 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 840
9e4f83a0 841 $tag = $self->write_tag(
e96daec8 842 $loc, SIG_BLIST,
f37c15ab 843 chr(0)x$self->{bucket_list_size},
d5d7c51d 844 );
845
846 $tag->{ref_loc} = $ref_loc;
847 $tag->{ch} = $ch;
848
849 last;
d0b74c17 850 }
851
16d1ad9b 852 $tag->{ch} = $ch++;
d0b74c17 853 $tag->{ref_loc} = $ref_loc;
beac1dff 854 }
d0b74c17 855
beac1dff 856 return $tag;
6736c116 857}
858
d0b74c17 859sub index_lookup {
860 ##
861 # Given index tag, lookup single entry in index and return .
862 ##
863 my $self = shift;
e96daec8 864 my ($tag, $index) = @_;
d0b74c17 865
866 my $location = unpack(
867 $self->{long_pack},
868 substr(
869 $tag->{content},
870 $index * $self->{long_size},
871 $self->{long_size},
872 ),
873 );
874
875 if (!$location) { return; }
876
e96daec8 877 return $self->load_tag( $location );
d0b74c17 878}
879
6736c116 880sub traverse_index {
beac1dff 881 ##
882 # Scan index and recursively step into deeper levels, looking for next key.
883 ##
6736c116 884 my $self = shift;
885 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 886
21838116 887 local($/,$\);
888
e96daec8 889 my $tag = $self->load_tag( $offset );
6736c116 890
e96daec8 891 my $fh = $self->_fh;
d0b74c17 892
8db25060 893 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 894 my $content = $tag->{content};
e5fc7e69 895 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 896
d5d7c51d 897 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 898 my $subloc = unpack(
899 $self->{long_pack},
e06824f8 900 substr(
901 $content,
902 $idx * $self->{long_size},
903 $self->{long_size},
904 ),
e5fc7e69 905 );
906
beac1dff 907 if ($subloc) {
e5fc7e69 908 my $result = $self->traverse_index(
909 $obj, $subloc, $ch + 1, $force_return_next,
910 );
911
beac1dff 912 if (defined($result)) { return $result; }
913 }
914 } # index loop
d0b74c17 915
beac1dff 916 $obj->{return_next} = 1;
917 } # tag is an index
d0b74c17 918
e5fc7e69 919 else {
beac1dff 920 my $keys = $tag->{content};
921 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 922
beac1dff 923 ##
924 # Iterate through buckets, looking for a key match
925 ##
8db25060 926 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 927 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 928
8db25060 929 # End of bucket list -- return to outer loop
beac1dff 930 if (!$subloc) {
beac1dff 931 $obj->{return_next} = 1;
932 last;
933 }
8db25060 934 # Located previous key -- return next one found
beac1dff 935 elsif ($key eq $obj->{prev_md5}) {
beac1dff 936 $obj->{return_next} = 1;
937 next;
938 }
8db25060 939 # Seek to bucket location and skip over signature
beac1dff 940 elsif ($obj->{return_next}) {
e96daec8 941 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
d0b74c17 942
beac1dff 943 # Skip over value to get to plain key
8db25060 944 my $sig;
945 read( $fh, $sig, SIG_SIZE );
946
beac1dff 947 my $size;
e5fc7e69 948 read( $fh, $size, $self->{data_size});
949 $size = unpack($self->{data_pack}, $size);
beac1dff 950 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 951
beac1dff 952 # Read in plain key and return as scalar
beac1dff 953 my $plain_key;
e5fc7e69 954 read( $fh, $size, $self->{data_size});
955 $size = unpack($self->{data_pack}, $size);
beac1dff 956 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 957
beac1dff 958 return $plain_key;
959 }
8db25060 960 }
d0b74c17 961
beac1dff 962 $obj->{return_next} = 1;
963 } # tag is a bucket list
d0b74c17 964
beac1dff 965 return;
6736c116 966}
967
968sub get_next_key {
beac1dff 969 ##
970 # Locate next key, given digested previous one
971 ##
6736c116 972 my $self = shift;
973 my ($obj) = @_;
d0b74c17 974
beac1dff 975 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
976 $obj->{return_next} = 0;
d0b74c17 977
beac1dff 978 ##
979 # If the previous key was not specifed, start at the top and
980 # return the first one found.
981 ##
982 if (!$obj->{prev_md5}) {
983 $obj->{prev_md5} = chr(0) x $self->{hash_size};
984 $obj->{return_next} = 1;
985 }
d0b74c17 986
beac1dff 987 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 988}
989
75be6413 990# Utilities
991
9cec1360 992sub _get_key_subloc {
75be6413 993 my $self = shift;
994 my ($keys, $idx) = @_;
995
c9b6d0d8 996 my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack(
28394a1a 997 # This is 'a', not 'A'. Please read the pack() documentation for the
998 # difference between the two and why it's important.
c9b6d0d8 999 "a$self->{hash_size} $self->{long_pack}2 n2",
75be6413 1000 substr(
1001 $keys,
9cec1360 1002 ($idx * $self->{bucket_size}),
1003 $self->{bucket_size},
75be6413 1004 ),
1005 );
1006
c9b6d0d8 1007 return ($key, $subloc, $size, $transaction_id, $is_deleted);
75be6413 1008}
1009
d608b06e 1010sub _find_in_buckets {
1011 my $self = shift;
21838116 1012 my ($tag, $md5, $exact) = @_;
d608b06e 1013
28394a1a 1014 my $trans_id = $self->_fileobj->transaction_id;
1015
21838116 1016 my @zero;
1017
d608b06e 1018 BUCKET:
1019 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
c9b6d0d8 1020 my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc(
9a187d8c 1021 $tag->{content}, $i,
1022 );
d608b06e 1023
c9b6d0d8 1024 my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted);
21838116 1025
1026 unless ( $subloc ) {
20b7f047 1027 if ( !$exact && @zero and $trans_id ) {
c9b6d0d8 1028 @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted);
20b7f047 1029 }
21838116 1030 return @rv;
1031 }
1032
1033 next BUCKET if $key ne $md5;
d608b06e 1034
21838116 1035 # Save off the HEAD in case we need it.
c9b6d0d8 1036 @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0;
d608b06e 1037
21838116 1038 next BUCKET if $transaction_id != $trans_id;
1039
1040 return @rv;
d608b06e 1041 }
1042
1043 return;
1044}
1045
994ccd8e 1046sub _request_space {
1047 my $self = shift;
e96daec8 1048 my ($size) = @_;
994ccd8e 1049
460b1067 1050 my $loc = $self->_fileobj->{end};
1051 $self->_fileobj->{end} += $size;
994ccd8e 1052
1053 return $loc;
1054}
1055
1056sub _release_space {
1057 my $self = shift;
e96daec8 1058 my ($size, $loc) = @_;
994ccd8e 1059
21838116 1060 local($/,$\);
1061
7b1e1aa1 1062 my $next_loc = 0;
1063
e96daec8 1064 my $fh = $self->_fh;
1065 seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET );
7b1e1aa1 1066 print( $fh SIG_FREE
1067 . pack($self->{long_pack}, $size )
1068 . pack($self->{long_pack}, $next_loc )
1069 );
1070
994ccd8e 1071 return;
1072}
1073
e96daec8 1074sub _throw_error {
1075 die "DBM::Deep: $_[1]\n";
1076}
1077
a20d9a3f 10781;
1079__END__
d5d7c51d 1080
1081# This will be added in later, after more refactoring is done. This is an early
1082# attempt at refactoring on the physical level instead of the virtual level.
1083sub _read_at {
1084 my $self = shift;
e96daec8 1085 my ($spot, $amount, $unpack) = @_;
d5d7c51d 1086
21838116 1087 local($/,$\);
1088
e96daec8 1089 my $fh = $self->_fh;
1090 seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET );
d5d7c51d 1091
1092 my $buffer;
1093 my $bytes_read = read( $fh, $buffer, $amount );
1094
1095 if ( $unpack ) {
1096 $buffer = unpack( $unpack, $buffer );
1097 }
1098
1099 if ( wantarray ) {
1100 return ($buffer, $bytes_read);
1101 }
1102 else {
1103 return $buffer;
1104 }
1105}
e96daec8 1106
1107sub _print_at {
1108 my $self = shift;
1109 my ($spot, $data) = @_;
1110
21838116 1111 local($/,$\);
1112
e96daec8 1113 my $fh = $self->_fh;
1114 seek( $fh, $spot, SEEK_SET );
1115 print( $fh $data );
1116
1117 return;
1118}
1119
1120sub get_file_version {
1121 my $self = shift;
1122
21838116 1123 local($/,$\);
1124
e96daec8 1125 my $fh = $self->_fh;
1126
1127 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1128 my $buffer;
1129 my $bytes_read = read( $fh, $buffer, 4 );
1130 unless ( $bytes_read == 4 ) {
1131 $self->_throw_error( "Cannot read file version" );
1132 }
1133
1134 return unpack( 'N', $buffer );
1135}
1136
1137sub write_file_version {
1138 my $self = shift;
1139 my ($new_version) = @_;
1140
21838116 1141 local($/,$\);
1142
e96daec8 1143 my $fh = $self->_fh;
1144
1145 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1146 print( $fh pack( 'N', $new_version ) );
1147
1148 return;
1149}
1150