Removed only physical access to the in ::Deep
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
8use Fcntl qw( :DEFAULT :flock :seek );
359a01ac 9use Scalar::Util ();
a20d9a3f 10
21838116 11# File-wide notes:
12# * All the local($/,$\); are to protect read() and print() from -l.
20b7f047 13# * To add to bucket_size, make sure you modify the following:
14# - calculate_sizes()
15# - _get_key_subloc()
16# - add_bucket() - where the buckets are printed
21838116 17
8db25060 18##
19# Setup file and tag signatures. These should never change.
20##
21sub SIG_FILE () { 'DPDB' }
460b1067 22sub SIG_HEADER () { 'h' }
8db25060 23sub SIG_INTERNAL () { 'i' }
24sub SIG_HASH () { 'H' }
25sub SIG_ARRAY () { 'A' }
8db25060 26sub SIG_NULL () { 'N' }
27sub SIG_DATA () { 'D' }
28sub SIG_INDEX () { 'I' }
29sub SIG_BLIST () { 'B' }
7b1e1aa1 30sub SIG_FREE () { 'F' }
8db25060 31sub SIG_SIZE () { 1 }
32
612969fb 33sub new {
34 my $class = shift;
35 my ($args) = @_;
36
37 my $self = bless {
38 long_size => 4,
39 long_pack => 'N',
40 data_size => 4,
41 data_pack => 'N',
251dfd0e 42
612969fb 43 digest => \&Digest::MD5::md5,
44 hash_size => 16,
251dfd0e 45
81d16922 46 ##
d5d7c51d 47 # Maximum number of buckets per list before another level of indexing is
e0098e7f 48 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 49 # files. DO NOT decrease this value below 16, due to risk of recursive
50 # reindex overrun.
81d16922 51 ##
612969fb 52 max_buckets => 16,
460b1067 53
54 fileobj => undef,
359a01ac 55 obj => undef,
612969fb 56 }, $class;
57
e0098e7f 58 if ( defined $args->{pack_size} ) {
59 if ( lc $args->{pack_size} eq 'small' ) {
60 $args->{long_size} = 2;
c9b6d0d8 61 $args->{long_pack} = 'n';
e0098e7f 62 }
63 elsif ( lc $args->{pack_size} eq 'medium' ) {
64 $args->{long_size} = 4;
65 $args->{long_pack} = 'N';
66 }
67 elsif ( lc $args->{pack_size} eq 'large' ) {
68 $args->{long_size} = 8;
69 $args->{long_pack} = 'Q';
70 }
71 else {
72 die "Unknown pack_size value: '$args->{pack_size}'\n";
73 }
74 }
75
fde3db1a 76 # Grab the parameters we want to use
77 foreach my $param ( keys %$self ) {
78 next unless exists $args->{$param};
3e9498a1 79 $self->{$param} = $args->{$param};
fde3db1a 80 }
359a01ac 81 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 82
e0098e7f 83 if ( $self->{max_buckets} < 16 ) {
84 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
85 $self->{max_buckets} = 16;
86 }
87
260a80b4 88 return $self;
89}
90
460b1067 91sub _fileobj { return $_[0]{fileobj} }
92sub _fh { return $_[0]->_fileobj->{fh} }
93
260a80b4 94sub calculate_sizes {
95 my $self = shift;
96
28394a1a 97 #XXX Does this need to be updated with different hashing algorithms?
e0098e7f 98 $self->{index_size} = (2**8) * $self->{long_size};
28394a1a 99 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
e0098e7f 100 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 101
260a80b4 102 return;
1bf65be7 103}
104
fde3db1a 105sub write_file_header {
0d0f3d5d 106 my $self = shift;
0d0f3d5d 107
019404df 108 my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 21 );
0d0f3d5d 109
019404df 110 $self->_fileobj->print_at( $loc,
260a80b4 111 SIG_FILE,
460b1067 112 SIG_HEADER,
113 pack('N', 1), # header version
114 pack('N', 12), # header size
15ba72cc 115 pack('N', 0), # currently running transaction IDs
c9b6d0d8 116 pack('n', $self->{long_size}),
260a80b4 117 pack('A', $self->{long_pack}),
c9b6d0d8 118 pack('n', $self->{data_size}),
260a80b4 119 pack('A', $self->{data_pack}),
c9b6d0d8 120 pack('n', $self->{max_buckets}),
260a80b4 121 );
0d0f3d5d 122
20b7f047 123 $self->_fileobj->set_transaction_offset( 13 );
124
0d0f3d5d 125 return;
126}
127
fde3db1a 128sub read_file_header {
e064ccd1 129 my $self = shift;
e064ccd1 130
21838116 131 local($/,$\);
132
e96daec8 133 my $fh = $self->_fh;
e064ccd1 134
e96daec8 135 seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET);
42f79e07 136 my $buffer;
460b1067 137 my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
138
139 return unless $bytes_read;
140
141 my ($file_signature, $sig_header, $header_version, $size) = unpack(
142 'A4 A N N', $buffer
42f79e07 143 );
e064ccd1 144
460b1067 145 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 146 $self->_fileobj->close;
e96daec8 147 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 148 }
260a80b4 149
460b1067 150 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 151 $self->_fileobj->close;
e96daec8 152 $self->_throw_error( "Old file version found." );
460b1067 153 }
9b2370e0 154
460b1067 155 my $buffer2;
156 $bytes_read += read( $fh, $buffer2, $size );
c9b6d0d8 157 my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 );
15ba72cc 158
159 $self->_fileobj->set_transaction_offset( 13 );
160
460b1067 161 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 162 $self->_fileobj->close;
e96daec8 163 $self->_throw_error("Corrupted file - bad header");
e064ccd1 164 }
165
460b1067 166 #XXX Add warnings if values weren't set right
167 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
168
e064ccd1 169 return $bytes_read;
170}
171
460b1067 172sub setup_fh {
173 my $self = shift;
174 my ($obj) = @_;
70b55428 175
359a01ac 176 local($/,$\);
177
e96daec8 178 my $fh = $self->_fh;
6fde4ed2 179 flock $fh, LOCK_EX;
118ba343 180
260a80b4 181 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 182 unless ( $obj->{base_offset} ) {
e96daec8 183 my $bytes_read = $self->read_file_header;
118ba343 184
260a80b4 185 $self->calculate_sizes;
186
118ba343 187 ##
fde3db1a 188 # File is empty -- write header and master index
118ba343 189 ##
190 if (!$bytes_read) {
aa83bc1e 191 $self->_fileobj->audit( "# Database created on" );
359a01ac 192
e96daec8 193 $self->write_file_header;
118ba343 194
019404df 195 $obj->{base_offset} = $self->_fileobj->request_space( $self->tag_size( $self->{index_size} ) );
118ba343 196
9e4f83a0 197 $self->write_tag(
e96daec8 198 $obj->_base_offset, $obj->_type,
f37c15ab 199 chr(0)x$self->{index_size},
118ba343 200 );
201
202 # Flush the filehandle
203 my $old_fh = select $fh;
204 my $old_af = $|; $| = 1; $| = $old_af;
205 select $old_fh;
206 }
207 else {
208 $obj->{base_offset} = $bytes_read;
209
210 ##
fde3db1a 211 # Get our type from master index header
118ba343 212 ##
359a01ac 213 my $tag = $self->load_tag($obj->_base_offset);
214 unless ( $tag ) {
215 flock $fh, LOCK_UN;
216 $self->_throw_error("Corrupted file, no master index record");
217 }
118ba343 218
e96daec8 219 unless ($obj->_type eq $tag->{signature}) {
359a01ac 220 flock $fh, LOCK_UN;
e96daec8 221 $self->_throw_error("File type mismatch");
118ba343 222 }
223 }
118ba343 224 }
260a80b4 225 else {
226 $self->calculate_sizes;
227 }
e06824f8 228
673464d9 229 #XXX We have to make sure we don't mess up when autoflush isn't turned on
e96daec8 230 unless ( $self->_fileobj->{inode} ) {
231 my @stats = stat($fh);
232 $self->_fileobj->{inode} = $stats[1];
233 $self->_fileobj->{end} = $stats[7];
70b55428 234 }
235
6fde4ed2 236 flock $fh, LOCK_UN;
237
70b55428 238 return 1;
239}
240
16d1ad9b 241sub tag_size {
242 my $self = shift;
243 my ($size) = @_;
244 return SIG_SIZE + $self->{data_size} + $size;
245}
246
9e4f83a0 247sub write_tag {
20f7b20c 248 ##
249 # Given offset, signature and content, create tag and write to disk
250 ##
d4b1166e 251 my $self = shift;
e96daec8 252 my ($offset, $sig, $content) = @_;
f37c15ab 253 my $size = length( $content );
20f7b20c 254
21838116 255 local($/,$\);
256
e96daec8 257 my $fh = $self->_fh;
d4b1166e 258
f37c15ab 259 if ( defined $offset ) {
e96daec8 260 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
f37c15ab 261 }
262
251dfd0e 263 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 264
f37c15ab 265 return unless defined $offset;
266
20f7b20c 267 return {
268 signature => $sig,
269 size => $size,
8db25060 270 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 271 content => $content
272 };
d4b1166e 273}
274
275sub load_tag {
20f7b20c 276 ##
277 # Given offset, load single tag and return signature, size and data
278 ##
d4b1166e 279 my $self = shift;
e96daec8 280 my ($offset) = @_;
20f7b20c 281
21838116 282 local($/,$\);
283
e96daec8 284 my $fh = $self->_fh;
d4b1166e 285
e96daec8 286 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
e5fc7e69 287
75be6413 288 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 289 return if eof $fh;
20f7b20c 290
d4b1166e 291 my $b;
8db25060 292 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 293 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 294
295 my $buffer;
296 read( $fh, $buffer, $size);
297
298 return {
299 signature => $sig,
300 size => $size,
8db25060 301 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 302 content => $buffer
303 };
d4b1166e 304}
305
56ec4340 306sub _get_dbm_object {
307 my $item = shift;
308
309 my $obj = eval {
310 local $SIG{__DIE__};
311 if ($item->isa( 'DBM::Deep' )) {
312 return $item;
313 }
314 return;
315 };
316 return $obj if $obj;
317
318 my $r = Scalar::Util::reftype( $item ) || '';
319 if ( $r eq 'HASH' ) {
320 my $obj = eval {
321 local $SIG{__DIE__};
322 my $obj = tied(%$item);
323 if ($obj->isa( 'DBM::Deep' )) {
324 return $obj;
325 }
326 return;
327 };
328 return $obj if $obj;
329 }
330 elsif ( $r eq 'ARRAY' ) {
331 my $obj = eval {
332 local $SIG{__DIE__};
333 my $obj = tied(@$item);
334 if ($obj->isa( 'DBM::Deep' )) {
335 return $obj;
336 }
337 return;
338 };
339 return $obj if $obj;
340 }
341
342 return;
343}
344
29b01632 345sub _length_needed {
346 my $self = shift;
e96daec8 347 my ($value, $key) = @_;
29b01632 348
349 my $is_dbm_deep = eval {
350 local $SIG{'__DIE__'};
351 $value->isa( 'DBM::Deep' );
352 };
353
f37c15ab 354 my $len = SIG_SIZE + $self->{data_size}
355 + $self->{data_size} + length( $key );
29b01632 356
e96daec8 357 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
f37c15ab 358 return $len + $self->{long_size};
29b01632 359 }
360
361 my $r = Scalar::Util::reftype( $value ) || '';
e96daec8 362 if ( $self->_fileobj->{autobless} ) {
9a187d8c 363 # This is for the bit saying whether or not this thing is blessed.
364 $len += 1;
365 }
366
29b01632 367 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 368 if ( defined $value ) {
369 $len += length( $value );
370 }
371 return $len;
29b01632 372 }
373
f37c15ab 374 $len += $self->{index_size};
29b01632 375
376 # if autobless is enabled, must also take into consideration
f37c15ab 377 # the class name as it is stored after the key.
e96daec8 378 if ( $self->_fileobj->{autobless} ) {
56ec4340 379 my $c = Scalar::Util::blessed($value);
380 if ( defined $c && !$is_dbm_deep ) {
381 $len += $self->{data_size} + length($c);
29b01632 382 }
383 }
384
f37c15ab 385 return $len;
29b01632 386}
387
20f7b20c 388sub add_bucket {
389 ##
390 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
391 # plain (undigested) key and value.
392 ##
d4b1166e 393 my $self = shift;
359a01ac 394 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
c9b6d0d8 395 $deleted ||= 0;
75be6413 396
21838116 397 local($/,$\);
398
eea0d863 399 # This verifies that only supported values will be stored.
400 {
401 my $r = Scalar::Util::reftype( $value );
402 last if !defined $r;
403
404 last if $r eq 'HASH';
405 last if $r eq 'ARRAY';
406
e96daec8 407 $self->_throw_error(
eea0d863 408 "Storage of variables of type '$r' is not supported."
409 );
410 }
411
20f7b20c 412 my $location = 0;
413 my $result = 2;
414
019404df 415 my $fileobj = $self->_fileobj;
20f7b20c 416
e96daec8 417 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 418
21838116 419 #ACID - This is a mutation. Must only find the exact transaction
c9b6d0d8 420 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 );
421
422 my @transactions;
019404df 423 if ( $fileobj->transaction_id == 0 ) {
424 @transactions = $fileobj->current_transactions;
c9b6d0d8 425 }
75be6413 426
e96daec8 427# $self->_release_space( $size, $subloc );
386bab6c 428 # Updating a known md5
f9c33187 429#XXX This needs updating to use _release_space
386bab6c 430 if ( $subloc ) {
431 $result = 1;
20f7b20c 432
386bab6c 433 if ($actual_length <= $size) {
434 $location = $subloc;
20f7b20c 435 }
75be6413 436 else {
019404df 437 $location = $fileobj->request_space( $actual_length );
438
439 $fileobj->print_at( $tag->{offset} + $offset + $self->{hash_size},
440 pack($self->{long_pack}, $location ),
441 pack($self->{long_pack}, $actual_length ),
442 pack('n n', $fileobj->transaction_id, $deleted ),
386bab6c 443 );
75be6413 444 }
75be6413 445 }
386bab6c 446 # Adding a new md5
447 elsif ( defined $offset ) {
019404df 448 $location = $fileobj->request_space( $actual_length );
386bab6c 449
019404df 450 $fileobj->print_at( $tag->{offset} + $offset,
451 $md5,
452 pack($self->{long_pack}, $location ),
453 pack($self->{long_pack}, $actual_length ),
454 pack('n n', $fileobj->transaction_id, $deleted ),
455 );
c9b6d0d8 456
457 for ( @transactions ) {
458 my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
019404df 459 $fileobj->{transaction_id} = $_;
359a01ac 460 $self->add_bucket( $tag2, $md5, '', '', 1, $orig_key );
019404df 461 $fileobj->{transaction_id} = 0;
c9b6d0d8 462 }
386bab6c 463 }
464 # If bucket didn't fit into list, split into a new index level
019404df 465 # split_index() will do the _fileobj->request_space() call
386bab6c 466 else {
e96daec8 467 $location = $self->split_index( $md5, $tag );
386bab6c 468 }
20f7b20c 469
359a01ac 470 $self->write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 471
472 return $result;
473}
474
475sub write_value {
476 my $self = shift;
359a01ac 477 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 478
21838116 479 local($/,$\);
480
e96daec8 481 my $fh = $self->_fh;
482 my $root = $self->_fileobj;
d5d7c51d 483
9d4fa373 484 my $dbm_deep_obj = _get_dbm_object( $value );
e96daec8 485 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) {
486 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 487 }
d5d7c51d 488
489 seek($fh, $location + $root->{file_offset}, SEEK_SET);
490
20f7b20c 491 ##
d5d7c51d 492 # Write signature based on content type, set content length and write
493 # actual value.
20f7b20c 494 ##
9d4fa373 495 my $r = Scalar::Util::reftype( $value ) || '';
496 if ( $dbm_deep_obj ) {
e96daec8 497 $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 498 }
499 elsif ($r eq 'HASH') {
9d4fa373 500 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 501 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 502 }
e96daec8 503 $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 504 }
505 elsif ($r eq 'ARRAY') {
9d4fa373 506 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 507 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 508 }
e96daec8 509 $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 510 }
511 elsif (!defined($value)) {
e96daec8 512 $self->write_tag( undef, SIG_NULL, '' );
d5d7c51d 513 }
514 else {
e96daec8 515 $self->write_tag( undef, SIG_DATA, $value );
d5d7c51d 516 }
20f7b20c 517
d5d7c51d 518 ##
519 # Plain key is stored AFTER value, as keys are typically fetched less often.
520 ##
521 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 522
9a187d8c 523 # Internal references don't care about autobless
9d4fa373 524 return 1 if $dbm_deep_obj;
9a187d8c 525
d5d7c51d 526 ##
527 # If value is blessed, preserve class name
528 ##
529 if ( $root->{autobless} ) {
56ec4340 530 my $c = Scalar::Util::blessed($value);
531 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 532 print( $fh chr(1) );
56ec4340 533 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 534 }
d5d7c51d 535 else {
536 print( $fh chr(0) );
20f7b20c 537 }
d5d7c51d 538 }
20f7b20c 539
d5d7c51d 540 ##
56ec4340 541 # Tie the passed in reference so that changes to it are reflected in the
542 # datafile. The use of $location as the base_offset will act as the
543 # the linkage between parent and child.
544 #
545 # The overall assignment is a hack around the fact that just tying doesn't
546 # store the values. This may not be the wrong thing to do.
d5d7c51d 547 ##
9d4fa373 548 if ($r eq 'HASH') {
549 my %x = %$value;
550 tie %$value, 'DBM::Deep', {
551 base_offset => $location,
460b1067 552 fileobj => $root,
359a01ac 553 parent => $self->{obj},
554 parent_key => $orig_key,
9d4fa373 555 };
556 %$value = %x;
557 }
558 elsif ($r eq 'ARRAY') {
559 my @x = @$value;
560 tie @$value, 'DBM::Deep', {
561 base_offset => $location,
460b1067 562 fileobj => $root,
359a01ac 563 parent => $self->{obj},
564 parent_key => $orig_key,
9d4fa373 565 };
566 @$value = @x;
20f7b20c 567 }
d4b1166e 568
d5d7c51d 569 return 1;
d4b1166e 570}
571
75be6413 572sub split_index {
573 my $self = shift;
e96daec8 574 my ($md5, $tag) = @_;
75be6413 575
019404df 576 my $fileobj = $self->_fileobj;
21838116 577
019404df 578 my $loc = $fileobj->request_space(
e96daec8 579 $self->tag_size( $self->{index_size} ),
16d1ad9b 580 );
581
019404df 582 $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) );
75be6413 583
9e4f83a0 584 my $index_tag = $self->write_tag(
e96daec8 585 $loc, SIG_INDEX,
f37c15ab 586 chr(0)x$self->{index_size},
75be6413 587 );
588
019404df 589 my $newtag_loc = $fileobj->request_space(
e96daec8 590 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 591 );
75be6413 592
7b1e1aa1 593 my $keys = $tag->{content}
f9c33187 594 . $md5 . pack($self->{long_pack}, $newtag_loc)
28394a1a 595 . pack($self->{long_pack}, 0) # size
20b7f047 596 . pack($self->{long_pack}, 0); # transaction ID
75be6413 597
f9c33187 598 my @newloc = ();
75be6413 599 BUCKET:
600 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 601 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 602
f9c33187 603 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
604 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 605
75be6413 606 my $num = ord(substr($key, $tag->{ch} + 1, 1));
607
f9c33187 608 if ($newloc[$num]) {
019404df 609 local($/,$\);
610
611 my $fh = $self->_fh;
612
613 seek($fh, $newloc[$num] + $fileobj->{file_offset}, SEEK_SET);
75be6413 614 my $subkeys;
615 read( $fh, $subkeys, $self->{bucket_list_size});
616
f9c33187 617 # This is looking for the first empty spot
7b1e1aa1 618 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 619 { content => $subkeys }, '',
7b1e1aa1 620 );
75be6413 621
019404df 622 $fileobj->print_at( $newloc[$num] + $offset, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 623
624 next;
75be6413 625 }
75be6413 626
019404df 627 my $loc = $fileobj->request_space(
e96daec8 628 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 629 );
2603d86e 630
019404df 631 $fileobj->print_at(
632 $index_tag->{offset} + ($num * $self->{long_size}),
633 pack($self->{long_pack}, $loc),
634 );
75be6413 635
7b1e1aa1 636 my $blist_tag = $self->write_tag(
e96daec8 637 $loc, SIG_BLIST,
7b1e1aa1 638 chr(0)x$self->{bucket_list_size},
639 );
640
019404df 641 $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 642
f9c33187 643 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 644 }
645
646 $self->_release_space(
e96daec8 647 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 648 $tag->{offset} - SIG_SIZE - $self->{data_size},
649 );
75be6413 650
f9c33187 651 return $newtag_loc;
75be6413 652}
653
8db25060 654sub read_from_loc {
655 my $self = shift;
359a01ac 656 my ($subloc, $orig_key) = @_;
8db25060 657
21838116 658 local($/,$\);
659
e96daec8 660 my $fh = $self->_fh;
8db25060 661
662 ##
663 # Found match -- seek to offset and read signature
664 ##
665 my $signature;
e96daec8 666 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
8db25060 667 read( $fh, $signature, SIG_SIZE);
668
669 ##
670 # If value is a hash or array, return new DBM::Deep object with correct offset
671 ##
672 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 673 my $new_obj = DBM::Deep->new({
359a01ac 674 type => $signature,
8db25060 675 base_offset => $subloc,
e96daec8 676 fileobj => $self->_fileobj,
359a01ac 677 parent => $self->{obj},
678 parent_key => $orig_key,
685e40f1 679 });
8db25060 680
460b1067 681 if ($new_obj->_fileobj->{autobless}) {
8db25060 682 ##
683 # Skip over value and plain key to see if object needs
684 # to be re-blessed
685 ##
686 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
687
688 my $size;
c6ea6b6c 689 read( $fh, $size, $self->{data_size});
690 $size = unpack($self->{data_pack}, $size);
8db25060 691 if ($size) { seek($fh, $size, SEEK_CUR); }
692
693 my $bless_bit;
694 read( $fh, $bless_bit, 1);
695 if (ord($bless_bit)) {
696 ##
697 # Yes, object needs to be re-blessed
698 ##
699 my $class_name;
c6ea6b6c 700 read( $fh, $size, $self->{data_size});
701 $size = unpack($self->{data_pack}, $size);
8db25060 702 if ($size) { read( $fh, $class_name, $size); }
685e40f1 703 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 704 }
705 }
706
685e40f1 707 return $new_obj;
8db25060 708 }
709 elsif ( $signature eq SIG_INTERNAL ) {
710 my $size;
711 read( $fh, $size, $self->{data_size});
712 $size = unpack($self->{data_pack}, $size);
713
714 if ( $size ) {
715 my $new_loc;
716 read( $fh, $new_loc, $size );
717 $new_loc = unpack( $self->{long_pack}, $new_loc );
718
359a01ac 719 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 720 }
721 else {
722 return;
723 }
724 }
725 ##
726 # Otherwise return actual value
727 ##
460b1067 728 elsif ( $signature eq SIG_DATA ) {
8db25060 729 my $size;
730 read( $fh, $size, $self->{data_size});
731 $size = unpack($self->{data_pack}, $size);
732
733 my $value = '';
734 if ($size) { read( $fh, $value, $size); }
735 return $value;
736 }
737
738 ##
739 # Key exists, but content is null
740 ##
741 return;
742}
743
9020ee8c 744sub get_bucket_value {
beac1dff 745 ##
746 # Fetch single value given tag and MD5 digested key.
747 ##
748 my $self = shift;
359a01ac 749 my ($tag, $md5, $orig_key) = @_;
9020ee8c 750
21838116 751 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 752 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 );
753 if ( $subloc && !$is_deleted ) {
359a01ac 754 return $self->read_from_loc( $subloc, $orig_key );
386bab6c 755 }
beac1dff 756 return;
9020ee8c 757}
ab0e4957 758
759sub delete_bucket {
beac1dff 760 ##
761 # Delete single key/value pair given tag and MD5 digested key.
762 ##
763 my $self = shift;
a97c8f67 764 my ($tag, $md5, $orig_key) = @_;
ab0e4957 765
21838116 766 #ACID - This is a mutation. Must only find the exact transaction
767 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
019404df 768#XXX This needs _release_space() for the value and anything below
386bab6c 769 if ( $subloc ) {
019404df 770 $self->_fileobj->print_at(
771 $tag->{offset} + $offset,
772 substr($tag->{content}, $offset + $self->{bucket_size} ),
773 chr(0) x $self->{bucket_size},
774 );
d0b74c17 775
ab0e4957 776 return 1;
386bab6c 777 }
beac1dff 778 return;
ab0e4957 779}
780
912d50b1 781sub bucket_exists {
beac1dff 782 ##
783 # Check existence of single key given tag and MD5 digested key.
784 ##
785 my $self = shift;
e96daec8 786 my ($tag, $md5) = @_;
912d50b1 787
21838116 788 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 789 my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
790 return ($subloc && !$is_deleted) && 1;
912d50b1 791}
792
6736c116 793sub find_bucket_list {
beac1dff 794 ##
795 # Locate offset for bucket list, given digested key
796 ##
797 my $self = shift;
e96daec8 798 my ($offset, $md5, $args) = @_;
d0b74c17 799 $args = {} unless $args;
800
21838116 801 local($/,$\);
802
beac1dff 803 ##
804 # Locate offset for bucket list using digest index system
805 ##
e96daec8 806 my $tag = $self->load_tag( $offset )
807 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 808
e5fc7e69 809 my $ch = 0;
8db25060 810 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 811 my $num = ord substr($md5, $ch, 1);
812
813 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 814 $tag = $self->index_lookup( $tag, $num );
d0b74c17 815
816 if (!$tag) {
29b01632 817 return if !$args->{create};
d0b74c17 818
019404df 819 my $loc = $self->_fileobj->request_space(
e96daec8 820 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 821 );
822
019404df 823 $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) );
d0b74c17 824
9e4f83a0 825 $tag = $self->write_tag(
e96daec8 826 $loc, SIG_BLIST,
f37c15ab 827 chr(0)x$self->{bucket_list_size},
d5d7c51d 828 );
829
830 $tag->{ref_loc} = $ref_loc;
831 $tag->{ch} = $ch;
832
833 last;
d0b74c17 834 }
835
16d1ad9b 836 $tag->{ch} = $ch++;
d0b74c17 837 $tag->{ref_loc} = $ref_loc;
beac1dff 838 }
d0b74c17 839
beac1dff 840 return $tag;
6736c116 841}
842
d0b74c17 843sub index_lookup {
844 ##
845 # Given index tag, lookup single entry in index and return .
846 ##
847 my $self = shift;
e96daec8 848 my ($tag, $index) = @_;
d0b74c17 849
850 my $location = unpack(
851 $self->{long_pack},
852 substr(
853 $tag->{content},
854 $index * $self->{long_size},
855 $self->{long_size},
856 ),
857 );
858
859 if (!$location) { return; }
860
e96daec8 861 return $self->load_tag( $location );
d0b74c17 862}
863
6736c116 864sub traverse_index {
beac1dff 865 ##
866 # Scan index and recursively step into deeper levels, looking for next key.
867 ##
6736c116 868 my $self = shift;
869 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 870
21838116 871 local($/,$\);
872
e96daec8 873 my $tag = $self->load_tag( $offset );
6736c116 874
e96daec8 875 my $fh = $self->_fh;
d0b74c17 876
8db25060 877 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 878 my $content = $tag->{content};
e5fc7e69 879 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 880
d5d7c51d 881 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 882 my $subloc = unpack(
883 $self->{long_pack},
e06824f8 884 substr(
885 $content,
886 $idx * $self->{long_size},
887 $self->{long_size},
888 ),
e5fc7e69 889 );
890
beac1dff 891 if ($subloc) {
e5fc7e69 892 my $result = $self->traverse_index(
893 $obj, $subloc, $ch + 1, $force_return_next,
894 );
895
beac1dff 896 if (defined($result)) { return $result; }
897 }
898 } # index loop
d0b74c17 899
beac1dff 900 $obj->{return_next} = 1;
901 } # tag is an index
d0b74c17 902
e5fc7e69 903 else {
beac1dff 904 my $keys = $tag->{content};
905 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 906
beac1dff 907 ##
908 # Iterate through buckets, looking for a key match
909 ##
8db25060 910 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 911 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 912
8db25060 913 # End of bucket list -- return to outer loop
beac1dff 914 if (!$subloc) {
beac1dff 915 $obj->{return_next} = 1;
916 last;
917 }
8db25060 918 # Located previous key -- return next one found
beac1dff 919 elsif ($key eq $obj->{prev_md5}) {
beac1dff 920 $obj->{return_next} = 1;
921 next;
922 }
8db25060 923 # Seek to bucket location and skip over signature
beac1dff 924 elsif ($obj->{return_next}) {
e96daec8 925 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
d0b74c17 926
beac1dff 927 # Skip over value to get to plain key
8db25060 928 my $sig;
929 read( $fh, $sig, SIG_SIZE );
930
beac1dff 931 my $size;
e5fc7e69 932 read( $fh, $size, $self->{data_size});
933 $size = unpack($self->{data_pack}, $size);
beac1dff 934 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 935
beac1dff 936 # Read in plain key and return as scalar
beac1dff 937 my $plain_key;
e5fc7e69 938 read( $fh, $size, $self->{data_size});
939 $size = unpack($self->{data_pack}, $size);
beac1dff 940 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 941
beac1dff 942 return $plain_key;
943 }
8db25060 944 }
d0b74c17 945
beac1dff 946 $obj->{return_next} = 1;
947 } # tag is a bucket list
d0b74c17 948
beac1dff 949 return;
6736c116 950}
951
952sub get_next_key {
beac1dff 953 ##
954 # Locate next key, given digested previous one
955 ##
6736c116 956 my $self = shift;
957 my ($obj) = @_;
d0b74c17 958
beac1dff 959 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
960 $obj->{return_next} = 0;
d0b74c17 961
beac1dff 962 ##
963 # If the previous key was not specifed, start at the top and
964 # return the first one found.
965 ##
966 if (!$obj->{prev_md5}) {
967 $obj->{prev_md5} = chr(0) x $self->{hash_size};
968 $obj->{return_next} = 1;
969 }
d0b74c17 970
beac1dff 971 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 972}
973
75be6413 974# Utilities
975
9cec1360 976sub _get_key_subloc {
75be6413 977 my $self = shift;
978 my ($keys, $idx) = @_;
979
c9b6d0d8 980 my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack(
28394a1a 981 # This is 'a', not 'A'. Please read the pack() documentation for the
982 # difference between the two and why it's important.
c9b6d0d8 983 "a$self->{hash_size} $self->{long_pack}2 n2",
75be6413 984 substr(
985 $keys,
9cec1360 986 ($idx * $self->{bucket_size}),
987 $self->{bucket_size},
75be6413 988 ),
989 );
990
c9b6d0d8 991 return ($key, $subloc, $size, $transaction_id, $is_deleted);
75be6413 992}
993
d608b06e 994sub _find_in_buckets {
995 my $self = shift;
21838116 996 my ($tag, $md5, $exact) = @_;
d608b06e 997
28394a1a 998 my $trans_id = $self->_fileobj->transaction_id;
999
21838116 1000 my @zero;
1001
d608b06e 1002 BUCKET:
1003 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
c9b6d0d8 1004 my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc(
9a187d8c 1005 $tag->{content}, $i,
1006 );
d608b06e 1007
c9b6d0d8 1008 my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted);
21838116 1009
1010 unless ( $subloc ) {
20b7f047 1011 if ( !$exact && @zero and $trans_id ) {
c9b6d0d8 1012 @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted);
20b7f047 1013 }
21838116 1014 return @rv;
1015 }
1016
1017 next BUCKET if $key ne $md5;
d608b06e 1018
21838116 1019 # Save off the HEAD in case we need it.
c9b6d0d8 1020 @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0;
d608b06e 1021
21838116 1022 next BUCKET if $transaction_id != $trans_id;
1023
1024 return @rv;
d608b06e 1025 }
1026
1027 return;
1028}
1029
994ccd8e 1030sub _release_space {
1031 my $self = shift;
e96daec8 1032 my ($size, $loc) = @_;
994ccd8e 1033
7b1e1aa1 1034 my $next_loc = 0;
1035
019404df 1036 $self->_fileobj->print_at( $loc,
1037 SIG_FREE,
1038 pack($self->{long_pack}, $size ),
1039 pack($self->{long_pack}, $next_loc ),
7b1e1aa1 1040 );
1041
994ccd8e 1042 return;
1043}
1044
e96daec8 1045sub _throw_error {
1046 die "DBM::Deep: $_[1]\n";
1047}
1048
a20d9a3f 10491;
1050__END__
d5d7c51d 1051
1052# This will be added in later, after more refactoring is done. This is an early
1053# attempt at refactoring on the physical level instead of the virtual level.
1054sub _read_at {
1055 my $self = shift;
e96daec8 1056 my ($spot, $amount, $unpack) = @_;
d5d7c51d 1057
21838116 1058 local($/,$\);
1059
e96daec8 1060 my $fh = $self->_fh;
1061 seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET );
d5d7c51d 1062
1063 my $buffer;
1064 my $bytes_read = read( $fh, $buffer, $amount );
1065
1066 if ( $unpack ) {
1067 $buffer = unpack( $unpack, $buffer );
1068 }
1069
1070 if ( wantarray ) {
1071 return ($buffer, $bytes_read);
1072 }
1073 else {
1074 return $buffer;
1075 }
1076}
e96daec8 1077
e96daec8 1078sub get_file_version {
1079 my $self = shift;
1080
21838116 1081 local($/,$\);
1082
e96daec8 1083 my $fh = $self->_fh;
1084
1085 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1086 my $buffer;
1087 my $bytes_read = read( $fh, $buffer, 4 );
1088 unless ( $bytes_read == 4 ) {
1089 $self->_throw_error( "Cannot read file version" );
1090 }
1091
1092 return unpack( 'N', $buffer );
1093}
1094
1095sub write_file_version {
1096 my $self = shift;
1097 my ($new_version) = @_;
1098
21838116 1099 local($/,$\);
1100
e96daec8 1101 my $fh = $self->_fh;
1102
1103 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1104 print( $fh pack( 'N', $new_version ) );
1105
1106 return;
1107}
1108