Added Test::Deep and removed Test::Class dependency
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
8use Fcntl qw( :DEFAULT :flock :seek );
9
21838116 10# File-wide notes:
11# * All the local($/,$\); are to protect read() and print() from -l.
20b7f047 12# * To add to bucket_size, make sure you modify the following:
13# - calculate_sizes()
14# - _get_key_subloc()
15# - add_bucket() - where the buckets are printed
21838116 16
8db25060 17##
18# Setup file and tag signatures. These should never change.
19##
20sub SIG_FILE () { 'DPDB' }
460b1067 21sub SIG_HEADER () { 'h' }
8db25060 22sub SIG_INTERNAL () { 'i' }
23sub SIG_HASH () { 'H' }
24sub SIG_ARRAY () { 'A' }
8db25060 25sub SIG_NULL () { 'N' }
26sub SIG_DATA () { 'D' }
27sub SIG_INDEX () { 'I' }
28sub SIG_BLIST () { 'B' }
7b1e1aa1 29sub SIG_FREE () { 'F' }
8db25060 30sub SIG_SIZE () { 1 }
31
612969fb 32sub new {
33 my $class = shift;
34 my ($args) = @_;
35
36 my $self = bless {
37 long_size => 4,
38 long_pack => 'N',
39 data_size => 4,
40 data_pack => 'N',
251dfd0e 41
612969fb 42 digest => \&Digest::MD5::md5,
43 hash_size => 16,
251dfd0e 44
81d16922 45 ##
d5d7c51d 46 # Maximum number of buckets per list before another level of indexing is
e0098e7f 47 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 48 # files. DO NOT decrease this value below 16, due to risk of recursive
49 # reindex overrun.
81d16922 50 ##
612969fb 51 max_buckets => 16,
460b1067 52
53 fileobj => undef,
612969fb 54 }, $class;
55
e0098e7f 56 if ( defined $args->{pack_size} ) {
57 if ( lc $args->{pack_size} eq 'small' ) {
58 $args->{long_size} = 2;
c9b6d0d8 59 $args->{long_pack} = 'n';
e0098e7f 60 }
61 elsif ( lc $args->{pack_size} eq 'medium' ) {
62 $args->{long_size} = 4;
63 $args->{long_pack} = 'N';
64 }
65 elsif ( lc $args->{pack_size} eq 'large' ) {
66 $args->{long_size} = 8;
67 $args->{long_pack} = 'Q';
68 }
69 else {
70 die "Unknown pack_size value: '$args->{pack_size}'\n";
71 }
72 }
73
fde3db1a 74 # Grab the parameters we want to use
75 foreach my $param ( keys %$self ) {
76 next unless exists $args->{$param};
3e9498a1 77 $self->{$param} = $args->{$param};
fde3db1a 78 }
79
e0098e7f 80 if ( $self->{max_buckets} < 16 ) {
81 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
82 $self->{max_buckets} = 16;
83 }
84
260a80b4 85 return $self;
86}
87
460b1067 88sub _fileobj { return $_[0]{fileobj} }
89sub _fh { return $_[0]->_fileobj->{fh} }
90
260a80b4 91sub calculate_sizes {
92 my $self = shift;
93
28394a1a 94 #XXX Does this need to be updated with different hashing algorithms?
e0098e7f 95 $self->{index_size} = (2**8) * $self->{long_size};
28394a1a 96 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
e0098e7f 97 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 98
260a80b4 99 return;
1bf65be7 100}
101
fde3db1a 102sub write_file_header {
0d0f3d5d 103 my $self = shift;
0d0f3d5d 104
21838116 105 local($/,$\);
106
460b1067 107 my $fh = $self->_fh;
0d0f3d5d 108
e96daec8 109 my $loc = $self->_request_space( length( SIG_FILE ) + 21 );
460b1067 110 seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET);
260a80b4 111 print( $fh
112 SIG_FILE,
460b1067 113 SIG_HEADER,
114 pack('N', 1), # header version
115 pack('N', 12), # header size
15ba72cc 116 pack('N', 0), # currently running transaction IDs
c9b6d0d8 117 pack('n', $self->{long_size}),
260a80b4 118 pack('A', $self->{long_pack}),
c9b6d0d8 119 pack('n', $self->{data_size}),
260a80b4 120 pack('A', $self->{data_pack}),
c9b6d0d8 121 pack('n', $self->{max_buckets}),
260a80b4 122 );
0d0f3d5d 123
20b7f047 124 $self->_fileobj->set_transaction_offset( 13 );
125
0d0f3d5d 126 return;
127}
128
fde3db1a 129sub read_file_header {
e064ccd1 130 my $self = shift;
e064ccd1 131
21838116 132 local($/,$\);
133
e96daec8 134 my $fh = $self->_fh;
e064ccd1 135
e96daec8 136 seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET);
42f79e07 137 my $buffer;
460b1067 138 my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
139
140 return unless $bytes_read;
141
142 my ($file_signature, $sig_header, $header_version, $size) = unpack(
143 'A4 A N N', $buffer
42f79e07 144 );
e064ccd1 145
460b1067 146 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 147 $self->_fileobj->close;
e96daec8 148 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 149 }
260a80b4 150
460b1067 151 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 152 $self->_fileobj->close;
e96daec8 153 $self->_throw_error( "Old file version found." );
460b1067 154 }
9b2370e0 155
460b1067 156 my $buffer2;
157 $bytes_read += read( $fh, $buffer2, $size );
c9b6d0d8 158 my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 );
15ba72cc 159
160 $self->_fileobj->set_transaction_offset( 13 );
161
460b1067 162 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 163 $self->_fileobj->close;
e96daec8 164 $self->_throw_error("Corrupted file - bad header");
e064ccd1 165 }
166
460b1067 167 #XXX Add warnings if values weren't set right
168 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
169
e064ccd1 170 return $bytes_read;
171}
172
460b1067 173sub setup_fh {
174 my $self = shift;
175 my ($obj) = @_;
70b55428 176
e96daec8 177 my $fh = $self->_fh;
6fde4ed2 178 flock $fh, LOCK_EX;
118ba343 179
260a80b4 180 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 181 unless ( $obj->{base_offset} ) {
e96daec8 182 my $bytes_read = $self->read_file_header;
118ba343 183
260a80b4 184 $self->calculate_sizes;
185
118ba343 186 ##
fde3db1a 187 # File is empty -- write header and master index
118ba343 188 ##
189 if (!$bytes_read) {
e96daec8 190 $self->write_file_header;
118ba343 191
e96daec8 192 $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) );
118ba343 193
9e4f83a0 194 $self->write_tag(
e96daec8 195 $obj->_base_offset, $obj->_type,
f37c15ab 196 chr(0)x$self->{index_size},
118ba343 197 );
198
199 # Flush the filehandle
200 my $old_fh = select $fh;
201 my $old_af = $|; $| = 1; $| = $old_af;
202 select $old_fh;
203 }
204 else {
205 $obj->{base_offset} = $bytes_read;
206
207 ##
fde3db1a 208 # Get our type from master index header
118ba343 209 ##
e96daec8 210 my $tag = $self->load_tag($obj->_base_offset)
211 or $self->_throw_error("Corrupted file, no master index record");
118ba343 212
e96daec8 213 unless ($obj->_type eq $tag->{signature}) {
214 $self->_throw_error("File type mismatch");
118ba343 215 }
216 }
118ba343 217 }
260a80b4 218 else {
219 $self->calculate_sizes;
220 }
e06824f8 221
673464d9 222 #XXX We have to make sure we don't mess up when autoflush isn't turned on
e96daec8 223 unless ( $self->_fileobj->{inode} ) {
224 my @stats = stat($fh);
225 $self->_fileobj->{inode} = $stats[1];
226 $self->_fileobj->{end} = $stats[7];
70b55428 227 }
228
6fde4ed2 229 flock $fh, LOCK_UN;
230
70b55428 231 return 1;
232}
233
16d1ad9b 234sub tag_size {
235 my $self = shift;
236 my ($size) = @_;
237 return SIG_SIZE + $self->{data_size} + $size;
238}
239
9e4f83a0 240sub write_tag {
20f7b20c 241 ##
242 # Given offset, signature and content, create tag and write to disk
243 ##
d4b1166e 244 my $self = shift;
e96daec8 245 my ($offset, $sig, $content) = @_;
f37c15ab 246 my $size = length( $content );
20f7b20c 247
21838116 248 local($/,$\);
249
e96daec8 250 my $fh = $self->_fh;
d4b1166e 251
f37c15ab 252 if ( defined $offset ) {
e96daec8 253 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
f37c15ab 254 }
255
251dfd0e 256 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 257
f37c15ab 258 return unless defined $offset;
259
20f7b20c 260 return {
261 signature => $sig,
262 size => $size,
8db25060 263 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 264 content => $content
265 };
d4b1166e 266}
267
268sub load_tag {
20f7b20c 269 ##
270 # Given offset, load single tag and return signature, size and data
271 ##
d4b1166e 272 my $self = shift;
e96daec8 273 my ($offset) = @_;
20f7b20c 274
21838116 275 local($/,$\);
276
e06824f8 277# print join(':',map{$_||''}caller(1)), $/;
278
e96daec8 279 my $fh = $self->_fh;
d4b1166e 280
e96daec8 281 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
e5fc7e69 282
75be6413 283 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 284 return if eof $fh;
20f7b20c 285
d4b1166e 286 my $b;
8db25060 287 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 288 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 289
290 my $buffer;
291 read( $fh, $buffer, $size);
292
293 return {
294 signature => $sig,
295 size => $size,
8db25060 296 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 297 content => $buffer
298 };
d4b1166e 299}
300
56ec4340 301sub _get_dbm_object {
302 my $item = shift;
303
304 my $obj = eval {
305 local $SIG{__DIE__};
306 if ($item->isa( 'DBM::Deep' )) {
307 return $item;
308 }
309 return;
310 };
311 return $obj if $obj;
312
313 my $r = Scalar::Util::reftype( $item ) || '';
314 if ( $r eq 'HASH' ) {
315 my $obj = eval {
316 local $SIG{__DIE__};
317 my $obj = tied(%$item);
318 if ($obj->isa( 'DBM::Deep' )) {
319 return $obj;
320 }
321 return;
322 };
323 return $obj if $obj;
324 }
325 elsif ( $r eq 'ARRAY' ) {
326 my $obj = eval {
327 local $SIG{__DIE__};
328 my $obj = tied(@$item);
329 if ($obj->isa( 'DBM::Deep' )) {
330 return $obj;
331 }
332 return;
333 };
334 return $obj if $obj;
335 }
336
337 return;
338}
339
29b01632 340sub _length_needed {
341 my $self = shift;
e96daec8 342 my ($value, $key) = @_;
29b01632 343
344 my $is_dbm_deep = eval {
345 local $SIG{'__DIE__'};
346 $value->isa( 'DBM::Deep' );
347 };
348
f37c15ab 349 my $len = SIG_SIZE + $self->{data_size}
350 + $self->{data_size} + length( $key );
29b01632 351
e96daec8 352 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
f37c15ab 353 return $len + $self->{long_size};
29b01632 354 }
355
356 my $r = Scalar::Util::reftype( $value ) || '';
e96daec8 357 if ( $self->_fileobj->{autobless} ) {
9a187d8c 358 # This is for the bit saying whether or not this thing is blessed.
359 $len += 1;
360 }
361
29b01632 362 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 363 if ( defined $value ) {
364 $len += length( $value );
365 }
366 return $len;
29b01632 367 }
368
f37c15ab 369 $len += $self->{index_size};
29b01632 370
371 # if autobless is enabled, must also take into consideration
f37c15ab 372 # the class name as it is stored after the key.
e96daec8 373 if ( $self->_fileobj->{autobless} ) {
56ec4340 374 my $c = Scalar::Util::blessed($value);
375 if ( defined $c && !$is_dbm_deep ) {
376 $len += $self->{data_size} + length($c);
29b01632 377 }
378 }
379
f37c15ab 380 return $len;
29b01632 381}
382
20f7b20c 383sub add_bucket {
384 ##
385 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
386 # plain (undigested) key and value.
387 ##
d4b1166e 388 my $self = shift;
c9b6d0d8 389 my ($tag, $md5, $plain_key, $value, $deleted) = @_;
390 $deleted ||= 0;
75be6413 391
21838116 392 local($/,$\);
393
eea0d863 394 # This verifies that only supported values will be stored.
395 {
396 my $r = Scalar::Util::reftype( $value );
397 last if !defined $r;
398
399 last if $r eq 'HASH';
400 last if $r eq 'ARRAY';
401
e96daec8 402 $self->_throw_error(
eea0d863 403 "Storage of variables of type '$r' is not supported."
404 );
405 }
406
20f7b20c 407 my $location = 0;
408 my $result = 2;
409
e96daec8 410 my $root = $self->_fileobj;
411 my $fh = $self->_fh;
20f7b20c 412
e96daec8 413 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 414
21838116 415 #ACID - This is a mutation. Must only find the exact transaction
c9b6d0d8 416 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 );
417
418 my @transactions;
419 if ( $self->_fileobj->transaction_id == 0 ) {
420 @transactions = $self->_fileobj->current_transactions;
421 }
75be6413 422
e96daec8 423# $self->_release_space( $size, $subloc );
386bab6c 424 # Updating a known md5
f9c33187 425#XXX This needs updating to use _release_space
386bab6c 426 if ( $subloc ) {
427 $result = 1;
20f7b20c 428
386bab6c 429 if ($actual_length <= $size) {
430 $location = $subloc;
20f7b20c 431 }
75be6413 432 else {
e96daec8 433 $location = $self->_request_space( $actual_length );
386bab6c 434 seek(
435 $fh,
9a187d8c 436 $tag->{offset} + $offset
437 + $self->{hash_size} + $root->{file_offset},
386bab6c 438 SEEK_SET,
439 );
9a187d8c 440 print( $fh pack($self->{long_pack}, $location ) );
441 print( $fh pack($self->{long_pack}, $actual_length ) );
c9b6d0d8 442 print( $fh pack('n n', $root->transaction_id, $deleted ) );
75be6413 443 }
75be6413 444 }
386bab6c 445 # Adding a new md5
446 elsif ( defined $offset ) {
e96daec8 447 $location = $self->_request_space( $actual_length );
386bab6c 448
449 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 450 print( $fh $md5 . pack($self->{long_pack}, $location ) );
451 print( $fh pack($self->{long_pack}, $actual_length ) );
c9b6d0d8 452 print( $fh pack('n n', $root->transaction_id, $deleted ) );
453
454 for ( @transactions ) {
455 my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
456 $self->_fileobj->{transaction_id} = $_;
457 $self->add_bucket( $tag2, $md5, '', '', 1 );
458 $self->_fileobj->{transaction_id} = 0;
459 }
386bab6c 460 }
461 # If bucket didn't fit into list, split into a new index level
f9c33187 462 # split_index() will do the _request_space() call
386bab6c 463 else {
e96daec8 464 $location = $self->split_index( $md5, $tag );
386bab6c 465 }
20f7b20c 466
e96daec8 467 $self->write_value( $location, $plain_key, $value );
d5d7c51d 468
469 return $result;
470}
471
472sub write_value {
473 my $self = shift;
e96daec8 474 my ($location, $key, $value) = @_;
d5d7c51d 475
21838116 476 local($/,$\);
477
e96daec8 478 my $fh = $self->_fh;
479 my $root = $self->_fileobj;
d5d7c51d 480
9d4fa373 481 my $dbm_deep_obj = _get_dbm_object( $value );
e96daec8 482 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) {
483 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 484 }
d5d7c51d 485
486 seek($fh, $location + $root->{file_offset}, SEEK_SET);
487
20f7b20c 488 ##
d5d7c51d 489 # Write signature based on content type, set content length and write
490 # actual value.
20f7b20c 491 ##
9d4fa373 492 my $r = Scalar::Util::reftype( $value ) || '';
493 if ( $dbm_deep_obj ) {
e96daec8 494 $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 495 }
496 elsif ($r eq 'HASH') {
9d4fa373 497 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 498 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 499 }
e96daec8 500 $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 501 }
502 elsif ($r eq 'ARRAY') {
9d4fa373 503 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 504 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 505 }
e96daec8 506 $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 507 }
508 elsif (!defined($value)) {
e96daec8 509 $self->write_tag( undef, SIG_NULL, '' );
d5d7c51d 510 }
511 else {
e96daec8 512 $self->write_tag( undef, SIG_DATA, $value );
d5d7c51d 513 }
20f7b20c 514
d5d7c51d 515 ##
516 # Plain key is stored AFTER value, as keys are typically fetched less often.
517 ##
518 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 519
9a187d8c 520 # Internal references don't care about autobless
9d4fa373 521 return 1 if $dbm_deep_obj;
9a187d8c 522
d5d7c51d 523 ##
524 # If value is blessed, preserve class name
525 ##
526 if ( $root->{autobless} ) {
56ec4340 527 my $c = Scalar::Util::blessed($value);
528 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 529 print( $fh chr(1) );
56ec4340 530 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 531 }
d5d7c51d 532 else {
533 print( $fh chr(0) );
20f7b20c 534 }
d5d7c51d 535 }
20f7b20c 536
d5d7c51d 537 ##
56ec4340 538 # Tie the passed in reference so that changes to it are reflected in the
539 # datafile. The use of $location as the base_offset will act as the
540 # the linkage between parent and child.
541 #
542 # The overall assignment is a hack around the fact that just tying doesn't
543 # store the values. This may not be the wrong thing to do.
d5d7c51d 544 ##
9d4fa373 545 if ($r eq 'HASH') {
546 my %x = %$value;
547 tie %$value, 'DBM::Deep', {
548 base_offset => $location,
460b1067 549 fileobj => $root,
9d4fa373 550 };
551 %$value = %x;
552 }
553 elsif ($r eq 'ARRAY') {
554 my @x = @$value;
555 tie @$value, 'DBM::Deep', {
556 base_offset => $location,
460b1067 557 fileobj => $root,
9d4fa373 558 };
559 @$value = @x;
20f7b20c 560 }
d4b1166e 561
d5d7c51d 562 return 1;
d4b1166e 563}
564
75be6413 565sub split_index {
566 my $self = shift;
e96daec8 567 my ($md5, $tag) = @_;
75be6413 568
21838116 569 local($/,$\);
570
e96daec8 571 my $fh = $self->_fh;
572 my $root = $self->_fileobj;
16d1ad9b 573
574 my $loc = $self->_request_space(
e96daec8 575 $self->tag_size( $self->{index_size} ),
16d1ad9b 576 );
577
7b1e1aa1 578 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 579 print( $fh pack($self->{long_pack}, $loc) );
75be6413 580
9e4f83a0 581 my $index_tag = $self->write_tag(
e96daec8 582 $loc, SIG_INDEX,
f37c15ab 583 chr(0)x$self->{index_size},
75be6413 584 );
585
f9c33187 586 my $newtag_loc = $self->_request_space(
e96daec8 587 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 588 );
75be6413 589
7b1e1aa1 590 my $keys = $tag->{content}
f9c33187 591 . $md5 . pack($self->{long_pack}, $newtag_loc)
28394a1a 592 . pack($self->{long_pack}, 0) # size
20b7f047 593 . pack($self->{long_pack}, 0); # transaction ID
75be6413 594
f9c33187 595 my @newloc = ();
75be6413 596 BUCKET:
597 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 598 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 599
f9c33187 600 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
601 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 602
75be6413 603 my $num = ord(substr($key, $tag->{ch} + 1, 1));
604
f9c33187 605 if ($newloc[$num]) {
606 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 607 my $subkeys;
608 read( $fh, $subkeys, $self->{bucket_list_size});
609
f9c33187 610 # This is looking for the first empty spot
7b1e1aa1 611 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 612 { content => $subkeys }, '',
7b1e1aa1 613 );
75be6413 614
f9c33187 615 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
616 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 617
618 next;
75be6413 619 }
75be6413 620
7b1e1aa1 621 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 622
7b1e1aa1 623 my $loc = $self->_request_space(
e96daec8 624 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 625 );
2603d86e 626
7b1e1aa1 627 print( $fh pack($self->{long_pack}, $loc) );
75be6413 628
7b1e1aa1 629 my $blist_tag = $self->write_tag(
e96daec8 630 $loc, SIG_BLIST,
7b1e1aa1 631 chr(0)x$self->{bucket_list_size},
632 );
633
634 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 635 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 636
f9c33187 637 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 638 }
639
640 $self->_release_space(
e96daec8 641 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 642 $tag->{offset} - SIG_SIZE - $self->{data_size},
643 );
75be6413 644
f9c33187 645 return $newtag_loc;
75be6413 646}
647
8db25060 648sub read_from_loc {
649 my $self = shift;
e96daec8 650 my ($subloc) = @_;
8db25060 651
21838116 652 local($/,$\);
653
e96daec8 654 my $fh = $self->_fh;
8db25060 655
656 ##
657 # Found match -- seek to offset and read signature
658 ##
659 my $signature;
e96daec8 660 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
8db25060 661 read( $fh, $signature, SIG_SIZE);
662
663 ##
664 # If value is a hash or array, return new DBM::Deep object with correct offset
665 ##
666 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 667 my $new_obj = DBM::Deep->new({
8db25060 668 type => $signature,
669 base_offset => $subloc,
e96daec8 670 fileobj => $self->_fileobj,
685e40f1 671 });
8db25060 672
460b1067 673 if ($new_obj->_fileobj->{autobless}) {
8db25060 674 ##
675 # Skip over value and plain key to see if object needs
676 # to be re-blessed
677 ##
678 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
679
680 my $size;
c6ea6b6c 681 read( $fh, $size, $self->{data_size});
682 $size = unpack($self->{data_pack}, $size);
8db25060 683 if ($size) { seek($fh, $size, SEEK_CUR); }
684
685 my $bless_bit;
686 read( $fh, $bless_bit, 1);
687 if (ord($bless_bit)) {
688 ##
689 # Yes, object needs to be re-blessed
690 ##
691 my $class_name;
c6ea6b6c 692 read( $fh, $size, $self->{data_size});
693 $size = unpack($self->{data_pack}, $size);
8db25060 694 if ($size) { read( $fh, $class_name, $size); }
685e40f1 695 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 696 }
697 }
698
685e40f1 699 return $new_obj;
8db25060 700 }
701 elsif ( $signature eq SIG_INTERNAL ) {
702 my $size;
703 read( $fh, $size, $self->{data_size});
704 $size = unpack($self->{data_pack}, $size);
705
706 if ( $size ) {
707 my $new_loc;
708 read( $fh, $new_loc, $size );
709 $new_loc = unpack( $self->{long_pack}, $new_loc );
710
e96daec8 711 return $self->read_from_loc( $new_loc );
8db25060 712 }
713 else {
714 return;
715 }
716 }
717 ##
718 # Otherwise return actual value
719 ##
460b1067 720 elsif ( $signature eq SIG_DATA ) {
8db25060 721 my $size;
722 read( $fh, $size, $self->{data_size});
723 $size = unpack($self->{data_pack}, $size);
724
725 my $value = '';
726 if ($size) { read( $fh, $value, $size); }
727 return $value;
728 }
729
730 ##
731 # Key exists, but content is null
732 ##
733 return;
734}
735
9020ee8c 736sub get_bucket_value {
beac1dff 737 ##
738 # Fetch single value given tag and MD5 digested key.
739 ##
740 my $self = shift;
e96daec8 741 my ($tag, $md5) = @_;
9020ee8c 742
21838116 743 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 744 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 );
745 if ( $subloc && !$is_deleted ) {
e96daec8 746 return $self->read_from_loc( $subloc );
386bab6c 747 }
beac1dff 748 return;
9020ee8c 749}
ab0e4957 750
751sub delete_bucket {
beac1dff 752 ##
753 # Delete single key/value pair given tag and MD5 digested key.
754 ##
755 my $self = shift;
e96daec8 756 my ($tag, $md5) = @_;
ab0e4957 757
21838116 758 local($/,$\);
759
760 #ACID - This is a mutation. Must only find the exact transaction
761 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
f9c33187 762#XXX This needs _release_space()
386bab6c 763 if ( $subloc ) {
e96daec8 764 my $fh = $self->_fh;
765 seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET);
386bab6c 766 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 767 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 768
ab0e4957 769 return 1;
386bab6c 770 }
beac1dff 771 return;
ab0e4957 772}
773
912d50b1 774sub bucket_exists {
beac1dff 775 ##
776 # Check existence of single key given tag and MD5 digested key.
777 ##
778 my $self = shift;
e96daec8 779 my ($tag, $md5) = @_;
912d50b1 780
21838116 781 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 782 my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
783 return ($subloc && !$is_deleted) && 1;
912d50b1 784}
785
6736c116 786sub find_bucket_list {
beac1dff 787 ##
788 # Locate offset for bucket list, given digested key
789 ##
790 my $self = shift;
e96daec8 791 my ($offset, $md5, $args) = @_;
d0b74c17 792 $args = {} unless $args;
793
21838116 794 local($/,$\);
795
beac1dff 796 ##
797 # Locate offset for bucket list using digest index system
798 ##
e96daec8 799 my $tag = $self->load_tag( $offset )
800 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 801
e5fc7e69 802 my $ch = 0;
8db25060 803 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 804 my $num = ord substr($md5, $ch, 1);
805
806 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 807 $tag = $self->index_lookup( $tag, $num );
d0b74c17 808
809 if (!$tag) {
29b01632 810 return if !$args->{create};
d0b74c17 811
16d1ad9b 812 my $loc = $self->_request_space(
e96daec8 813 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 814 );
815
e96daec8 816 my $fh = $self->_fh;
817 seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET);
16d1ad9b 818 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 819
9e4f83a0 820 $tag = $self->write_tag(
e96daec8 821 $loc, SIG_BLIST,
f37c15ab 822 chr(0)x$self->{bucket_list_size},
d5d7c51d 823 );
824
825 $tag->{ref_loc} = $ref_loc;
826 $tag->{ch} = $ch;
827
828 last;
d0b74c17 829 }
830
16d1ad9b 831 $tag->{ch} = $ch++;
d0b74c17 832 $tag->{ref_loc} = $ref_loc;
beac1dff 833 }
d0b74c17 834
beac1dff 835 return $tag;
6736c116 836}
837
d0b74c17 838sub index_lookup {
839 ##
840 # Given index tag, lookup single entry in index and return .
841 ##
842 my $self = shift;
e96daec8 843 my ($tag, $index) = @_;
d0b74c17 844
845 my $location = unpack(
846 $self->{long_pack},
847 substr(
848 $tag->{content},
849 $index * $self->{long_size},
850 $self->{long_size},
851 ),
852 );
853
854 if (!$location) { return; }
855
e96daec8 856 return $self->load_tag( $location );
d0b74c17 857}
858
6736c116 859sub traverse_index {
beac1dff 860 ##
861 # Scan index and recursively step into deeper levels, looking for next key.
862 ##
6736c116 863 my $self = shift;
864 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 865
21838116 866 local($/,$\);
867
e96daec8 868 my $tag = $self->load_tag( $offset );
6736c116 869
e96daec8 870 my $fh = $self->_fh;
d0b74c17 871
8db25060 872 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 873 my $content = $tag->{content};
e5fc7e69 874 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 875
d5d7c51d 876 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 877 my $subloc = unpack(
878 $self->{long_pack},
e06824f8 879 substr(
880 $content,
881 $idx * $self->{long_size},
882 $self->{long_size},
883 ),
e5fc7e69 884 );
885
beac1dff 886 if ($subloc) {
e5fc7e69 887 my $result = $self->traverse_index(
888 $obj, $subloc, $ch + 1, $force_return_next,
889 );
890
beac1dff 891 if (defined($result)) { return $result; }
892 }
893 } # index loop
d0b74c17 894
beac1dff 895 $obj->{return_next} = 1;
896 } # tag is an index
d0b74c17 897
e5fc7e69 898 else {
beac1dff 899 my $keys = $tag->{content};
900 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 901
beac1dff 902 ##
903 # Iterate through buckets, looking for a key match
904 ##
8db25060 905 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 906 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 907
8db25060 908 # End of bucket list -- return to outer loop
beac1dff 909 if (!$subloc) {
beac1dff 910 $obj->{return_next} = 1;
911 last;
912 }
8db25060 913 # Located previous key -- return next one found
beac1dff 914 elsif ($key eq $obj->{prev_md5}) {
beac1dff 915 $obj->{return_next} = 1;
916 next;
917 }
8db25060 918 # Seek to bucket location and skip over signature
beac1dff 919 elsif ($obj->{return_next}) {
e96daec8 920 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
d0b74c17 921
beac1dff 922 # Skip over value to get to plain key
8db25060 923 my $sig;
924 read( $fh, $sig, SIG_SIZE );
925
beac1dff 926 my $size;
e5fc7e69 927 read( $fh, $size, $self->{data_size});
928 $size = unpack($self->{data_pack}, $size);
beac1dff 929 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 930
beac1dff 931 # Read in plain key and return as scalar
beac1dff 932 my $plain_key;
e5fc7e69 933 read( $fh, $size, $self->{data_size});
934 $size = unpack($self->{data_pack}, $size);
beac1dff 935 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 936
beac1dff 937 return $plain_key;
938 }
8db25060 939 }
d0b74c17 940
beac1dff 941 $obj->{return_next} = 1;
942 } # tag is a bucket list
d0b74c17 943
beac1dff 944 return;
6736c116 945}
946
947sub get_next_key {
beac1dff 948 ##
949 # Locate next key, given digested previous one
950 ##
6736c116 951 my $self = shift;
952 my ($obj) = @_;
d0b74c17 953
beac1dff 954 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
955 $obj->{return_next} = 0;
d0b74c17 956
beac1dff 957 ##
958 # If the previous key was not specifed, start at the top and
959 # return the first one found.
960 ##
961 if (!$obj->{prev_md5}) {
962 $obj->{prev_md5} = chr(0) x $self->{hash_size};
963 $obj->{return_next} = 1;
964 }
d0b74c17 965
beac1dff 966 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 967}
968
75be6413 969# Utilities
970
9cec1360 971sub _get_key_subloc {
75be6413 972 my $self = shift;
973 my ($keys, $idx) = @_;
974
c9b6d0d8 975 my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack(
28394a1a 976 # This is 'a', not 'A'. Please read the pack() documentation for the
977 # difference between the two and why it's important.
c9b6d0d8 978 "a$self->{hash_size} $self->{long_pack}2 n2",
75be6413 979 substr(
980 $keys,
9cec1360 981 ($idx * $self->{bucket_size}),
982 $self->{bucket_size},
75be6413 983 ),
984 );
985
c9b6d0d8 986 return ($key, $subloc, $size, $transaction_id, $is_deleted);
75be6413 987}
988
d608b06e 989sub _find_in_buckets {
990 my $self = shift;
21838116 991 my ($tag, $md5, $exact) = @_;
d608b06e 992
28394a1a 993 my $trans_id = $self->_fileobj->transaction_id;
994
21838116 995 my @zero;
996
d608b06e 997 BUCKET:
998 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
c9b6d0d8 999 my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc(
9a187d8c 1000 $tag->{content}, $i,
1001 );
d608b06e 1002
c9b6d0d8 1003 my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted);
21838116 1004
1005 unless ( $subloc ) {
20b7f047 1006 if ( !$exact && @zero and $trans_id ) {
c9b6d0d8 1007 @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted);
20b7f047 1008 }
21838116 1009 return @rv;
1010 }
1011
1012 next BUCKET if $key ne $md5;
d608b06e 1013
21838116 1014 # Save off the HEAD in case we need it.
c9b6d0d8 1015 @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0;
d608b06e 1016
21838116 1017 next BUCKET if $transaction_id != $trans_id;
1018
1019 return @rv;
d608b06e 1020 }
1021
1022 return;
1023}
1024
994ccd8e 1025sub _request_space {
1026 my $self = shift;
e96daec8 1027 my ($size) = @_;
994ccd8e 1028
460b1067 1029 my $loc = $self->_fileobj->{end};
1030 $self->_fileobj->{end} += $size;
994ccd8e 1031
1032 return $loc;
1033}
1034
1035sub _release_space {
1036 my $self = shift;
e96daec8 1037 my ($size, $loc) = @_;
994ccd8e 1038
21838116 1039 local($/,$\);
1040
7b1e1aa1 1041 my $next_loc = 0;
1042
e96daec8 1043 my $fh = $self->_fh;
1044 seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET );
7b1e1aa1 1045 print( $fh SIG_FREE
1046 . pack($self->{long_pack}, $size )
1047 . pack($self->{long_pack}, $next_loc )
1048 );
1049
994ccd8e 1050 return;
1051}
1052
e96daec8 1053sub _throw_error {
1054 die "DBM::Deep: $_[1]\n";
1055}
1056
a20d9a3f 10571;
1058__END__
d5d7c51d 1059
1060# This will be added in later, after more refactoring is done. This is an early
1061# attempt at refactoring on the physical level instead of the virtual level.
1062sub _read_at {
1063 my $self = shift;
e96daec8 1064 my ($spot, $amount, $unpack) = @_;
d5d7c51d 1065
21838116 1066 local($/,$\);
1067
e96daec8 1068 my $fh = $self->_fh;
1069 seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET );
d5d7c51d 1070
1071 my $buffer;
1072 my $bytes_read = read( $fh, $buffer, $amount );
1073
1074 if ( $unpack ) {
1075 $buffer = unpack( $unpack, $buffer );
1076 }
1077
1078 if ( wantarray ) {
1079 return ($buffer, $bytes_read);
1080 }
1081 else {
1082 return $buffer;
1083 }
1084}
e96daec8 1085
1086sub _print_at {
1087 my $self = shift;
1088 my ($spot, $data) = @_;
1089
21838116 1090 local($/,$\);
1091
e96daec8 1092 my $fh = $self->_fh;
1093 seek( $fh, $spot, SEEK_SET );
1094 print( $fh $data );
1095
1096 return;
1097}
1098
1099sub get_file_version {
1100 my $self = shift;
1101
21838116 1102 local($/,$\);
1103
e96daec8 1104 my $fh = $self->_fh;
1105
1106 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1107 my $buffer;
1108 my $bytes_read = read( $fh, $buffer, 4 );
1109 unless ( $bytes_read == 4 ) {
1110 $self->_throw_error( "Cannot read file version" );
1111 }
1112
1113 return unpack( 'N', $buffer );
1114}
1115
1116sub write_file_version {
1117 my $self = shift;
1118 my ($new_version) = @_;
1119
21838116 1120 local($/,$\);
1121
e96daec8 1122 my $fh = $self->_fh;
1123
1124 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1125 print( $fh pack( 'N', $new_version ) );
1126
1127 return;
1128}
1129