Adding transactions further - settled on MVCC
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
8use Fcntl qw( :DEFAULT :flock :seek );
9
21838116 10# File-wide notes:
11# * All the local($/,$\); are to protect read() and print() from -l.
12
8db25060 13##
14# Setup file and tag signatures. These should never change.
15##
16sub SIG_FILE () { 'DPDB' }
460b1067 17sub SIG_HEADER () { 'h' }
8db25060 18sub SIG_INTERNAL () { 'i' }
19sub SIG_HASH () { 'H' }
20sub SIG_ARRAY () { 'A' }
8db25060 21sub SIG_NULL () { 'N' }
22sub SIG_DATA () { 'D' }
23sub SIG_INDEX () { 'I' }
24sub SIG_BLIST () { 'B' }
7b1e1aa1 25sub SIG_FREE () { 'F' }
8db25060 26sub SIG_SIZE () { 1 }
27
612969fb 28sub new {
29 my $class = shift;
30 my ($args) = @_;
31
32 my $self = bless {
33 long_size => 4,
34 long_pack => 'N',
35 data_size => 4,
36 data_pack => 'N',
251dfd0e 37
612969fb 38 digest => \&Digest::MD5::md5,
39 hash_size => 16,
251dfd0e 40
81d16922 41 ##
d5d7c51d 42 # Maximum number of buckets per list before another level of indexing is
e0098e7f 43 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 44 # files. DO NOT decrease this value below 16, due to risk of recursive
45 # reindex overrun.
81d16922 46 ##
612969fb 47 max_buckets => 16,
460b1067 48
49 fileobj => undef,
612969fb 50 }, $class;
51
e0098e7f 52 if ( defined $args->{pack_size} ) {
53 if ( lc $args->{pack_size} eq 'small' ) {
54 $args->{long_size} = 2;
55 $args->{long_pack} = 'S';
56 }
57 elsif ( lc $args->{pack_size} eq 'medium' ) {
58 $args->{long_size} = 4;
59 $args->{long_pack} = 'N';
60 }
61 elsif ( lc $args->{pack_size} eq 'large' ) {
62 $args->{long_size} = 8;
63 $args->{long_pack} = 'Q';
64 }
65 else {
66 die "Unknown pack_size value: '$args->{pack_size}'\n";
67 }
68 }
69
fde3db1a 70 # Grab the parameters we want to use
71 foreach my $param ( keys %$self ) {
72 next unless exists $args->{$param};
3e9498a1 73 $self->{$param} = $args->{$param};
fde3db1a 74 }
75
e0098e7f 76 if ( $self->{max_buckets} < 16 ) {
77 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
78 $self->{max_buckets} = 16;
79 }
80
260a80b4 81 return $self;
82}
83
460b1067 84sub _fileobj { return $_[0]{fileobj} }
85sub _fh { return $_[0]->_fileobj->{fh} }
86
260a80b4 87sub calculate_sizes {
88 my $self = shift;
89
28394a1a 90 #XXX Does this need to be updated with different hashing algorithms?
e0098e7f 91 $self->{index_size} = (2**8) * $self->{long_size};
28394a1a 92 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
e0098e7f 93 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 94
260a80b4 95 return;
1bf65be7 96}
97
fde3db1a 98sub write_file_header {
0d0f3d5d 99 my $self = shift;
0d0f3d5d 100
21838116 101 local($/,$\);
102
460b1067 103 my $fh = $self->_fh;
0d0f3d5d 104
e96daec8 105 my $loc = $self->_request_space( length( SIG_FILE ) + 21 );
460b1067 106 seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET);
260a80b4 107 print( $fh
108 SIG_FILE,
460b1067 109 SIG_HEADER,
110 pack('N', 1), # header version
111 pack('N', 12), # header size
15ba72cc 112 pack('N', 0), # currently running transaction IDs
260a80b4 113 pack('S', $self->{long_size}),
114 pack('A', $self->{long_pack}),
115 pack('S', $self->{data_size}),
116 pack('A', $self->{data_pack}),
117 pack('S', $self->{max_buckets}),
118 );
0d0f3d5d 119
120 return;
121}
122
fde3db1a 123sub read_file_header {
e064ccd1 124 my $self = shift;
e064ccd1 125
21838116 126 local($/,$\);
127
e96daec8 128 my $fh = $self->_fh;
e064ccd1 129
e96daec8 130 seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET);
42f79e07 131 my $buffer;
460b1067 132 my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
133
134 return unless $bytes_read;
135
136 my ($file_signature, $sig_header, $header_version, $size) = unpack(
137 'A4 A N N', $buffer
42f79e07 138 );
e064ccd1 139
460b1067 140 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 141 $self->_fileobj->close;
e96daec8 142 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 143 }
260a80b4 144
460b1067 145 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 146 $self->_fileobj->close;
e96daec8 147 $self->_throw_error( "Old file version found." );
460b1067 148 }
9b2370e0 149
460b1067 150 my $buffer2;
151 $bytes_read += read( $fh, $buffer2, $size );
15ba72cc 152 my ($running_transactions, @values) = unpack( 'N S A S A S', $buffer2 );
153
154 $self->_fileobj->set_transaction_offset( 13 );
155
460b1067 156 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 157 $self->_fileobj->close;
e96daec8 158 $self->_throw_error("Corrupted file - bad header");
e064ccd1 159 }
160
460b1067 161 #XXX Add warnings if values weren't set right
162 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
163
e064ccd1 164 return $bytes_read;
165}
166
460b1067 167sub setup_fh {
168 my $self = shift;
169 my ($obj) = @_;
70b55428 170
e96daec8 171 my $fh = $self->_fh;
6fde4ed2 172 flock $fh, LOCK_EX;
118ba343 173
260a80b4 174 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 175 unless ( $obj->{base_offset} ) {
e96daec8 176 my $bytes_read = $self->read_file_header;
118ba343 177
260a80b4 178 $self->calculate_sizes;
179
118ba343 180 ##
fde3db1a 181 # File is empty -- write header and master index
118ba343 182 ##
183 if (!$bytes_read) {
e96daec8 184 $self->write_file_header;
118ba343 185
e96daec8 186 $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) );
118ba343 187
9e4f83a0 188 $self->write_tag(
e96daec8 189 $obj->_base_offset, $obj->_type,
f37c15ab 190 chr(0)x$self->{index_size},
118ba343 191 );
192
193 # Flush the filehandle
194 my $old_fh = select $fh;
195 my $old_af = $|; $| = 1; $| = $old_af;
196 select $old_fh;
197 }
198 else {
199 $obj->{base_offset} = $bytes_read;
200
201 ##
fde3db1a 202 # Get our type from master index header
118ba343 203 ##
e96daec8 204 my $tag = $self->load_tag($obj->_base_offset)
205 or $self->_throw_error("Corrupted file, no master index record");
118ba343 206
e96daec8 207 unless ($obj->_type eq $tag->{signature}) {
208 $self->_throw_error("File type mismatch");
118ba343 209 }
210 }
118ba343 211 }
260a80b4 212 else {
213 $self->calculate_sizes;
214 }
e06824f8 215
673464d9 216 #XXX We have to make sure we don't mess up when autoflush isn't turned on
e96daec8 217 unless ( $self->_fileobj->{inode} ) {
218 my @stats = stat($fh);
219 $self->_fileobj->{inode} = $stats[1];
220 $self->_fileobj->{end} = $stats[7];
70b55428 221 }
222
6fde4ed2 223 flock $fh, LOCK_UN;
224
70b55428 225 return 1;
226}
227
16d1ad9b 228sub tag_size {
229 my $self = shift;
230 my ($size) = @_;
231 return SIG_SIZE + $self->{data_size} + $size;
232}
233
9e4f83a0 234sub write_tag {
20f7b20c 235 ##
236 # Given offset, signature and content, create tag and write to disk
237 ##
d4b1166e 238 my $self = shift;
e96daec8 239 my ($offset, $sig, $content) = @_;
f37c15ab 240 my $size = length( $content );
20f7b20c 241
21838116 242 local($/,$\);
243
e96daec8 244 my $fh = $self->_fh;
d4b1166e 245
f37c15ab 246 if ( defined $offset ) {
e96daec8 247 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
f37c15ab 248 }
249
251dfd0e 250 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 251
f37c15ab 252 return unless defined $offset;
253
20f7b20c 254 return {
255 signature => $sig,
256 size => $size,
8db25060 257 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 258 content => $content
259 };
d4b1166e 260}
261
262sub load_tag {
20f7b20c 263 ##
264 # Given offset, load single tag and return signature, size and data
265 ##
d4b1166e 266 my $self = shift;
e96daec8 267 my ($offset) = @_;
20f7b20c 268
21838116 269 local($/,$\);
270
e06824f8 271# print join(':',map{$_||''}caller(1)), $/;
272
e96daec8 273 my $fh = $self->_fh;
d4b1166e 274
e96daec8 275 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
e5fc7e69 276
75be6413 277 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 278 return if eof $fh;
20f7b20c 279
d4b1166e 280 my $b;
8db25060 281 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 282 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 283
284 my $buffer;
285 read( $fh, $buffer, $size);
286
287 return {
288 signature => $sig,
289 size => $size,
8db25060 290 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 291 content => $buffer
292 };
d4b1166e 293}
294
56ec4340 295sub _get_dbm_object {
296 my $item = shift;
297
298 my $obj = eval {
299 local $SIG{__DIE__};
300 if ($item->isa( 'DBM::Deep' )) {
301 return $item;
302 }
303 return;
304 };
305 return $obj if $obj;
306
307 my $r = Scalar::Util::reftype( $item ) || '';
308 if ( $r eq 'HASH' ) {
309 my $obj = eval {
310 local $SIG{__DIE__};
311 my $obj = tied(%$item);
312 if ($obj->isa( 'DBM::Deep' )) {
313 return $obj;
314 }
315 return;
316 };
317 return $obj if $obj;
318 }
319 elsif ( $r eq 'ARRAY' ) {
320 my $obj = eval {
321 local $SIG{__DIE__};
322 my $obj = tied(@$item);
323 if ($obj->isa( 'DBM::Deep' )) {
324 return $obj;
325 }
326 return;
327 };
328 return $obj if $obj;
329 }
330
331 return;
332}
333
29b01632 334sub _length_needed {
335 my $self = shift;
e96daec8 336 my ($value, $key) = @_;
29b01632 337
338 my $is_dbm_deep = eval {
339 local $SIG{'__DIE__'};
340 $value->isa( 'DBM::Deep' );
341 };
342
f37c15ab 343 my $len = SIG_SIZE + $self->{data_size}
344 + $self->{data_size} + length( $key );
29b01632 345
e96daec8 346 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
f37c15ab 347 return $len + $self->{long_size};
29b01632 348 }
349
350 my $r = Scalar::Util::reftype( $value ) || '';
e96daec8 351 if ( $self->_fileobj->{autobless} ) {
9a187d8c 352 # This is for the bit saying whether or not this thing is blessed.
353 $len += 1;
354 }
355
29b01632 356 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 357 if ( defined $value ) {
358 $len += length( $value );
359 }
360 return $len;
29b01632 361 }
362
f37c15ab 363 $len += $self->{index_size};
29b01632 364
365 # if autobless is enabled, must also take into consideration
f37c15ab 366 # the class name as it is stored after the key.
e96daec8 367 if ( $self->_fileobj->{autobless} ) {
56ec4340 368 my $c = Scalar::Util::blessed($value);
369 if ( defined $c && !$is_dbm_deep ) {
370 $len += $self->{data_size} + length($c);
29b01632 371 }
372 }
373
f37c15ab 374 return $len;
29b01632 375}
376
20f7b20c 377sub add_bucket {
378 ##
379 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
380 # plain (undigested) key and value.
381 ##
d4b1166e 382 my $self = shift;
e96daec8 383 my ($tag, $md5, $plain_key, $value) = @_;
75be6413 384
21838116 385 local($/,$\);
386
eea0d863 387 # This verifies that only supported values will be stored.
388 {
389 my $r = Scalar::Util::reftype( $value );
390 last if !defined $r;
391
392 last if $r eq 'HASH';
393 last if $r eq 'ARRAY';
394
e96daec8 395 $self->_throw_error(
eea0d863 396 "Storage of variables of type '$r' is not supported."
397 );
398 }
399
20f7b20c 400 my $location = 0;
401 my $result = 2;
402
e96daec8 403 my $root = $self->_fileobj;
404 my $fh = $self->_fh;
20f7b20c 405
e96daec8 406 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 407
21838116 408 #ACID - This is a mutation. Must only find the exact transaction
409 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
75be6413 410
e96daec8 411# $self->_release_space( $size, $subloc );
386bab6c 412 # Updating a known md5
f9c33187 413#XXX This needs updating to use _release_space
386bab6c 414 if ( $subloc ) {
415 $result = 1;
20f7b20c 416
386bab6c 417 if ($actual_length <= $size) {
418 $location = $subloc;
20f7b20c 419 }
75be6413 420 else {
e96daec8 421 $location = $self->_request_space( $actual_length );
386bab6c 422 seek(
423 $fh,
9a187d8c 424 $tag->{offset} + $offset
425 + $self->{hash_size} + $root->{file_offset},
386bab6c 426 SEEK_SET,
427 );
9a187d8c 428 print( $fh pack($self->{long_pack}, $location ) );
429 print( $fh pack($self->{long_pack}, $actual_length ) );
28394a1a 430 print( $fh pack($self->{long_pack}, $root->transaction_id ) );
75be6413 431 }
75be6413 432 }
386bab6c 433 # Adding a new md5
434 elsif ( defined $offset ) {
e96daec8 435 $location = $self->_request_space( $actual_length );
386bab6c 436
437 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 438 print( $fh $md5 . pack($self->{long_pack}, $location ) );
439 print( $fh pack($self->{long_pack}, $actual_length ) );
28394a1a 440 print( $fh pack($self->{long_pack}, $root->transaction_id ) );
386bab6c 441 }
442 # If bucket didn't fit into list, split into a new index level
f9c33187 443 # split_index() will do the _request_space() call
386bab6c 444 else {
e96daec8 445 $location = $self->split_index( $md5, $tag );
386bab6c 446 }
20f7b20c 447
e96daec8 448 $self->write_value( $location, $plain_key, $value );
d5d7c51d 449
450 return $result;
451}
452
453sub write_value {
454 my $self = shift;
e96daec8 455 my ($location, $key, $value) = @_;
d5d7c51d 456
21838116 457 local($/,$\);
458
e96daec8 459 my $fh = $self->_fh;
460 my $root = $self->_fileobj;
d5d7c51d 461
9d4fa373 462 my $dbm_deep_obj = _get_dbm_object( $value );
e96daec8 463 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) {
464 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 465 }
d5d7c51d 466
467 seek($fh, $location + $root->{file_offset}, SEEK_SET);
468
20f7b20c 469 ##
d5d7c51d 470 # Write signature based on content type, set content length and write
471 # actual value.
20f7b20c 472 ##
9d4fa373 473 my $r = Scalar::Util::reftype( $value ) || '';
474 if ( $dbm_deep_obj ) {
e96daec8 475 $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 476 }
477 elsif ($r eq 'HASH') {
9d4fa373 478 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 479 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 480 }
e96daec8 481 $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 482 }
483 elsif ($r eq 'ARRAY') {
9d4fa373 484 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 485 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 486 }
e96daec8 487 $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 488 }
489 elsif (!defined($value)) {
e96daec8 490 $self->write_tag( undef, SIG_NULL, '' );
d5d7c51d 491 }
492 else {
e96daec8 493 $self->write_tag( undef, SIG_DATA, $value );
d5d7c51d 494 }
20f7b20c 495
d5d7c51d 496 ##
497 # Plain key is stored AFTER value, as keys are typically fetched less often.
498 ##
499 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 500
9a187d8c 501 # Internal references don't care about autobless
9d4fa373 502 return 1 if $dbm_deep_obj;
9a187d8c 503
d5d7c51d 504 ##
505 # If value is blessed, preserve class name
506 ##
507 if ( $root->{autobless} ) {
56ec4340 508 my $c = Scalar::Util::blessed($value);
509 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 510 print( $fh chr(1) );
56ec4340 511 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 512 }
d5d7c51d 513 else {
514 print( $fh chr(0) );
20f7b20c 515 }
d5d7c51d 516 }
20f7b20c 517
d5d7c51d 518 ##
56ec4340 519 # Tie the passed in reference so that changes to it are reflected in the
520 # datafile. The use of $location as the base_offset will act as the
521 # the linkage between parent and child.
522 #
523 # The overall assignment is a hack around the fact that just tying doesn't
524 # store the values. This may not be the wrong thing to do.
d5d7c51d 525 ##
9d4fa373 526 if ($r eq 'HASH') {
527 my %x = %$value;
528 tie %$value, 'DBM::Deep', {
529 base_offset => $location,
460b1067 530 fileobj => $root,
9d4fa373 531 };
532 %$value = %x;
533 }
534 elsif ($r eq 'ARRAY') {
535 my @x = @$value;
536 tie @$value, 'DBM::Deep', {
537 base_offset => $location,
460b1067 538 fileobj => $root,
9d4fa373 539 };
540 @$value = @x;
20f7b20c 541 }
d4b1166e 542
d5d7c51d 543 return 1;
d4b1166e 544}
545
75be6413 546sub split_index {
547 my $self = shift;
e96daec8 548 my ($md5, $tag) = @_;
75be6413 549
21838116 550 local($/,$\);
551
e96daec8 552 my $fh = $self->_fh;
553 my $root = $self->_fileobj;
16d1ad9b 554
555 my $loc = $self->_request_space(
e96daec8 556 $self->tag_size( $self->{index_size} ),
16d1ad9b 557 );
558
7b1e1aa1 559 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 560 print( $fh pack($self->{long_pack}, $loc) );
75be6413 561
9e4f83a0 562 my $index_tag = $self->write_tag(
e96daec8 563 $loc, SIG_INDEX,
f37c15ab 564 chr(0)x$self->{index_size},
75be6413 565 );
566
f9c33187 567 my $newtag_loc = $self->_request_space(
e96daec8 568 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 569 );
75be6413 570
7b1e1aa1 571 my $keys = $tag->{content}
f9c33187 572 . $md5 . pack($self->{long_pack}, $newtag_loc)
28394a1a 573 . pack($self->{long_pack}, 0) # size
574 . pack($self->{long_pack}, 0); # transaction #
75be6413 575
f9c33187 576 my @newloc = ();
75be6413 577 BUCKET:
578 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 579 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 580
f9c33187 581 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
582 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 583
75be6413 584 my $num = ord(substr($key, $tag->{ch} + 1, 1));
585
f9c33187 586 if ($newloc[$num]) {
587 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 588 my $subkeys;
589 read( $fh, $subkeys, $self->{bucket_list_size});
590
f9c33187 591 # This is looking for the first empty spot
7b1e1aa1 592 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 593 { content => $subkeys }, '',
7b1e1aa1 594 );
75be6413 595
f9c33187 596 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
597 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 598
599 next;
75be6413 600 }
75be6413 601
7b1e1aa1 602 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 603
7b1e1aa1 604 my $loc = $self->_request_space(
e96daec8 605 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 606 );
2603d86e 607
7b1e1aa1 608 print( $fh pack($self->{long_pack}, $loc) );
75be6413 609
7b1e1aa1 610 my $blist_tag = $self->write_tag(
e96daec8 611 $loc, SIG_BLIST,
7b1e1aa1 612 chr(0)x$self->{bucket_list_size},
613 );
614
615 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 616 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 617
f9c33187 618 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 619 }
620
621 $self->_release_space(
e96daec8 622 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 623 $tag->{offset} - SIG_SIZE - $self->{data_size},
624 );
75be6413 625
f9c33187 626 return $newtag_loc;
75be6413 627}
628
8db25060 629sub read_from_loc {
630 my $self = shift;
e96daec8 631 my ($subloc) = @_;
8db25060 632
21838116 633 local($/,$\);
634
e96daec8 635 my $fh = $self->_fh;
8db25060 636
637 ##
638 # Found match -- seek to offset and read signature
639 ##
640 my $signature;
e96daec8 641 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
8db25060 642 read( $fh, $signature, SIG_SIZE);
643
644 ##
645 # If value is a hash or array, return new DBM::Deep object with correct offset
646 ##
647 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 648 my $new_obj = DBM::Deep->new({
8db25060 649 type => $signature,
650 base_offset => $subloc,
e96daec8 651 fileobj => $self->_fileobj,
685e40f1 652 });
8db25060 653
460b1067 654 if ($new_obj->_fileobj->{autobless}) {
8db25060 655 ##
656 # Skip over value and plain key to see if object needs
657 # to be re-blessed
658 ##
659 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
660
661 my $size;
c6ea6b6c 662 read( $fh, $size, $self->{data_size});
663 $size = unpack($self->{data_pack}, $size);
8db25060 664 if ($size) { seek($fh, $size, SEEK_CUR); }
665
666 my $bless_bit;
667 read( $fh, $bless_bit, 1);
668 if (ord($bless_bit)) {
669 ##
670 # Yes, object needs to be re-blessed
671 ##
672 my $class_name;
c6ea6b6c 673 read( $fh, $size, $self->{data_size});
674 $size = unpack($self->{data_pack}, $size);
8db25060 675 if ($size) { read( $fh, $class_name, $size); }
685e40f1 676 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 677 }
678 }
679
685e40f1 680 return $new_obj;
8db25060 681 }
682 elsif ( $signature eq SIG_INTERNAL ) {
683 my $size;
684 read( $fh, $size, $self->{data_size});
685 $size = unpack($self->{data_pack}, $size);
686
687 if ( $size ) {
688 my $new_loc;
689 read( $fh, $new_loc, $size );
690 $new_loc = unpack( $self->{long_pack}, $new_loc );
691
e96daec8 692 return $self->read_from_loc( $new_loc );
8db25060 693 }
694 else {
695 return;
696 }
697 }
698 ##
699 # Otherwise return actual value
700 ##
460b1067 701 elsif ( $signature eq SIG_DATA ) {
8db25060 702 my $size;
703 read( $fh, $size, $self->{data_size});
704 $size = unpack($self->{data_pack}, $size);
705
706 my $value = '';
707 if ($size) { read( $fh, $value, $size); }
708 return $value;
709 }
710
711 ##
712 # Key exists, but content is null
713 ##
714 return;
715}
716
9020ee8c 717sub get_bucket_value {
beac1dff 718 ##
719 # Fetch single value given tag and MD5 digested key.
720 ##
721 my $self = shift;
e96daec8 722 my ($tag, $md5) = @_;
9020ee8c 723
21838116 724 #ACID - This is a read. Can find exact or HEAD
9a187d8c 725 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
386bab6c 726 if ( $subloc ) {
e96daec8 727 return $self->read_from_loc( $subloc );
386bab6c 728 }
beac1dff 729 return;
9020ee8c 730}
ab0e4957 731
732sub delete_bucket {
beac1dff 733 ##
734 # Delete single key/value pair given tag and MD5 digested key.
735 ##
736 my $self = shift;
e96daec8 737 my ($tag, $md5) = @_;
ab0e4957 738
21838116 739 local($/,$\);
740
741 #ACID - This is a mutation. Must only find the exact transaction
742 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
f9c33187 743#XXX This needs _release_space()
386bab6c 744 if ( $subloc ) {
e96daec8 745 my $fh = $self->_fh;
746 seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET);
386bab6c 747 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 748 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 749
ab0e4957 750 return 1;
386bab6c 751 }
beac1dff 752 return;
ab0e4957 753}
754
912d50b1 755sub bucket_exists {
beac1dff 756 ##
757 # Check existence of single key given tag and MD5 digested key.
758 ##
759 my $self = shift;
e96daec8 760 my ($tag, $md5) = @_;
912d50b1 761
21838116 762 #ACID - This is a read. Can find exact or HEAD
9a187d8c 763 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 );
d5d7c51d 764 return $subloc && 1;
912d50b1 765}
766
6736c116 767sub find_bucket_list {
beac1dff 768 ##
769 # Locate offset for bucket list, given digested key
770 ##
771 my $self = shift;
e96daec8 772 my ($offset, $md5, $args) = @_;
d0b74c17 773 $args = {} unless $args;
774
21838116 775 local($/,$\);
776
beac1dff 777 ##
778 # Locate offset for bucket list using digest index system
779 ##
e96daec8 780 my $tag = $self->load_tag( $offset )
781 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 782
e5fc7e69 783 my $ch = 0;
8db25060 784 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 785 my $num = ord substr($md5, $ch, 1);
786
787 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 788 $tag = $self->index_lookup( $tag, $num );
d0b74c17 789
790 if (!$tag) {
29b01632 791 return if !$args->{create};
d0b74c17 792
16d1ad9b 793 my $loc = $self->_request_space(
e96daec8 794 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 795 );
796
e96daec8 797 my $fh = $self->_fh;
798 seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET);
16d1ad9b 799 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 800
9e4f83a0 801 $tag = $self->write_tag(
e96daec8 802 $loc, SIG_BLIST,
f37c15ab 803 chr(0)x$self->{bucket_list_size},
d5d7c51d 804 );
805
806 $tag->{ref_loc} = $ref_loc;
807 $tag->{ch} = $ch;
808
809 last;
d0b74c17 810 }
811
16d1ad9b 812 $tag->{ch} = $ch++;
d0b74c17 813 $tag->{ref_loc} = $ref_loc;
beac1dff 814 }
d0b74c17 815
beac1dff 816 return $tag;
6736c116 817}
818
d0b74c17 819sub index_lookup {
820 ##
821 # Given index tag, lookup single entry in index and return .
822 ##
823 my $self = shift;
e96daec8 824 my ($tag, $index) = @_;
d0b74c17 825
826 my $location = unpack(
827 $self->{long_pack},
828 substr(
829 $tag->{content},
830 $index * $self->{long_size},
831 $self->{long_size},
832 ),
833 );
834
835 if (!$location) { return; }
836
e96daec8 837 return $self->load_tag( $location );
d0b74c17 838}
839
6736c116 840sub traverse_index {
beac1dff 841 ##
842 # Scan index and recursively step into deeper levels, looking for next key.
843 ##
6736c116 844 my $self = shift;
845 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 846
21838116 847 local($/,$\);
848
e96daec8 849 my $tag = $self->load_tag( $offset );
6736c116 850
e96daec8 851 my $fh = $self->_fh;
d0b74c17 852
8db25060 853 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 854 my $content = $tag->{content};
e5fc7e69 855 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 856
d5d7c51d 857 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 858 my $subloc = unpack(
859 $self->{long_pack},
e06824f8 860 substr(
861 $content,
862 $idx * $self->{long_size},
863 $self->{long_size},
864 ),
e5fc7e69 865 );
866
beac1dff 867 if ($subloc) {
e5fc7e69 868 my $result = $self->traverse_index(
869 $obj, $subloc, $ch + 1, $force_return_next,
870 );
871
beac1dff 872 if (defined($result)) { return $result; }
873 }
874 } # index loop
d0b74c17 875
beac1dff 876 $obj->{return_next} = 1;
877 } # tag is an index
d0b74c17 878
e5fc7e69 879 else {
beac1dff 880 my $keys = $tag->{content};
881 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 882
beac1dff 883 ##
884 # Iterate through buckets, looking for a key match
885 ##
8db25060 886 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 887 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 888
8db25060 889 # End of bucket list -- return to outer loop
beac1dff 890 if (!$subloc) {
beac1dff 891 $obj->{return_next} = 1;
892 last;
893 }
8db25060 894 # Located previous key -- return next one found
beac1dff 895 elsif ($key eq $obj->{prev_md5}) {
beac1dff 896 $obj->{return_next} = 1;
897 next;
898 }
8db25060 899 # Seek to bucket location and skip over signature
beac1dff 900 elsif ($obj->{return_next}) {
e96daec8 901 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
d0b74c17 902
beac1dff 903 # Skip over value to get to plain key
8db25060 904 my $sig;
905 read( $fh, $sig, SIG_SIZE );
906
beac1dff 907 my $size;
e5fc7e69 908 read( $fh, $size, $self->{data_size});
909 $size = unpack($self->{data_pack}, $size);
beac1dff 910 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 911
beac1dff 912 # Read in plain key and return as scalar
beac1dff 913 my $plain_key;
e5fc7e69 914 read( $fh, $size, $self->{data_size});
915 $size = unpack($self->{data_pack}, $size);
beac1dff 916 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 917
beac1dff 918 return $plain_key;
919 }
8db25060 920 }
d0b74c17 921
beac1dff 922 $obj->{return_next} = 1;
923 } # tag is a bucket list
d0b74c17 924
beac1dff 925 return;
6736c116 926}
927
928sub get_next_key {
beac1dff 929 ##
930 # Locate next key, given digested previous one
931 ##
6736c116 932 my $self = shift;
933 my ($obj) = @_;
d0b74c17 934
beac1dff 935 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
936 $obj->{return_next} = 0;
d0b74c17 937
beac1dff 938 ##
939 # If the previous key was not specifed, start at the top and
940 # return the first one found.
941 ##
942 if (!$obj->{prev_md5}) {
943 $obj->{prev_md5} = chr(0) x $self->{hash_size};
944 $obj->{return_next} = 1;
945 }
d0b74c17 946
beac1dff 947 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 948}
949
75be6413 950# Utilities
951
9cec1360 952sub _get_key_subloc {
75be6413 953 my $self = shift;
954 my ($keys, $idx) = @_;
955
28394a1a 956 my ($key, $subloc, $size, $transaction) = unpack(
957 # This is 'a', not 'A'. Please read the pack() documentation for the
958 # difference between the two and why it's important.
959 "a$self->{hash_size} $self->{long_pack} $self->{long_pack} $self->{long_pack}",
75be6413 960 substr(
961 $keys,
9cec1360 962 ($idx * $self->{bucket_size}),
963 $self->{bucket_size},
75be6413 964 ),
965 );
966
28394a1a 967 return ($key, $subloc, $size, $transaction);
75be6413 968}
969
d608b06e 970sub _find_in_buckets {
971 my $self = shift;
21838116 972 my ($tag, $md5, $exact) = @_;
d608b06e 973
28394a1a 974 my $trans_id = $self->_fileobj->transaction_id;
975
21838116 976 my @zero;
977
d608b06e 978 BUCKET:
979 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
28394a1a 980 my ($key, $subloc, $size, $transaction_id) = $self->_get_key_subloc(
9a187d8c 981 $tag->{content}, $i,
982 );
d608b06e 983
21838116 984 my @rv = ($subloc, $i * $self->{bucket_size}, $size);
985
986 unless ( $subloc ) {
987 return @zero if !$exact && @zero and $trans_id;
988 return @rv;
989 }
990
991 next BUCKET if $key ne $md5;
d608b06e 992
21838116 993 # Save off the HEAD in case we need it.
994 @zero = @rv if $transaction_id == 0;
d608b06e 995
21838116 996 next BUCKET if $transaction_id != $trans_id;
997
998 return @rv;
d608b06e 999 }
1000
1001 return;
1002}
1003
994ccd8e 1004sub _request_space {
1005 my $self = shift;
e96daec8 1006 my ($size) = @_;
994ccd8e 1007
460b1067 1008 my $loc = $self->_fileobj->{end};
1009 $self->_fileobj->{end} += $size;
994ccd8e 1010
1011 return $loc;
1012}
1013
1014sub _release_space {
1015 my $self = shift;
e96daec8 1016 my ($size, $loc) = @_;
994ccd8e 1017
21838116 1018 local($/,$\);
1019
7b1e1aa1 1020 my $next_loc = 0;
1021
e96daec8 1022 my $fh = $self->_fh;
1023 seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET );
7b1e1aa1 1024 print( $fh SIG_FREE
1025 . pack($self->{long_pack}, $size )
1026 . pack($self->{long_pack}, $next_loc )
1027 );
1028
994ccd8e 1029 return;
1030}
1031
e96daec8 1032sub _throw_error {
1033 die "DBM::Deep: $_[1]\n";
1034}
1035
a20d9a3f 10361;
1037__END__
d5d7c51d 1038
1039# This will be added in later, after more refactoring is done. This is an early
1040# attempt at refactoring on the physical level instead of the virtual level.
1041sub _read_at {
1042 my $self = shift;
e96daec8 1043 my ($spot, $amount, $unpack) = @_;
d5d7c51d 1044
21838116 1045 local($/,$\);
1046
e96daec8 1047 my $fh = $self->_fh;
1048 seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET );
d5d7c51d 1049
1050 my $buffer;
1051 my $bytes_read = read( $fh, $buffer, $amount );
1052
1053 if ( $unpack ) {
1054 $buffer = unpack( $unpack, $buffer );
1055 }
1056
1057 if ( wantarray ) {
1058 return ($buffer, $bytes_read);
1059 }
1060 else {
1061 return $buffer;
1062 }
1063}
e96daec8 1064
1065sub _print_at {
1066 my $self = shift;
1067 my ($spot, $data) = @_;
1068
21838116 1069 local($/,$\);
1070
e96daec8 1071 my $fh = $self->_fh;
1072 seek( $fh, $spot, SEEK_SET );
1073 print( $fh $data );
1074
1075 return;
1076}
1077
1078sub get_file_version {
1079 my $self = shift;
1080
21838116 1081 local($/,$\);
1082
e96daec8 1083 my $fh = $self->_fh;
1084
1085 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1086 my $buffer;
1087 my $bytes_read = read( $fh, $buffer, 4 );
1088 unless ( $bytes_read == 4 ) {
1089 $self->_throw_error( "Cannot read file version" );
1090 }
1091
1092 return unpack( 'N', $buffer );
1093}
1094
1095sub write_file_version {
1096 my $self = shift;
1097 my ($new_version) = @_;
1098
21838116 1099 local($/,$\);
1100
e96daec8 1101 my $fh = $self->_fh;
1102
1103 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1104 print( $fh pack( 'N', $new_version ) );
1105
1106 return;
1107}
1108