All auditing now goes through a method on ::File
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Engine.pm
CommitLineData
a20d9a3f 1package DBM::Deep::Engine;
2
460b1067 3use 5.6.0;
4
a20d9a3f 5use strict;
460b1067 6use warnings;
a20d9a3f 7
8use Fcntl qw( :DEFAULT :flock :seek );
359a01ac 9use Scalar::Util ();
a20d9a3f 10
21838116 11# File-wide notes:
12# * All the local($/,$\); are to protect read() and print() from -l.
20b7f047 13# * To add to bucket_size, make sure you modify the following:
14# - calculate_sizes()
15# - _get_key_subloc()
16# - add_bucket() - where the buckets are printed
21838116 17
8db25060 18##
19# Setup file and tag signatures. These should never change.
20##
21sub SIG_FILE () { 'DPDB' }
460b1067 22sub SIG_HEADER () { 'h' }
8db25060 23sub SIG_INTERNAL () { 'i' }
24sub SIG_HASH () { 'H' }
25sub SIG_ARRAY () { 'A' }
8db25060 26sub SIG_NULL () { 'N' }
27sub SIG_DATA () { 'D' }
28sub SIG_INDEX () { 'I' }
29sub SIG_BLIST () { 'B' }
7b1e1aa1 30sub SIG_FREE () { 'F' }
8db25060 31sub SIG_SIZE () { 1 }
32
612969fb 33sub new {
34 my $class = shift;
35 my ($args) = @_;
36
37 my $self = bless {
38 long_size => 4,
39 long_pack => 'N',
40 data_size => 4,
41 data_pack => 'N',
251dfd0e 42
612969fb 43 digest => \&Digest::MD5::md5,
44 hash_size => 16,
251dfd0e 45
81d16922 46 ##
d5d7c51d 47 # Maximum number of buckets per list before another level of indexing is
e0098e7f 48 # done. Increase this value for slightly greater speed, but larger database
d5d7c51d 49 # files. DO NOT decrease this value below 16, due to risk of recursive
50 # reindex overrun.
81d16922 51 ##
612969fb 52 max_buckets => 16,
460b1067 53
54 fileobj => undef,
359a01ac 55 obj => undef,
612969fb 56 }, $class;
57
e0098e7f 58 if ( defined $args->{pack_size} ) {
59 if ( lc $args->{pack_size} eq 'small' ) {
60 $args->{long_size} = 2;
c9b6d0d8 61 $args->{long_pack} = 'n';
e0098e7f 62 }
63 elsif ( lc $args->{pack_size} eq 'medium' ) {
64 $args->{long_size} = 4;
65 $args->{long_pack} = 'N';
66 }
67 elsif ( lc $args->{pack_size} eq 'large' ) {
68 $args->{long_size} = 8;
69 $args->{long_pack} = 'Q';
70 }
71 else {
72 die "Unknown pack_size value: '$args->{pack_size}'\n";
73 }
74 }
75
fde3db1a 76 # Grab the parameters we want to use
77 foreach my $param ( keys %$self ) {
78 next unless exists $args->{$param};
3e9498a1 79 $self->{$param} = $args->{$param};
fde3db1a 80 }
359a01ac 81 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
fde3db1a 82
e0098e7f 83 if ( $self->{max_buckets} < 16 ) {
84 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
85 $self->{max_buckets} = 16;
86 }
87
260a80b4 88 return $self;
89}
90
460b1067 91sub _fileobj { return $_[0]{fileobj} }
92sub _fh { return $_[0]->_fileobj->{fh} }
93
260a80b4 94sub calculate_sizes {
95 my $self = shift;
96
28394a1a 97 #XXX Does this need to be updated with different hashing algorithms?
e0098e7f 98 $self->{index_size} = (2**8) * $self->{long_size};
28394a1a 99 $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3;
e0098e7f 100 $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size};
612969fb 101
260a80b4 102 return;
1bf65be7 103}
104
fde3db1a 105sub write_file_header {
0d0f3d5d 106 my $self = shift;
0d0f3d5d 107
21838116 108 local($/,$\);
109
460b1067 110 my $fh = $self->_fh;
0d0f3d5d 111
e96daec8 112 my $loc = $self->_request_space( length( SIG_FILE ) + 21 );
460b1067 113 seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET);
260a80b4 114 print( $fh
115 SIG_FILE,
460b1067 116 SIG_HEADER,
117 pack('N', 1), # header version
118 pack('N', 12), # header size
15ba72cc 119 pack('N', 0), # currently running transaction IDs
c9b6d0d8 120 pack('n', $self->{long_size}),
260a80b4 121 pack('A', $self->{long_pack}),
c9b6d0d8 122 pack('n', $self->{data_size}),
260a80b4 123 pack('A', $self->{data_pack}),
c9b6d0d8 124 pack('n', $self->{max_buckets}),
260a80b4 125 );
0d0f3d5d 126
20b7f047 127 $self->_fileobj->set_transaction_offset( 13 );
128
0d0f3d5d 129 return;
130}
131
fde3db1a 132sub read_file_header {
e064ccd1 133 my $self = shift;
e064ccd1 134
21838116 135 local($/,$\);
136
e96daec8 137 my $fh = $self->_fh;
e064ccd1 138
e96daec8 139 seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET);
42f79e07 140 my $buffer;
460b1067 141 my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 );
142
143 return unless $bytes_read;
144
145 my ($file_signature, $sig_header, $header_version, $size) = unpack(
146 'A4 A N N', $buffer
42f79e07 147 );
e064ccd1 148
460b1067 149 unless ( $file_signature eq SIG_FILE ) {
15ba72cc 150 $self->_fileobj->close;
e96daec8 151 $self->_throw_error( "Signature not found -- file is not a Deep DB" );
460b1067 152 }
260a80b4 153
460b1067 154 unless ( $sig_header eq SIG_HEADER ) {
15ba72cc 155 $self->_fileobj->close;
e96daec8 156 $self->_throw_error( "Old file version found." );
460b1067 157 }
9b2370e0 158
460b1067 159 my $buffer2;
160 $bytes_read += read( $fh, $buffer2, $size );
c9b6d0d8 161 my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 );
15ba72cc 162
163 $self->_fileobj->set_transaction_offset( 13 );
164
460b1067 165 if ( @values < 5 || grep { !defined } @values ) {
15ba72cc 166 $self->_fileobj->close;
e96daec8 167 $self->_throw_error("Corrupted file - bad header");
e064ccd1 168 }
169
460b1067 170 #XXX Add warnings if values weren't set right
171 @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values;
172
e064ccd1 173 return $bytes_read;
174}
175
460b1067 176sub setup_fh {
177 my $self = shift;
178 my ($obj) = @_;
70b55428 179
359a01ac 180 local($/,$\);
181
e96daec8 182 my $fh = $self->_fh;
6fde4ed2 183 flock $fh, LOCK_EX;
118ba343 184
260a80b4 185 #XXX The duplication of calculate_sizes needs to go away
6fde4ed2 186 unless ( $obj->{base_offset} ) {
e96daec8 187 my $bytes_read = $self->read_file_header;
118ba343 188
260a80b4 189 $self->calculate_sizes;
190
118ba343 191 ##
fde3db1a 192 # File is empty -- write header and master index
118ba343 193 ##
194 if (!$bytes_read) {
aa83bc1e 195 $self->_fileobj->audit( "# Database created on" );
359a01ac 196
e96daec8 197 $self->write_file_header;
118ba343 198
e96daec8 199 $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) );
118ba343 200
9e4f83a0 201 $self->write_tag(
e96daec8 202 $obj->_base_offset, $obj->_type,
f37c15ab 203 chr(0)x$self->{index_size},
118ba343 204 );
205
206 # Flush the filehandle
207 my $old_fh = select $fh;
208 my $old_af = $|; $| = 1; $| = $old_af;
209 select $old_fh;
210 }
211 else {
212 $obj->{base_offset} = $bytes_read;
213
214 ##
fde3db1a 215 # Get our type from master index header
118ba343 216 ##
359a01ac 217 my $tag = $self->load_tag($obj->_base_offset);
218 unless ( $tag ) {
219 flock $fh, LOCK_UN;
220 $self->_throw_error("Corrupted file, no master index record");
221 }
118ba343 222
e96daec8 223 unless ($obj->_type eq $tag->{signature}) {
359a01ac 224 flock $fh, LOCK_UN;
e96daec8 225 $self->_throw_error("File type mismatch");
118ba343 226 }
227 }
118ba343 228 }
260a80b4 229 else {
230 $self->calculate_sizes;
231 }
e06824f8 232
673464d9 233 #XXX We have to make sure we don't mess up when autoflush isn't turned on
e96daec8 234 unless ( $self->_fileobj->{inode} ) {
235 my @stats = stat($fh);
236 $self->_fileobj->{inode} = $stats[1];
237 $self->_fileobj->{end} = $stats[7];
70b55428 238 }
239
6fde4ed2 240 flock $fh, LOCK_UN;
241
70b55428 242 return 1;
243}
244
16d1ad9b 245sub tag_size {
246 my $self = shift;
247 my ($size) = @_;
248 return SIG_SIZE + $self->{data_size} + $size;
249}
250
9e4f83a0 251sub write_tag {
20f7b20c 252 ##
253 # Given offset, signature and content, create tag and write to disk
254 ##
d4b1166e 255 my $self = shift;
e96daec8 256 my ($offset, $sig, $content) = @_;
f37c15ab 257 my $size = length( $content );
20f7b20c 258
21838116 259 local($/,$\);
260
e96daec8 261 my $fh = $self->_fh;
d4b1166e 262
f37c15ab 263 if ( defined $offset ) {
e96daec8 264 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
f37c15ab 265 }
266
251dfd0e 267 print( $fh $sig . pack($self->{data_pack}, $size) . $content );
20f7b20c 268
f37c15ab 269 return unless defined $offset;
270
20f7b20c 271 return {
272 signature => $sig,
273 size => $size,
8db25060 274 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 275 content => $content
276 };
d4b1166e 277}
278
279sub load_tag {
20f7b20c 280 ##
281 # Given offset, load single tag and return signature, size and data
282 ##
d4b1166e 283 my $self = shift;
e96daec8 284 my ($offset) = @_;
20f7b20c 285
21838116 286 local($/,$\);
287
e06824f8 288# print join(':',map{$_||''}caller(1)), $/;
289
e96daec8 290 my $fh = $self->_fh;
d4b1166e 291
e96daec8 292 seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET);
e5fc7e69 293
75be6413 294 #XXX I'm not sure this check will work if autoflush isn't enabled ...
e5fc7e69 295 return if eof $fh;
20f7b20c 296
d4b1166e 297 my $b;
8db25060 298 read( $fh, $b, SIG_SIZE + $self->{data_size} );
251dfd0e 299 my ($sig, $size) = unpack( "A $self->{data_pack}", $b );
20f7b20c 300
301 my $buffer;
302 read( $fh, $buffer, $size);
303
304 return {
305 signature => $sig,
306 size => $size,
8db25060 307 offset => $offset + SIG_SIZE + $self->{data_size},
20f7b20c 308 content => $buffer
309 };
d4b1166e 310}
311
56ec4340 312sub _get_dbm_object {
313 my $item = shift;
314
315 my $obj = eval {
316 local $SIG{__DIE__};
317 if ($item->isa( 'DBM::Deep' )) {
318 return $item;
319 }
320 return;
321 };
322 return $obj if $obj;
323
324 my $r = Scalar::Util::reftype( $item ) || '';
325 if ( $r eq 'HASH' ) {
326 my $obj = eval {
327 local $SIG{__DIE__};
328 my $obj = tied(%$item);
329 if ($obj->isa( 'DBM::Deep' )) {
330 return $obj;
331 }
332 return;
333 };
334 return $obj if $obj;
335 }
336 elsif ( $r eq 'ARRAY' ) {
337 my $obj = eval {
338 local $SIG{__DIE__};
339 my $obj = tied(@$item);
340 if ($obj->isa( 'DBM::Deep' )) {
341 return $obj;
342 }
343 return;
344 };
345 return $obj if $obj;
346 }
347
348 return;
349}
350
29b01632 351sub _length_needed {
352 my $self = shift;
e96daec8 353 my ($value, $key) = @_;
29b01632 354
355 my $is_dbm_deep = eval {
356 local $SIG{'__DIE__'};
357 $value->isa( 'DBM::Deep' );
358 };
359
f37c15ab 360 my $len = SIG_SIZE + $self->{data_size}
361 + $self->{data_size} + length( $key );
29b01632 362
e96daec8 363 if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) {
f37c15ab 364 return $len + $self->{long_size};
29b01632 365 }
366
367 my $r = Scalar::Util::reftype( $value ) || '';
e96daec8 368 if ( $self->_fileobj->{autobless} ) {
9a187d8c 369 # This is for the bit saying whether or not this thing is blessed.
370 $len += 1;
371 }
372
29b01632 373 unless ( $r eq 'HASH' || $r eq 'ARRAY' ) {
f37c15ab 374 if ( defined $value ) {
375 $len += length( $value );
376 }
377 return $len;
29b01632 378 }
379
f37c15ab 380 $len += $self->{index_size};
29b01632 381
382 # if autobless is enabled, must also take into consideration
f37c15ab 383 # the class name as it is stored after the key.
e96daec8 384 if ( $self->_fileobj->{autobless} ) {
56ec4340 385 my $c = Scalar::Util::blessed($value);
386 if ( defined $c && !$is_dbm_deep ) {
387 $len += $self->{data_size} + length($c);
29b01632 388 }
389 }
390
f37c15ab 391 return $len;
29b01632 392}
393
20f7b20c 394sub add_bucket {
395 ##
396 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
397 # plain (undigested) key and value.
398 ##
d4b1166e 399 my $self = shift;
359a01ac 400 my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_;
c9b6d0d8 401 $deleted ||= 0;
75be6413 402
21838116 403 local($/,$\);
404
eea0d863 405 # This verifies that only supported values will be stored.
406 {
407 my $r = Scalar::Util::reftype( $value );
408 last if !defined $r;
409
410 last if $r eq 'HASH';
411 last if $r eq 'ARRAY';
412
e96daec8 413 $self->_throw_error(
eea0d863 414 "Storage of variables of type '$r' is not supported."
415 );
416 }
417
20f7b20c 418 my $location = 0;
419 my $result = 2;
420
e96daec8 421 my $root = $self->_fileobj;
422 my $fh = $self->_fh;
20f7b20c 423
e96daec8 424 my $actual_length = $self->_length_needed( $value, $plain_key );
20f7b20c 425
21838116 426 #ACID - This is a mutation. Must only find the exact transaction
c9b6d0d8 427 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 );
428
429 my @transactions;
430 if ( $self->_fileobj->transaction_id == 0 ) {
431 @transactions = $self->_fileobj->current_transactions;
432 }
75be6413 433
e96daec8 434# $self->_release_space( $size, $subloc );
386bab6c 435 # Updating a known md5
f9c33187 436#XXX This needs updating to use _release_space
386bab6c 437 if ( $subloc ) {
438 $result = 1;
20f7b20c 439
386bab6c 440 if ($actual_length <= $size) {
441 $location = $subloc;
20f7b20c 442 }
75be6413 443 else {
e96daec8 444 $location = $self->_request_space( $actual_length );
386bab6c 445 seek(
446 $fh,
9a187d8c 447 $tag->{offset} + $offset
448 + $self->{hash_size} + $root->{file_offset},
386bab6c 449 SEEK_SET,
450 );
9a187d8c 451 print( $fh pack($self->{long_pack}, $location ) );
452 print( $fh pack($self->{long_pack}, $actual_length ) );
c9b6d0d8 453 print( $fh pack('n n', $root->transaction_id, $deleted ) );
75be6413 454 }
75be6413 455 }
386bab6c 456 # Adding a new md5
457 elsif ( defined $offset ) {
e96daec8 458 $location = $self->_request_space( $actual_length );
386bab6c 459
460 seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET );
9a187d8c 461 print( $fh $md5 . pack($self->{long_pack}, $location ) );
462 print( $fh pack($self->{long_pack}, $actual_length ) );
c9b6d0d8 463 print( $fh pack('n n', $root->transaction_id, $deleted ) );
464
465 for ( @transactions ) {
466 my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} );
467 $self->_fileobj->{transaction_id} = $_;
359a01ac 468 $self->add_bucket( $tag2, $md5, '', '', 1, $orig_key );
c9b6d0d8 469 $self->_fileobj->{transaction_id} = 0;
470 }
386bab6c 471 }
472 # If bucket didn't fit into list, split into a new index level
f9c33187 473 # split_index() will do the _request_space() call
386bab6c 474 else {
e96daec8 475 $location = $self->split_index( $md5, $tag );
386bab6c 476 }
20f7b20c 477
359a01ac 478 $self->write_value( $location, $plain_key, $value, $orig_key );
d5d7c51d 479
480 return $result;
481}
482
483sub write_value {
484 my $self = shift;
359a01ac 485 my ($location, $key, $value, $orig_key) = @_;
d5d7c51d 486
21838116 487 local($/,$\);
488
e96daec8 489 my $fh = $self->_fh;
490 my $root = $self->_fileobj;
d5d7c51d 491
9d4fa373 492 my $dbm_deep_obj = _get_dbm_object( $value );
e96daec8 493 if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) {
494 $self->_throw_error( "Cannot cross-reference. Use export() instead" );
9d4fa373 495 }
d5d7c51d 496
497 seek($fh, $location + $root->{file_offset}, SEEK_SET);
498
20f7b20c 499 ##
d5d7c51d 500 # Write signature based on content type, set content length and write
501 # actual value.
20f7b20c 502 ##
9d4fa373 503 my $r = Scalar::Util::reftype( $value ) || '';
504 if ( $dbm_deep_obj ) {
e96daec8 505 $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) );
f37c15ab 506 }
507 elsif ($r eq 'HASH') {
9d4fa373 508 if ( !$dbm_deep_obj && tied %{$value} ) {
e96daec8 509 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 510 }
e96daec8 511 $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} );
f37c15ab 512 }
513 elsif ($r eq 'ARRAY') {
9d4fa373 514 if ( !$dbm_deep_obj && tied @{$value} ) {
e96daec8 515 $self->_throw_error( "Cannot store something that is tied" );
019ab3a1 516 }
e96daec8 517 $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} );
f37c15ab 518 }
519 elsif (!defined($value)) {
e96daec8 520 $self->write_tag( undef, SIG_NULL, '' );
d5d7c51d 521 }
522 else {
e96daec8 523 $self->write_tag( undef, SIG_DATA, $value );
d5d7c51d 524 }
20f7b20c 525
d5d7c51d 526 ##
527 # Plain key is stored AFTER value, as keys are typically fetched less often.
528 ##
529 print( $fh pack($self->{data_pack}, length($key)) . $key );
20f7b20c 530
9a187d8c 531 # Internal references don't care about autobless
9d4fa373 532 return 1 if $dbm_deep_obj;
9a187d8c 533
d5d7c51d 534 ##
535 # If value is blessed, preserve class name
536 ##
537 if ( $root->{autobless} ) {
56ec4340 538 my $c = Scalar::Util::blessed($value);
539 if ( defined $c && !$dbm_deep_obj ) {
d5d7c51d 540 print( $fh chr(1) );
56ec4340 541 print( $fh pack($self->{data_pack}, length($c)) . $c );
20f7b20c 542 }
d5d7c51d 543 else {
544 print( $fh chr(0) );
20f7b20c 545 }
d5d7c51d 546 }
20f7b20c 547
d5d7c51d 548 ##
56ec4340 549 # Tie the passed in reference so that changes to it are reflected in the
550 # datafile. The use of $location as the base_offset will act as the
551 # the linkage between parent and child.
552 #
553 # The overall assignment is a hack around the fact that just tying doesn't
554 # store the values. This may not be the wrong thing to do.
d5d7c51d 555 ##
9d4fa373 556 if ($r eq 'HASH') {
557 my %x = %$value;
558 tie %$value, 'DBM::Deep', {
559 base_offset => $location,
460b1067 560 fileobj => $root,
359a01ac 561 parent => $self->{obj},
562 parent_key => $orig_key,
9d4fa373 563 };
564 %$value = %x;
565 }
566 elsif ($r eq 'ARRAY') {
567 my @x = @$value;
568 tie @$value, 'DBM::Deep', {
569 base_offset => $location,
460b1067 570 fileobj => $root,
359a01ac 571 parent => $self->{obj},
572 parent_key => $orig_key,
9d4fa373 573 };
574 @$value = @x;
20f7b20c 575 }
d4b1166e 576
d5d7c51d 577 return 1;
d4b1166e 578}
579
75be6413 580sub split_index {
581 my $self = shift;
e96daec8 582 my ($md5, $tag) = @_;
75be6413 583
21838116 584 local($/,$\);
585
e96daec8 586 my $fh = $self->_fh;
587 my $root = $self->_fileobj;
16d1ad9b 588
589 my $loc = $self->_request_space(
e96daec8 590 $self->tag_size( $self->{index_size} ),
16d1ad9b 591 );
592
7b1e1aa1 593 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
16d1ad9b 594 print( $fh pack($self->{long_pack}, $loc) );
75be6413 595
9e4f83a0 596 my $index_tag = $self->write_tag(
e96daec8 597 $loc, SIG_INDEX,
f37c15ab 598 chr(0)x$self->{index_size},
75be6413 599 );
600
f9c33187 601 my $newtag_loc = $self->_request_space(
e96daec8 602 $self->tag_size( $self->{bucket_list_size} ),
f9c33187 603 );
75be6413 604
7b1e1aa1 605 my $keys = $tag->{content}
f9c33187 606 . $md5 . pack($self->{long_pack}, $newtag_loc)
28394a1a 607 . pack($self->{long_pack}, 0) # size
20b7f047 608 . pack($self->{long_pack}, 0); # transaction ID
75be6413 609
f9c33187 610 my @newloc = ();
75be6413 611 BUCKET:
612 for (my $i = 0; $i <= $self->{max_buckets}; $i++) {
9a187d8c 613 my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i );
75be6413 614
f9c33187 615 die "[INTERNAL ERROR]: No key in split_index()\n" unless $key;
616 die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc;
75be6413 617
75be6413 618 my $num = ord(substr($key, $tag->{ch} + 1, 1));
619
f9c33187 620 if ($newloc[$num]) {
621 seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET);
75be6413 622 my $subkeys;
623 read( $fh, $subkeys, $self->{bucket_list_size});
624
f9c33187 625 # This is looking for the first empty spot
7b1e1aa1 626 my ($subloc, $offset, $size) = $self->_find_in_buckets(
f9c33187 627 { content => $subkeys }, '',
7b1e1aa1 628 );
75be6413 629
f9c33187 630 seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET);
631 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 632
633 next;
75be6413 634 }
75be6413 635
7b1e1aa1 636 seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET);
2603d86e 637
7b1e1aa1 638 my $loc = $self->_request_space(
e96daec8 639 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 640 );
2603d86e 641
7b1e1aa1 642 print( $fh pack($self->{long_pack}, $loc) );
75be6413 643
7b1e1aa1 644 my $blist_tag = $self->write_tag(
e96daec8 645 $loc, SIG_BLIST,
7b1e1aa1 646 chr(0)x$self->{bucket_list_size},
647 );
648
649 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
f9c33187 650 print( $fh $key . pack($self->{long_pack}, $old_subloc) );
7b1e1aa1 651
f9c33187 652 $newloc[$num] = $blist_tag->{offset};
7b1e1aa1 653 }
654
655 $self->_release_space(
e96daec8 656 $self->tag_size( $self->{bucket_list_size} ),
7b1e1aa1 657 $tag->{offset} - SIG_SIZE - $self->{data_size},
658 );
75be6413 659
f9c33187 660 return $newtag_loc;
75be6413 661}
662
8db25060 663sub read_from_loc {
664 my $self = shift;
359a01ac 665 my ($subloc, $orig_key) = @_;
8db25060 666
21838116 667 local($/,$\);
668
e96daec8 669 my $fh = $self->_fh;
8db25060 670
671 ##
672 # Found match -- seek to offset and read signature
673 ##
674 my $signature;
e96daec8 675 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
8db25060 676 read( $fh, $signature, SIG_SIZE);
677
678 ##
679 # If value is a hash or array, return new DBM::Deep object with correct offset
680 ##
681 if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) {
685e40f1 682 my $new_obj = DBM::Deep->new({
359a01ac 683 type => $signature,
8db25060 684 base_offset => $subloc,
e96daec8 685 fileobj => $self->_fileobj,
359a01ac 686 parent => $self->{obj},
687 parent_key => $orig_key,
685e40f1 688 });
8db25060 689
460b1067 690 if ($new_obj->_fileobj->{autobless}) {
8db25060 691 ##
692 # Skip over value and plain key to see if object needs
693 # to be re-blessed
694 ##
695 seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR);
696
697 my $size;
c6ea6b6c 698 read( $fh, $size, $self->{data_size});
699 $size = unpack($self->{data_pack}, $size);
8db25060 700 if ($size) { seek($fh, $size, SEEK_CUR); }
701
702 my $bless_bit;
703 read( $fh, $bless_bit, 1);
704 if (ord($bless_bit)) {
705 ##
706 # Yes, object needs to be re-blessed
707 ##
708 my $class_name;
c6ea6b6c 709 read( $fh, $size, $self->{data_size});
710 $size = unpack($self->{data_pack}, $size);
8db25060 711 if ($size) { read( $fh, $class_name, $size); }
685e40f1 712 if ($class_name) { $new_obj = bless( $new_obj, $class_name ); }
8db25060 713 }
714 }
715
685e40f1 716 return $new_obj;
8db25060 717 }
718 elsif ( $signature eq SIG_INTERNAL ) {
719 my $size;
720 read( $fh, $size, $self->{data_size});
721 $size = unpack($self->{data_pack}, $size);
722
723 if ( $size ) {
724 my $new_loc;
725 read( $fh, $new_loc, $size );
726 $new_loc = unpack( $self->{long_pack}, $new_loc );
727
359a01ac 728 return $self->read_from_loc( $new_loc, $orig_key );
8db25060 729 }
730 else {
731 return;
732 }
733 }
734 ##
735 # Otherwise return actual value
736 ##
460b1067 737 elsif ( $signature eq SIG_DATA ) {
8db25060 738 my $size;
739 read( $fh, $size, $self->{data_size});
740 $size = unpack($self->{data_pack}, $size);
741
742 my $value = '';
743 if ($size) { read( $fh, $value, $size); }
744 return $value;
745 }
746
747 ##
748 # Key exists, but content is null
749 ##
750 return;
751}
752
9020ee8c 753sub get_bucket_value {
beac1dff 754 ##
755 # Fetch single value given tag and MD5 digested key.
756 ##
757 my $self = shift;
359a01ac 758 my ($tag, $md5, $orig_key) = @_;
9020ee8c 759
21838116 760 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 761 my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 );
762 if ( $subloc && !$is_deleted ) {
359a01ac 763 return $self->read_from_loc( $subloc, $orig_key );
386bab6c 764 }
beac1dff 765 return;
9020ee8c 766}
ab0e4957 767
768sub delete_bucket {
beac1dff 769 ##
770 # Delete single key/value pair given tag and MD5 digested key.
771 ##
772 my $self = shift;
a97c8f67 773 my ($tag, $md5, $orig_key) = @_;
ab0e4957 774
21838116 775 local($/,$\);
776
777 #ACID - This is a mutation. Must only find the exact transaction
778 my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5, 1 );
f9c33187 779#XXX This needs _release_space()
386bab6c 780 if ( $subloc ) {
e96daec8 781 my $fh = $self->_fh;
782 seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET);
386bab6c 783 print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) );
251dfd0e 784 print( $fh chr(0) x $self->{bucket_size} );
d0b74c17 785
ab0e4957 786 return 1;
386bab6c 787 }
beac1dff 788 return;
ab0e4957 789}
790
912d50b1 791sub bucket_exists {
beac1dff 792 ##
793 # Check existence of single key given tag and MD5 digested key.
794 ##
795 my $self = shift;
e96daec8 796 my ($tag, $md5) = @_;
912d50b1 797
21838116 798 #ACID - This is a read. Can find exact or HEAD
c9b6d0d8 799 my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 );
800 return ($subloc && !$is_deleted) && 1;
912d50b1 801}
802
6736c116 803sub find_bucket_list {
beac1dff 804 ##
805 # Locate offset for bucket list, given digested key
806 ##
807 my $self = shift;
e96daec8 808 my ($offset, $md5, $args) = @_;
d0b74c17 809 $args = {} unless $args;
810
21838116 811 local($/,$\);
812
beac1dff 813 ##
814 # Locate offset for bucket list using digest index system
815 ##
e96daec8 816 my $tag = $self->load_tag( $offset )
817 or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" );
d0b74c17 818
e5fc7e69 819 my $ch = 0;
8db25060 820 while ($tag->{signature} ne SIG_BLIST) {
d0b74c17 821 my $num = ord substr($md5, $ch, 1);
822
823 my $ref_loc = $tag->{offset} + ($num * $self->{long_size});
e96daec8 824 $tag = $self->index_lookup( $tag, $num );
d0b74c17 825
826 if (!$tag) {
29b01632 827 return if !$args->{create};
d0b74c17 828
16d1ad9b 829 my $loc = $self->_request_space(
e96daec8 830 $self->tag_size( $self->{bucket_list_size} ),
16d1ad9b 831 );
832
e96daec8 833 my $fh = $self->_fh;
834 seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET);
16d1ad9b 835 print( $fh pack($self->{long_pack}, $loc) );
d0b74c17 836
9e4f83a0 837 $tag = $self->write_tag(
e96daec8 838 $loc, SIG_BLIST,
f37c15ab 839 chr(0)x$self->{bucket_list_size},
d5d7c51d 840 );
841
842 $tag->{ref_loc} = $ref_loc;
843 $tag->{ch} = $ch;
844
845 last;
d0b74c17 846 }
847
16d1ad9b 848 $tag->{ch} = $ch++;
d0b74c17 849 $tag->{ref_loc} = $ref_loc;
beac1dff 850 }
d0b74c17 851
beac1dff 852 return $tag;
6736c116 853}
854
d0b74c17 855sub index_lookup {
856 ##
857 # Given index tag, lookup single entry in index and return .
858 ##
859 my $self = shift;
e96daec8 860 my ($tag, $index) = @_;
d0b74c17 861
862 my $location = unpack(
863 $self->{long_pack},
864 substr(
865 $tag->{content},
866 $index * $self->{long_size},
867 $self->{long_size},
868 ),
869 );
870
871 if (!$location) { return; }
872
e96daec8 873 return $self->load_tag( $location );
d0b74c17 874}
875
6736c116 876sub traverse_index {
beac1dff 877 ##
878 # Scan index and recursively step into deeper levels, looking for next key.
879 ##
6736c116 880 my $self = shift;
881 my ($obj, $offset, $ch, $force_return_next) = @_;
d0b74c17 882
21838116 883 local($/,$\);
884
e96daec8 885 my $tag = $self->load_tag( $offset );
6736c116 886
e96daec8 887 my $fh = $self->_fh;
d0b74c17 888
8db25060 889 if ($tag->{signature} ne SIG_BLIST) {
beac1dff 890 my $content = $tag->{content};
e5fc7e69 891 my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1));
d0b74c17 892
d5d7c51d 893 for (my $idx = $start; $idx < (2**8); $idx++) {
e5fc7e69 894 my $subloc = unpack(
895 $self->{long_pack},
e06824f8 896 substr(
897 $content,
898 $idx * $self->{long_size},
899 $self->{long_size},
900 ),
e5fc7e69 901 );
902
beac1dff 903 if ($subloc) {
e5fc7e69 904 my $result = $self->traverse_index(
905 $obj, $subloc, $ch + 1, $force_return_next,
906 );
907
beac1dff 908 if (defined($result)) { return $result; }
909 }
910 } # index loop
d0b74c17 911
beac1dff 912 $obj->{return_next} = 1;
913 } # tag is an index
d0b74c17 914
e5fc7e69 915 else {
beac1dff 916 my $keys = $tag->{content};
917 if ($force_return_next) { $obj->{return_next} = 1; }
d0b74c17 918
beac1dff 919 ##
920 # Iterate through buckets, looking for a key match
921 ##
8db25060 922 for (my $i = 0; $i < $self->{max_buckets}; $i++) {
9cec1360 923 my ($key, $subloc) = $self->_get_key_subloc( $keys, $i );
d0b74c17 924
8db25060 925 # End of bucket list -- return to outer loop
beac1dff 926 if (!$subloc) {
beac1dff 927 $obj->{return_next} = 1;
928 last;
929 }
8db25060 930 # Located previous key -- return next one found
beac1dff 931 elsif ($key eq $obj->{prev_md5}) {
beac1dff 932 $obj->{return_next} = 1;
933 next;
934 }
8db25060 935 # Seek to bucket location and skip over signature
beac1dff 936 elsif ($obj->{return_next}) {
e96daec8 937 seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET);
d0b74c17 938
beac1dff 939 # Skip over value to get to plain key
8db25060 940 my $sig;
941 read( $fh, $sig, SIG_SIZE );
942
beac1dff 943 my $size;
e5fc7e69 944 read( $fh, $size, $self->{data_size});
945 $size = unpack($self->{data_pack}, $size);
beac1dff 946 if ($size) { seek($fh, $size, SEEK_CUR); }
d0b74c17 947
beac1dff 948 # Read in plain key and return as scalar
beac1dff 949 my $plain_key;
e5fc7e69 950 read( $fh, $size, $self->{data_size});
951 $size = unpack($self->{data_pack}, $size);
beac1dff 952 if ($size) { read( $fh, $plain_key, $size); }
d0b74c17 953
beac1dff 954 return $plain_key;
955 }
8db25060 956 }
d0b74c17 957
beac1dff 958 $obj->{return_next} = 1;
959 } # tag is a bucket list
d0b74c17 960
beac1dff 961 return;
6736c116 962}
963
964sub get_next_key {
beac1dff 965 ##
966 # Locate next key, given digested previous one
967 ##
6736c116 968 my $self = shift;
969 my ($obj) = @_;
d0b74c17 970
beac1dff 971 $obj->{prev_md5} = $_[1] ? $_[1] : undef;
972 $obj->{return_next} = 0;
d0b74c17 973
beac1dff 974 ##
975 # If the previous key was not specifed, start at the top and
976 # return the first one found.
977 ##
978 if (!$obj->{prev_md5}) {
979 $obj->{prev_md5} = chr(0) x $self->{hash_size};
980 $obj->{return_next} = 1;
981 }
d0b74c17 982
beac1dff 983 return $self->traverse_index( $obj, $obj->_base_offset, 0 );
6736c116 984}
985
75be6413 986# Utilities
987
9cec1360 988sub _get_key_subloc {
75be6413 989 my $self = shift;
990 my ($keys, $idx) = @_;
991
c9b6d0d8 992 my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack(
28394a1a 993 # This is 'a', not 'A'. Please read the pack() documentation for the
994 # difference between the two and why it's important.
c9b6d0d8 995 "a$self->{hash_size} $self->{long_pack}2 n2",
75be6413 996 substr(
997 $keys,
9cec1360 998 ($idx * $self->{bucket_size}),
999 $self->{bucket_size},
75be6413 1000 ),
1001 );
1002
c9b6d0d8 1003 return ($key, $subloc, $size, $transaction_id, $is_deleted);
75be6413 1004}
1005
d608b06e 1006sub _find_in_buckets {
1007 my $self = shift;
21838116 1008 my ($tag, $md5, $exact) = @_;
d608b06e 1009
28394a1a 1010 my $trans_id = $self->_fileobj->transaction_id;
1011
21838116 1012 my @zero;
1013
d608b06e 1014 BUCKET:
1015 for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) {
c9b6d0d8 1016 my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc(
9a187d8c 1017 $tag->{content}, $i,
1018 );
d608b06e 1019
c9b6d0d8 1020 my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted);
21838116 1021
1022 unless ( $subloc ) {
20b7f047 1023 if ( !$exact && @zero and $trans_id ) {
c9b6d0d8 1024 @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted);
20b7f047 1025 }
21838116 1026 return @rv;
1027 }
1028
1029 next BUCKET if $key ne $md5;
d608b06e 1030
21838116 1031 # Save off the HEAD in case we need it.
c9b6d0d8 1032 @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0;
d608b06e 1033
21838116 1034 next BUCKET if $transaction_id != $trans_id;
1035
1036 return @rv;
d608b06e 1037 }
1038
1039 return;
1040}
1041
994ccd8e 1042sub _request_space {
1043 my $self = shift;
e96daec8 1044 my ($size) = @_;
994ccd8e 1045
460b1067 1046 my $loc = $self->_fileobj->{end};
1047 $self->_fileobj->{end} += $size;
994ccd8e 1048
1049 return $loc;
1050}
1051
1052sub _release_space {
1053 my $self = shift;
e96daec8 1054 my ($size, $loc) = @_;
994ccd8e 1055
21838116 1056 local($/,$\);
1057
7b1e1aa1 1058 my $next_loc = 0;
1059
e96daec8 1060 my $fh = $self->_fh;
1061 seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET );
7b1e1aa1 1062 print( $fh SIG_FREE
1063 . pack($self->{long_pack}, $size )
1064 . pack($self->{long_pack}, $next_loc )
1065 );
1066
994ccd8e 1067 return;
1068}
1069
e96daec8 1070sub _throw_error {
1071 die "DBM::Deep: $_[1]\n";
1072}
1073
a20d9a3f 10741;
1075__END__
d5d7c51d 1076
1077# This will be added in later, after more refactoring is done. This is an early
1078# attempt at refactoring on the physical level instead of the virtual level.
1079sub _read_at {
1080 my $self = shift;
e96daec8 1081 my ($spot, $amount, $unpack) = @_;
d5d7c51d 1082
21838116 1083 local($/,$\);
1084
e96daec8 1085 my $fh = $self->_fh;
1086 seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET );
d5d7c51d 1087
1088 my $buffer;
1089 my $bytes_read = read( $fh, $buffer, $amount );
1090
1091 if ( $unpack ) {
1092 $buffer = unpack( $unpack, $buffer );
1093 }
1094
1095 if ( wantarray ) {
1096 return ($buffer, $bytes_read);
1097 }
1098 else {
1099 return $buffer;
1100 }
1101}
e96daec8 1102
1103sub _print_at {
1104 my $self = shift;
1105 my ($spot, $data) = @_;
1106
21838116 1107 local($/,$\);
1108
e96daec8 1109 my $fh = $self->_fh;
1110 seek( $fh, $spot, SEEK_SET );
1111 print( $fh $data );
1112
1113 return;
1114}
1115
1116sub get_file_version {
1117 my $self = shift;
1118
21838116 1119 local($/,$\);
1120
e96daec8 1121 my $fh = $self->_fh;
1122
1123 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1124 my $buffer;
1125 my $bytes_read = read( $fh, $buffer, 4 );
1126 unless ( $bytes_read == 4 ) {
1127 $self->_throw_error( "Cannot read file version" );
1128 }
1129
1130 return unpack( 'N', $buffer );
1131}
1132
1133sub write_file_version {
1134 my $self = shift;
1135 my ($new_version) = @_;
1136
21838116 1137 local($/,$\);
1138
e96daec8 1139 my $fh = $self->_fh;
1140
1141 seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET );
1142 print( $fh pack( 'N', $new_version ) );
1143
1144 return;
1145}
1146