1 package DBM::Deep::Engine3;
7 our $VERSION = q(0.99_03);
13 # * Every method in here assumes that the storage has been appropriately
14 # safeguarded. This can be anything from flock() to some sort of manual
15 # mutex. But, it's the caller's responsability to make sure that this has
18 # Setup file and tag signatures. These should never change.
19 sub SIG_FILE () { 'DPDB' }
20 sub SIG_HEADER () { 'h' }
21 sub SIG_INTERNAL () { 'i' }
22 sub SIG_HASH () { 'H' }
23 sub SIG_ARRAY () { 'A' }
24 sub SIG_NULL () { 'N' }
25 sub SIG_DATA () { 'D' }
26 sub SIG_INDEX () { 'I' }
27 sub SIG_BLIST () { 'B' }
28 sub SIG_FREE () { 'F' }
29 sub SIG_KEYS () { 'K' }
32 ################################################################################
34 # Please refer to the pack() documentation for further information
36 1 => 'C', # Unsigned char value
37 2 => 'n', # Unsigned short in "network" (big-endian) order
38 4 => 'N', # Unsigned long in "network" (big-endian) order
39 8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
50 hash_size => 16, # In bytes
52 num_txns => 16, # HEAD plus 15 running txns
58 if ( defined $args->{pack_size} ) {
59 if ( lc $args->{pack_size} eq 'small' ) {
60 $args->{byte_size} = 2;
62 elsif ( lc $args->{pack_size} eq 'medium' ) {
63 $args->{byte_size} = 4;
65 elsif ( lc $args->{pack_size} eq 'large' ) {
66 $args->{byte_size} = 8;
69 die "Unknown pack_size value: '$args->{pack_size}'\n";
73 # Grab the parameters we want to use
74 foreach my $param ( keys %$self ) {
75 next unless exists $args->{$param};
76 $self->{$param} = $args->{$param};
78 Scalar::Util::weaken( $self->{obj} ) if $self->{obj};
80 $self->{byte_pack} = $StP{ $self->byte_size };
83 # Number of buckets per blist before another level of indexing is
84 # done. Increase this value for slightly greater speed, but larger database
85 # files. DO NOT decrease this value below 16, due to risk of recursive
88 if ( $self->{max_buckets} < 16 ) {
89 warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n";
90 $self->{max_buckets} = 16;
93 if ( !$self->{digest} ) {
95 $self->{digest} = \&Digest::MD5::md5;
99 $self->{chains_loc} = 15;
104 ################################################################################
108 my ($trans_id, $base_offset, $key) = @_;
109 print "read_value( $trans_id, $base_offset, $key )\n" if $DEBUG;
111 # This will be a Reference sector
112 my $sector = $self->_load_sector( $base_offset )
113 or die "How did read_value fail (no sector for '$base_offset')?!\n";
115 my $key_md5 = $self->_apply_digest( $key );
117 # XXX What should happen if this fails?
118 my $blist = $sector->get_bucket_list({
121 }) or die "How did read_value fail (no blist)?!\n";
123 my $value_sector = $blist->get_data_for( $key_md5 );
124 if ( !$value_sector ) {
126 $value_sector = DBM::Deep::Engine::Sector::Null->new({
131 $blist->write_md5( $key_md5, $key, $value_sector->offset );
134 return $value_sector->data;
139 my ($trans_id, $base_offset) = @_;
140 print "get_classname( $trans_id, $base_offset )\n" if $DEBUG;
142 # This will be a Reference sector
143 my $sector = $self->_load_sector( $base_offset )
144 or die "How did read_value fail (no sector for '$base_offset')?!\n";
146 return $sector->get_classname;
151 my ($trans_id, $base_offset, $key) = @_;
152 print "key_exists( $trans_id, $base_offset, $key )\n" if $DEBUG;
154 # This will be a Reference sector
155 my $sector = $self->_load_sector( $base_offset )
156 or die "How did key_exists fail (no sector for '$base_offset')?!\n";
158 my $key_md5 = $self->_apply_digest( $key );
160 # XXX What should happen if this fails?
161 my $blist = $sector->get_bucket_list({
163 }) or die "How did key_exists fail (no blist)?!\n";
165 # exists() returns 1 or '' for true/false.
166 return $blist->has_md5( $key_md5 ) ? 1 : '';
171 my ($trans_id, $base_offset, $key) = @_;
172 print "delete_key( $trans_id, $base_offset, $key )\n" if $DEBUG;
174 my $sector = $self->_load_sector( $base_offset )
175 or die "How did delete_key fail (no sector for '$base_offset')?!\n";
177 my $key_md5 = $self->_apply_digest( $key );
179 # XXX What should happen if this fails?
180 my $blist = $sector->get_bucket_list({
182 }) or die "How did delete_key fail (no blist)?!\n";
184 return $blist->delete_md5( $key_md5 );
189 my ($trans_id, $base_offset, $key, $value) = @_;
190 print "write_value( $trans_id, $base_offset, $key, $value )\n" if $DEBUG;
192 # This will be a Reference sector
193 my $sector = $self->_load_sector( $base_offset )
194 or die "How did write_value fail (no sector for '$base_offset')?!\n";
196 my $key_md5 = $self->_apply_digest( $key );
198 # XXX What should happen if this fails?
199 my $blist = $sector->get_bucket_list({
202 }) or die "How did write_value fail (no blist)?!\n";
204 my $r = Scalar::Util::reftype( $value ) || '';
207 last if $r eq 'HASH';
208 last if $r eq 'ARRAY';
210 DBM::Deep->_throw_error(
211 "Storage of references of type '$r' is not supported."
216 if ( !defined $value ) {
217 $class = 'DBM::Deep::Engine::Sector::Null';
219 elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) {
220 if ( $r eq 'ARRAY' && tied(@$value) ) {
221 DBM::Deep->_throw_error( "Cannot store something that is tied." );
223 if ( $r eq 'HASH' && tied(%$value) ) {
224 DBM::Deep->_throw_error( "Cannot store something that is tied." );
226 $class = 'DBM::Deep::Engine::Sector::Reference';
227 $type = substr( $r, 0, 1 );
230 $class = 'DBM::Deep::Engine::Sector::Scalar';
233 if ( $blist->has_md5( $key_md5 ) ) {
234 $blist->get_data_for( $key_md5 )->free;
237 my $value_sector = $class->new({
243 $blist->write_md5( $key_md5, $key, $value_sector->offset );
245 # This code is to make sure we write all the values in the $value to the disk
246 # and to make sure all changes to $value after the assignment are reflected
247 # on disk. This may be counter-intuitive at first, but it is correct dwimmery.
248 # NOTE - simply tying $value won't perform a STORE on each value. Hence, the
249 # copy to a temp value.
250 if ( $r eq 'ARRAY' ) {
252 tie @$value, 'DBM::Deep', {
253 base_offset => $value_sector->offset,
254 storage => $self->storage,
257 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
259 elsif ( $r eq 'HASH' ) {
261 tie %$value, 'DBM::Deep', {
262 base_offset => $value_sector->offset,
263 storage => $self->storage,
267 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
275 my ($trans_id, $base_offset, $prev_key) = @_;
276 print "get_next_key( $trans_id, $base_offset )\n" if $DEBUG;
278 # XXX Need to add logic about resetting the iterator if any key in the reference has changed
279 unless ( $prev_key ) {
280 $self->{iterator} = DBM::Deep::Engine::Iterator->new({
281 base_offset => $base_offset,
282 trans_id => $trans_id,
287 return $self->iterator->get_next_key;
290 ################################################################################
296 # We're opening the file.
297 unless ( $obj->_base_offset ) {
298 my $bytes_read = $self->_read_file_header;
300 # Creating a new file
301 unless ( $bytes_read ) {
302 $self->_write_file_header;
304 # 1) Create Array/Hash entry
305 my $initial_reference = DBM::Deep::Engine::Sector::Reference->new({
309 $obj->{base_offset} = $initial_reference->offset;
311 $self->storage->flush;
313 # Reading from an existing file
315 $obj->{base_offset} = $bytes_read;
316 my $initial_reference = DBM::Deep::Engine::Sector::Reference->new({
318 offset => $obj->_base_offset,
320 unless ( $initial_reference ) {
321 DBM::Deep->_throw_error("Corrupted file, no master index record");
324 unless ($obj->_type eq $initial_reference->type) {
325 DBM::Deep->_throw_error("File type mismatch");
333 ################################################################################
335 sub _write_file_header {
338 my $header_fixed = length( SIG_FILE ) + 1 + 4 + 4;
339 my $header_var = 1 + 1 + 2 * $self->byte_size;
341 my $loc = $self->storage->request_space( $header_fixed + $header_var );
343 $self->storage->print_at( $loc,
346 pack('N', 1), # header version - at this point, we're at 9 bytes
347 pack('N', $header_var), # header size
348 # --- Above is $header_fixed. Below is $header_var
349 pack('C', $self->byte_size),
350 pack('C', $self->max_buckets),
351 pack($StP{$self->byte_size}, 0), # Start of free chain (blist size)
352 pack($StP{$self->byte_size}, 0), # Start of free chain (data size)
355 $self->set_chains_loc( $header_fixed + 2 );
357 # $self->storage->set_transaction_offset( $header_fixed );
362 sub _read_file_header {
365 my $header_fixed = length( SIG_FILE ) + 1 + 4 + 4;
367 my $buffer = $self->storage->read_at( 0, $header_fixed );
368 return unless length($buffer);
370 my ($file_signature, $sig_header, $header_version, $size) = unpack(
374 unless ( $file_signature eq SIG_FILE ) {
375 $self->storage->close;
376 DBM::Deep->_throw_error( "Signature not found -- file is not a Deep DB" );
379 unless ( $sig_header eq SIG_HEADER ) {
380 $self->storage->close;
381 DBM::Deep->_throw_error( "Old file version found." );
384 my $buffer2 = $self->storage->read_at( undef, $size );
385 my @values = unpack( 'C C', $buffer2 );
387 $self->set_chains_loc( $header_fixed + 2 );
389 # The transaction offset is the first thing after the fixed header section
390 #$self->storage->set_transaction_offset( $header_fixed );
392 if ( @values < 2 || grep { !defined } @values ) {
393 $self->storage->close;
394 DBM::Deep->_throw_error("Corrupted file - bad header");
397 #XXX Add warnings if values weren't set right
398 @{$self}{qw(byte_size max_buckets)} = @values;
400 my $header_var = 1 + 1 + 2 * $self->byte_size;
401 unless ( $size eq $header_var ) {
402 $self->storage->close;
403 DBM::Deep->_throw_error( "Unexpected size found ($size <-> $header_var)." );
406 return length($buffer) + length($buffer2);
413 my $type = $self->storage->read_at( $offset, 1 );
414 return if $type eq chr(0);
416 if ( $type eq $self->SIG_ARRAY || $type eq $self->SIG_HASH ) {
417 return DBM::Deep::Engine::Sector::Reference->new({
423 elsif ( $type eq $self->SIG_BLIST ) {
424 return DBM::Deep::Engine::Sector::BucketList->new({
430 elsif ( $type eq $self->SIG_NULL ) {
431 return DBM::Deep::Engine::Sector::Null->new({
437 elsif ( $type eq $self->SIG_DATA ) {
438 return DBM::Deep::Engine::Sector::Scalar->new({
444 # This was deleted from under us, so just return and let the caller figure it out.
445 elsif ( $type eq $self->SIG_FREE ) {
449 die "'$offset': Don't know what to do with type '$type'\n";
454 return $self->{digest}->(@_);
457 sub _add_free_sector {
459 my ($offset, $size) = @_;
463 if ( $size == 256 ) {
464 $chains_offset = $self->byte_size;
471 my $old_head = $self->storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size );
473 $self->storage->print_at( $self->chains_loc + $chains_offset,
474 pack( $StP{$self->byte_size}, $offset ),
477 # Record the old head in the new sector after the signature
478 $self->storage->print_at( $offset + 1, $old_head );
481 sub _request_sector {
487 if ( $size == 256 ) {
488 $chains_offset = $self->byte_size;
495 my $old_head = $self->storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size );
496 my $loc = unpack( $StP{$self->byte_size}, $old_head );
498 # We don't have any free sectors of the right size, so allocate a new one.
500 return $self->storage->request_space( $size );
503 my $new_head = $self->storage->read_at( $loc + 1, $self->byte_size );
504 $self->storage->print_at( $self->chains_loc + $chains_offset, $new_head );
509 ################################################################################
511 sub storage { $_[0]{storage} }
512 sub byte_size { $_[0]{byte_size} }
513 sub hash_size { $_[0]{hash_size} }
514 sub num_txns { $_[0]{num_txns} }
515 sub max_buckets { $_[0]{max_buckets} }
516 sub iterator { $_[0]{iterator} }
517 sub blank_md5 { chr(0) x $_[0]->hash_size }
519 sub chains_loc { $_[0]{chains_loc} }
520 sub set_chains_loc { $_[0]{chains_loc} = $_[1] }
522 ################################################################################
524 package DBM::Deep::Engine::Iterator;
532 engine => $args->{engine},
533 base_offset => $args->{base_offset},
534 trans_id => $args->{trans_id},
537 Scalar::Util::weaken( $self->{engine} );
544 $self->{breadcrumbs} = [];
550 my $crumbs = $self->{breadcrumbs};
552 unless ( @$crumbs ) {
553 # This will be a Reference sector
554 my $sector = $self->{engine}->_load_sector( $self->{base_offset} )
555 # or die "Iterator: How did this fail (no ref sector for '$self->{base_offset}')?!\n";
556 # If no sector is found, thist must have been deleted from under us.
558 push @$crumbs, [ $sector->get_blist_loc, 0 ];
563 my ($offset, $idx) = @{ $crumbs->[-1] };
569 my $sector = $self->{engine}->_load_sector( $offset )
570 or die "Iterator: How did this fail (no blist sector for '$offset')?!\n";
572 my $key_sector = $sector->get_key_for( $idx );
573 unless ( $key_sector ) {
579 $key = $key_sector->data;
586 package DBM::Deep::Engine::Sector;
589 my $self = bless $_[1], $_[0];
590 Scalar::Util::weaken( $self->{engine} );
596 sub engine { $_[0]{engine} }
597 sub offset { $_[0]{offset} }
598 sub type { $_[0]{type} }
603 $self->engine->storage->print_at( $self->offset,
604 $self->engine->SIG_FREE,
605 chr(0) x ($self->size - 1),
608 $self->engine->_add_free_sector(
609 $self->offset, $self->size,
615 package DBM::Deep::Engine::Sector::Data;
617 our @ISA = qw( DBM::Deep::Engine::Sector );
620 sub size { return 256 }
622 package DBM::Deep::Engine::Sector::Scalar;
624 our @ISA = qw( DBM::Deep::Engine::Sector::Data );
629 my $chain_loc = $self->chain_loc;
631 $self->SUPER::free();
634 $self->engine->_load_sector( $chain_loc )->free;
640 sub type { $_[0]{engine}->SIG_DATA }
644 my $engine = $self->engine;
646 unless ( $self->offset ) {
647 my $data_section = $self->size - 3 - 1 * $engine->byte_size;
649 my $data = delete $self->{data};
651 $self->{offset} = $engine->_request_sector( $self->size );
653 my $dlen = length $data;
655 my $curr_offset = $self->offset;
656 while ( $continue ) {
660 my ($leftover, $this_len, $chunk);
661 if ( $dlen > $data_section ) {
663 $this_len = $data_section;
664 $chunk = substr( $data, 0, $this_len );
666 $dlen -= $data_section;
667 $next_offset = $engine->_request_sector( $self->size );
668 $data = substr( $data, $this_len );
671 $leftover = $data_section - $dlen;
678 $engine->storage->print_at( $curr_offset,
679 $self->type, # Sector type
680 pack( $StP{1}, 0 ), # Recycled counter
681 pack( $StP{$engine->byte_size}, $next_offset ), # Chain loc
682 pack( $StP{1}, $this_len ), # Data length
683 $chunk, # Data to be stored in this sector
684 chr(0) x $leftover, # Zero-fill the rest
687 $curr_offset = $next_offset;
697 my $buffer = $self->engine->storage->read_at(
698 $self->offset + 2 + $self->engine->byte_size, 1
701 return unpack( $StP{1}, $buffer );
706 my $chain_loc = $self->engine->storage->read_at(
707 $self->offset + 2, $self->engine->byte_size,
709 return unpack( $StP{$self->engine->byte_size}, $chain_loc );
717 my $chain_loc = $self->chain_loc;
719 $data .= $self->engine->storage->read_at(
720 $self->offset + 2 + $self->engine->byte_size + 1, $self->data_length,
723 last unless $chain_loc;
725 $self = $self->engine->_load_sector( $chain_loc );
731 package DBM::Deep::Engine::Sector::Null;
733 our @ISA = qw( DBM::Deep::Engine::Sector::Data );
735 sub type { $_[0]{engine}->SIG_NULL }
736 sub data_length { 0 }
742 my $engine = $self->engine;
744 unless ( $self->offset ) {
745 my $leftover = $self->size - 3 - 1 * $engine->byte_size;
747 $self->{offset} = $engine->_request_sector( $self->size );
748 $engine->storage->print_at( $self->offset,
749 $self->type, # Sector type
750 pack( $StP{1}, 0 ), # Recycled counter
751 pack( $StP{$engine->byte_size}, 0 ), # Chain loc
752 pack( $StP{1}, $self->data_length ), # Data length
753 chr(0) x $leftover, # Zero-fill the rest
760 package DBM::Deep::Engine::Sector::Reference;
762 our @ISA = qw( DBM::Deep::Engine::Sector::Data );
767 my $engine = $self->engine;
769 unless ( $self->offset ) {
770 my $classname = Scalar::Util::blessed( delete $self->{data} );
771 my $class_len = length( defined $classname ? $classname : '' );
772 my $leftover = $self->size - 4 - 2 * $engine->byte_size - $class_len;
774 $self->{offset} = $engine->_request_sector( $self->size );
775 $engine->storage->print_at( $self->offset,
776 $self->type, # Sector type
777 pack( $StP{1}, 0 ), # Recycled counter
778 pack( $StP{$engine->byte_size}, 0 ), # Chain loc
779 pack( $StP{$engine->byte_size}, 0 ), # Index/BList loc
780 pack( $StP{1}, (defined($classname) ? 1 : 0) ), # Blessedness
781 pack( $StP{1}, $class_len ), # Classname length
782 (defined($classname) ? $classname : ''), # Classname
783 chr(0) x $leftover, # Zero-fill the rest
789 $self->{type} = $engine->storage->read_at( $self->offset, 1 );
797 my $engine = $self->engine;
798 my $blist_loc = $engine->storage->read_at( $self->offset + 2 + $engine->byte_size, $engine->byte_size );
799 return unpack( $StP{$engine->byte_size}, $blist_loc );
802 sub get_bucket_list {
807 # XXX Add in check here for recycling?
809 my $engine = $self->engine;
811 my $blist_loc = $self->get_blist_loc;
813 # There's no index or blist yet
814 unless ( $blist_loc ) {
815 return unless $args->{create};
817 my $blist = DBM::Deep::Engine::Sector::BucketList->new({
820 $engine->storage->print_at( $self->offset + 2 + $engine->byte_size,
821 pack( $StP{$engine->byte_size}, $blist->offset ),
826 return DBM::Deep::Engine::Sector::BucketList->new({
828 offset => $blist_loc,
835 my $is_blessed = $self->engine->storage->read_at(
836 $self->offset + 2 + 2 * $self->engine->byte_size, 1,
838 $is_blessed = unpack ( $StP{1}, $is_blessed );
840 return unless $is_blessed;
842 my $classname_len = $self->engine->storage->read_at( undef, 1 );
843 $classname_len = unpack( $StP{1}, $classname_len );
844 return $self->engine->storage->read_at( undef, $classname_len );
850 my $new_obj = DBM::Deep->new({
852 base_offset => $self->offset,
853 storage => $self->engine->storage,
856 if ( $self->engine->storage->{autobless} ) {
857 my $classname = $self->get_classname;
858 if ( defined $classname ) {
859 bless $new_obj, $classname;
866 package DBM::Deep::Engine::Sector::BucketList;
868 our @ISA = qw( DBM::Deep::Engine::Sector );
870 sub idx_for_txn { return $_[1] + 1 }
875 my $engine = $self->engine;
877 unless ( $self->offset ) {
878 my $leftover = $self->size - $self->base_size;
880 $self->{offset} = $engine->_request_sector( $self->size );
881 $engine->storage->print_at( $self->offset,
882 $engine->SIG_BLIST, # Sector type
883 pack( $StP{1}, 0 ), # Recycled counter
884 chr(0) x $leftover, # Zero-fill the data
891 sub base_size { 2 } # Sig + recycled counter
895 my $e = $self->engine;
896 return $self->base_size + $e->max_buckets * $self->bucket_size; # Base + numbuckets * bucketsize
901 my $e = $self->engine;
903 my $locs_size = (1 + $e->num_txns ) * $e->byte_size;
904 return $e->hash_size + $locs_size;
909 my ($found, $idx) = $self->find_md5( @_ );
917 foreach my $idx ( 0 .. $self->engine->max_buckets - 1 ) {
918 my $potential = $self->engine->storage->read_at(
919 $self->offset + $self->base_size + $idx * $self->bucket_size, $self->engine->hash_size,
922 return (undef, $idx) if $potential eq $self->engine->blank_md5;
923 return (1, $idx) if $md5 eq $potential;
931 my ($md5, $key, $value_loc) = @_;
933 my $engine = $self->engine;
934 my ($found, $idx) = $self->find_md5( $md5 );
935 my $spot = $self->offset + $self->base_size + $idx * $self->bucket_size;
938 my $key_sector = DBM::Deep::Engine::Sector::Scalar->new({
939 engine => $self->engine,
943 $engine->storage->print_at( $spot,
945 pack( $StP{$self->engine->byte_size}, $key_sector->offset ),
949 $engine->storage->print_at( $spot + $self->engine->hash_size + $self->engine->byte_size,
950 pack( $StP{$engine->byte_size}, $value_loc ), # The pointer to the data in the HEAD
958 my $engine = $self->engine;
959 my ($found, $idx) = $self->find_md5( $md5 );
960 return undef unless $found;
962 # Save the location so that we can free the data
963 my $location = $self->get_data_location_for( $idx );
964 my $key_sector = $self->get_key_for( $idx );
966 my $spot = $self->offset + $self->base_size + $idx * $self->bucket_size;
967 $engine->storage->print_at( $spot,
968 $engine->storage->read_at(
969 $spot + $self->bucket_size,
970 $self->bucket_size * ( $engine->num_txns - $idx - 1 ),
972 chr(0) x $self->bucket_size,
977 my $data_sector = $self->engine->_load_sector( $location );
978 my $data = $data_sector->data;
984 sub get_data_location_for {
988 my $location = $self->engine->storage->read_at(
989 $self->offset + $self->base_size + $idx * $self->bucket_size + $self->engine->hash_size + $self->engine->byte_size,
990 $self->engine->byte_size,
992 return unpack( $StP{$self->engine->byte_size}, $location );
999 my ($found, $idx) = $self->find_md5( $md5 );
1000 return unless $found;
1001 my $location = $self->get_data_location_for( $idx );
1002 return $self->engine->_load_sector( $location );
1009 my $location = $self->engine->storage->read_at(
1010 $self->offset + $self->base_size + $idx * $self->bucket_size + $self->engine->hash_size,
1011 $self->engine->byte_size,
1013 $location = unpack( $StP{$self->engine->byte_size}, $location );
1014 return unless $location;
1015 return $self->engine->_load_sector( $location );