1 package DBM::Deep::09830;
7 # Multi-level database module for storing hash trees, arrays and simple
8 # key/value pairs into FTP-able, cross-platform binary database files.
10 # Type `perldoc DBM::Deep` for complete documentation.
14 # tie %db, 'DBM::Deep', 'my_database.db'; # standard tie() method
16 # my $db = new DBM::Deep( 'my_database.db' ); # preferred OO method
18 # $db->{my_scalar} = 'hello world';
19 # $db->{my_hash} = { larry => 'genius', hashes => 'fast' };
20 # $db->{my_array} = [ 1, 2, 3, time() ];
21 # $db->{my_complex} = [ 'hello', { perl => 'rules' }, 42, 99 ];
22 # push @{$db->{my_array}}, 'another value';
23 # my @key_list = keys %{$db->{my_hash}};
24 # print "This module " . $db->{my_complex}->[1]->{perl} . "!\n";
27 # (c) 2002-2006 Joseph Huckaby. All Rights Reserved.
28 # This program is free software; you can redistribute it and/or
29 # modify it under the same terms as Perl itself.
34 use Fcntl qw( :DEFAULT :flock :seek );
38 use vars qw( $VERSION );
42 # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file.
43 # (Perl must be compiled with largefile support for files > 2 GB)
45 # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
46 # (Perl must be compiled with largefile and 64-bit long support)
52 # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value.
53 # Upgrading this is possible (see above) but probably not necessary. If you need
54 # more than 4 GB for a single key or value, this module is really not for you :-)
56 #my $DATA_LENGTH_SIZE = 4;
57 #my $DATA_LENGTH_PACK = 'N';
58 our ($LONG_SIZE, $LONG_PACK, $DATA_LENGTH_SIZE, $DATA_LENGTH_PACK);
61 # Maximum number of buckets per list before another level of indexing is done.
62 # Increase this value for slightly greater speed, but larger database files.
63 # DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
68 # Better not adjust anything below here, unless you're me :-)
72 # Setup digest function for keys
74 our ($DIGEST_FUNC, $HASH_SIZE);
75 #my $DIGEST_FUNC = \&Digest::MD5::md5;
78 # Precalculate index and bucket sizes based on values above.
81 my ($INDEX_SIZE, $BUCKET_SIZE, $BUCKET_LIST_SIZE);
88 # Setup file and tag signatures. These should never change.
90 sub SIG_FILE () { 'DPDB' }
91 sub SIG_HASH () { 'H' }
92 sub SIG_ARRAY () { 'A' }
93 sub SIG_NULL () { 'N' }
94 sub SIG_DATA () { 'D' }
95 sub SIG_INDEX () { 'I' }
96 sub SIG_BLIST () { 'B' }
100 # Setup constants for users to pass to new()
102 sub TYPE_HASH () { SIG_HASH }
103 sub TYPE_ARRAY () { SIG_ARRAY }
109 if (scalar(@_) > 1) {
111 $proto->_throw_error( "Odd number of parameters to " . (caller(1))[2] );
115 elsif ( ref $_[0] ) {
116 unless ( eval { local $SIG{'__DIE__'}; %{$_[0]} || 1 } ) {
117 $proto->_throw_error( "Not a hashref in args to " . (caller(1))[2] );
122 $args = { file => shift };
130 # Class constructor method for Perl OO interface.
131 # Calls tie() and returns blessed reference to tied hash or array,
132 # providing a hybrid OO/tie interface.
135 my $args = $class->_get_args( @_ );
138 # Check if we want a tied hash or array.
141 if (defined($args->{type}) && $args->{type} eq TYPE_ARRAY) {
142 $class = 'DBM::Deep::09830::Array';
143 #require DBM::Deep::09830::Array;
144 tie @$self, $class, %$args;
147 $class = 'DBM::Deep::09830::Hash';
148 #require DBM::Deep::09830::Hash;
149 tie %$self, $class, %$args;
152 return bless $self, $class;
157 # Setup $self and bless into this class.
162 # These are the defaults to be optionally overridden below
165 base_offset => length(SIG_FILE),
168 foreach my $param ( keys %$self ) {
169 next unless exists $args->{$param};
170 $self->{$param} = delete $args->{$param}
173 # locking implicitly enables autoflush
174 if ($args->{locking}) { $args->{autoflush} = 1; }
176 $self->{root} = exists $args->{root}
178 : DBM::Deep::09830::_::Root->new( $args );
180 if (!defined($self->_fh)) { $self->_open(); }
187 #require DBM::Deep::09830::Hash;
188 return DBM::Deep::09830::Hash->TIEHASH( @_ );
193 #require DBM::Deep::09830::Array;
194 return DBM::Deep::09830::Array->TIEARRAY( @_ );
197 #XXX Unneeded now ...
203 # Open a fh to the database, create if nonexistent.
204 # Make sure file signature matches DBM::Deep spec.
206 my $self = $_[0]->_get_self;
210 if (defined($self->_fh)) { $self->_close(); }
212 my $flags = O_RDWR | O_CREAT | O_BINARY;
215 sysopen( $fh, $self->_root->{file}, $flags )
216 or $self->_throw_error( "Cannot sysopen file: " . $self->_root->{file} . ": $!" );
218 $self->_root->{fh} = $fh;
220 if ($self->_root->{autoflush}) {
221 my $old = select $fh;
226 seek($fh, 0 + $self->_root->{file_offset}, SEEK_SET);
229 my $bytes_read = read( $fh, $signature, length(SIG_FILE));
232 # File is empty -- write signature and master index
235 seek($fh, 0 + $self->_root->{file_offset}, SEEK_SET);
236 print( $fh SIG_FILE);
237 $self->_create_tag($self->_base_offset, $self->_type, chr(0) x $INDEX_SIZE);
239 my $plain_key = "[base]";
240 print( $fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key );
242 # Flush the filehandle
243 my $old_fh = select $fh;
244 my $old_af = $|; $| = 1; $| = $old_af;
247 my @stats = stat($fh);
248 $self->_root->{inode} = $stats[1];
249 $self->_root->{end} = $stats[7];
255 # Check signature was valid
257 unless ($signature eq SIG_FILE) {
259 return $self->_throw_error("Signature not found -- file is not a Deep DB");
262 my @stats = stat($fh);
263 $self->_root->{inode} = $stats[1];
264 $self->_root->{end} = $stats[7];
267 # Get our type from master index signature
269 my $tag = $self->_load_tag($self->_base_offset);
271 #XXX We probably also want to store the hash algorithm name and not assume anything
272 #XXX The cool thing would be to allow a different hashing algorithm at every level
275 return $self->_throw_error("Corrupted file, no master index record");
277 if ($self->{type} ne $tag->{signature}) {
278 return $self->_throw_error("File type mismatch");
288 my $self = $_[0]->_get_self;
289 close $self->_root->{fh} if $self->_root->{fh};
290 $self->_root->{fh} = undef;
295 # Given offset, signature and content, create tag and write to disk
297 my ($self, $offset, $sig, $content) = @_;
298 my $size = length($content);
304 seek($fh, $offset + $self->_root->{file_offset}, SEEK_SET);
305 print( $fh $sig . pack($DATA_LENGTH_PACK, $size) . $content );
307 if ($offset == $self->_root->{end}) {
308 $self->_root->{end} += SIG_SIZE + $DATA_LENGTH_SIZE + $size;
314 offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE,
321 # Given offset, load single tag and return signature, size and data
330 seek($fh, $offset + $self->_root->{file_offset}, SEEK_SET);
331 if (eof $fh) { return undef; }
334 read( $fh, $b, SIG_SIZE + $DATA_LENGTH_SIZE );
335 my ($sig, $size) = unpack( "A $DATA_LENGTH_PACK", $b );
338 read( $fh, $buffer, $size);
343 offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE,
350 # Given index tag, lookup single entry in index and return .
353 my ($tag, $index) = @_;
355 my $location = unpack($LONG_PACK, substr($tag->{content}, $index * $LONG_SIZE, $LONG_SIZE) );
356 if (!$location) { return; }
358 return $self->_load_tag( $location );
363 # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
364 # plain (undigested) key and value.
367 my ($tag, $md5, $plain_key, $value) = @_;
368 my $keys = $tag->{content};
374 # This verifies that only supported values will be stored.
376 my $r = Scalar::Util::reftype( $value );
379 last if $r eq 'HASH';
380 last if $r eq 'ARRAY';
383 "Storage of variables of type '$r' is not supported."
387 my $root = $self->_root;
389 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep::09830' ) };
390 my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
395 # Iterate through buckets, seeing if this is a new entry or a replace.
397 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
398 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
401 # Found empty bucket (end of list). Populate and exit loop.
405 $location = $internal_ref
406 ? $value->_base_offset
409 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
410 print( $fh $md5 . pack($LONG_PACK, $location) );
414 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
417 # Found existing bucket with same key. Replace with new value.
422 $location = $value->_base_offset;
423 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
424 print( $fh $md5 . pack($LONG_PACK, $location) );
428 seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET);
430 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
433 # If value is a hash, array, or raw value with equal or less size, we can
434 # reuse the same content area of the database. Otherwise, we have to create
435 # a new content area at the EOF.
438 my $r = Scalar::Util::reftype( $value ) || '';
439 if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
440 $actual_length = $INDEX_SIZE;
442 # if autobless is enabled, must also take into consideration
443 # the class name, as it is stored along with key/value.
444 if ( $root->{autobless} ) {
445 my $value_class = Scalar::Util::blessed($value);
446 if ( defined $value_class && !$value->isa('DBM::Deep::09830') ) {
447 $actual_length += length($value_class);
451 else { $actual_length = length($value); }
453 if ($actual_length <= ($size || 0)) {
457 $location = $root->{end};
458 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $HASH_SIZE + $root->{file_offset}, SEEK_SET);
459 print( $fh pack($LONG_PACK, $location) );
467 # If this is an internal reference, return now.
468 # No need to write value or plain key
475 # If bucket didn't fit into list, split into a new index level
478 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
479 print( $fh pack($LONG_PACK, $root->{end}) );
481 my $index_tag = $self->_create_tag($root->{end}, SIG_INDEX, chr(0) x $INDEX_SIZE);
484 $keys .= $md5 . pack($LONG_PACK, 0);
486 for (my $i=0; $i<=$MAX_BUCKETS; $i++) {
487 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
489 my $old_subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
490 my $num = ord(substr($key, $tag->{ch} + 1, 1));
492 if ($offsets[$num]) {
493 my $offset = $offsets[$num] + SIG_SIZE + $DATA_LENGTH_SIZE;
494 seek($fh, $offset + $root->{file_offset}, SEEK_SET);
496 read( $fh, $subkeys, $BUCKET_LIST_SIZE);
498 for (my $k=0; $k<$MAX_BUCKETS; $k++) {
499 my $subloc = unpack($LONG_PACK, substr($subkeys, ($k * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
501 seek($fh, $offset + ($k * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
502 print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) );
508 $offsets[$num] = $root->{end};
509 seek($fh, $index_tag->{offset} + ($num * $LONG_SIZE) + $root->{file_offset}, SEEK_SET);
510 print( $fh pack($LONG_PACK, $root->{end}) );
512 my $blist_tag = $self->_create_tag($root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE);
514 seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
515 print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) );
520 $location ||= $root->{end};
521 } # re-index bucket list
524 # Seek to content area and store signature, value and plaintext key
528 seek($fh, $location + $root->{file_offset}, SEEK_SET);
531 # Write signature based on content type, set content length and write actual value.
533 my $r = Scalar::Util::reftype($value) || '';
535 if ( !$internal_ref && tied %{$value} ) {
536 return $self->_throw_error("Cannot store a tied value");
538 print( $fh TYPE_HASH );
539 print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE );
540 $content_length = $INDEX_SIZE;
542 elsif ($r eq 'ARRAY') {
543 if ( !$internal_ref && tied @{$value} ) {
544 return $self->_throw_error("Cannot store a tied value");
546 print( $fh TYPE_ARRAY );
547 print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE );
548 $content_length = $INDEX_SIZE;
550 elsif (!defined($value)) {
551 print( $fh SIG_NULL );
552 print( $fh pack($DATA_LENGTH_PACK, 0) );
556 print( $fh SIG_DATA );
557 print( $fh pack($DATA_LENGTH_PACK, length($value)) . $value );
558 $content_length = length($value);
562 # Plain key is stored AFTER value, as keys are typically fetched less often.
564 print( $fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key );
567 # If value is blessed, preserve class name
569 if ( $root->{autobless} ) {
570 my $value_class = Scalar::Util::blessed($value);
571 if ( defined $value_class && $value_class ne 'DBM::Deep::09830' ) {
573 # Blessed ref -- will restore later
576 print( $fh pack($DATA_LENGTH_PACK, length($value_class)) . $value_class );
577 $content_length += 1;
578 $content_length += $DATA_LENGTH_SIZE + length($value_class);
582 $content_length += 1;
587 # If this is a new content area, advance EOF counter
589 if ($location == $root->{end}) {
590 $root->{end} += SIG_SIZE;
591 $root->{end} += $DATA_LENGTH_SIZE + $content_length;
592 $root->{end} += $DATA_LENGTH_SIZE + length($plain_key);
596 # If content is a hash or array, create new child DBM::Deep object and
597 # pass each key or element to it.
601 tie %$value, 'DBM::Deep::09830', {
603 base_offset => $location,
608 elsif ($r eq 'ARRAY') {
610 tie @$value, 'DBM::Deep::09830', {
612 base_offset => $location,
621 return $self->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
624 sub _get_bucket_value {
626 # Fetch single value given tag and MD5 digested key.
629 my ($tag, $md5) = @_;
630 my $keys = $tag->{content};
637 # Iterate through buckets, looking for a key match
640 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
641 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
642 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
646 # Hit end of list, no match
651 if ( $md5 ne $key ) {
656 # Found match -- seek to offset and read signature
659 seek($fh, $subloc + $self->_root->{file_offset}, SEEK_SET);
660 read( $fh, $signature, SIG_SIZE);
663 # If value is a hash or array, return new DBM::Deep object with correct offset
665 if (($signature eq TYPE_HASH) || ($signature eq TYPE_ARRAY)) {
666 my $obj = DBM::Deep::09830->new(
668 base_offset => $subloc,
672 if ($self->_root->{autobless}) {
674 # Skip over value and plain key to see if object needs
677 seek($fh, $DATA_LENGTH_SIZE + $INDEX_SIZE, SEEK_CUR);
680 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
681 if ($size) { seek($fh, $size, SEEK_CUR); }
684 read( $fh, $bless_bit, 1);
685 if (ord($bless_bit)) {
687 # Yes, object needs to be re-blessed
690 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
691 if ($size) { read( $fh, $class_name, $size); }
692 if ($class_name) { $obj = bless( $obj, $class_name ); }
700 # Otherwise return actual value
702 elsif ($signature eq SIG_DATA) {
705 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
706 if ($size) { read( $fh, $value, $size); }
711 # Key exists, but content is null
721 # Delete single key/value pair given tag and MD5 digested key.
724 my ($tag, $md5) = @_;
725 my $keys = $tag->{content};
732 # Iterate through buckets, looking for a key match
735 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
736 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
737 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
741 # Hit end of list, no match
746 if ( $md5 ne $key ) {
751 # Matched key -- delete bucket and return
753 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $self->_root->{file_offset}, SEEK_SET);
754 print( $fh substr($keys, ($i+1) * $BUCKET_SIZE ) );
755 print( $fh chr(0) x $BUCKET_SIZE );
765 # Check existence of single key given tag and MD5 digested key.
768 my ($tag, $md5) = @_;
769 my $keys = $tag->{content};
772 # Iterate through buckets, looking for a key match
775 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
776 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
777 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
781 # Hit end of list, no match
786 if ( $md5 ne $key ) {
791 # Matched key -- return true
799 sub _find_bucket_list {
801 # Locate offset for bucket list, given digested key
807 # Locate offset for bucket list using digest index system
810 my $tag = $self->_load_tag($self->_base_offset);
811 if (!$tag) { return; }
813 while ($tag->{signature} ne SIG_BLIST) {
814 $tag = $self->_index_lookup($tag, ord(substr($md5, $ch, 1)));
815 if (!$tag) { return; }
822 sub _traverse_index {
824 # Scan index and recursively step into deeper levels, looking for next key.
826 my ($self, $offset, $ch, $force_return_next) = @_;
827 $force_return_next = undef unless $force_return_next;
831 my $tag = $self->_load_tag( $offset );
835 if ($tag->{signature} ne SIG_BLIST) {
836 my $content = $tag->{content};
838 if ($self->{return_next}) { $start = 0; }
839 else { $start = ord(substr($self->{prev_md5}, $ch, 1)); }
841 for (my $index = $start; $index < 256; $index++) {
842 my $subloc = unpack($LONG_PACK, substr($content, $index * $LONG_SIZE, $LONG_SIZE) );
844 my $result = $self->_traverse_index( $subloc, $ch + 1, $force_return_next );
845 if (defined($result)) { return $result; }
849 $self->{return_next} = 1;
852 elsif ($tag->{signature} eq SIG_BLIST) {
853 my $keys = $tag->{content};
854 if ($force_return_next) { $self->{return_next} = 1; }
857 # Iterate through buckets, looking for a key match
859 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
860 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
861 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
865 # End of bucket list -- return to outer loop
867 $self->{return_next} = 1;
870 elsif ($key eq $self->{prev_md5}) {
872 # Located previous key -- return next one found
874 $self->{return_next} = 1;
877 elsif ($self->{return_next}) {
879 # Seek to bucket location and skip over signature
881 seek($fh, $subloc + SIG_SIZE + $self->_root->{file_offset}, SEEK_SET);
884 # Skip over value to get to plain key
887 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
888 if ($size) { seek($fh, $size, SEEK_CUR); }
891 # Read in plain key and return as scalar
894 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
895 if ($size) { read( $fh, $plain_key, $size); }
901 $self->{return_next} = 1;
902 } # tag is a bucket list
909 # Locate next key, given digested previous one
911 my $self = $_[0]->_get_self;
913 $self->{prev_md5} = $_[1] ? $_[1] : undef;
914 $self->{return_next} = 0;
917 # If the previous key was not specifed, start at the top and
918 # return the first one found.
920 if (!$self->{prev_md5}) {
921 $self->{prev_md5} = chr(0) x $HASH_SIZE;
922 $self->{return_next} = 1;
925 return $self->_traverse_index( $self->_base_offset, 0 );
930 # If db locking is set, flock() the db file. If called multiple
931 # times before unlock(), then the same number of unlocks() must
932 # be called before the lock is released.
934 my $self = $_[0]->_get_self;
936 $type = LOCK_EX unless defined $type;
938 if (!defined($self->_fh)) { return; }
940 if ($self->_root->{locking}) {
941 if (!$self->_root->{locked}) {
942 flock($self->_fh, $type);
944 # refresh end counter in case file has changed size
945 my @stats = stat($self->_root->{file});
946 $self->_root->{end} = $stats[7];
948 # double-check file inode, in case another process
949 # has optimize()d our file while we were waiting.
950 if ($stats[1] != $self->_root->{inode}) {
951 $self->_open(); # re-open
952 flock($self->_fh, $type); # re-lock
953 $self->_root->{end} = (stat($self->_fh))[7]; # re-end
956 $self->_root->{locked}++;
966 # If db locking is set, unlock the db file. See note in lock()
967 # regarding calling lock() multiple times.
969 my $self = $_[0]->_get_self;
971 if (!defined($self->_fh)) { return; }
973 if ($self->_root->{locking} && $self->_root->{locked} > 0) {
974 $self->_root->{locked}--;
975 if (!$self->_root->{locked}) { flock($self->_fh, LOCK_UN); }
984 my $self = shift->_get_self;
985 my ($spot, $value) = @_;
990 elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep::09830' ) } ) {
991 my $type = $value->_type;
992 ${$spot} = $type eq TYPE_HASH ? {} : [];
993 $value->_copy_node( ${$spot} );
996 my $r = Scalar::Util::reftype( $value );
997 my $c = Scalar::Util::blessed( $value );
998 if ( $r eq 'ARRAY' ) {
999 ${$spot} = [ @{$value} ];
1002 ${$spot} = { %{$value} };
1004 ${$spot} = bless ${$spot}, $c
1013 # Copy single level of keys or elements to new DB handle.
1014 # Recurse for nested structures
1016 my $self = shift->_get_self;
1019 if ($self->_type eq TYPE_HASH) {
1020 my $key = $self->first_key();
1022 my $value = $self->get($key);
1023 $self->_copy_value( \$db_temp->{$key}, $value );
1024 $key = $self->next_key($key);
1028 my $length = $self->length();
1029 for (my $index = 0; $index < $length; $index++) {
1030 my $value = $self->get($index);
1031 $self->_copy_value( \$db_temp->[$index], $value );
1040 # Recursively export into standard Perl hashes and arrays.
1042 my $self = $_[0]->_get_self;
1045 if ($self->_type eq TYPE_HASH) { $temp = {}; }
1046 elsif ($self->_type eq TYPE_ARRAY) { $temp = []; }
1049 $self->_copy_node( $temp );
1057 # Recursively import Perl hash/array structure
1059 #XXX This use of ref() seems to be ok
1060 if (!ref($_[0])) { return; } # Perl calls import() on use -- ignore
1062 my $self = $_[0]->_get_self;
1065 #XXX This use of ref() seems to be ok
1066 if (!ref($struct)) {
1068 # struct is not a reference, so just import based on our type
1072 if ($self->_type eq TYPE_HASH) { $struct = {@_}; }
1073 elsif ($self->_type eq TYPE_ARRAY) { $struct = [@_]; }
1076 my $r = Scalar::Util::reftype($struct) || '';
1077 if ($r eq "HASH" && $self->_type eq TYPE_HASH) {
1078 foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); }
1080 elsif ($r eq "ARRAY" && $self->_type eq TYPE_ARRAY) {
1081 $self->push( @$struct );
1084 return $self->_throw_error("Cannot import: type mismatch");
1092 # Rebuild entire database into new file, then move
1093 # it back on top of original.
1095 my $self = $_[0]->_get_self;
1097 #XXX Need to create a new test for this
1098 # if ($self->_root->{links} > 1) {
1099 # return $self->_throw_error("Cannot optimize: reference count is greater than 1");
1102 my $db_temp = DBM::Deep::09830->new(
1103 file => $self->_root->{file} . '.tmp',
1104 type => $self->_type
1107 return $self->_throw_error("Cannot optimize: failed to open temp file: $!");
1111 $self->_copy_node( $db_temp );
1115 # Attempt to copy user, group and permissions over to new file
1117 my @stats = stat($self->_fh);
1118 my $perms = $stats[2] & 07777;
1119 my $uid = $stats[4];
1120 my $gid = $stats[5];
1121 chown( $uid, $gid, $self->_root->{file} . '.tmp' );
1122 chmod( $perms, $self->_root->{file} . '.tmp' );
1124 # q.v. perlport for more information on this variable
1125 if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) {
1127 # Potential race condition when optmizing on Win32 with locking.
1128 # The Windows filesystem requires that the filehandle be closed
1129 # before it is overwritten with rename(). This could be redone
1136 if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) {
1137 unlink $self->_root->{file} . '.tmp';
1139 return $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!");
1151 # Make copy of object and return
1153 my $self = $_[0]->_get_self;
1155 return DBM::Deep::09830->new(
1156 type => $self->_type,
1157 base_offset => $self->_base_offset,
1158 root => $self->_root
1163 my %is_legal_filter = map {
1166 store_key store_value
1167 fetch_key fetch_value
1172 # Setup filter function for storing or fetching the key or value
1174 my $self = $_[0]->_get_self;
1175 my $type = lc $_[1];
1176 my $func = $_[2] ? $_[2] : undef;
1178 if ( $is_legal_filter{$type} ) {
1179 $self->_root->{"filter_$type"} = $func;
1193 # Get access to the root structure
1195 my $self = $_[0]->_get_self;
1196 return $self->{root};
1201 # Get access to the raw fh
1203 #XXX It will be useful, though, when we split out HASH and ARRAY
1204 my $self = $_[0]->_get_self;
1205 return $self->_root->{fh};
1210 # Get type of current node (TYPE_HASH or TYPE_ARRAY)
1212 my $self = $_[0]->_get_self;
1213 return $self->{type};
1218 # Get base_offset of current node (TYPE_HASH or TYPE_ARRAY)
1220 my $self = $_[0]->_get_self;
1221 return $self->{base_offset};
1226 # Get last error string, or undef if no error
1229 ? ( $_[0]->_get_self->{root}->{error} or undef )
1239 # Store error string in self
1241 my $error_text = $_[1];
1243 if ( Scalar::Util::blessed $_[0] ) {
1244 my $self = $_[0]->_get_self;
1245 $self->_root->{error} = $error_text;
1247 unless ($self->_root->{debug}) {
1248 die "DBM::Deep::09830: $error_text\n";
1251 warn "DBM::Deep::09830: $error_text\n";
1255 die "DBM::Deep::09830: $error_text\n";
1263 my $self = $_[0]->_get_self;
1265 undef $self->_root->{error};
1268 sub _precalc_sizes {
1270 # Precalculate index, bucket and bucket list sizes
1273 #XXX I don't like this ...
1274 set_pack() unless defined $LONG_SIZE;
1276 $INDEX_SIZE = 256 * $LONG_SIZE;
1277 $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE;
1278 $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE;
1283 # Set pack/unpack modes (see file header for more)
1285 my ($long_s, $long_p, $data_s, $data_p) = @_;
1287 $LONG_SIZE = $long_s ? $long_s : 4;
1288 $LONG_PACK = $long_p ? $long_p : 'N';
1290 $DATA_LENGTH_SIZE = $data_s ? $data_s : 4;
1291 $DATA_LENGTH_PACK = $data_p ? $data_p : 'N';
1298 # Set key digest function (default is MD5)
1300 my ($digest_func, $hash_size) = @_;
1302 $DIGEST_FUNC = $digest_func ? $digest_func : \&Digest::MD5::md5;
1303 $HASH_SIZE = $hash_size ? $hash_size : 16;
1310 (O_WRONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0);
1315 # (O_RDONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0);
1319 # tie() methods (hashes and arrays)
1324 # Store single hash key/value or array element in database.
1326 my $self = $_[0]->_get_self;
1331 # User may be storing a hash, in which case we do not want it run
1332 # through the filtering system
1333 my $value = ($self->_root->{filter_store_value} && !ref($_[2]))
1334 ? $self->_root->{filter_store_value}->($_[2])
1337 my $md5 = $DIGEST_FUNC->($key);
1340 # Make sure file is open
1342 if (!defined($self->_fh) && !$self->_open()) {
1346 if ( $^O ne 'MSWin32' && !_is_writable( $self->_fh ) ) {
1347 $self->_throw_error( 'Cannot write to a readonly filehandle' );
1351 # Request exclusive lock for writing
1353 $self->lock( LOCK_EX );
1355 my $fh = $self->_fh;
1358 # Locate offset for bucket list using digest index system
1360 my $tag = $self->_load_tag($self->_base_offset);
1362 $tag = $self->_create_tag($self->_base_offset, SIG_INDEX, chr(0) x $INDEX_SIZE);
1366 while ($tag->{signature} ne SIG_BLIST) {
1367 my $num = ord(substr($md5, $ch, 1));
1369 my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE);
1370 my $new_tag = $self->_index_lookup($tag, $num);
1373 seek($fh, $ref_loc + $self->_root->{file_offset}, SEEK_SET);
1374 print( $fh pack($LONG_PACK, $self->_root->{end}) );
1376 $tag = $self->_create_tag($self->_root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE);
1378 $tag->{ref_loc} = $ref_loc;
1386 $tag->{ref_loc} = $ref_loc;
1393 # Add key/value to bucket list
1395 my $result = $self->_add_bucket( $tag, $md5, $key, $value );
1404 # Fetch single value or element given plain key or array index
1406 my $self = shift->_get_self;
1410 # Make sure file is open
1412 if (!defined($self->_fh)) { $self->_open(); }
1414 my $md5 = $DIGEST_FUNC->($key);
1417 # Request shared lock for reading
1419 $self->lock( LOCK_SH );
1421 my $tag = $self->_find_bucket_list( $md5 );
1428 # Get value from bucket list
1430 my $result = $self->_get_bucket_value( $tag, $md5 );
1434 #XXX What is ref() checking here?
1435 #YYY Filters only apply on scalar values, so the ref check is making
1436 #YYY sure the fetched bucket is a scalar, not a child hash or array.
1437 return ($result && !ref($result) && $self->_root->{filter_fetch_value})
1438 ? $self->_root->{filter_fetch_value}->($result)
1444 # Delete single key/value pair or element given plain key or array index
1446 my $self = $_[0]->_get_self;
1449 my $md5 = $DIGEST_FUNC->($key);
1452 # Make sure file is open
1454 if (!defined($self->_fh)) { $self->_open(); }
1457 # Request exclusive lock for writing
1459 $self->lock( LOCK_EX );
1461 my $tag = $self->_find_bucket_list( $md5 );
1470 my $value = $self->_get_bucket_value( $tag, $md5 );
1471 if ($value && !ref($value) && $self->_root->{filter_fetch_value}) {
1472 $value = $self->_root->{filter_fetch_value}->($value);
1475 my $result = $self->_delete_bucket( $tag, $md5 );
1478 # If this object is an array and the key deleted was on the end of the stack,
1479 # decrement the length variable.
1489 # Check if a single key or element exists given plain key or array index
1491 my $self = $_[0]->_get_self;
1494 my $md5 = $DIGEST_FUNC->($key);
1497 # Make sure file is open
1499 if (!defined($self->_fh)) { $self->_open(); }
1502 # Request shared lock for reading
1504 $self->lock( LOCK_SH );
1506 my $tag = $self->_find_bucket_list( $md5 );
1509 # For some reason, the built-in exists() function returns '' for false
1517 # Check if bucket exists and return 1 or ''
1519 my $result = $self->_bucket_exists( $tag, $md5 ) || '';
1528 # Clear all keys from hash, or all elements from array.
1530 my $self = $_[0]->_get_self;
1533 # Make sure file is open
1535 if (!defined($self->_fh)) { $self->_open(); }
1538 # Request exclusive lock for writing
1540 $self->lock( LOCK_EX );
1542 my $fh = $self->_fh;
1544 seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET);
1550 $self->_create_tag($self->_base_offset, $self->_type, chr(0) x $INDEX_SIZE);
1558 # Public method aliases
1560 sub put { (shift)->STORE( @_ ) }
1561 sub store { (shift)->STORE( @_ ) }
1562 sub get { (shift)->FETCH( @_ ) }
1563 sub fetch { (shift)->FETCH( @_ ) }
1564 sub delete { (shift)->DELETE( @_ ) }
1565 sub exists { (shift)->EXISTS( @_ ) }
1566 sub clear { (shift)->CLEAR( @_ ) }
1568 package DBM::Deep::09830::_::Root;
1582 filter_store_key => undef,
1583 filter_store_value => undef,
1584 filter_fetch_key => undef,
1585 filter_fetch_value => undef,
1591 if ( $self->{fh} && !$self->{file_offset} ) {
1592 $self->{file_offset} = tell( $self->{fh} );
1600 return unless $self;
1602 close $self->{fh} if $self->{fh};
1607 package DBM::Deep::09830::Array;
1611 # This is to allow DBM::Deep::Array to handle negative indices on
1612 # its own. Otherwise, Perl would intercept the call to negative
1613 # indices for us. This was causing bugs for negative index handling.
1614 use vars qw( $NEGATIVE_INDICES );
1615 $NEGATIVE_INDICES = 1;
1617 use base 'DBM::Deep::09830';
1619 use Scalar::Util ();
1622 eval { local $SIG{'__DIE__'}; tied( @{$_[0]} ) } || $_[0]
1627 # Tied array constructor method, called by Perl's tie() function.
1630 my $args = $class->_get_args( @_ );
1632 $args->{type} = $class->TYPE_ARRAY;
1634 return $class->_init($args);
1638 my $self = $_[0]->_get_self;
1641 $self->lock( $self->LOCK_SH );
1643 if ( $key =~ /^-?\d+$/ ) {
1645 $key += $self->FETCHSIZE;
1646 unless ( $key >= 0 ) {
1652 $key = pack($DBM::Deep::09830::LONG_PACK, $key);
1655 my $rv = $self->SUPER::FETCH( $key );
1663 my $self = shift->_get_self;
1664 my ($key, $value) = @_;
1666 $self->lock( $self->LOCK_EX );
1672 if ( $key =~ /^\-?\d+$/ ) {
1675 $size = $self->FETCHSIZE;
1678 die( "Modification of non-creatable array value attempted, subscript $orig" );
1682 $key = pack($DBM::Deep::09830::LONG_PACK, $key);
1685 my $rv = $self->SUPER::STORE( $key, $value );
1687 if ( $numeric_idx && $rv == 2 ) {
1688 $size = $self->FETCHSIZE unless defined $size;
1689 if ( $orig >= $size ) {
1690 $self->STORESIZE( $orig + 1 );
1700 my $self = $_[0]->_get_self;
1703 $self->lock( $self->LOCK_SH );
1705 if ( $key =~ /^\-?\d+$/ ) {
1707 $key += $self->FETCHSIZE;
1708 unless ( $key >= 0 ) {
1714 $key = pack($DBM::Deep::09830::LONG_PACK, $key);
1717 my $rv = $self->SUPER::EXISTS( $key );
1725 my $self = $_[0]->_get_self;
1728 my $unpacked_key = $key;
1730 $self->lock( $self->LOCK_EX );
1732 my $size = $self->FETCHSIZE;
1733 if ( $key =~ /^-?\d+$/ ) {
1736 unless ( $key >= 0 ) {
1742 $key = pack($DBM::Deep::09830::LONG_PACK, $key);
1745 my $rv = $self->SUPER::DELETE( $key );
1747 if ($rv && $unpacked_key == $size - 1) {
1748 $self->STORESIZE( $unpacked_key );
1758 # Return the length of the array
1760 my $self = shift->_get_self;
1762 $self->lock( $self->LOCK_SH );
1764 my $SAVE_FILTER = $self->_root->{filter_fetch_value};
1765 $self->_root->{filter_fetch_value} = undef;
1767 my $packed_size = $self->FETCH('length');
1769 $self->_root->{filter_fetch_value} = $SAVE_FILTER;
1774 return int(unpack($DBM::Deep::09830::LONG_PACK, $packed_size));
1782 # Set the length of the array
1784 my $self = $_[0]->_get_self;
1785 my $new_length = $_[1];
1787 $self->lock( $self->LOCK_EX );
1789 my $SAVE_FILTER = $self->_root->{filter_store_value};
1790 $self->_root->{filter_store_value} = undef;
1792 my $result = $self->STORE('length', pack($DBM::Deep::09830::LONG_PACK, $new_length));
1794 $self->_root->{filter_store_value} = $SAVE_FILTER;
1803 # Remove and return the last element on the array
1805 my $self = $_[0]->_get_self;
1807 $self->lock( $self->LOCK_EX );
1809 my $length = $self->FETCHSIZE();
1812 my $content = $self->FETCH( $length - 1 );
1813 $self->DELETE( $length - 1 );
1827 # Add new element(s) to the end of the array
1829 my $self = shift->_get_self;
1831 $self->lock( $self->LOCK_EX );
1833 my $length = $self->FETCHSIZE();
1835 while (my $content = shift @_) {
1836 $self->STORE( $length, $content );
1847 # Remove and return first element on the array.
1848 # Shift over remaining elements to take up space.
1850 my $self = $_[0]->_get_self;
1852 $self->lock( $self->LOCK_EX );
1854 my $length = $self->FETCHSIZE();
1857 my $content = $self->FETCH( 0 );
1860 # Shift elements over and remove last one.
1862 for (my $i = 0; $i < $length - 1; $i++) {
1863 $self->STORE( $i, $self->FETCH($i + 1) );
1865 $self->DELETE( $length - 1 );
1879 # Insert new element(s) at beginning of array.
1880 # Shift over other elements to make space.
1882 my $self = shift->_get_self;
1883 my @new_elements = @_;
1885 $self->lock( $self->LOCK_EX );
1887 my $length = $self->FETCHSIZE();
1888 my $new_size = scalar @new_elements;
1891 for (my $i = $length - 1; $i >= 0; $i--) {
1892 $self->STORE( $i + $new_size, $self->FETCH($i) );
1896 for (my $i = 0; $i < $new_size; $i++) {
1897 $self->STORE( $i, $new_elements[$i] );
1902 return $length + $new_size;
1907 # Splices section of array with optional new section.
1908 # Returns deleted section, or last element deleted in scalar context.
1910 my $self = shift->_get_self;
1912 $self->lock( $self->LOCK_EX );
1914 my $length = $self->FETCHSIZE();
1917 # Calculate offset and length of splice
1920 $offset = 0 unless defined $offset;
1921 if ($offset < 0) { $offset += $length; }
1924 if (scalar @_) { $splice_length = shift; }
1925 else { $splice_length = $length - $offset; }
1926 if ($splice_length < 0) { $splice_length += ($length - $offset); }
1929 # Setup array with new elements, and copy out old elements for return
1931 my @new_elements = @_;
1932 my $new_size = scalar @new_elements;
1934 my @old_elements = map {
1936 } $offset .. ($offset + $splice_length - 1);
1939 # Adjust array length, and shift elements to accomodate new section.
1941 if ( $new_size != $splice_length ) {
1942 if ($new_size > $splice_length) {
1943 for (my $i = $length - 1; $i >= $offset + $splice_length; $i--) {
1944 $self->STORE( $i + ($new_size - $splice_length), $self->FETCH($i) );
1948 for (my $i = $offset + $splice_length; $i < $length; $i++) {
1949 $self->STORE( $i + ($new_size - $splice_length), $self->FETCH($i) );
1951 for (my $i = 0; $i < $splice_length - $new_size; $i++) {
1952 $self->DELETE( $length - 1 );
1959 # Insert new elements into array
1961 for (my $i = $offset; $i < $offset + $new_size; $i++) {
1962 $self->STORE( $i, shift @new_elements );
1968 # Return deleted section, or last element in scalar context.
1970 return wantarray ? @old_elements : $old_elements[-1];
1975 # Perl will call EXTEND() when the array is likely to grow.
1976 # We don't care, but include it for compatibility.
1981 # Public method aliases
1983 *length = *FETCHSIZE;
1987 *unshift = *UNSHIFT;
1990 package DBM::Deep::09830::Hash;
1994 use base 'DBM::Deep::09830';
1997 eval { local $SIG{'__DIE__'}; tied( %{$_[0]} ) } || $_[0]
2002 # Tied hash constructor method, called by Perl's tie() function.
2005 my $args = $class->_get_args( @_ );
2007 $args->{type} = $class->TYPE_HASH;
2009 return $class->_init($args);
2013 my $self = shift->_get_self;
2014 my $key = ($self->_root->{filter_store_key})
2015 ? $self->_root->{filter_store_key}->($_[0])
2018 return $self->SUPER::FETCH( $key );
2022 my $self = shift->_get_self;
2023 my $key = ($self->_root->{filter_store_key})
2024 ? $self->_root->{filter_store_key}->($_[0])
2028 return $self->SUPER::STORE( $key, $value );
2032 my $self = shift->_get_self;
2033 my $key = ($self->_root->{filter_store_key})
2034 ? $self->_root->{filter_store_key}->($_[0])
2037 return $self->SUPER::EXISTS( $key );
2041 my $self = shift->_get_self;
2042 my $key = ($self->_root->{filter_store_key})
2043 ? $self->_root->{filter_store_key}->($_[0])
2046 return $self->SUPER::DELETE( $key );
2051 # Locate and return first key (in no particular order)
2053 my $self = $_[0]->_get_self;
2056 # Make sure file is open
2058 if (!defined($self->_fh)) { $self->_open(); }
2061 # Request shared lock for reading
2063 $self->lock( $self->LOCK_SH );
2065 my $result = $self->_get_next_key();
2069 return ($result && $self->_root->{filter_fetch_key})
2070 ? $self->_root->{filter_fetch_key}->($result)
2076 # Return next key (in no particular order), given previous one
2078 my $self = $_[0]->_get_self;
2080 my $prev_key = ($self->_root->{filter_store_key})
2081 ? $self->_root->{filter_store_key}->($_[1])
2084 my $prev_md5 = $DBM::Deep::09830::DIGEST_FUNC->($prev_key);
2087 # Make sure file is open
2089 if (!defined($self->_fh)) { $self->_open(); }
2092 # Request shared lock for reading
2094 $self->lock( $self->LOCK_SH );
2096 my $result = $self->_get_next_key( $prev_md5 );
2100 return ($result && $self->_root->{filter_fetch_key})
2101 ? $self->_root->{filter_fetch_key}->($result)
2106 # Public method aliases
2108 *first_key = *FIRSTKEY;
2109 *next_key = *NEXTKEY;