lib/DBM/Deep.pm

   1 package DBM::Deep;
   2
   3 ##
   4 # DBM::Deep
   5 #
   6 # Description:
   7 #       Multi-level database module for storing hash trees, arrays and simple
   8 #       key/value pairs into FTP-able, cross-platform binary database files.
   9 #
  10 #       Type `perldoc DBM::Deep` for complete documentation.
  11 #
  12 # Usage Examples:
  13 #       my %db;
  14 #       tie %db, 'DBM::Deep', 'my_database.db'; # standard tie() method
  15 #
  16 #       my $db = new DBM::Deep( 'my_database.db' ); # preferred OO method
  17 #
  18 #       $db->{my_scalar} = 'hello world';
  19 #       $db->{my_hash} = { larry => 'genius', hashes => 'fast' };
  20 #       $db->{my_array} = [ 1, 2, 3, time() ];
  21 #       $db->{my_complex} = [ 'hello', { perl => 'rules' }, 42, 99 ];
  22 #       push @{$db->{my_array}}, 'another value';
  23 #       my @key_list = keys %{$db->{my_hash}};
  24 #       print "This module " . $db->{my_complex}->[1]->{perl} . "!\n";
  25 #
  26 # Copyright:
  27 #       (c) 2002-2006 Joseph Huckaby.  All Rights Reserved.
  28 #       This program is free software; you can redistribute it and/or
  29 #       modify it under the same terms as Perl itself.
  30 ##
  31
  32 use strict;
  33
  34 use Fcntl qw( :DEFAULT :flock :seek );
  35 use Digest::MD5 ();
  36 use Scalar::Util ();
  37
  38 use DBM::Deep::Engine;
  39
  40 use vars qw( $VERSION );
  41 $VERSION = q(0.99_01);
  42
  43 ##
  44 # Set to 4 and 'N' for 32-bit offset tags (default).  Theoretical limit of 4 GB per file.
  45 #       (Perl must be compiled with largefile support for files > 2 GB)
  46 #
  47 # Set to 8 and 'Q' for 64-bit offsets.  Theoretical limit of 16 XB per file.
  48 #       (Perl must be compiled with largefile and 64-bit long support)
  49 ##
  50 #my $LONG_SIZE = 4;
  51 #my $LONG_PACK = 'N';
  52
  53 ##
  54 # Set to 4 and 'N' for 32-bit data length prefixes.  Limit of 4 GB for each key/value.
  55 # Upgrading this is possible (see above) but probably not necessary.  If you need
  56 # more than 4 GB for a single key or value, this module is really not for you :-)
  57 ##
  58 #my $DATA_LENGTH_SIZE = 4;
  59 #my $DATA_LENGTH_PACK = 'N';
  60 our ($LONG_SIZE, $LONG_PACK, $DATA_LENGTH_SIZE, $DATA_LENGTH_PACK);
  61
  62 ##
  63 # Maximum number of buckets per list before another level of indexing is done.
  64 # Increase this value for slightly greater speed, but larger database files.
  65 # DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
  66 ##
  67 my $MAX_BUCKETS = 16;
  68
  69 ##
  70 # Better not adjust anything below here, unless you're me :-)
  71 ##
  72
  73 ##
  74 # Setup digest function for keys
  75 ##
  76 our ($DIGEST_FUNC, $HASH_SIZE);
  77 #my $DIGEST_FUNC = \&Digest::MD5::md5;
  78
  79 ##
  80 # Precalculate index and bucket sizes based on values above.
  81 ##
  82 #my $HASH_SIZE = 16;
  83 our ($INDEX_SIZE, $BUCKET_SIZE, $BUCKET_LIST_SIZE);
  84
  85 set_digest();
  86 #set_pack();
  87 #_precalc_sizes();
  88
  89 ##
  90 # Setup file and tag signatures.  These should never change.
  91 ##
  92 sub SIG_FILE   () { 'DPDB' }
  93 sub SIG_HASH   () { 'H' }
  94 sub SIG_ARRAY  () { 'A' }
  95 sub SIG_SCALAR () { 'S' }
  96 sub SIG_NULL   () { 'N' }
  97 sub SIG_DATA   () { 'D' }
  98 sub SIG_INDEX  () { 'I' }
  99 sub SIG_BLIST  () { 'B' }
 100 sub SIG_SIZE   () {  1  }
 101
 102 ##
 103 # Setup constants for users to pass to new()
 104 ##
 105 sub TYPE_HASH   () { SIG_HASH   }
 106 sub TYPE_ARRAY  () { SIG_ARRAY  }
 107 sub TYPE_SCALAR () { SIG_SCALAR }
 108
 109 sub _get_args {
 110     my $proto = shift;
 111
 112     my $args;
 113     if (scalar(@_) > 1) {
 114         if ( @_ % 2 ) {
 115             $proto->_throw_error( "Odd number of parameters to " . (caller(1))[2] );
 116         }
 117         $args = {@_};
 118     }
 119         elsif ( ref $_[0] ) {
 120         unless ( eval { local $SIG{'__DIE__'}; %{$_[0]} || 1 } ) {
 121             $proto->_throw_error( "Not a hashref in args to " . (caller(1))[2] );
 122         }
 123         $args = $_[0];
 124     }
 125         else {
 126         $args = { file => shift };
 127     }
 128
 129     return $args;
 130 }
 131
 132 sub new {
 133         ##
 134         # Class constructor method for Perl OO interface.
 135         # Calls tie() and returns blessed reference to tied hash or array,
 136         # providing a hybrid OO/tie interface.
 137         ##
 138         my $class = shift;
 139         my $args = $class->_get_args( @_ );
 140
 141         ##
 142         # Check if we want a tied hash or array.
 143         ##
 144         my $self;
 145         if (defined($args->{type}) && $args->{type} eq TYPE_ARRAY) {
 146         $class = 'DBM::Deep::Array';
 147         require DBM::Deep::Array;
 148                 tie @$self, $class, %$args;
 149         }
 150         else {
 151         $class = 'DBM::Deep::Hash';
 152         require DBM::Deep::Hash;
 153                 tie %$self, $class, %$args;
 154         }
 155
 156         return bless $self, $class;
 157 }
 158
 159 sub _init {
 160     ##
 161     # Setup $self and bless into this class.
 162     ##
 163     my $class = shift;
 164     my $args = shift;
 165
 166     # These are the defaults to be optionally overridden below
 167     my $self = bless {
 168         type        => TYPE_HASH,
 169         base_offset => length(SIG_FILE),
 170         engine      => 'DBM::Deep::Engine',
 171     }, $class;
 172
 173     foreach my $param ( keys %$self ) {
 174         next unless exists $args->{$param};
 175         $self->{$param} = delete $args->{$param}
 176     }
 177
 178     # locking implicitly enables autoflush
 179     if ($args->{locking}) { $args->{autoflush} = 1; }
 180
 181     $self->{root} = exists $args->{root}
 182         ? $args->{root}
 183         : DBM::Deep::_::Root->new( $args );
 184
 185     if (!defined($self->_fh)) { $self->{engine}->open( $self ); }
 186
 187     return $self;
 188 }
 189
 190 sub TIEHASH {
 191     shift;
 192     require DBM::Deep::Hash;
 193     return DBM::Deep::Hash->TIEHASH( @_ );
 194 }
 195
 196 sub TIEARRAY {
 197     shift;
 198     require DBM::Deep::Array;
 199     return DBM::Deep::Array->TIEARRAY( @_ );
 200 }
 201
 202 #XXX Unneeded now ...
 203 #sub DESTROY {
 204 #}
 205
 206 sub _close {
 207         ##
 208         # Close database fh
 209         ##
 210     my $self = $_[0]->_get_self;
 211     close $self->_root->{fh} if $self->_root->{fh};
 212     $self->_root->{fh} = undef;
 213 }
 214
 215 sub _create_tag {
 216         ##
 217         # Given offset, signature and content, create tag and write to disk
 218         ##
 219         my ($self, $offset, $sig, $content) = @_;
 220         my $size = length($content);
 221
 222     my $fh = $self->_fh;
 223
 224         seek($fh, $offset + $self->_root->{file_offset}, SEEK_SET);
 225         print( $fh $sig . pack($DATA_LENGTH_PACK, $size) . $content );
 226
 227         if ($offset == $self->_root->{end}) {
 228                 $self->_root->{end} += SIG_SIZE + $DATA_LENGTH_SIZE + $size;
 229         }
 230
 231         return {
 232                 signature => $sig,
 233                 size => $size,
 234                 offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE,
 235                 content => $content
 236         };
 237 }
 238
 239 sub _load_tag {
 240         ##
 241         # Given offset, load single tag and return signature, size and data
 242         ##
 243         my $self = shift;
 244         my $offset = shift;
 245
 246     my $fh = $self->_fh;
 247
 248         seek($fh, $offset + $self->_root->{file_offset}, SEEK_SET);
 249         if (eof $fh) { return undef; }
 250
 251     my $b;
 252     read( $fh, $b, SIG_SIZE + $DATA_LENGTH_SIZE );
 253     my ($sig, $size) = unpack( "A $DATA_LENGTH_PACK", $b );
 254
 255         my $buffer;
 256         read( $fh, $buffer, $size);
 257
 258         return {
 259                 signature => $sig,
 260                 size => $size,
 261                 offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE,
 262                 content => $buffer
 263         };
 264 }
 265
 266 sub _index_lookup {
 267         ##
 268         # Given index tag, lookup single entry in index and return .
 269         ##
 270         my $self = shift;
 271         my ($tag, $index) = @_;
 272
 273         my $location = unpack($LONG_PACK, substr($tag->{content}, $index * $LONG_SIZE, $LONG_SIZE) );
 274         if (!$location) { return; }
 275
 276         return $self->_load_tag( $location );
 277 }
 278
 279 sub _add_bucket {
 280         ##
 281         # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
 282         # plain (undigested) key and value.
 283         ##
 284         my $self = shift;
 285         my ($tag, $md5, $plain_key, $value) = @_;
 286         my $keys = $tag->{content};
 287         my $location = 0;
 288         my $result = 2;
 289
 290     my $root = $self->_root;
 291
 292     my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) };
 293         my $internal_ref = $is_dbm_deep && ($value->_root eq $root);
 294
 295     my $fh = $self->_fh;
 296
 297         ##
 298         # Iterate through buckets, seeing if this is a new entry or a replace.
 299         ##
 300         for (my $i=0; $i<$MAX_BUCKETS; $i++) {
 301                 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
 302                 if (!$subloc) {
 303                         ##
 304                         # Found empty bucket (end of list).  Populate and exit loop.
 305                         ##
 306                         $result = 2;
 307
 308             $location = $internal_ref
 309                 ? $value->_base_offset
 310                 : $root->{end};
 311
 312                         seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
 313                         print( $fh $md5 . pack($LONG_PACK, $location) );
 314                         last;
 315                 }
 316
 317                 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
 318                 if ($md5 eq $key) {
 319                         ##
 320                         # Found existing bucket with same key.  Replace with new value.
 321                         ##
 322                         $result = 1;
 323
 324                         if ($internal_ref) {
 325                                 $location = $value->_base_offset;
 326                                 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
 327                                 print( $fh $md5 . pack($LONG_PACK, $location) );
 328                 return $result;
 329                         }
 330
 331             seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET);
 332             my $size;
 333             read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
 334
 335             ##
 336             # If value is a hash, array, or raw value with equal or less size, we can
 337             # reuse the same content area of the database.  Otherwise, we have to create
 338             # a new content area at the EOF.
 339             ##
 340             my $actual_length;
 341             my $r = Scalar::Util::reftype( $value ) || '';
 342             if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
 343                 $actual_length = $INDEX_SIZE;
 344
 345                 # if autobless is enabled, must also take into consideration
 346                 # the class name, as it is stored along with key/value.
 347                 if ( $root->{autobless} ) {
 348                     my $value_class = Scalar::Util::blessed($value);
 349                     if ( defined $value_class && !$value->isa('DBM::Deep') ) {
 350                         $actual_length += length($value_class);
 351                     }
 352                 }
 353             }
 354             else { $actual_length = length($value); }
 355
 356             if ($actual_length <= $size) {
 357                 $location = $subloc;
 358             }
 359             else {
 360                 $location = $root->{end};
 361                 seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $HASH_SIZE + $root->{file_offset}, SEEK_SET);
 362                 print( $fh pack($LONG_PACK, $location) );
 363             }
 364
 365                         last;
 366                 }
 367         }
 368
 369         ##
 370         # If this is an internal reference, return now.
 371         # No need to write value or plain key
 372         ##
 373         if ($internal_ref) {
 374         return $result;
 375     }
 376
 377         ##
 378         # If bucket didn't fit into list, split into a new index level
 379         ##
 380         if (!$location) {
 381                 seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET);
 382                 print( $fh pack($LONG_PACK, $root->{end}) );
 383
 384                 my $index_tag = $self->_create_tag($root->{end}, SIG_INDEX, chr(0) x $INDEX_SIZE);
 385                 my @offsets = ();
 386
 387                 $keys .= $md5 . pack($LONG_PACK, 0);
 388
 389                 for (my $i=0; $i<=$MAX_BUCKETS; $i++) {
 390                         my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
 391                         if ($key) {
 392                                 my $old_subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
 393                                 my $num = ord(substr($key, $tag->{ch} + 1, 1));
 394
 395                                 if ($offsets[$num]) {
 396                                         my $offset = $offsets[$num] + SIG_SIZE + $DATA_LENGTH_SIZE;
 397                                         seek($fh, $offset + $root->{file_offset}, SEEK_SET);
 398                                         my $subkeys;
 399                                         read( $fh, $subkeys, $BUCKET_LIST_SIZE);
 400
 401                                         for (my $k=0; $k<$MAX_BUCKETS; $k++) {
 402                                                 my $subloc = unpack($LONG_PACK, substr($subkeys, ($k * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
 403                                                 if (!$subloc) {
 404                                                         seek($fh, $offset + ($k * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET);
 405                                                         print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) );
 406                                                         last;
 407                                                 }
 408                                         } # k loop
 409                                 }
 410                                 else {
 411                                         $offsets[$num] = $root->{end};
 412                                         seek($fh, $index_tag->{offset} + ($num * $LONG_SIZE) + $root->{file_offset}, SEEK_SET);
 413                                         print( $fh pack($LONG_PACK, $root->{end}) );
 414
 415                                         my $blist_tag = $self->_create_tag($root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE);
 416
 417                                         seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET);
 418                                         print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) );
 419                                 }
 420                         } # key is real
 421                 } # i loop
 422
 423                 $location ||= $root->{end};
 424         } # re-index bucket list
 425
 426         ##
 427         # Seek to content area and store signature, value and plaintext key
 428         ##
 429         if ($location) {
 430                 my $content_length;
 431                 seek($fh, $location + $root->{file_offset}, SEEK_SET);
 432
 433                 ##
 434                 # Write signature based on content type, set content length and write actual value.
 435                 ##
 436         my $r = Scalar::Util::reftype($value) || '';
 437                 if ($r eq 'HASH') {
 438                         print( $fh TYPE_HASH );
 439                         print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE );
 440                         $content_length = $INDEX_SIZE;
 441                 }
 442                 elsif ($r eq 'ARRAY') {
 443                         print( $fh TYPE_ARRAY );
 444                         print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE );
 445                         $content_length = $INDEX_SIZE;
 446                 }
 447                 elsif (!defined($value)) {
 448                         print( $fh SIG_NULL );
 449                         print( $fh pack($DATA_LENGTH_PACK, 0) );
 450                         $content_length = 0;
 451                 }
 452                 else {
 453                         print( $fh SIG_DATA );
 454                         print( $fh pack($DATA_LENGTH_PACK, length($value)) . $value );
 455                         $content_length = length($value);
 456                 }
 457
 458                 ##
 459                 # Plain key is stored AFTER value, as keys are typically fetched less often.
 460                 ##
 461                 print( $fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key );
 462
 463                 ##
 464                 # If value is blessed, preserve class name
 465                 ##
 466                 if ( $root->{autobless} ) {
 467             my $value_class = Scalar::Util::blessed($value);
 468             if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
 469                 ##
 470                 # Blessed ref -- will restore later
 471                 ##
 472                 print( $fh chr(1) );
 473                 print( $fh pack($DATA_LENGTH_PACK, length($value_class)) . $value_class );
 474                 $content_length += 1;
 475                 $content_length += $DATA_LENGTH_SIZE + length($value_class);
 476             }
 477             else {
 478                 print( $fh chr(0) );
 479                 $content_length += 1;
 480             }
 481         }
 482
 483                 ##
 484                 # If this is a new content area, advance EOF counter
 485                 ##
 486                 if ($location == $root->{end}) {
 487                         $root->{end} += SIG_SIZE;
 488                         $root->{end} += $DATA_LENGTH_SIZE + $content_length;
 489                         $root->{end} += $DATA_LENGTH_SIZE + length($plain_key);
 490                 }
 491
 492                 ##
 493                 # If content is a hash or array, create new child DBM::Deep object and
 494                 # pass each key or element to it.
 495                 ##
 496                 if ($r eq 'HASH') {
 497                         my $branch = DBM::Deep->new(
 498                                 type => TYPE_HASH,
 499                                 base_offset => $location,
 500                                 root => $root,
 501                         );
 502                         foreach my $key (keys %{$value}) {
 503                 $branch->STORE( $key, $value->{$key} );
 504                         }
 505                 }
 506                 elsif ($r eq 'ARRAY') {
 507                         my $branch = DBM::Deep->new(
 508                                 type => TYPE_ARRAY,
 509                                 base_offset => $location,
 510                                 root => $root,
 511                         );
 512                         my $index = 0;
 513                         foreach my $element (@{$value}) {
 514                 $branch->STORE( $index, $element );
 515                                 $index++;
 516                         }
 517                 }
 518
 519                 return $result;
 520         }
 521
 522         return $self->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
 523 }
 524
 525 sub _get_bucket_value {
 526         ##
 527         # Fetch single value given tag and MD5 digested key.
 528         ##
 529         my $self = shift;
 530         my ($tag, $md5) = @_;
 531         my $keys = $tag->{content};
 532
 533     my $fh = $self->_fh;
 534
 535         ##
 536         # Iterate through buckets, looking for a key match
 537         ##
 538     BUCKET:
 539         for (my $i=0; $i<$MAX_BUCKETS; $i++) {
 540                 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
 541                 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
 542
 543                 if (!$subloc) {
 544                         ##
 545                         # Hit end of list, no match
 546                         ##
 547                         return;
 548                 }
 549
 550         if ( $md5 ne $key ) {
 551             next BUCKET;
 552         }
 553
 554         ##
 555         # Found match -- seek to offset and read signature
 556         ##
 557         my $signature;
 558         seek($fh, $subloc + $self->_root->{file_offset}, SEEK_SET);
 559         read( $fh, $signature, SIG_SIZE);
 560
 561         ##
 562         # If value is a hash or array, return new DBM::Deep object with correct offset
 563         ##
 564         if (($signature eq TYPE_HASH) || ($signature eq TYPE_ARRAY)) {
 565             my $obj = DBM::Deep->new(
 566                 type => $signature,
 567                 base_offset => $subloc,
 568                 root => $self->_root
 569             );
 570
 571             if ($self->_root->{autobless}) {
 572                 ##
 573                 # Skip over value and plain key to see if object needs
 574                 # to be re-blessed
 575                 ##
 576                 seek($fh, $DATA_LENGTH_SIZE + $INDEX_SIZE, SEEK_CUR);
 577
 578                 my $size;
 579                 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
 580                 if ($size) { seek($fh, $size, SEEK_CUR); }
 581
 582                 my $bless_bit;
 583                 read( $fh, $bless_bit, 1);
 584                 if (ord($bless_bit)) {
 585                     ##
 586                     # Yes, object needs to be re-blessed
 587                     ##
 588                     my $class_name;
 589                     read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
 590                     if ($size) { read( $fh, $class_name, $size); }
 591                     if ($class_name) { $obj = bless( $obj, $class_name ); }
 592                 }
 593             }
 594
 595             return $obj;
 596         }
 597
 598         ##
 599         # Otherwise return actual value
 600         ##
 601         elsif ($signature eq SIG_DATA) {
 602             my $size;
 603             my $value = '';
 604             read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
 605             if ($size) { read( $fh, $value, $size); }
 606             return $value;
 607         }
 608
 609         ##
 610         # Key exists, but content is null
 611         ##
 612         else { return; }
 613         } # i loop
 614
 615         return;
 616 }
 617
 618 sub _delete_bucket {
 619         ##
 620         # Delete single key/value pair given tag and MD5 digested key.
 621         ##
 622         my $self = shift;
 623         my ($tag, $md5) = @_;
 624         my $keys = $tag->{content};
 625
 626     my $fh = $self->_fh;
 627
 628         ##
 629         # Iterate through buckets, looking for a key match
 630         ##
 631     BUCKET:
 632         for (my $i=0; $i<$MAX_BUCKETS; $i++) {
 633                 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
 634                 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
 635
 636                 if (!$subloc) {
 637                         ##
 638                         # Hit end of list, no match
 639                         ##
 640                         return;
 641                 }
 642
 643         if ( $md5 ne $key ) {
 644             next BUCKET;
 645         }
 646
 647         ##
 648         # Matched key -- delete bucket and return
 649         ##
 650         seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $self->_root->{file_offset}, SEEK_SET);
 651         print( $fh substr($keys, ($i+1) * $BUCKET_SIZE ) );
 652         print( $fh chr(0) x $BUCKET_SIZE );
 653
 654         return 1;
 655         } # i loop
 656
 657         return;
 658 }
 659
 660 sub _bucket_exists {
 661         ##
 662         # Check existence of single key given tag and MD5 digested key.
 663         ##
 664         my $self = shift;
 665         my ($tag, $md5) = @_;
 666         my $keys = $tag->{content};
 667
 668         ##
 669         # Iterate through buckets, looking for a key match
 670         ##
 671     BUCKET:
 672         for (my $i=0; $i<$MAX_BUCKETS; $i++) {
 673                 my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
 674                 my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
 675
 676                 if (!$subloc) {
 677                         ##
 678                         # Hit end of list, no match
 679                         ##
 680                         return;
 681                 }
 682
 683         if ( $md5 ne $key ) {
 684             next BUCKET;
 685         }
 686
 687         ##
 688         # Matched key -- return true
 689         ##
 690         return 1;
 691         } # i loop
 692
 693         return;
 694 }
 695
 696 sub _find_bucket_list {
 697         ##
 698         # Locate offset for bucket list, given digested key
 699         ##
 700         my $self = shift;
 701         my $md5 = shift;
 702
 703         ##
 704         # Locate offset for bucket list using digest index system
 705         ##
 706         my $ch = 0;
 707         my $tag = $self->_load_tag($self->_base_offset);
 708         if (!$tag) { return; }
 709
 710         while ($tag->{signature} ne SIG_BLIST) {
 711                 $tag = $self->_index_lookup($tag, ord(substr($md5, $ch, 1)));
 712                 if (!$tag) { return; }
 713                 $ch++;
 714         }
 715
 716         return $tag;
 717 }
 718
 719 sub _traverse_index {
 720         ##
 721         # Scan index and recursively step into deeper levels, looking for next key.
 722         ##
 723     my ($self, $offset, $ch, $force_return_next) = @_;
 724     $force_return_next = undef unless $force_return_next;
 725
 726         my $tag = $self->_load_tag( $offset );
 727
 728     my $fh = $self->_fh;
 729
 730         if ($tag->{signature} ne SIG_BLIST) {
 731                 my $content = $tag->{content};
 732                 my $start;
 733                 if ($self->{return_next}) { $start = 0; }
 734                 else { $start = ord(substr($self->{prev_md5}, $ch, 1)); }
 735
 736                 for (my $index = $start; $index < 256; $index++) {
 737                         my $subloc = unpack($LONG_PACK, substr($content, $index * $LONG_SIZE, $LONG_SIZE) );
 738                         if ($subloc) {
 739                                 my $result = $self->_traverse_index( $subloc, $ch + 1, $force_return_next );
 740                                 if (defined($result)) { return $result; }
 741                         }
 742                 } # index loop
 743
 744                 $self->{return_next} = 1;
 745         } # tag is an index
 746
 747         elsif ($tag->{signature} eq SIG_BLIST) {
 748                 my $keys = $tag->{content};
 749                 if ($force_return_next) { $self->{return_next} = 1; }
 750
 751                 ##
 752                 # Iterate through buckets, looking for a key match
 753                 ##
 754                 for (my $i=0; $i<$MAX_BUCKETS; $i++) {
 755                         my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
 756                         my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
 757
 758                         if (!$subloc) {
 759                                 ##
 760                                 # End of bucket list -- return to outer loop
 761                                 ##
 762                                 $self->{return_next} = 1;
 763                                 last;
 764                         }
 765                         elsif ($key eq $self->{prev_md5}) {
 766                                 ##
 767                                 # Located previous key -- return next one found
 768                                 ##
 769                                 $self->{return_next} = 1;
 770                                 next;
 771                         }
 772                         elsif ($self->{return_next}) {
 773                                 ##
 774                                 # Seek to bucket location and skip over signature
 775                                 ##
 776                                 seek($fh, $subloc + SIG_SIZE + $self->_root->{file_offset}, SEEK_SET);
 777
 778                                 ##
 779                                 # Skip over value to get to plain key
 780                                 ##
 781                                 my $size;
 782                                 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
 783                                 if ($size) { seek($fh, $size, SEEK_CUR); }
 784
 785                                 ##
 786                                 # Read in plain key and return as scalar
 787                                 ##
 788                                 my $plain_key;
 789                                 read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
 790                                 if ($size) { read( $fh, $plain_key, $size); }
 791
 792                                 return $plain_key;
 793                         }
 794                 } # bucket loop
 795
 796                 $self->{return_next} = 1;
 797         } # tag is a bucket list
 798
 799         return;
 800 }
 801
 802 sub _get_next_key {
 803         ##
 804         # Locate next key, given digested previous one
 805         ##
 806     my $self = $_[0]->_get_self;
 807
 808         $self->{prev_md5} = $_[1] ? $_[1] : undef;
 809         $self->{return_next} = 0;
 810
 811         ##
 812         # If the previous key was not specifed, start at the top and
 813         # return the first one found.
 814         ##
 815         if (!$self->{prev_md5}) {
 816                 $self->{prev_md5} = chr(0) x $HASH_SIZE;
 817                 $self->{return_next} = 1;
 818         }
 819
 820         return $self->_traverse_index( $self->_base_offset, 0 );
 821 }
 822
 823 sub lock {
 824         ##
 825         # If db locking is set, flock() the db file.  If called multiple
 826         # times before unlock(), then the same number of unlocks() must
 827         # be called before the lock is released.
 828         ##
 829     my $self = $_[0]->_get_self;
 830         my $type = $_[1];
 831     $type = LOCK_EX unless defined $type;
 832
 833         if (!defined($self->_fh)) { return; }
 834
 835         if ($self->_root->{locking}) {
 836                 if (!$self->_root->{locked}) {
 837                         flock($self->_fh, $type);
 838
 839                         # refresh end counter in case file has changed size
 840                         my @stats = stat($self->_root->{file});
 841                         $self->_root->{end} = $stats[7];
 842
 843                         # double-check file inode, in case another process
 844                         # has optimize()d our file while we were waiting.
 845                         if ($stats[1] != $self->_root->{inode}) {
 846                                 $self->{engine}->open($self); # re-open
 847                                 flock($self->_fh, $type); # re-lock
 848                                 $self->_root->{end} = (stat($self->_fh))[7]; # re-end
 849                         }
 850                 }
 851                 $self->_root->{locked}++;
 852
 853         return 1;
 854         }
 855
 856     return;
 857 }
 858
 859 sub unlock {
 860         ##
 861         # If db locking is set, unlock the db file.  See note in lock()
 862         # regarding calling lock() multiple times.
 863         ##
 864     my $self = $_[0]->_get_self;
 865
 866         if (!defined($self->_fh)) { return; }
 867
 868         if ($self->_root->{locking} && $self->_root->{locked} > 0) {
 869                 $self->_root->{locked}--;
 870                 if (!$self->_root->{locked}) { flock($self->_fh, LOCK_UN); }
 871
 872         return 1;
 873         }
 874
 875     return;
 876 }
 877
 878 sub _copy_value {
 879     my $self = shift->_get_self;
 880     my ($spot, $value) = @_;
 881
 882     if ( !ref $value ) {
 883         ${$spot} = $value;
 884     }
 885     elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep' ) } ) {
 886         my $type = $value->_type;
 887         ${$spot} = $type eq TYPE_HASH ? {} : [];
 888         $value->_copy_node( ${$spot} );
 889     }
 890     else {
 891         my $r = Scalar::Util::reftype( $value );
 892         my $c = Scalar::Util::blessed( $value );
 893         if ( $r eq 'ARRAY' ) {
 894             ${$spot} = [ @{$value} ];
 895         }
 896         else {
 897             ${$spot} = { %{$value} };
 898         }
 899         ${$spot} = bless ${$spot}, $c
 900             if defined $c;
 901     }
 902
 903     return 1;
 904 }
 905
 906 sub _copy_node {
 907         ##
 908         # Copy single level of keys or elements to new DB handle.
 909         # Recurse for nested structures
 910         ##
 911     my $self = shift->_get_self;
 912         my ($db_temp) = @_;
 913
 914         if ($self->_type eq TYPE_HASH) {
 915                 my $key = $self->first_key();
 916                 while ($key) {
 917                         my $value = $self->get($key);
 918             $self->_copy_value( \$db_temp->{$key}, $value );
 919                         $key = $self->next_key($key);
 920                 }
 921         }
 922         else {
 923                 my $length = $self->length();
 924                 for (my $index = 0; $index < $length; $index++) {
 925                         my $value = $self->get($index);
 926             $self->_copy_value( \$db_temp->[$index], $value );
 927                 }
 928         }
 929
 930     return 1;
 931 }
 932
 933 sub export {
 934         ##
 935         # Recursively export into standard Perl hashes and arrays.
 936         ##
 937     my $self = $_[0]->_get_self;
 938
 939         my $temp;
 940         if ($self->_type eq TYPE_HASH) { $temp = {}; }
 941         elsif ($self->_type eq TYPE_ARRAY) { $temp = []; }
 942
 943         $self->lock();
 944         $self->_copy_node( $temp );
 945         $self->unlock();
 946
 947         return $temp;
 948 }
 949
 950 sub import {
 951         ##
 952         # Recursively import Perl hash/array structure
 953         ##
 954     #XXX This use of ref() seems to be ok
 955         if (!ref($_[0])) { return; } # Perl calls import() on use -- ignore
 956
 957     my $self = $_[0]->_get_self;
 958         my $struct = $_[1];
 959
 960     #XXX This use of ref() seems to be ok
 961         if (!ref($struct)) {
 962                 ##
 963                 # struct is not a reference, so just import based on our type
 964                 ##
 965                 shift @_;
 966
 967                 if ($self->_type eq TYPE_HASH) { $struct = {@_}; }
 968                 elsif ($self->_type eq TYPE_ARRAY) { $struct = [@_]; }
 969         }
 970
 971     my $r = Scalar::Util::reftype($struct) || '';
 972         if ($r eq "HASH" && $self->_type eq TYPE_HASH) {
 973                 foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); }
 974         }
 975         elsif ($r eq "ARRAY" && $self->_type eq TYPE_ARRAY) {
 976                 $self->push( @$struct );
 977         }
 978         else {
 979                 return $self->_throw_error("Cannot import: type mismatch");
 980         }
 981
 982         return 1;
 983 }
 984
 985 sub optimize {
 986         ##
 987         # Rebuild entire database into new file, then move
 988         # it back on top of original.
 989         ##
 990     my $self = $_[0]->_get_self;
 991
 992 #XXX Need to create a new test for this
 993 #       if ($self->_root->{links} > 1) {
 994 #               return $self->_throw_error("Cannot optimize: reference count is greater than 1");
 995 #       }
 996
 997         my $db_temp = DBM::Deep->new(
 998                 file => $self->_root->{file} . '.tmp',
 999                 type => $self->_type
1000         );
1001         if (!$db_temp) {
1002                 return $self->_throw_error("Cannot optimize: failed to open temp file: $!");
1003         }
1004
1005         $self->lock();
1006         $self->_copy_node( $db_temp );
1007         undef $db_temp;
1008
1009         ##
1010         # Attempt to copy user, group and permissions over to new file
1011         ##
1012         my @stats = stat($self->_fh);
1013         my $perms = $stats[2] & 07777;
1014         my $uid = $stats[4];
1015         my $gid = $stats[5];
1016         chown( $uid, $gid, $self->_root->{file} . '.tmp' );
1017         chmod( $perms, $self->_root->{file} . '.tmp' );
1018
1019     # q.v. perlport for more information on this variable
1020     if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) {
1021                 ##
1022                 # Potential race condition when optmizing on Win32 with locking.
1023                 # The Windows filesystem requires that the filehandle be closed
1024                 # before it is overwritten with rename().  This could be redone
1025                 # with a soft copy.
1026                 ##
1027                 $self->unlock();
1028                 $self->_close();
1029         }
1030
1031         if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) {
1032                 unlink $self->_root->{file} . '.tmp';
1033                 $self->unlock();
1034                 return $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!");
1035         }
1036
1037         $self->unlock();
1038         $self->_close();
1039         $self->{engine}->open($self);
1040
1041         return 1;
1042 }
1043
1044 sub clone {
1045         ##
1046         # Make copy of object and return
1047         ##
1048     my $self = $_[0]->_get_self;
1049
1050         return DBM::Deep->new(
1051                 type => $self->_type,
1052                 base_offset => $self->_base_offset,
1053                 root => $self->_root
1054         );
1055 }
1056
1057 {
1058     my %is_legal_filter = map {
1059         $_ => ~~1,
1060     } qw(
1061         store_key store_value
1062         fetch_key fetch_value
1063     );
1064
1065     sub set_filter {
1066         ##
1067         # Setup filter function for storing or fetching the key or value
1068         ##
1069         my $self = $_[0]->_get_self;
1070         my $type = lc $_[1];
1071         my $func = $_[2] ? $_[2] : undef;
1072
1073         if ( $is_legal_filter{$type} ) {
1074             $self->_root->{"filter_$type"} = $func;
1075             return 1;
1076         }
1077
1078         return;
1079     }
1080 }
1081
1082 ##
1083 # Accessor methods
1084 ##
1085
1086 sub _root {
1087         ##
1088         # Get access to the root structure
1089         ##
1090     my $self = $_[0]->_get_self;
1091         return $self->{root};
1092 }
1093
1094 sub _fh {
1095         ##
1096         # Get access to the raw fh
1097         ##
1098     #XXX It will be useful, though, when we split out HASH and ARRAY
1099     my $self = $_[0]->_get_self;
1100         return $self->_root->{fh};
1101 }
1102
1103 sub _type {
1104         ##
1105         # Get type of current node (TYPE_HASH or TYPE_ARRAY)
1106         ##
1107     my $self = $_[0]->_get_self;
1108         return $self->{type};
1109 }
1110
1111 sub _base_offset {
1112         ##
1113         # Get base_offset of current node (TYPE_HASH or TYPE_ARRAY)
1114         ##
1115     my $self = $_[0]->_get_self;
1116         return $self->{base_offset};
1117 }
1118
1119 ##
1120 # Utility methods
1121 ##
1122
1123 sub _throw_error {
1124     die "DBM::Deep: $_[1]\n";
1125 }
1126
1127 sub _precalc_sizes {
1128         ##
1129         # Precalculate index, bucket and bucket list sizes
1130         ##
1131
1132     #XXX I don't like this ...
1133     set_pack() unless defined $LONG_SIZE;
1134
1135         $INDEX_SIZE = 256 * $LONG_SIZE;
1136         $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE;
1137         $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE;
1138 }
1139
1140 sub set_pack {
1141         ##
1142         # Set pack/unpack modes (see file header for more)
1143         ##
1144     my ($long_s, $long_p, $data_s, $data_p) = @_;
1145
1146     $LONG_SIZE = $long_s ? $long_s : 4;
1147     $LONG_PACK = $long_p ? $long_p : 'N';
1148
1149     $DATA_LENGTH_SIZE = $data_s ? $data_s : 4;
1150     $DATA_LENGTH_PACK = $data_p ? $data_p : 'N';
1151
1152         _precalc_sizes();
1153 }
1154
1155 sub set_digest {
1156         ##
1157         # Set key digest function (default is MD5)
1158         ##
1159     my ($digest_func, $hash_size) = @_;
1160
1161     $DIGEST_FUNC = $digest_func ? $digest_func : \&Digest::MD5::md5;
1162     $HASH_SIZE = $hash_size ? $hash_size : 16;
1163
1164         _precalc_sizes();
1165 }
1166
1167 sub _is_writable {
1168     my $fh = shift;
1169     (O_WRONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0);
1170 }
1171
1172 #sub _is_readable {
1173 #    my $fh = shift;
1174 #    (O_RDONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0);
1175 #}
1176
1177 ##
1178 # tie() methods (hashes and arrays)
1179 ##
1180
1181 sub STORE {
1182         ##
1183         # Store single hash key/value or array element in database.
1184         ##
1185     my $self = $_[0]->_get_self;
1186         my $key = $_[1];
1187
1188     # User may be storing a hash, in which case we do not want it run
1189     # through the filtering system
1190         my $value = ($self->_root->{filter_store_value} && !ref($_[2]))
1191         ? $self->_root->{filter_store_value}->($_[2])
1192         : $_[2];
1193
1194         my $md5 = $DIGEST_FUNC->($key);
1195
1196     unless ( _is_writable( $self->_fh ) ) {
1197         $self->_throw_error( 'Cannot write to a readonly filehandle' );
1198     }
1199
1200         ##
1201         # Request exclusive lock for writing
1202         ##
1203         $self->lock( LOCK_EX );
1204
1205         my $fh = $self->_fh;
1206
1207         ##
1208         # Locate offset for bucket list using digest index system
1209         ##
1210         my $tag = $self->_load_tag($self->_base_offset);
1211         if (!$tag) {
1212                 $tag = $self->_create_tag($self->_base_offset, SIG_INDEX, chr(0) x $INDEX_SIZE);
1213         }
1214
1215         my $ch = 0;
1216         while ($tag->{signature} ne SIG_BLIST) {
1217                 my $num = ord(substr($md5, $ch, 1));
1218
1219         my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE);
1220                 my $new_tag = $self->_index_lookup($tag, $num);
1221
1222                 if (!$new_tag) {
1223                         seek($fh, $ref_loc + $self->_root->{file_offset}, SEEK_SET);
1224                         print( $fh pack($LONG_PACK, $self->_root->{end}) );
1225
1226                         $tag = $self->_create_tag($self->_root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE);
1227
1228                         $tag->{ref_loc} = $ref_loc;
1229                         $tag->{ch} = $ch;
1230
1231                         last;
1232                 }
1233                 else {
1234                         $tag = $new_tag;
1235
1236                         $tag->{ref_loc} = $ref_loc;
1237                         $tag->{ch} = $ch;
1238                 }
1239                 $ch++;
1240         }
1241
1242         ##
1243         # Add key/value to bucket list
1244         ##
1245         my $result = $self->_add_bucket( $tag, $md5, $key, $value );
1246
1247         $self->unlock();
1248
1249         return $result;
1250 }
1251
1252 sub FETCH {
1253         ##
1254         # Fetch single value or element given plain key or array index
1255         ##
1256     my $self = shift->_get_self;
1257     my $key = shift;
1258
1259         my $md5 = $DIGEST_FUNC->($key);
1260
1261         ##
1262         # Request shared lock for reading
1263         ##
1264         $self->lock( LOCK_SH );
1265
1266         my $tag = $self->_find_bucket_list( $md5 );
1267         if (!$tag) {
1268                 $self->unlock();
1269                 return;
1270         }
1271
1272         ##
1273         # Get value from bucket list
1274         ##
1275         my $result = $self->_get_bucket_value( $tag, $md5 );
1276
1277         $self->unlock();
1278
1279     #XXX What is ref() checking here?
1280     #YYY Filters only apply on scalar values, so the ref check is making
1281     #YYY sure the fetched bucket is a scalar, not a child hash or array.
1282         return ($result && !ref($result) && $self->_root->{filter_fetch_value})
1283         ? $self->_root->{filter_fetch_value}->($result)
1284         : $result;
1285 }
1286
1287 sub DELETE {
1288         ##
1289         # Delete single key/value pair or element given plain key or array index
1290         ##
1291     my $self = $_[0]->_get_self;
1292         my $key = $_[1];
1293
1294         my $md5 = $DIGEST_FUNC->($key);
1295
1296         ##
1297         # Request exclusive lock for writing
1298         ##
1299         $self->lock( LOCK_EX );
1300
1301         my $tag = $self->_find_bucket_list( $md5 );
1302         if (!$tag) {
1303                 $self->unlock();
1304                 return;
1305         }
1306
1307         ##
1308         # Delete bucket
1309         ##
1310     my $value = $self->_get_bucket_value( $tag, $md5 );
1311         if ($value && !ref($value) && $self->_root->{filter_fetch_value}) {
1312         $value = $self->_root->{filter_fetch_value}->($value);
1313     }
1314
1315         my $result = $self->_delete_bucket( $tag, $md5 );
1316
1317         ##
1318         # If this object is an array and the key deleted was on the end of the stack,
1319         # decrement the length variable.
1320         ##
1321
1322         $self->unlock();
1323
1324         return $value;
1325 }
1326
1327 sub EXISTS {
1328         ##
1329         # Check if a single key or element exists given plain key or array index
1330         ##
1331     my $self = $_[0]->_get_self;
1332         my $key = $_[1];
1333
1334         my $md5 = $DIGEST_FUNC->($key);
1335
1336         ##
1337         # Request shared lock for reading
1338         ##
1339         $self->lock( LOCK_SH );
1340
1341         my $tag = $self->_find_bucket_list( $md5 );
1342
1343         ##
1344         # For some reason, the built-in exists() function returns '' for false
1345         ##
1346         if (!$tag) {
1347                 $self->unlock();
1348                 return '';
1349         }
1350
1351         ##
1352         # Check if bucket exists and return 1 or ''
1353         ##
1354         my $result = $self->_bucket_exists( $tag, $md5 ) || '';
1355
1356         $self->unlock();
1357
1358         return $result;
1359 }
1360
1361 sub CLEAR {
1362         ##
1363         # Clear all keys from hash, or all elements from array.
1364         ##
1365     my $self = $_[0]->_get_self;
1366
1367         ##
1368         # Request exclusive lock for writing
1369         ##
1370         $self->lock( LOCK_EX );
1371
1372     my $fh = $self->_fh;
1373
1374         seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET);
1375         if (eof $fh) {
1376                 $self->unlock();
1377                 return;
1378         }
1379
1380         $self->_create_tag($self->_base_offset, $self->_type, chr(0) x $INDEX_SIZE);
1381
1382         $self->unlock();
1383
1384         return 1;
1385 }
1386
1387 ##
1388 # Public method aliases
1389 ##
1390 sub put { (shift)->STORE( @_ ) }
1391 sub store { (shift)->STORE( @_ ) }
1392 sub get { (shift)->FETCH( @_ ) }
1393 sub fetch { (shift)->FETCH( @_ ) }
1394 sub delete { (shift)->DELETE( @_ ) }
1395 sub exists { (shift)->EXISTS( @_ ) }
1396 sub clear { (shift)->CLEAR( @_ ) }
1397
1398 package DBM::Deep::_::Root;
1399
1400 sub new {
1401     my $class = shift;
1402     my ($args) = @_;
1403
1404     my $self = bless {
1405         file => undef,
1406         fh => undef,
1407         file_offset => 0,
1408         end => 0,
1409         autoflush => undef,
1410         locking => undef,
1411         debug => undef,
1412         filter_store_key => undef,
1413         filter_store_value => undef,
1414         filter_fetch_key => undef,
1415         filter_fetch_value => undef,
1416         autobless => undef,
1417         locked => 0,
1418         %$args,
1419     }, $class;
1420
1421     if ( $self->{fh} && !$self->{file_offset} ) {
1422         $self->{file_offset} = tell( $self->{fh} );
1423     }
1424
1425     return $self;
1426 }
1427
1428 sub DESTROY {
1429     my $self = shift;
1430     return unless $self;
1431
1432     close $self->{fh} if $self->{fh};
1433
1434     return;
1435 }
1436
1437 1;
1438
1439 __END__
1440
1441 =head1 NAME
1442
1443 DBM::Deep - A pure perl multi-level hash/array DBM
1444
1445 =head1 SYNOPSIS
1446
1447   use DBM::Deep;
1448   my $db = DBM::Deep->new( "foo.db" );
1449
1450   $db->{key} = 'value'; # tie() style
1451   print $db->{key};
1452
1453   $db->put('key' => 'value'); # OO style
1454   print $db->get('key');
1455
1456   # true multi-level support
1457   $db->{my_complex} = [
1458         'hello', { perl => 'rules' },
1459         42, 99,
1460   ];
1461
1462 =head1 DESCRIPTION
1463
1464 A unique flat-file database module, written in pure perl.  True
1465 multi-level hash/array support (unlike MLDBM, which is faked), hybrid
1466 OO / tie() interface, cross-platform FTPable files, and quite fast.  Can
1467 handle millions of keys and unlimited hash levels without significant
1468 slow-down.  Written from the ground-up in pure perl -- this is NOT a
1469 wrapper around a C-based DBM.  Out-of-the-box compatibility with Unix,
1470 Mac OS X and Windows.
1471
1472 =head1 INSTALLATION
1473
1474 Hopefully you are using Perl's excellent CPAN module, which will download
1475 and install the module for you.  If not, get the tarball, and run these
1476 commands:
1477
1478         tar zxf DBM-Deep-*
1479         cd DBM-Deep-*
1480         perl Makefile.PL
1481         make
1482         make test
1483         make install
1484
1485 =head1 SETUP
1486
1487 Construction can be done OO-style (which is the recommended way), or using
1488 Perl's tie() function.  Both are examined here.
1489
1490 =head2 OO CONSTRUCTION
1491
1492 The recommended way to construct a DBM::Deep object is to use the new()
1493 method, which gets you a blessed, tied hash or array reference.
1494
1495         my $db = DBM::Deep->new( "foo.db" );
1496
1497 This opens a new database handle, mapped to the file "foo.db".  If this
1498 file does not exist, it will automatically be created.  DB files are
1499 opened in "r+" (read/write) mode, and the type of object returned is a
1500 hash, unless otherwise specified (see L<OPTIONS> below).
1501
1502 You can pass a number of options to the constructor to specify things like
1503 locking, autoflush, etc.  This is done by passing an inline hash:
1504
1505         my $db = DBM::Deep->new(
1506                 file => "foo.db",
1507                 locking => 1,
1508                 autoflush => 1
1509         );
1510
1511 Notice that the filename is now specified I<inside> the hash with
1512 the "file" parameter, as opposed to being the sole argument to the
1513 constructor.  This is required if any options are specified.
1514 See L<OPTIONS> below for the complete list.
1515
1516
1517
1518 You can also start with an array instead of a hash.  For this, you must
1519 specify the C<type> parameter:
1520
1521         my $db = DBM::Deep->new(
1522                 file => "foo.db",
1523                 type => DBM::Deep->TYPE_ARRAY
1524         );
1525
1526 B<Note:> Specifing the C<type> parameter only takes effect when beginning
1527 a new DB file.  If you create a DBM::Deep object with an existing file, the
1528 C<type> will be loaded from the file header, and an error will be thrown if
1529 the wrong type is passed in.
1530
1531 =head2 TIE CONSTRUCTION
1532
1533 Alternately, you can create a DBM::Deep handle by using Perl's built-in
1534 tie() function.  The object returned from tie() can be used to call methods,
1535 such as lock() and unlock(), but cannot be used to assign to the DBM::Deep
1536 file (as expected with most tie'd objects).
1537
1538         my %hash;
1539         my $db = tie %hash, "DBM::Deep", "foo.db";
1540
1541         my @array;
1542         my $db = tie @array, "DBM::Deep", "bar.db";
1543
1544 As with the OO constructor, you can replace the DB filename parameter with
1545 a hash containing one or more options (see L<OPTIONS> just below for the
1546 complete list).
1547
1548         tie %hash, "DBM::Deep", {
1549                 file => "foo.db",
1550                 locking => 1,
1551                 autoflush => 1
1552         };
1553
1554 =head2 OPTIONS
1555
1556 There are a number of options that can be passed in when constructing your
1557 DBM::Deep objects.  These apply to both the OO- and tie- based approaches.
1558
1559 =over
1560
1561 =item * file
1562
1563 Filename of the DB file to link the handle to.  You can pass a full absolute
1564 filesystem path, partial path, or a plain filename if the file is in the
1565 current working directory.  This is a required parameter (though q.v. fh).
1566
1567 =item * fh
1568
1569 If you want, you can pass in the fh instead of the file. This is most useful for doing
1570 something like:
1571
1572   my $db = DBM::Deep->new( { fh => \*DATA } );
1573
1574 You are responsible for making sure that the fh has been opened appropriately for your
1575 needs. If you open it read-only and attempt to write, an exception will be thrown. If you
1576 open it write-only or append-only, an exception will be thrown immediately as DBM::Deep
1577 needs to read from the fh.
1578
1579 =item * file_offset
1580
1581 This is the offset within the file that the DBM::Deep db starts. Most of the time, you will
1582 not need to set this. However, it's there if you want it.
1583
1584 If you pass in fh and do not set this, it will be set appropriately.
1585
1586 =item * type
1587
1588 This parameter specifies what type of object to create, a hash or array.  Use
1589 one of these two constants: C<DBM::Deep-E<gt>TYPE_HASH> or C<DBM::Deep-E<gt>TYPE_ARRAY>.
1590 This only takes effect when beginning a new file.  This is an optional
1591 parameter, and defaults to C<DBM::Deep-E<gt>TYPE_HASH>.
1592
1593 =item * locking
1594
1595 Specifies whether locking is to be enabled.  DBM::Deep uses Perl's Fnctl flock()
1596 function to lock the database in exclusive mode for writes, and shared mode for
1597 reads.  Pass any true value to enable.  This affects the base DB handle I<and
1598 any child hashes or arrays> that use the same DB file.  This is an optional
1599 parameter, and defaults to 0 (disabled).  See L<LOCKING> below for more.
1600
1601 =item * autoflush
1602
1603 Specifies whether autoflush is to be enabled on the underlying filehandle.
1604 This obviously slows down write operations, but is required if you may have
1605 multiple processes accessing the same DB file (also consider enable I<locking>).
1606 Pass any true value to enable.  This is an optional parameter, and defaults to 0
1607 (disabled).
1608
1609 =item * autobless
1610
1611 If I<autobless> mode is enabled, DBM::Deep will preserve blessed hashes, and
1612 restore them when fetched.  This is an B<experimental> feature, and does have
1613 side-effects.  Basically, when hashes are re-blessed into their original
1614 classes, they are no longer blessed into the DBM::Deep class!  So you won't be
1615 able to call any DBM::Deep methods on them.  You have been warned.
1616 This is an optional parameter, and defaults to 0 (disabled).
1617
1618 =item * filter_*
1619
1620 See L<FILTERS> below.
1621
1622 =item * debug
1623
1624 Setting I<debug> mode will make all errors non-fatal, dump them out to
1625 STDERR, and continue on.  This is for debugging purposes only, and probably
1626 not what you want.  This is an optional parameter, and defaults to 0 (disabled).
1627
1628 B<NOTE>: This parameter is considered deprecated and should not be used anymore.
1629
1630 =back
1631
1632 =head1 TIE INTERFACE
1633
1634 With DBM::Deep you can access your databases using Perl's standard hash/array
1635 syntax.  Because all DBM::Deep objects are I<tied> to hashes or arrays, you can
1636 treat them as such.  DBM::Deep will intercept all reads/writes and direct them
1637 to the right place -- the DB file.  This has nothing to do with the
1638 L<TIE CONSTRUCTION> section above.  This simply tells you how to use DBM::Deep
1639 using regular hashes and arrays, rather than calling functions like C<get()>
1640 and C<put()> (although those work too).  It is entirely up to you how to want
1641 to access your databases.
1642
1643 =head2 HASHES
1644
1645 You can treat any DBM::Deep object like a normal Perl hash reference.  Add keys,
1646 or even nested hashes (or arrays) using standard Perl syntax:
1647
1648         my $db = DBM::Deep->new( "foo.db" );
1649
1650         $db->{mykey} = "myvalue";
1651         $db->{myhash} = {};
1652         $db->{myhash}->{subkey} = "subvalue";
1653
1654         print $db->{myhash}->{subkey} . "\n";
1655
1656 You can even step through hash keys using the normal Perl C<keys()> function:
1657
1658         foreach my $key (keys %$db) {
1659                 print "$key: " . $db->{$key} . "\n";
1660         }
1661
1662 Remember that Perl's C<keys()> function extracts I<every> key from the hash and
1663 pushes them onto an array, all before the loop even begins.  If you have an
1664 extra large hash, this may exhaust Perl's memory.  Instead, consider using
1665 Perl's C<each()> function, which pulls keys/values one at a time, using very
1666 little memory:
1667
1668         while (my ($key, $value) = each %$db) {
1669                 print "$key: $value\n";
1670         }
1671
1672 Please note that when using C<each()>, you should always pass a direct
1673 hash reference, not a lookup.  Meaning, you should B<never> do this:
1674
1675         # NEVER DO THIS
1676         while (my ($key, $value) = each %{$db->{foo}}) { # BAD
1677
1678 This causes an infinite loop, because for each iteration, Perl is calling
1679 FETCH() on the $db handle, resulting in a "new" hash for foo every time, so
1680 it effectively keeps returning the first key over and over again. Instead,
1681 assign a temporary variable to C<$db->{foo}>, then pass that to each().
1682
1683 =head2 ARRAYS
1684
1685 As with hashes, you can treat any DBM::Deep object like a normal Perl array
1686 reference.  This includes inserting, removing and manipulating elements,
1687 and the C<push()>, C<pop()>, C<shift()>, C<unshift()> and C<splice()> functions.
1688 The object must have first been created using type C<DBM::Deep-E<gt>TYPE_ARRAY>,
1689 or simply be a nested array reference inside a hash.  Example:
1690
1691         my $db = DBM::Deep->new(
1692                 file => "foo-array.db",
1693                 type => DBM::Deep->TYPE_ARRAY
1694         );
1695
1696         $db->[0] = "foo";
1697         push @$db, "bar", "baz";
1698         unshift @$db, "bah";
1699
1700         my $last_elem = pop @$db; # baz
1701         my $first_elem = shift @$db; # bah
1702         my $second_elem = $db->[1]; # bar
1703
1704         my $num_elements = scalar @$db;
1705
1706 =head1 OO INTERFACE
1707
1708 In addition to the I<tie()> interface, you can also use a standard OO interface
1709 to manipulate all aspects of DBM::Deep databases.  Each type of object (hash or
1710 array) has its own methods, but both types share the following common methods:
1711 C<put()>, C<get()>, C<exists()>, C<delete()> and C<clear()>.
1712
1713 =over
1714
1715 =item * new() / clone()
1716
1717 These are the constructor and copy-functions.
1718
1719 =item * put() / store()
1720
1721 Stores a new hash key/value pair, or sets an array element value.  Takes two
1722 arguments, the hash key or array index, and the new value.  The value can be
1723 a scalar, hash ref or array ref.  Returns true on success, false on failure.
1724
1725         $db->put("foo", "bar"); # for hashes
1726         $db->put(1, "bar"); # for arrays
1727
1728 =item * get() / fetch()
1729
1730 Fetches the value of a hash key or array element.  Takes one argument: the hash
1731 key or array index.  Returns a scalar, hash ref or array ref, depending on the
1732 data type stored.
1733
1734         my $value = $db->get("foo"); # for hashes
1735         my $value = $db->get(1); # for arrays
1736
1737 =item * exists()
1738
1739 Checks if a hash key or array index exists.  Takes one argument: the hash key
1740 or array index.  Returns true if it exists, false if not.
1741
1742         if ($db->exists("foo")) { print "yay!\n"; } # for hashes
1743         if ($db->exists(1)) { print "yay!\n"; } # for arrays
1744
1745 =item * delete()
1746
1747 Deletes one hash key/value pair or array element.  Takes one argument: the hash
1748 key or array index.  Returns true on success, false if not found.  For arrays,
1749 the remaining elements located after the deleted element are NOT moved over.
1750 The deleted element is essentially just undefined, which is exactly how Perl's
1751 internal arrays work.  Please note that the space occupied by the deleted
1752 key/value or element is B<not> reused again -- see L<UNUSED SPACE RECOVERY>
1753 below for details and workarounds.
1754
1755         $db->delete("foo"); # for hashes
1756         $db->delete(1); # for arrays
1757
1758 =item * clear()
1759
1760 Deletes B<all> hash keys or array elements.  Takes no arguments.  No return
1761 value.  Please note that the space occupied by the deleted keys/values or
1762 elements is B<not> reused again -- see L<UNUSED SPACE RECOVERY> below for
1763 details and workarounds.
1764
1765         $db->clear(); # hashes or arrays
1766
1767 =item * lock() / unlock()
1768
1769 q.v. Locking.
1770
1771 =item * optimize()
1772
1773 Recover lost disk space.
1774
1775 =item * import() / export()
1776
1777 Data going in and out.
1778
1779 =item * set_digest() / set_pack() / set_filter()
1780
1781 q.v. adjusting the interal parameters.
1782
1783 =back
1784
1785 =head2 HASHES
1786
1787 For hashes, DBM::Deep supports all the common methods described above, and the
1788 following additional methods: C<first_key()> and C<next_key()>.
1789
1790 =over
1791
1792 =item * first_key()
1793
1794 Returns the "first" key in the hash.  As with built-in Perl hashes, keys are
1795 fetched in an undefined order (which appears random).  Takes no arguments,
1796 returns the key as a scalar value.
1797
1798         my $key = $db->first_key();
1799
1800 =item * next_key()
1801
1802 Returns the "next" key in the hash, given the previous one as the sole argument.
1803 Returns undef if there are no more keys to be fetched.
1804
1805         $key = $db->next_key($key);
1806
1807 =back
1808
1809 Here are some examples of using hashes:
1810
1811         my $db = DBM::Deep->new( "foo.db" );
1812
1813         $db->put("foo", "bar");
1814         print "foo: " . $db->get("foo") . "\n";
1815
1816         $db->put("baz", {}); # new child hash ref
1817         $db->get("baz")->put("buz", "biz");
1818         print "buz: " . $db->get("baz")->get("buz") . "\n";
1819
1820         my $key = $db->first_key();
1821         while ($key) {
1822                 print "$key: " . $db->get($key) . "\n";
1823                 $key = $db->next_key($key);
1824         }
1825
1826         if ($db->exists("foo")) { $db->delete("foo"); }
1827
1828 =head2 ARRAYS
1829
1830 For arrays, DBM::Deep supports all the common methods described above, and the
1831 following additional methods: C<length()>, C<push()>, C<pop()>, C<shift()>,
1832 C<unshift()> and C<splice()>.
1833
1834 =over
1835
1836 =item * length()
1837
1838 Returns the number of elements in the array.  Takes no arguments.
1839
1840         my $len = $db->length();
1841
1842 =item * push()
1843
1844 Adds one or more elements onto the end of the array.  Accepts scalars, hash
1845 refs or array refs.  No return value.
1846
1847         $db->push("foo", "bar", {});
1848
1849 =item * pop()
1850
1851 Fetches the last element in the array, and deletes it.  Takes no arguments.
1852 Returns undef if array is empty.  Returns the element value.
1853
1854         my $elem = $db->pop();
1855
1856 =item * shift()
1857
1858 Fetches the first element in the array, deletes it, then shifts all the
1859 remaining elements over to take up the space.  Returns the element value.  This
1860 method is not recommended with large arrays -- see L<LARGE ARRAYS> below for
1861 details.
1862
1863         my $elem = $db->shift();
1864
1865 =item * unshift()
1866
1867 Inserts one or more elements onto the beginning of the array, shifting all
1868 existing elements over to make room.  Accepts scalars, hash refs or array refs.
1869 No return value.  This method is not recommended with large arrays -- see
1870 <LARGE ARRAYS> below for details.
1871
1872         $db->unshift("foo", "bar", {});
1873
1874 =item * splice()
1875
1876 Performs exactly like Perl's built-in function of the same name.  See L<perldoc
1877 -f splice> for usage -- it is too complicated to document here.  This method is
1878 not recommended with large arrays -- see L<LARGE ARRAYS> below for details.
1879
1880 =back
1881
1882 Here are some examples of using arrays:
1883
1884         my $db = DBM::Deep->new(
1885                 file => "foo.db",
1886                 type => DBM::Deep->TYPE_ARRAY
1887         );
1888
1889         $db->push("bar", "baz");
1890         $db->unshift("foo");
1891         $db->put(3, "buz");
1892
1893         my $len = $db->length();
1894         print "length: $len\n"; # 4
1895
1896         for (my $k=0; $k<$len; $k++) {
1897                 print "$k: " . $db->get($k) . "\n";
1898         }
1899
1900         $db->splice(1, 2, "biz", "baf");
1901
1902         while (my $elem = shift @$db) {
1903                 print "shifted: $elem\n";
1904         }
1905
1906 =head1 LOCKING
1907
1908 Enable automatic file locking by passing a true value to the C<locking>
1909 parameter when constructing your DBM::Deep object (see L<SETUP> above).
1910
1911         my $db = DBM::Deep->new(
1912                 file => "foo.db",
1913                 locking => 1
1914         );
1915
1916 This causes DBM::Deep to C<flock()> the underlying filehandle with exclusive
1917 mode for writes, and shared mode for reads.  This is required if you have
1918 multiple processes accessing the same database file, to avoid file corruption.
1919 Please note that C<flock()> does NOT work for files over NFS.  See L<DB OVER
1920 NFS> below for more.
1921
1922 =head2 EXPLICIT LOCKING
1923
1924 You can explicitly lock a database, so it remains locked for multiple
1925 transactions.  This is done by calling the C<lock()> method, and passing an
1926 optional lock mode argument (defaults to exclusive mode).  This is particularly
1927 useful for things like counters, where the current value needs to be fetched,
1928 then incremented, then stored again.
1929
1930         $db->lock();
1931         my $counter = $db->get("counter");
1932         $counter++;
1933         $db->put("counter", $counter);
1934         $db->unlock();
1935
1936         # or...
1937
1938         $db->lock();
1939         $db->{counter}++;
1940         $db->unlock();
1941
1942 You can pass C<lock()> an optional argument, which specifies which mode to use
1943 (exclusive or shared).  Use one of these two constants: C<DBM::Deep-E<gt>LOCK_EX>
1944 or C<DBM::Deep-E<gt>LOCK_SH>.  These are passed directly to C<flock()>, and are the
1945 same as the constants defined in Perl's C<Fcntl> module.
1946
1947         $db->lock( DBM::Deep->LOCK_SH );
1948         # something here
1949         $db->unlock();
1950
1951 =head1 IMPORTING/EXPORTING
1952
1953 You can import existing complex structures by calling the C<import()> method,
1954 and export an entire database into an in-memory structure using the C<export()>
1955 method.  Both are examined here.
1956
1957 =head2 IMPORTING
1958
1959 Say you have an existing hash with nested hashes/arrays inside it.  Instead of
1960 walking the structure and adding keys/elements to the database as you go,
1961 simply pass a reference to the C<import()> method.  This recursively adds
1962 everything to an existing DBM::Deep object for you.  Here is an example:
1963
1964         my $struct = {
1965                 key1 => "value1",
1966                 key2 => "value2",
1967                 array1 => [ "elem0", "elem1", "elem2" ],
1968                 hash1 => {
1969                         subkey1 => "subvalue1",
1970                         subkey2 => "subvalue2"
1971                 }
1972         };
1973
1974         my $db = DBM::Deep->new( "foo.db" );
1975         $db->import( $struct );
1976
1977         print $db->{key1} . "\n"; # prints "value1"
1978
1979 This recursively imports the entire C<$struct> object into C<$db>, including
1980 all nested hashes and arrays.  If the DBM::Deep object contains exsiting data,
1981 keys are merged with the existing ones, replacing if they already exist.
1982 The C<import()> method can be called on any database level (not just the base
1983 level), and works with both hash and array DB types.
1984
1985 B<Note:> Make sure your existing structure has no circular references in it.
1986 These will cause an infinite loop when importing.
1987
1988 =head2 EXPORTING
1989
1990 Calling the C<export()> method on an existing DBM::Deep object will return
1991 a reference to a new in-memory copy of the database.  The export is done
1992 recursively, so all nested hashes/arrays are all exported to standard Perl
1993 objects.  Here is an example:
1994
1995         my $db = DBM::Deep->new( "foo.db" );
1996
1997         $db->{key1} = "value1";
1998         $db->{key2} = "value2";
1999         $db->{hash1} = {};
2000         $db->{hash1}->{subkey1} = "subvalue1";
2001         $db->{hash1}->{subkey2} = "subvalue2";
2002
2003         my $struct = $db->export();
2004
2005         print $struct->{key1} . "\n"; # prints "value1"
2006
2007 This makes a complete copy of the database in memory, and returns a reference
2008 to it.  The C<export()> method can be called on any database level (not just
2009 the base level), and works with both hash and array DB types.  Be careful of
2010 large databases -- you can store a lot more data in a DBM::Deep object than an
2011 in-memory Perl structure.
2012
2013 B<Note:> Make sure your database has no circular references in it.
2014 These will cause an infinite loop when exporting.
2015
2016 =head1 FILTERS
2017
2018 DBM::Deep has a number of hooks where you can specify your own Perl function
2019 to perform filtering on incoming or outgoing data.  This is a perfect
2020 way to extend the engine, and implement things like real-time compression or
2021 encryption.  Filtering applies to the base DB level, and all child hashes /
2022 arrays.  Filter hooks can be specified when your DBM::Deep object is first
2023 constructed, or by calling the C<set_filter()> method at any time.  There are
2024 four available filter hooks, described below:
2025
2026 =over
2027
2028 =item * filter_store_key
2029
2030 This filter is called whenever a hash key is stored.  It
2031 is passed the incoming key, and expected to return a transformed key.
2032
2033 =item * filter_store_value
2034
2035 This filter is called whenever a hash key or array element is stored.  It
2036 is passed the incoming value, and expected to return a transformed value.
2037
2038 =item * filter_fetch_key
2039
2040 This filter is called whenever a hash key is fetched (i.e. via
2041 C<first_key()> or C<next_key()>).  It is passed the transformed key,
2042 and expected to return the plain key.
2043
2044 =item * filter_fetch_value
2045
2046 This filter is called whenever a hash key or array element is fetched.
2047 It is passed the transformed value, and expected to return the plain value.
2048
2049 =back
2050
2051 Here are the two ways to setup a filter hook:
2052
2053         my $db = DBM::Deep->new(
2054                 file => "foo.db",
2055                 filter_store_value => \&my_filter_store,
2056                 filter_fetch_value => \&my_filter_fetch
2057         );
2058
2059         # or...
2060
2061         $db->set_filter( "filter_store_value", \&my_filter_store );
2062         $db->set_filter( "filter_fetch_value", \&my_filter_fetch );
2063
2064 Your filter function will be called only when dealing with SCALAR keys or
2065 values.  When nested hashes and arrays are being stored/fetched, filtering
2066 is bypassed.  Filters are called as static functions, passed a single SCALAR
2067 argument, and expected to return a single SCALAR value.  If you want to
2068 remove a filter, set the function reference to C<undef>:
2069
2070         $db->set_filter( "filter_store_value", undef );
2071
2072 =head2 REAL-TIME ENCRYPTION EXAMPLE
2073
2074 Here is a working example that uses the I<Crypt::Blowfish> module to
2075 do real-time encryption / decryption of keys & values with DBM::Deep Filters.
2076 Please visit L<http://search.cpan.org/search?module=Crypt::Blowfish> for more
2077 on I<Crypt::Blowfish>.  You'll also need the I<Crypt::CBC> module.
2078
2079         use DBM::Deep;
2080         use Crypt::Blowfish;
2081         use Crypt::CBC;
2082
2083         my $cipher = Crypt::CBC->new({
2084                 'key'             => 'my secret key',
2085                 'cipher'          => 'Blowfish',
2086                 'iv'              => '$KJh#(}q',
2087                 'regenerate_key'  => 0,
2088                 'padding'         => 'space',
2089                 'prepend_iv'      => 0
2090         });
2091
2092         my $db = DBM::Deep->new(
2093                 file => "foo-encrypt.db",
2094                 filter_store_key => \&my_encrypt,
2095                 filter_store_value => \&my_encrypt,
2096                 filter_fetch_key => \&my_decrypt,
2097                 filter_fetch_value => \&my_decrypt,
2098         );
2099
2100         $db->{key1} = "value1";
2101         $db->{key2} = "value2";
2102         print "key1: " . $db->{key1} . "\n";
2103         print "key2: " . $db->{key2} . "\n";
2104
2105         undef $db;
2106         exit;
2107
2108         sub my_encrypt {
2109                 return $cipher->encrypt( $_[0] );
2110         }
2111         sub my_decrypt {
2112                 return $cipher->decrypt( $_[0] );
2113         }
2114
2115 =head2 REAL-TIME COMPRESSION EXAMPLE
2116
2117 Here is a working example that uses the I<Compress::Zlib> module to do real-time
2118 compression / decompression of keys & values with DBM::Deep Filters.
2119 Please visit L<http://search.cpan.org/search?module=Compress::Zlib> for
2120 more on I<Compress::Zlib>.
2121
2122         use DBM::Deep;
2123         use Compress::Zlib;
2124
2125         my $db = DBM::Deep->new(
2126                 file => "foo-compress.db",
2127                 filter_store_key => \&my_compress,
2128                 filter_store_value => \&my_compress,
2129                 filter_fetch_key => \&my_decompress,
2130                 filter_fetch_value => \&my_decompress,
2131         );
2132
2133         $db->{key1} = "value1";
2134         $db->{key2} = "value2";
2135         print "key1: " . $db->{key1} . "\n";
2136         print "key2: " . $db->{key2} . "\n";
2137
2138         undef $db;
2139         exit;
2140
2141         sub my_compress {
2142                 return Compress::Zlib::memGzip( $_[0] ) ;
2143         }
2144         sub my_decompress {
2145                 return Compress::Zlib::memGunzip( $_[0] ) ;
2146         }
2147
2148 B<Note:> Filtering of keys only applies to hashes.  Array "keys" are
2149 actually numerical index numbers, and are not filtered.
2150
2151 =head1 ERROR HANDLING
2152
2153 Most DBM::Deep methods return a true value for success, and call die() on
2154 failure.  You can wrap calls in an eval block to catch the die.
2155
2156         my $db = DBM::Deep->new( "foo.db" ); # create hash
2157         eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call
2158
2159     print $@;           # prints error message
2160
2161 =head1 LARGEFILE SUPPORT
2162
2163 If you have a 64-bit system, and your Perl is compiled with both LARGEFILE
2164 and 64-bit support, you I<may> be able to create databases larger than 2 GB.
2165 DBM::Deep by default uses 32-bit file offset tags, but these can be changed
2166 by calling the static C<set_pack()> method before you do anything else.
2167
2168         DBM::Deep::set_pack(8, 'Q');
2169
2170 This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words
2171 instead of 32-bit longs.  After setting these values your DB files have a
2172 theoretical maximum size of 16 XB (exabytes).
2173
2174 B<Note:> Changing these values will B<NOT> work for existing database files.
2175 Only change this for new files, and make sure it stays set consistently
2176 throughout the file's life.  If you do set these values, you can no longer
2177 access 32-bit DB files.  You can, however, call C<set_pack(4, 'N')> to change
2178 back to 32-bit mode.
2179
2180 B<Note:> I have not personally tested files > 2 GB -- all my systems have
2181 only a 32-bit Perl.  However, I have received user reports that this does
2182 indeed work!
2183
2184 =head1 LOW-LEVEL ACCESS
2185
2186 If you require low-level access to the underlying filehandle that DBM::Deep uses,
2187 you can call the C<_fh()> method, which returns the handle:
2188
2189         my $fh = $db->_fh();
2190
2191 This method can be called on the root level of the datbase, or any child
2192 hashes or arrays.  All levels share a I<root> structure, which contains things
2193 like the filehandle, a reference counter, and all the options specified
2194 when you created the object.  You can get access to this root structure by
2195 calling the C<root()> method.
2196
2197         my $root = $db->_root();
2198
2199 This is useful for changing options after the object has already been created,
2200 such as enabling/disabling locking, or debug modes.  You can also
2201 store your own temporary user data in this structure (be wary of name
2202 collision), which is then accessible from any child hash or array.
2203
2204 =head1 CUSTOM DIGEST ALGORITHM
2205
2206 DBM::Deep by default uses the I<Message Digest 5> (MD5) algorithm for hashing
2207 keys.  However you can override this, and use another algorithm (such as SHA-256)
2208 or even write your own.  But please note that DBM::Deep currently expects zero
2209 collisions, so your algorithm has to be I<perfect>, so to speak.
2210 Collision detection may be introduced in a later version.
2211
2212
2213
2214 You can specify a custom digest algorithm by calling the static C<set_digest()>
2215 function, passing a reference to a subroutine, and the length of the algorithm's
2216 hashes (in bytes).  This is a global static function, which affects ALL DBM::Deep
2217 objects.  Here is a working example that uses a 256-bit hash from the
2218 I<Digest::SHA256> module.  Please see
2219 L<http://search.cpan.org/search?module=Digest::SHA256> for more.
2220
2221         use DBM::Deep;
2222         use Digest::SHA256;
2223
2224         my $context = Digest::SHA256::new(256);
2225
2226         DBM::Deep::set_digest( \&my_digest, 32 );
2227
2228         my $db = DBM::Deep->new( "foo-sha.db" );
2229
2230         $db->{key1} = "value1";
2231         $db->{key2} = "value2";
2232         print "key1: " . $db->{key1} . "\n";
2233         print "key2: " . $db->{key2} . "\n";
2234
2235         undef $db;
2236         exit;
2237
2238         sub my_digest {
2239                 return substr( $context->hash($_[0]), 0, 32 );
2240         }
2241
2242 B<Note:> Your returned digest strings must be B<EXACTLY> the number
2243 of bytes you specify in the C<set_digest()> function (in this case 32).
2244
2245 =head1 CIRCULAR REFERENCES
2246
2247 DBM::Deep has B<experimental> support for circular references.  Meaning you
2248 can have a nested hash key or array element that points to a parent object.
2249 This relationship is stored in the DB file, and is preserved between sessions.
2250 Here is an example:
2251
2252         my $db = DBM::Deep->new( "foo.db" );
2253
2254         $db->{foo} = "bar";
2255         $db->{circle} = $db; # ref to self
2256
2257         print $db->{foo} . "\n"; # prints "foo"
2258         print $db->{circle}->{foo} . "\n"; # prints "foo" again
2259
2260 One catch is, passing the object to a function that recursively walks the
2261 object tree (such as I<Data::Dumper> or even the built-in C<optimize()> or
2262 C<export()> methods) will result in an infinite loop.  The other catch is,
2263 if you fetch the I<key> of a circular reference (i.e. using the C<first_key()>
2264 or C<next_key()> methods), you will get the I<target object's key>, not the
2265 ref's key.  This gets even more interesting with the above example, where
2266 the I<circle> key points to the base DB object, which technically doesn't
2267 have a key.  So I made DBM::Deep return "[base]" as the key name in that
2268 special case.
2269
2270 =head1 CAVEATS / ISSUES / BUGS
2271
2272 This section describes all the known issues with DBM::Deep.  It you have found
2273 something that is not listed here, please send e-mail to L<jhuckaby@cpan.org>.
2274
2275 =head2 UNUSED SPACE RECOVERY
2276
2277 One major caveat with DBM::Deep is that space occupied by existing keys and
2278 values is not recovered when they are deleted.  Meaning if you keep deleting
2279 and adding new keys, your file will continuously grow.  I am working on this,
2280 but in the meantime you can call the built-in C<optimize()> method from time to
2281 time (perhaps in a crontab or something) to recover all your unused space.
2282
2283         $db->optimize(); # returns true on success
2284
2285 This rebuilds the ENTIRE database into a new file, then moves it on top of
2286 the original.  The new file will have no unused space, thus it will take up as
2287 little disk space as possible.  Please note that this operation can take
2288 a long time for large files, and you need enough disk space to temporarily hold
2289 2 copies of your DB file.  The temporary file is created in the same directory
2290 as the original, named with a ".tmp" extension, and is deleted when the
2291 operation completes.  Oh, and if locking is enabled, the DB is automatically
2292 locked for the entire duration of the copy.
2293
2294 B<WARNING:> Only call optimize() on the top-level node of the database, and
2295 make sure there are no child references lying around.  DBM::Deep keeps a reference
2296 counter, and if it is greater than 1, optimize() will abort and return undef.
2297
2298 =head2 AUTOVIVIFICATION
2299
2300 Unfortunately, autovivification doesn't work with tied hashes.  This appears to
2301 be a bug in Perl's tie() system, as I<Jakob Schmidt> encountered the very same
2302 issue with his I<DWH_FIle> module (see L<http://search.cpan.org/search?module=DWH_File>),
2303 and it is also mentioned in the BUGS section for the I<MLDBM> module <see
2304 L<http://search.cpan.org/search?module=MLDBM>).  Basically, on a new db file,
2305 this does not work:
2306
2307         $db->{foo}->{bar} = "hello";
2308
2309 Since "foo" doesn't exist, you cannot add "bar" to it.  You end up with "foo"
2310 being an empty hash.  Try this instead, which works fine:
2311
2312         $db->{foo} = { bar => "hello" };
2313
2314 As of Perl 5.8.7, this bug still exists.  I have walked very carefully through
2315 the execution path, and Perl indeed passes an empty hash to the STORE() method.
2316 Probably a bug in Perl.
2317
2318 =head2 FILE CORRUPTION
2319
2320 The current level of error handling in DBM::Deep is minimal.  Files I<are> checked
2321 for a 32-bit signature when opened, but other corruption in files can cause
2322 segmentation faults.  DBM::Deep may try to seek() past the end of a file, or get
2323 stuck in an infinite loop depending on the level of corruption.  File write
2324 operations are not checked for failure (for speed), so if you happen to run
2325 out of disk space, DBM::Deep will probably fail in a bad way.  These things will
2326 be addressed in a later version of DBM::Deep.
2327
2328 =head2 DB OVER NFS
2329
2330 Beware of using DB files over NFS.  DBM::Deep uses flock(), which works well on local
2331 filesystems, but will NOT protect you from file corruption over NFS.  I've heard
2332 about setting up your NFS server with a locking daemon, then using lockf() to
2333 lock your files, but your mileage may vary there as well.  From what I
2334 understand, there is no real way to do it.  However, if you need access to the
2335 underlying filehandle in DBM::Deep for using some other kind of locking scheme like
2336 lockf(), see the L<LOW-LEVEL ACCESS> section above.
2337
2338 =head2 COPYING OBJECTS
2339
2340 Beware of copying tied objects in Perl.  Very strange things can happen.
2341 Instead, use DBM::Deep's C<clone()> method which safely copies the object and
2342 returns a new, blessed, tied hash or array to the same level in the DB.
2343
2344         my $copy = $db->clone();
2345
2346 B<Note>: Since clone() here is cloning the object, not the database location, any
2347 modifications to either $db or $copy will be visible in both.
2348
2349 =head2 LARGE ARRAYS
2350
2351 Beware of using C<shift()>, C<unshift()> or C<splice()> with large arrays.
2352 These functions cause every element in the array to move, which can be murder
2353 on DBM::Deep, as every element has to be fetched from disk, then stored again in
2354 a different location.  This will be addressed in the forthcoming version 1.00.
2355
2356 =head2 WRITEONLY FILES
2357
2358 If you pass in a filehandle to new(), you may have opened it in either a readonly or
2359 writeonly mode. STORE will verify that the filehandle is writable. However, there
2360 doesn't seem to be a good way to determine if a filehandle is readable. And, if the
2361 filehandle isn't readable, it's not clear what will happen. So, don't do that.
2362
2363 =head1 PERFORMANCE
2364
2365 This section discusses DBM::Deep's speed and memory usage.
2366
2367 =head2 SPEED
2368
2369 Obviously, DBM::Deep isn't going to be as fast as some C-based DBMs, such as
2370 the almighty I<BerkeleyDB>.  But it makes up for it in features like true
2371 multi-level hash/array support, and cross-platform FTPable files.  Even so,
2372 DBM::Deep is still pretty fast, and the speed stays fairly consistent, even
2373 with huge databases.  Here is some test data:
2374
2375         Adding 1,000,000 keys to new DB file...
2376
2377         At 100 keys, avg. speed is 2,703 keys/sec
2378         At 200 keys, avg. speed is 2,642 keys/sec
2379         At 300 keys, avg. speed is 2,598 keys/sec
2380         At 400 keys, avg. speed is 2,578 keys/sec
2381         At 500 keys, avg. speed is 2,722 keys/sec
2382         At 600 keys, avg. speed is 2,628 keys/sec
2383         At 700 keys, avg. speed is 2,700 keys/sec
2384         At 800 keys, avg. speed is 2,607 keys/sec
2385         At 900 keys, avg. speed is 2,190 keys/sec
2386         At 1,000 keys, avg. speed is 2,570 keys/sec
2387         At 2,000 keys, avg. speed is 2,417 keys/sec
2388         At 3,000 keys, avg. speed is 1,982 keys/sec
2389         At 4,000 keys, avg. speed is 1,568 keys/sec
2390         At 5,000 keys, avg. speed is 1,533 keys/sec
2391         At 6,000 keys, avg. speed is 1,787 keys/sec
2392         At 7,000 keys, avg. speed is 1,977 keys/sec
2393         At 8,000 keys, avg. speed is 2,028 keys/sec
2394         At 9,000 keys, avg. speed is 2,077 keys/sec
2395         At 10,000 keys, avg. speed is 2,031 keys/sec
2396         At 20,000 keys, avg. speed is 1,970 keys/sec
2397         At 30,000 keys, avg. speed is 2,050 keys/sec
2398         At 40,000 keys, avg. speed is 2,073 keys/sec
2399         At 50,000 keys, avg. speed is 1,973 keys/sec
2400         At 60,000 keys, avg. speed is 1,914 keys/sec
2401         At 70,000 keys, avg. speed is 2,091 keys/sec
2402         At 80,000 keys, avg. speed is 2,103 keys/sec
2403         At 90,000 keys, avg. speed is 1,886 keys/sec
2404         At 100,000 keys, avg. speed is 1,970 keys/sec
2405         At 200,000 keys, avg. speed is 2,053 keys/sec
2406         At 300,000 keys, avg. speed is 1,697 keys/sec
2407         At 400,000 keys, avg. speed is 1,838 keys/sec
2408         At 500,000 keys, avg. speed is 1,941 keys/sec
2409         At 600,000 keys, avg. speed is 1,930 keys/sec
2410         At 700,000 keys, avg. speed is 1,735 keys/sec
2411         At 800,000 keys, avg. speed is 1,795 keys/sec
2412         At 900,000 keys, avg. speed is 1,221 keys/sec
2413         At 1,000,000 keys, avg. speed is 1,077 keys/sec
2414
2415 This test was performed on a PowerMac G4 1gHz running Mac OS X 10.3.2 & Perl
2416 5.8.1, with an 80GB Ultra ATA/100 HD spinning at 7200RPM.  The hash keys and
2417 values were between 6 - 12 chars in length.  The DB file ended up at 210MB.
2418 Run time was 12 min 3 sec.
2419
2420 =head2 MEMORY USAGE
2421
2422 One of the great things about DBM::Deep is that it uses very little memory.
2423 Even with huge databases (1,000,000+ keys) you will not see much increased
2424 memory on your process.  DBM::Deep relies solely on the filesystem for storing
2425 and fetching data.  Here is output from I</usr/bin/top> before even opening a
2426 database handle:
2427
2428           PID USER     PRI  NI  SIZE  RSS SHARE STAT %CPU %MEM   TIME COMMAND
2429         22831 root      11   0  2716 2716  1296 R     0.0  0.2   0:07 perl
2430
2431 Basically the process is taking 2,716K of memory.  And here is the same
2432 process after storing and fetching 1,000,000 keys:
2433
2434           PID USER     PRI  NI  SIZE  RSS SHARE STAT %CPU %MEM   TIME COMMAND
2435         22831 root      14   0  2772 2772  1328 R     0.0  0.2  13:32 perl
2436
2437 Notice the memory usage increased by only 56K.  Test was performed on a 700mHz
2438 x86 box running Linux RedHat 7.2 & Perl 5.6.1.
2439
2440 =head1 DB FILE FORMAT
2441
2442 In case you were interested in the underlying DB file format, it is documented
2443 here in this section.  You don't need to know this to use the module, it's just
2444 included for reference.
2445
2446 =head2 SIGNATURE
2447
2448 DBM::Deep files always start with a 32-bit signature to identify the file type.
2449 This is at offset 0.  The signature is "DPDB" in network byte order.  This is
2450 checked for when the file is opened and an error will be thrown if it's not found.
2451
2452 =head2 TAG
2453
2454 The DBM::Deep file is in a I<tagged format>, meaning each section of the file
2455 has a standard header containing the type of data, the length of data, and then
2456 the data itself.  The type is a single character (1 byte), the length is a
2457 32-bit unsigned long in network byte order, and the data is, well, the data.
2458 Here is how it unfolds:
2459
2460 =head2 MASTER INDEX
2461
2462 Immediately after the 32-bit file signature is the I<Master Index> record.
2463 This is a standard tag header followed by 1024 bytes (in 32-bit mode) or 2048
2464 bytes (in 64-bit mode) of data.  The type is I<H> for hash or I<A> for array,
2465 depending on how the DBM::Deep object was constructed.
2466
2467 The index works by looking at a I<MD5 Hash> of the hash key (or array index
2468 number).  The first 8-bit char of the MD5 signature is the offset into the
2469 index, multipled by 4 in 32-bit mode, or 8 in 64-bit mode.  The value of the
2470 index element is a file offset of the next tag for the key/element in question,
2471 which is usually a I<Bucket List> tag (see below).
2472
2473 The next tag I<could> be another index, depending on how many keys/elements
2474 exist.  See L<RE-INDEXING> below for details.
2475
2476 =head2 BUCKET LIST
2477
2478 A I<Bucket List> is a collection of 16 MD5 hashes for keys/elements, plus
2479 file offsets to where the actual data is stored.  It starts with a standard
2480 tag header, with type I<B>, and a data size of 320 bytes in 32-bit mode, or
2481 384 bytes in 64-bit mode.  Each MD5 hash is stored in full (16 bytes), plus
2482 the 32-bit or 64-bit file offset for the I<Bucket> containing the actual data.
2483 When the list fills up, a I<Re-Index> operation is performed (See
2484 L<RE-INDEXING> below).
2485
2486 =head2 BUCKET
2487
2488 A I<Bucket> is a tag containing a key/value pair (in hash mode), or a
2489 index/value pair (in array mode).  It starts with a standard tag header with
2490 type I<D> for scalar data (string, binary, etc.), or it could be a nested
2491 hash (type I<H>) or array (type I<A>).  The value comes just after the tag
2492 header.  The size reported in the tag header is only for the value, but then,
2493 just after the value is another size (32-bit unsigned long) and then the plain
2494 key itself.  Since the value is likely to be fetched more often than the plain
2495 key, I figured it would be I<slightly> faster to store the value first.
2496
2497 If the type is I<H> (hash) or I<A> (array), the value is another I<Master Index>
2498 record for the nested structure, where the process begins all over again.
2499
2500 =head2 RE-INDEXING
2501
2502 After a I<Bucket List> grows to 16 records, its allocated space in the file is
2503 exhausted.  Then, when another key/element comes in, the list is converted to a
2504 new index record.  However, this index will look at the next char in the MD5
2505 hash, and arrange new Bucket List pointers accordingly.  This process is called
2506 I<Re-Indexing>.  Basically, a new index tag is created at the file EOF, and all
2507 17 (16 + new one) keys/elements are removed from the old Bucket List and
2508 inserted into the new index.  Several new Bucket Lists are created in the
2509 process, as a new MD5 char from the key is being examined (it is unlikely that
2510 the keys will all share the same next char of their MD5s).
2511
2512 Because of the way the I<MD5> algorithm works, it is impossible to tell exactly
2513 when the Bucket Lists will turn into indexes, but the first round tends to
2514 happen right around 4,000 keys.  You will see a I<slight> decrease in
2515 performance here, but it picks back up pretty quick (see L<SPEED> above).  Then
2516 it takes B<a lot> more keys to exhaust the next level of Bucket Lists.  It's
2517 right around 900,000 keys.  This process can continue nearly indefinitely --
2518 right up until the point the I<MD5> signatures start colliding with each other,
2519 and this is B<EXTREMELY> rare -- like winning the lottery 5 times in a row AND
2520 getting struck by lightning while you are walking to cash in your tickets.
2521 Theoretically, since I<MD5> hashes are 128-bit values, you I<could> have up to
2522 340,282,366,921,000,000,000,000,000,000,000,000,000 keys/elements (I believe
2523 this is 340 unodecillion, but don't quote me).
2524
2525 =head2 STORING
2526
2527 When a new key/element is stored, the key (or index number) is first run through
2528 I<Digest::MD5> to get a 128-bit signature (example, in hex:
2529 b05783b0773d894396d475ced9d2f4f6).  Then, the I<Master Index> record is checked
2530 for the first char of the signature (in this case I<b0>).  If it does not exist,
2531 a new I<Bucket List> is created for our key (and the next 15 future keys that
2532 happen to also have I<b> as their first MD5 char).  The entire MD5 is written
2533 to the I<Bucket List> along with the offset of the new I<Bucket> record (EOF at
2534 this point, unless we are replacing an existing I<Bucket>), where the actual
2535 data will be stored.
2536
2537 =head2 FETCHING
2538
2539 Fetching an existing key/element involves getting a I<Digest::MD5> of the key
2540 (or index number), then walking along the indexes.  If there are enough
2541 keys/elements in this DB level, there might be nested indexes, each linked to
2542 a particular char of the MD5.  Finally, a I<Bucket List> is pointed to, which
2543 contains up to 16 full MD5 hashes.  Each is checked for equality to the key in
2544 question.  If we found a match, the I<Bucket> tag is loaded, where the value and
2545 plain key are stored.
2546
2547 Fetching the plain key occurs when calling the I<first_key()> and I<next_key()>
2548 methods.  In this process the indexes are walked systematically, and each key
2549 fetched in increasing MD5 order (which is why it appears random).   Once the
2550 I<Bucket> is found, the value is skipped and the plain key returned instead.
2551 B<Note:> Do not count on keys being fetched as if the MD5 hashes were
2552 alphabetically sorted.  This only happens on an index-level -- as soon as the
2553 I<Bucket Lists> are hit, the keys will come out in the order they went in --
2554 so it's pretty much undefined how the keys will come out -- just like Perl's
2555 built-in hashes.
2556
2557 =head1 CODE COVERAGE
2558
2559 We use B<Devel::Cover> to test the code coverage of our tests, below is the
2560 B<Devel::Cover> report on this module's test suite.
2561
2562   ---------------------------- ------ ------ ------ ------ ------ ------ ------
2563   File                           stmt   bran   cond    sub    pod   time  total
2564   ---------------------------- ------ ------ ------ ------ ------ ------ ------
2565   blib/lib/DBM/Deep.pm           95.2   83.8   70.0   98.2  100.0   58.0   91.0
2566   blib/lib/DBM/Deep/Array.pm    100.0   91.1  100.0  100.0    n/a   26.7   98.0
2567   blib/lib/DBM/Deep/Hash.pm      95.3   80.0  100.0  100.0    n/a   15.3   92.4
2568   Total                          96.2   84.8   74.4   98.8  100.0  100.0   92.4
2569   ---------------------------- ------ ------ ------ ------ ------ ------ ------
2570
2571 =head1 MORE INFORMATION
2572
2573 Check out the DBM::Deep Google Group at L<http://groups.google.com/group/DBM-Deep>
2574 or send email to L<DBM-Deep@googlegroups.com>.
2575
2576 =head1 AUTHORS
2577
2578 Joseph Huckaby, L<jhuckaby@cpan.org>
2579
2580 Rob Kinyon, L<rkinyon@cpan.org>
2581
2582 Special thanks to Adam Sah and Rich Gaushell!  You know why :-)
2583
2584 =head1 SEE ALSO
2585
2586 perltie(1), Tie::Hash(3), Digest::MD5(3), Fcntl(3), flock(2), lockf(3), nfs(5),
2587 Digest::SHA256(3), Crypt::Blowfish(3), Compress::Zlib(3)
2588
2589 =head1 LICENSE
2590
2591 Copyright (c) 2002-2006 Joseph Huckaby.  All Rights Reserved.
2592 This is free software, you may use it and distribute it under the
2593 same terms as Perl itself.
2594
2595 =cut