lib/DBM/Deep/Cookbook.pod

   1 =head1 NAME
   2
   3 DBM::Deep::Cookbook
   4
   5 =head1 DESCRIPTION
   6
   7 This is the Cookbook for L<DBM::Deep>. It contains useful tips and tricks,
   8 plus some examples of how to do common tasks.
   9
  10 =head1 RECIPES
  11
  12 =head2 UTF8 data
  13
  14 When you're using UTF8 data, you may run into the "Wide character in print"
  15 warning. To fix that in 5.8+, do the following:
  16
  17   my $db = DBM::Deep->new( ... );
  18   binmode $db->_fh, ":utf8";
  19
  20 In 5.6, you will have to do the following:
  21
  22   my $db = DBM::Deep->new( ... );
  23   $db->set_filter( 'store_value' => sub { pack "U0C*", unpack "C*", $_[0] } );
  24   $db->set_filter( 'retrieve_value' => sub { pack "C*", unpack "U0C*", $_[0] } );
  25
  26 In a future version, you will be able to specify C<utf8 =E<gt> 1> and
  27 L<DBM::Deep> will do these things for you.
  28
  29 =head2 Real-time Encryption Example
  30
  31 B<NOTE>: This is just an example of how to write a filter. This most
  32 definitely should B<NOT> be taken as a proper way to write a filter that does
  33 encryption.
  34
  35 Here is a working example that uses the I<Crypt::Blowfish> module to
  36 do real-time encryption / decryption of keys & values with DBM::Deep Filters.
  37 Please visit L<http://search.cpan.org/search?module=Crypt::Blowfish> for more
  38 on I<Crypt::Blowfish>. You'll also need the I<Crypt::CBC> module.
  39
  40   use DBM::Deep;
  41   use Crypt::Blowfish;
  42   use Crypt::CBC;
  43
  44   my $cipher = Crypt::CBC->new({
  45       'key'             => 'my secret key',
  46       'cipher'          => 'Blowfish',
  47       'iv'              => '$KJh#(}q',
  48       'regenerate_key'  => 0,
  49       'padding'         => 'space',
  50       'prepend_iv'      => 0
  51   });
  52
  53   my $db = DBM::Deep->new(
  54       file => "foo-encrypt.db",
  55       filter_store_key => \&my_encrypt,
  56       filter_store_value => \&my_encrypt,
  57       filter_fetch_key => \&my_decrypt,
  58       filter_fetch_value => \&my_decrypt,
  59   );
  60
  61   $db->{key1} = "value1";
  62   $db->{key2} = "value2";
  63   print "key1: " . $db->{key1} . "\n";
  64   print "key2: " . $db->{key2} . "\n";
  65
  66   undef $db;
  67   exit;
  68
  69   sub my_encrypt {
  70       return $cipher->encrypt( $_[0] );
  71   }
  72   sub my_decrypt {
  73       return $cipher->decrypt( $_[0] );
  74   }
  75
  76 =head2 Real-time Compression Example
  77
  78 Here is a working example that uses the I<Compress::Zlib> module to do real-time
  79 compression / decompression of keys & values with DBM::Deep Filters.
  80 Please visit L<http://search.cpan.org/search?module=Compress::Zlib> for
  81 more on I<Compress::Zlib>.
  82
  83   use DBM::Deep;
  84   use Compress::Zlib;
  85
  86   my $db = DBM::Deep->new(
  87       file => "foo-compress.db",
  88       filter_store_key => \&my_compress,
  89       filter_store_value => \&my_compress,
  90       filter_fetch_key => \&my_decompress,
  91       filter_fetch_value => \&my_decompress,
  92   );
  93
  94   $db->{key1} = "value1";
  95   $db->{key2} = "value2";
  96   print "key1: " . $db->{key1} . "\n";
  97   print "key2: " . $db->{key2} . "\n";
  98
  99   undef $db;
 100   exit;
 101
 102   sub my_compress {
 103       return Compress::Zlib::memGzip( $_[0] ) ;
 104   }
 105   sub my_decompress {
 106       return Compress::Zlib::memGunzip( $_[0] ) ;
 107   }
 108
 109 B<Note:> Filtering of keys only applies to hashes. Array "keys" are
 110 actually numerical index numbers, and are not filtered.
 111
 112 =head1 Custom Digest Algorithm
 113
 114 DBM::Deep by default uses the I<Message Digest 5> (MD5) algorithm for hashing
 115 keys. However you can override this, and use another algorithm (such as SHA-256)
 116 or even write your own. But please note that DBM::Deep currently expects zero
 117 collisions, so your algorithm has to be I<perfect>, so to speak. Collision
 118 detection may be introduced in a later version.
 119
 120 You can specify a custom digest algorithm by passing it into the parameter
 121 list for new(), passing a reference to a subroutine as the 'digest' parameter,
 122 and the length of the algorithm's hashes (in bytes) as the 'hash_size'
 123 parameter. Here is a working example that uses a 256-bit hash from the
 124 I<Digest::SHA256> module. Please see
 125 L<http://search.cpan.org/search?module=Digest::SHA256> for more information.
 126
 127   use DBM::Deep;
 128   use Digest::SHA256;
 129
 130   my $context = Digest::SHA256::new(256);
 131
 132   my $db = DBM::Deep->new(
 133       filename => "foo-sha.db",
 134       digest => \&my_digest,
 135       hash_size => 32,
 136   );
 137
 138   $db->{key1} = "value1";
 139   $db->{key2} = "value2";
 140   print "key1: " . $db->{key1} . "\n";
 141   print "key2: " . $db->{key2} . "\n";
 142
 143   undef $db;
 144   exit;
 145
 146   sub my_digest {
 147       return substr( $context->hash($_[0]), 0, 32 );
 148   }
 149
 150 B<Note:> Your returned digest strings must be B<EXACTLY> the number
 151 of bytes you specify in the hash_size parameter (in this case 32). Undefined
 152 behavior will occur otherwise.
 153
 154 B<Note:> If you do choose to use a custom digest algorithm, you must set it
 155 every time you access this file. Otherwise, the default (MD5) will be used.
 156
 157 =head1 PERFORMANCE
 158
 159 Because DBM::Deep is a conncurrent datastore, every change is flushed to disk
 160 immediately and every read goes to disk. This means that DBM::Deep functions
 161 at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally
 162 50-70ns), or at least 150-200x slower than the comparable in-memory
 163 datastructure in Perl.
 164
 165 There are several techniques you can use to speed up how DBM::Deep functions.
 166
 167 =over 4
 168
 169 =item * Put it on a ramdisk
 170
 171 The easiest and quickest mechanism to making DBM::Deep run faster is to create
 172 a ramdisk and locate the DBM::Deep file there. Doing this as an option may
 173 become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN.
 174
 175 =item * Work at the tightest level possible
 176
 177 It is much faster to assign the level of your db that you are working with to
 178 an intermediate variable than to re-look it up every time. Thus
 179
 180   # BAD
 181   while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) {
 182     ...
 183   }
 184
 185   # GOOD
 186   my $x = $db->{foo}{bar}{baz};
 187   while ( my ($k, $v) = each %$x ) {
 188     ...
 189   }
 190
 191 =item * Make your file as tight as possible
 192
 193 If you know that you are not going to use more than 65K in your database,
 194 consider using the C<pack_size =E<gt> 'small'> option. This will instruct
 195 DBM::Deep to use 16bit addresses, meaning that the seek times will be less.
 196
 197 =back
 198
 199 =cut