r6200@rob-kinyons-computer-2 (orig r9980): rkinyon | 2007-09-22 21:02:54 -0400
[dbsrgits/DBM-Deep.git] / lib / DBM / Deep / Cookbook.pod
CommitLineData
d8db2929 1=head1 NAME
2
3DBM::Deep::Cookbook
4
5=head1 DESCRIPTION
6
7This is the Cookbook for L<DBM::Deep/>. It contains useful tips and tricks,
8plus some examples of how to do common tasks.
9
10=head1 RECIPES
11
12=head2 UTF8 data
13
14When you're using UTF8 data, you may run into the "Wide character in print"
15warning. To fix that in 5.8+, do the following:
16
17 my $db = DBM::Deep->new( ... );
18 binmode $db->_fh, ":utf8";
19
20In 5.6, you will have to do the following:
21
22 my $db = DBM::Deep->new( ... );
23 $db->set_filter( 'store_value' => sub { pack "U0C*", unpack "C*", $_[0] } );
24 $db->set_filter( 'retrieve_value' => sub { pack "C*", unpack "U0C*", $_[0] } );
25
26In a future version, you will be able to specify C<utf8 =E<gt> 1> and
27L<DBM::Deep/> will do these things for you.
28
1cff45d7 29=head2 Real-time Encryption Example
30
31B<NOTE>: This is just an example of how to write a filter. This most
32definitely should B<NOT> be taken as a proper way to write a filter that does
33encryption.
34
35Here is a working example that uses the I<Crypt::Blowfish> module to
36do real-time encryption / decryption of keys & values with DBM::Deep Filters.
37Please visit L<http://search.cpan.org/search?module=Crypt::Blowfish> for more
38on I<Crypt::Blowfish>. You'll also need the I<Crypt::CBC> module.
39
40 use DBM::Deep;
41 use Crypt::Blowfish;
42 use Crypt::CBC;
43
44 my $cipher = Crypt::CBC->new({
45 'key' => 'my secret key',
46 'cipher' => 'Blowfish',
47 'iv' => '$KJh#(}q',
48 'regenerate_key' => 0,
49 'padding' => 'space',
50 'prepend_iv' => 0
51 });
52
53 my $db = DBM::Deep->new(
54 file => "foo-encrypt.db",
55 filter_store_key => \&my_encrypt,
56 filter_store_value => \&my_encrypt,
57 filter_fetch_key => \&my_decrypt,
58 filter_fetch_value => \&my_decrypt,
59 );
60
61 $db->{key1} = "value1";
62 $db->{key2} = "value2";
63 print "key1: " . $db->{key1} . "\n";
64 print "key2: " . $db->{key2} . "\n";
65
66 undef $db;
67 exit;
68
69 sub my_encrypt {
70 return $cipher->encrypt( $_[0] );
71 }
72 sub my_decrypt {
73 return $cipher->decrypt( $_[0] );
74 }
75
76=head2 Real-time Compression Example
77
78Here is a working example that uses the I<Compress::Zlib> module to do real-time
79compression / decompression of keys & values with DBM::Deep Filters.
80Please visit L<http://search.cpan.org/search?module=Compress::Zlib> for
81more on I<Compress::Zlib>.
82
83 use DBM::Deep;
84 use Compress::Zlib;
85
86 my $db = DBM::Deep->new(
87 file => "foo-compress.db",
88 filter_store_key => \&my_compress,
89 filter_store_value => \&my_compress,
90 filter_fetch_key => \&my_decompress,
91 filter_fetch_value => \&my_decompress,
92 );
93
94 $db->{key1} = "value1";
95 $db->{key2} = "value2";
96 print "key1: " . $db->{key1} . "\n";
97 print "key2: " . $db->{key2} . "\n";
98
99 undef $db;
100 exit;
101
102 sub my_compress {
103 return Compress::Zlib::memGzip( $_[0] ) ;
104 }
105 sub my_decompress {
106 return Compress::Zlib::memGunzip( $_[0] ) ;
107 }
108
109B<Note:> Filtering of keys only applies to hashes. Array "keys" are
110actually numerical index numbers, and are not filtered.
111
112=head1 Custom Digest Algorithm
113
114DBM::Deep by default uses the I<Message Digest 5> (MD5) algorithm for hashing
115keys. However you can override this, and use another algorithm (such as SHA-256)
116or even write your own. But please note that DBM::Deep currently expects zero
117collisions, so your algorithm has to be I<perfect>, so to speak. Collision
118detection may be introduced in a later version.
119
120You can specify a custom digest algorithm by passing it into the parameter
121list for new(), passing a reference to a subroutine as the 'digest' parameter,
122and the length of the algorithm's hashes (in bytes) as the 'hash_size'
123parameter. Here is a working example that uses a 256-bit hash from the
124I<Digest::SHA256> module. Please see
125L<http://search.cpan.org/search?module=Digest::SHA256> for more information.
126
127 use DBM::Deep;
128 use Digest::SHA256;
129
130 my $context = Digest::SHA256::new(256);
131
132 my $db = DBM::Deep->new(
133 filename => "foo-sha.db",
134 digest => \&my_digest,
135 hash_size => 32,
136 );
137
138 $db->{key1} = "value1";
139 $db->{key2} = "value2";
140 print "key1: " . $db->{key1} . "\n";
141 print "key2: " . $db->{key2} . "\n";
142
143 undef $db;
144 exit;
145
146 sub my_digest {
147 return substr( $context->hash($_[0]), 0, 32 );
148 }
149
150B<Note:> Your returned digest strings must be B<EXACTLY> the number
151of bytes you specify in the hash_size parameter (in this case 32). Undefined
152behavior will occur otherwise.
153
154B<Note:> If you do choose to use a custom digest algorithm, you must set it
155every time you access this file. Otherwise, the default (MD5) will be used.
156
157=head1 PERFORMANCE
158
159Because DBM::Deep is a conncurrent datastore, every change is flushed to disk
160immediately and every read goes to disk. This means that DBM::Deep functions
161at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally
16250-70ns), or at least 150-200x slower than the comparable in-memory
163datastructure in Perl.
164
165There are several techniques you can use to speed up how DBM::Deep functions.
166
167=over 4
168
169=item * Put it on a ramdisk
170
171The easiest and quickest mechanism to making DBM::Deep run faster is to create
172a ramdisk and locate the DBM::Deep file there. Doing this as an option may
173become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN.
174
175=item * Work at the tightest level possible
176
177It is much faster to assign the level of your db that you are working with to
178an intermediate variable than to re-look it up every time. Thus
179
180 # BAD
181 while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) {
182 ...
183 }
184
185 # GOOD
186 my $x = $db->{foo}{bar}{baz};
187 while ( my ($k, $v) = each %$x ) {
188 ...
189 }
190
191=item * Make your file as tight as possible
192
193If you know that you are not going to use more than 65K in your database,
194consider using the C<pack_size =E<gt> 'small'> option. This will instruct
195DBM::Deep to use 16bit addresses, meaning that the seek times will be less.
196
197=back
198
199=cut