Commit | Line | Data |
ffed8b01 |
1 | package DBM::Deep; |
2 | |
3 | ## |
4 | # DBM::Deep |
5 | # |
6 | # Description: |
7 | # Multi-level database module for storing hash trees, arrays and simple |
8 | # key/value pairs into FTP-able, cross-platform binary database files. |
9 | # |
10 | # Type `perldoc DBM::Deep` for complete documentation. |
11 | # |
12 | # Usage Examples: |
13 | # my %db; |
14 | # tie %db, 'DBM::Deep', 'my_database.db'; # standard tie() method |
15 | # |
16 | # my $db = new DBM::Deep( 'my_database.db' ); # preferred OO method |
17 | # |
18 | # $db->{my_scalar} = 'hello world'; |
19 | # $db->{my_hash} = { larry => 'genius', hashes => 'fast' }; |
20 | # $db->{my_array} = [ 1, 2, 3, time() ]; |
21 | # $db->{my_complex} = [ 'hello', { perl => 'rules' }, 42, 99 ]; |
22 | # push @{$db->{my_array}}, 'another value'; |
23 | # my @key_list = keys %{$db->{my_hash}}; |
24 | # print "This module " . $db->{my_complex}->[1]->{perl} . "!\n"; |
25 | # |
26 | # Copyright: |
acd4faf2 |
27 | # (c) 2002-2006 Joseph Huckaby. All Rights Reserved. |
ffed8b01 |
28 | # This program is free software; you can redistribute it and/or |
29 | # modify it under the same terms as Perl itself. |
30 | ## |
31 | |
32 | use strict; |
8b957036 |
33 | |
596e9574 |
34 | use Fcntl qw( :DEFAULT :flock :seek ); |
ffed8b01 |
35 | use Digest::MD5 (); |
36 | use Scalar::Util (); |
ffed8b01 |
37 | |
95967a5e |
38 | use DBM::Deep::Engine; |
39 | |
596e9574 |
40 | use vars qw( $VERSION ); |
3a7a0dce |
41 | $VERSION = q(0.99_01); |
ffed8b01 |
42 | |
43 | ## |
44 | # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file. |
45 | # (Perl must be compiled with largefile support for files > 2 GB) |
46 | # |
47 | # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. |
48 | # (Perl must be compiled with largefile and 64-bit long support) |
49 | ## |
50 | #my $LONG_SIZE = 4; |
51 | #my $LONG_PACK = 'N'; |
52 | |
53 | ## |
54 | # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value. |
55 | # Upgrading this is possible (see above) but probably not necessary. If you need |
56 | # more than 4 GB for a single key or value, this module is really not for you :-) |
57 | ## |
58 | #my $DATA_LENGTH_SIZE = 4; |
59 | #my $DATA_LENGTH_PACK = 'N'; |
6fe26b29 |
60 | our ($LONG_SIZE, $LONG_PACK, $DATA_LENGTH_SIZE, $DATA_LENGTH_PACK); |
ffed8b01 |
61 | |
62 | ## |
63 | # Maximum number of buckets per list before another level of indexing is done. |
64 | # Increase this value for slightly greater speed, but larger database files. |
65 | # DO NOT decrease this value below 16, due to risk of recursive reindex overrun. |
66 | ## |
20f7b20c |
67 | our $MAX_BUCKETS = 16; |
ffed8b01 |
68 | |
69 | ## |
70 | # Better not adjust anything below here, unless you're me :-) |
71 | ## |
72 | |
73 | ## |
74 | # Setup digest function for keys |
75 | ## |
6fe26b29 |
76 | our ($DIGEST_FUNC, $HASH_SIZE); |
ffed8b01 |
77 | #my $DIGEST_FUNC = \&Digest::MD5::md5; |
78 | |
79 | ## |
80 | # Precalculate index and bucket sizes based on values above. |
81 | ## |
82 | #my $HASH_SIZE = 16; |
a20d9a3f |
83 | our ($INDEX_SIZE, $BUCKET_SIZE, $BUCKET_LIST_SIZE); |
ffed8b01 |
84 | |
85 | set_digest(); |
86 | #set_pack(); |
4d35d856 |
87 | #_precalc_sizes(); |
ffed8b01 |
88 | |
89 | ## |
90 | # Setup file and tag signatures. These should never change. |
91 | ## |
81d3d316 |
92 | sub SIG_FILE () { 'DPDB' } |
93 | sub SIG_HASH () { 'H' } |
94 | sub SIG_ARRAY () { 'A' } |
95 | sub SIG_SCALAR () { 'S' } |
96 | sub SIG_NULL () { 'N' } |
97 | sub SIG_DATA () { 'D' } |
98 | sub SIG_INDEX () { 'I' } |
99 | sub SIG_BLIST () { 'B' } |
100 | sub SIG_SIZE () { 1 } |
ffed8b01 |
101 | |
102 | ## |
103 | # Setup constants for users to pass to new() |
104 | ## |
4d35d856 |
105 | sub TYPE_HASH () { SIG_HASH } |
106 | sub TYPE_ARRAY () { SIG_ARRAY } |
107 | sub TYPE_SCALAR () { SIG_SCALAR } |
ffed8b01 |
108 | |
0ca7ea98 |
109 | sub _get_args { |
110 | my $proto = shift; |
111 | |
112 | my $args; |
113 | if (scalar(@_) > 1) { |
114 | if ( @_ % 2 ) { |
115 | $proto->_throw_error( "Odd number of parameters to " . (caller(1))[2] ); |
116 | } |
117 | $args = {@_}; |
118 | } |
4d35d856 |
119 | elsif ( ref $_[0] ) { |
120 | unless ( eval { local $SIG{'__DIE__'}; %{$_[0]} || 1 } ) { |
0ca7ea98 |
121 | $proto->_throw_error( "Not a hashref in args to " . (caller(1))[2] ); |
122 | } |
123 | $args = $_[0]; |
124 | } |
125 | else { |
126 | $args = { file => shift }; |
127 | } |
128 | |
129 | return $args; |
130 | } |
131 | |
ffed8b01 |
132 | sub new { |
133 | ## |
134 | # Class constructor method for Perl OO interface. |
135 | # Calls tie() and returns blessed reference to tied hash or array, |
136 | # providing a hybrid OO/tie interface. |
137 | ## |
138 | my $class = shift; |
0ca7ea98 |
139 | my $args = $class->_get_args( @_ ); |
ffed8b01 |
140 | |
141 | ## |
142 | # Check if we want a tied hash or array. |
143 | ## |
144 | my $self; |
145 | if (defined($args->{type}) && $args->{type} eq TYPE_ARRAY) { |
6fe26b29 |
146 | $class = 'DBM::Deep::Array'; |
147 | require DBM::Deep::Array; |
cc4bef86 |
148 | tie @$self, $class, %$args; |
ffed8b01 |
149 | } |
150 | else { |
6fe26b29 |
151 | $class = 'DBM::Deep::Hash'; |
152 | require DBM::Deep::Hash; |
cc4bef86 |
153 | tie %$self, $class, %$args; |
ffed8b01 |
154 | } |
155 | |
cc4bef86 |
156 | return bless $self, $class; |
ffed8b01 |
157 | } |
158 | |
0795f290 |
159 | sub _init { |
160 | ## |
161 | # Setup $self and bless into this class. |
162 | ## |
163 | my $class = shift; |
164 | my $args = shift; |
165 | |
166 | # These are the defaults to be optionally overridden below |
167 | my $self = bless { |
95967a5e |
168 | type => TYPE_HASH, |
0795f290 |
169 | base_offset => length(SIG_FILE), |
95967a5e |
170 | engine => 'DBM::Deep::Engine', |
0795f290 |
171 | }, $class; |
172 | |
173 | foreach my $param ( keys %$self ) { |
174 | next unless exists $args->{$param}; |
175 | $self->{$param} = delete $args->{$param} |
ffed8b01 |
176 | } |
0795f290 |
177 | |
37c5bcf0 |
178 | # locking implicitly enables autoflush |
179 | if ($args->{locking}) { $args->{autoflush} = 1; } |
180 | |
0795f290 |
181 | $self->{root} = exists $args->{root} |
182 | ? $args->{root} |
183 | : DBM::Deep::_::Root->new( $args ); |
184 | |
a20d9a3f |
185 | if (!defined($self->_fh)) { $self->{engine}->open( $self ); } |
0795f290 |
186 | |
187 | return $self; |
ffed8b01 |
188 | } |
189 | |
ffed8b01 |
190 | sub TIEHASH { |
6fe26b29 |
191 | shift; |
192 | require DBM::Deep::Hash; |
193 | return DBM::Deep::Hash->TIEHASH( @_ ); |
ffed8b01 |
194 | } |
195 | |
196 | sub TIEARRAY { |
6fe26b29 |
197 | shift; |
198 | require DBM::Deep::Array; |
199 | return DBM::Deep::Array->TIEARRAY( @_ ); |
ffed8b01 |
200 | } |
201 | |
cc4bef86 |
202 | #XXX Unneeded now ... |
203 | #sub DESTROY { |
204 | #} |
ffed8b01 |
205 | |
261d1296 |
206 | sub _find_bucket_list { |
ffed8b01 |
207 | ## |
208 | # Locate offset for bucket list, given digested key |
209 | ## |
210 | my $self = shift; |
211 | my $md5 = shift; |
212 | |
213 | ## |
214 | # Locate offset for bucket list using digest index system |
215 | ## |
216 | my $ch = 0; |
d4b1166e |
217 | my $tag = $self->{engine}->load_tag($self, $self->_base_offset); |
ffed8b01 |
218 | if (!$tag) { return; } |
219 | |
220 | while ($tag->{signature} ne SIG_BLIST) { |
d4b1166e |
221 | $tag = $self->{engine}->index_lookup($self, $tag, ord(substr($md5, $ch, 1))); |
ffed8b01 |
222 | if (!$tag) { return; } |
223 | $ch++; |
224 | } |
225 | |
226 | return $tag; |
227 | } |
228 | |
261d1296 |
229 | sub _traverse_index { |
ffed8b01 |
230 | ## |
231 | # Scan index and recursively step into deeper levels, looking for next key. |
232 | ## |
233 | my ($self, $offset, $ch, $force_return_next) = @_; |
234 | $force_return_next = undef unless $force_return_next; |
235 | |
d4b1166e |
236 | my $tag = $self->{engine}->load_tag($self, $offset ); |
f2fb5dff |
237 | |
4d35d856 |
238 | my $fh = $self->_fh; |
ffed8b01 |
239 | |
240 | if ($tag->{signature} ne SIG_BLIST) { |
241 | my $content = $tag->{content}; |
242 | my $start; |
243 | if ($self->{return_next}) { $start = 0; } |
244 | else { $start = ord(substr($self->{prev_md5}, $ch, 1)); } |
245 | |
246 | for (my $index = $start; $index < 256; $index++) { |
247 | my $subloc = unpack($LONG_PACK, substr($content, $index * $LONG_SIZE, $LONG_SIZE) ); |
248 | if ($subloc) { |
261d1296 |
249 | my $result = $self->_traverse_index( $subloc, $ch + 1, $force_return_next ); |
ffed8b01 |
250 | if (defined($result)) { return $result; } |
251 | } |
252 | } # index loop |
253 | |
254 | $self->{return_next} = 1; |
255 | } # tag is an index |
256 | |
257 | elsif ($tag->{signature} eq SIG_BLIST) { |
258 | my $keys = $tag->{content}; |
259 | if ($force_return_next) { $self->{return_next} = 1; } |
260 | |
261 | ## |
262 | # Iterate through buckets, looking for a key match |
263 | ## |
264 | for (my $i=0; $i<$MAX_BUCKETS; $i++) { |
265 | my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); |
266 | my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); |
267 | |
268 | if (!$subloc) { |
269 | ## |
270 | # End of bucket list -- return to outer loop |
271 | ## |
272 | $self->{return_next} = 1; |
273 | last; |
274 | } |
275 | elsif ($key eq $self->{prev_md5}) { |
276 | ## |
277 | # Located previous key -- return next one found |
278 | ## |
279 | $self->{return_next} = 1; |
280 | next; |
281 | } |
282 | elsif ($self->{return_next}) { |
283 | ## |
284 | # Seek to bucket location and skip over signature |
285 | ## |
714618f0 |
286 | seek($fh, $subloc + SIG_SIZE + $self->_root->{file_offset}, SEEK_SET); |
ffed8b01 |
287 | |
288 | ## |
289 | # Skip over value to get to plain key |
290 | ## |
291 | my $size; |
f2fb5dff |
292 | read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); |
0af414a6 |
293 | if ($size) { seek($fh, $size, SEEK_CUR); } |
ffed8b01 |
294 | |
295 | ## |
296 | # Read in plain key and return as scalar |
297 | ## |
298 | my $plain_key; |
f2fb5dff |
299 | read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); |
300 | if ($size) { read( $fh, $plain_key, $size); } |
ffed8b01 |
301 | |
302 | return $plain_key; |
303 | } |
304 | } # bucket loop |
305 | |
306 | $self->{return_next} = 1; |
307 | } # tag is a bucket list |
308 | |
309 | return; |
310 | } |
311 | |
261d1296 |
312 | sub _get_next_key { |
ffed8b01 |
313 | ## |
314 | # Locate next key, given digested previous one |
315 | ## |
2ac02042 |
316 | my $self = $_[0]->_get_self; |
ffed8b01 |
317 | |
318 | $self->{prev_md5} = $_[1] ? $_[1] : undef; |
319 | $self->{return_next} = 0; |
320 | |
321 | ## |
322 | # If the previous key was not specifed, start at the top and |
323 | # return the first one found. |
324 | ## |
325 | if (!$self->{prev_md5}) { |
326 | $self->{prev_md5} = chr(0) x $HASH_SIZE; |
327 | $self->{return_next} = 1; |
328 | } |
329 | |
4d35d856 |
330 | return $self->_traverse_index( $self->_base_offset, 0 ); |
ffed8b01 |
331 | } |
332 | |
333 | sub lock { |
334 | ## |
335 | # If db locking is set, flock() the db file. If called multiple |
336 | # times before unlock(), then the same number of unlocks() must |
337 | # be called before the lock is released. |
338 | ## |
2ac02042 |
339 | my $self = $_[0]->_get_self; |
b8b48a59 |
340 | my $type = $_[1]; |
ffed8b01 |
341 | $type = LOCK_EX unless defined $type; |
342 | |
4d35d856 |
343 | if (!defined($self->_fh)) { return; } |
7f441181 |
344 | |
4d35d856 |
345 | if ($self->_root->{locking}) { |
346 | if (!$self->_root->{locked}) { |
347 | flock($self->_fh, $type); |
a59a8dca |
348 | |
37c5bcf0 |
349 | # refresh end counter in case file has changed size |
4d35d856 |
350 | my @stats = stat($self->_root->{file}); |
351 | $self->_root->{end} = $stats[7]; |
37c5bcf0 |
352 | |
a59a8dca |
353 | # double-check file inode, in case another process |
354 | # has optimize()d our file while we were waiting. |
4d35d856 |
355 | if ($stats[1] != $self->_root->{inode}) { |
cd59cad8 |
356 | $self->{engine}->open( $self ); # re-open |
4d35d856 |
357 | flock($self->_fh, $type); # re-lock |
358 | $self->_root->{end} = (stat($self->_fh))[7]; # re-end |
a59a8dca |
359 | } |
360 | } |
4d35d856 |
361 | $self->_root->{locked}++; |
b4522594 |
362 | |
363 | return 1; |
ffed8b01 |
364 | } |
b4522594 |
365 | |
366 | return; |
ffed8b01 |
367 | } |
368 | |
369 | sub unlock { |
370 | ## |
371 | # If db locking is set, unlock the db file. See note in lock() |
372 | # regarding calling lock() multiple times. |
373 | ## |
2ac02042 |
374 | my $self = $_[0]->_get_self; |
7f441181 |
375 | |
4d35d856 |
376 | if (!defined($self->_fh)) { return; } |
ffed8b01 |
377 | |
4d35d856 |
378 | if ($self->_root->{locking} && $self->_root->{locked} > 0) { |
379 | $self->_root->{locked}--; |
380 | if (!$self->_root->{locked}) { flock($self->_fh, LOCK_UN); } |
b4522594 |
381 | |
382 | return 1; |
ffed8b01 |
383 | } |
b4522594 |
384 | |
385 | return; |
ffed8b01 |
386 | } |
387 | |
906c8e01 |
388 | sub _copy_value { |
389 | my $self = shift->_get_self; |
390 | my ($spot, $value) = @_; |
391 | |
392 | if ( !ref $value ) { |
393 | ${$spot} = $value; |
394 | } |
395 | elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep' ) } ) { |
396 | my $type = $value->_type; |
397 | ${$spot} = $type eq TYPE_HASH ? {} : []; |
398 | $value->_copy_node( ${$spot} ); |
399 | } |
400 | else { |
401 | my $r = Scalar::Util::reftype( $value ); |
402 | my $c = Scalar::Util::blessed( $value ); |
403 | if ( $r eq 'ARRAY' ) { |
404 | ${$spot} = [ @{$value} ]; |
405 | } |
406 | else { |
407 | ${$spot} = { %{$value} }; |
408 | } |
95bbd935 |
409 | ${$spot} = bless ${$spot}, $c |
906c8e01 |
410 | if defined $c; |
411 | } |
412 | |
413 | return 1; |
414 | } |
415 | |
261d1296 |
416 | sub _copy_node { |
ffed8b01 |
417 | ## |
418 | # Copy single level of keys or elements to new DB handle. |
419 | # Recurse for nested structures |
420 | ## |
906c8e01 |
421 | my $self = shift->_get_self; |
422 | my ($db_temp) = @_; |
b8b48a59 |
423 | |
4d35d856 |
424 | if ($self->_type eq TYPE_HASH) { |
ffed8b01 |
425 | my $key = $self->first_key(); |
426 | while ($key) { |
427 | my $value = $self->get($key); |
906c8e01 |
428 | $self->_copy_value( \$db_temp->{$key}, $value ); |
ffed8b01 |
429 | $key = $self->next_key($key); |
430 | } |
431 | } |
432 | else { |
433 | my $length = $self->length(); |
434 | for (my $index = 0; $index < $length; $index++) { |
435 | my $value = $self->get($index); |
906c8e01 |
436 | $self->_copy_value( \$db_temp->[$index], $value ); |
ffed8b01 |
437 | } |
438 | } |
906c8e01 |
439 | |
440 | return 1; |
ffed8b01 |
441 | } |
442 | |
443 | sub export { |
444 | ## |
445 | # Recursively export into standard Perl hashes and arrays. |
446 | ## |
2ac02042 |
447 | my $self = $_[0]->_get_self; |
ffed8b01 |
448 | |
449 | my $temp; |
4d35d856 |
450 | if ($self->_type eq TYPE_HASH) { $temp = {}; } |
451 | elsif ($self->_type eq TYPE_ARRAY) { $temp = []; } |
ffed8b01 |
452 | |
453 | $self->lock(); |
261d1296 |
454 | $self->_copy_node( $temp ); |
ffed8b01 |
455 | $self->unlock(); |
456 | |
457 | return $temp; |
458 | } |
459 | |
460 | sub import { |
461 | ## |
462 | # Recursively import Perl hash/array structure |
463 | ## |
464 | #XXX This use of ref() seems to be ok |
465 | if (!ref($_[0])) { return; } # Perl calls import() on use -- ignore |
466 | |
2ac02042 |
467 | my $self = $_[0]->_get_self; |
ffed8b01 |
468 | my $struct = $_[1]; |
469 | |
470 | #XXX This use of ref() seems to be ok |
471 | if (!ref($struct)) { |
472 | ## |
473 | # struct is not a reference, so just import based on our type |
474 | ## |
475 | shift @_; |
476 | |
4d35d856 |
477 | if ($self->_type eq TYPE_HASH) { $struct = {@_}; } |
478 | elsif ($self->_type eq TYPE_ARRAY) { $struct = [@_]; } |
ffed8b01 |
479 | } |
480 | |
481 | my $r = Scalar::Util::reftype($struct) || ''; |
4d35d856 |
482 | if ($r eq "HASH" && $self->_type eq TYPE_HASH) { |
ffed8b01 |
483 | foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); } |
484 | } |
4d35d856 |
485 | elsif ($r eq "ARRAY" && $self->_type eq TYPE_ARRAY) { |
ffed8b01 |
486 | $self->push( @$struct ); |
487 | } |
488 | else { |
261d1296 |
489 | return $self->_throw_error("Cannot import: type mismatch"); |
ffed8b01 |
490 | } |
491 | |
492 | return 1; |
493 | } |
494 | |
495 | sub optimize { |
496 | ## |
497 | # Rebuild entire database into new file, then move |
498 | # it back on top of original. |
499 | ## |
2ac02042 |
500 | my $self = $_[0]->_get_self; |
cc4bef86 |
501 | |
502 | #XXX Need to create a new test for this |
4d35d856 |
503 | # if ($self->_root->{links} > 1) { |
cc4bef86 |
504 | # return $self->_throw_error("Cannot optimize: reference count is greater than 1"); |
505 | # } |
ffed8b01 |
506 | |
507 | my $db_temp = DBM::Deep->new( |
4d35d856 |
508 | file => $self->_root->{file} . '.tmp', |
509 | type => $self->_type |
ffed8b01 |
510 | ); |
511 | if (!$db_temp) { |
261d1296 |
512 | return $self->_throw_error("Cannot optimize: failed to open temp file: $!"); |
ffed8b01 |
513 | } |
514 | |
515 | $self->lock(); |
261d1296 |
516 | $self->_copy_node( $db_temp ); |
ffed8b01 |
517 | undef $db_temp; |
518 | |
519 | ## |
520 | # Attempt to copy user, group and permissions over to new file |
521 | ## |
4d35d856 |
522 | my @stats = stat($self->_fh); |
ffed8b01 |
523 | my $perms = $stats[2] & 07777; |
524 | my $uid = $stats[4]; |
525 | my $gid = $stats[5]; |
4d35d856 |
526 | chown( $uid, $gid, $self->_root->{file} . '.tmp' ); |
527 | chmod( $perms, $self->_root->{file} . '.tmp' ); |
ffed8b01 |
528 | |
529 | # q.v. perlport for more information on this variable |
90f93b43 |
530 | if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) { |
ffed8b01 |
531 | ## |
532 | # Potential race condition when optmizing on Win32 with locking. |
533 | # The Windows filesystem requires that the filehandle be closed |
534 | # before it is overwritten with rename(). This could be redone |
535 | # with a soft copy. |
536 | ## |
537 | $self->unlock(); |
cd59cad8 |
538 | $self->{engine}->close( $self ); |
ffed8b01 |
539 | } |
540 | |
4d35d856 |
541 | if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) { |
542 | unlink $self->_root->{file} . '.tmp'; |
ffed8b01 |
543 | $self->unlock(); |
261d1296 |
544 | return $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!"); |
ffed8b01 |
545 | } |
546 | |
547 | $self->unlock(); |
cd59cad8 |
548 | $self->{engine}->close( $self ); |
549 | $self->{engine}->open( $self ); |
ffed8b01 |
550 | |
551 | return 1; |
552 | } |
553 | |
554 | sub clone { |
555 | ## |
556 | # Make copy of object and return |
557 | ## |
2ac02042 |
558 | my $self = $_[0]->_get_self; |
ffed8b01 |
559 | |
560 | return DBM::Deep->new( |
4d35d856 |
561 | type => $self->_type, |
562 | base_offset => $self->_base_offset, |
563 | root => $self->_root |
ffed8b01 |
564 | ); |
565 | } |
566 | |
567 | { |
568 | my %is_legal_filter = map { |
569 | $_ => ~~1, |
570 | } qw( |
571 | store_key store_value |
572 | fetch_key fetch_value |
573 | ); |
574 | |
575 | sub set_filter { |
576 | ## |
577 | # Setup filter function for storing or fetching the key or value |
578 | ## |
2ac02042 |
579 | my $self = $_[0]->_get_self; |
ffed8b01 |
580 | my $type = lc $_[1]; |
581 | my $func = $_[2] ? $_[2] : undef; |
582 | |
583 | if ( $is_legal_filter{$type} ) { |
4d35d856 |
584 | $self->_root->{"filter_$type"} = $func; |
ffed8b01 |
585 | return 1; |
586 | } |
587 | |
588 | return; |
589 | } |
590 | } |
591 | |
592 | ## |
593 | # Accessor methods |
594 | ## |
595 | |
4d35d856 |
596 | sub _root { |
ffed8b01 |
597 | ## |
598 | # Get access to the root structure |
599 | ## |
2ac02042 |
600 | my $self = $_[0]->_get_self; |
ffed8b01 |
601 | return $self->{root}; |
602 | } |
603 | |
4d35d856 |
604 | sub _fh { |
ffed8b01 |
605 | ## |
90f93b43 |
606 | # Get access to the raw fh |
ffed8b01 |
607 | ## |
b8b48a59 |
608 | #XXX It will be useful, though, when we split out HASH and ARRAY |
2ac02042 |
609 | my $self = $_[0]->_get_self; |
4d35d856 |
610 | return $self->_root->{fh}; |
ffed8b01 |
611 | } |
612 | |
4d35d856 |
613 | sub _type { |
ffed8b01 |
614 | ## |
615 | # Get type of current node (TYPE_HASH or TYPE_ARRAY) |
616 | ## |
2ac02042 |
617 | my $self = $_[0]->_get_self; |
ffed8b01 |
618 | return $self->{type}; |
619 | } |
620 | |
4d35d856 |
621 | sub _base_offset { |
ffed8b01 |
622 | ## |
623 | # Get base_offset of current node (TYPE_HASH or TYPE_ARRAY) |
624 | ## |
2ac02042 |
625 | my $self = $_[0]->_get_self; |
ffed8b01 |
626 | return $self->{base_offset}; |
627 | } |
628 | |
ffed8b01 |
629 | ## |
630 | # Utility methods |
631 | ## |
632 | |
261d1296 |
633 | sub _throw_error { |
95967a5e |
634 | die "DBM::Deep: $_[1]\n"; |
ffed8b01 |
635 | } |
636 | |
4d35d856 |
637 | sub _precalc_sizes { |
ffed8b01 |
638 | ## |
639 | # Precalculate index, bucket and bucket list sizes |
640 | ## |
641 | |
642 | #XXX I don't like this ... |
643 | set_pack() unless defined $LONG_SIZE; |
644 | |
645 | $INDEX_SIZE = 256 * $LONG_SIZE; |
646 | $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE; |
647 | $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE; |
648 | } |
649 | |
650 | sub set_pack { |
651 | ## |
652 | # Set pack/unpack modes (see file header for more) |
653 | ## |
654 | my ($long_s, $long_p, $data_s, $data_p) = @_; |
655 | |
656 | $LONG_SIZE = $long_s ? $long_s : 4; |
657 | $LONG_PACK = $long_p ? $long_p : 'N'; |
658 | |
659 | $DATA_LENGTH_SIZE = $data_s ? $data_s : 4; |
660 | $DATA_LENGTH_PACK = $data_p ? $data_p : 'N'; |
661 | |
4d35d856 |
662 | _precalc_sizes(); |
ffed8b01 |
663 | } |
664 | |
665 | sub set_digest { |
666 | ## |
667 | # Set key digest function (default is MD5) |
668 | ## |
669 | my ($digest_func, $hash_size) = @_; |
670 | |
671 | $DIGEST_FUNC = $digest_func ? $digest_func : \&Digest::MD5::md5; |
672 | $HASH_SIZE = $hash_size ? $hash_size : 16; |
673 | |
4d35d856 |
674 | _precalc_sizes(); |
ffed8b01 |
675 | } |
676 | |
acd4faf2 |
677 | sub _is_writable { |
678 | my $fh = shift; |
679 | (O_WRONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0); |
680 | } |
681 | |
9be51a89 |
682 | #sub _is_readable { |
683 | # my $fh = shift; |
684 | # (O_RDONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0); |
685 | #} |
acd4faf2 |
686 | |
ffed8b01 |
687 | ## |
688 | # tie() methods (hashes and arrays) |
689 | ## |
690 | |
691 | sub STORE { |
692 | ## |
693 | # Store single hash key/value or array element in database. |
694 | ## |
2ac02042 |
695 | my $self = $_[0]->_get_self; |
81d3d316 |
696 | my $key = $_[1]; |
697 | |
9ab67b8c |
698 | # User may be storing a hash, in which case we do not want it run |
699 | # through the filtering system |
4d35d856 |
700 | my $value = ($self->_root->{filter_store_value} && !ref($_[2])) |
701 | ? $self->_root->{filter_store_value}->($_[2]) |
81d3d316 |
702 | : $_[2]; |
ffed8b01 |
703 | |
ffed8b01 |
704 | my $md5 = $DIGEST_FUNC->($key); |
705 | |
acd4faf2 |
706 | unless ( _is_writable( $self->_fh ) ) { |
707 | $self->_throw_error( 'Cannot write to a readonly filehandle' ); |
708 | } |
ffed8b01 |
709 | |
710 | ## |
711 | # Request exclusive lock for writing |
712 | ## |
713 | $self->lock( LOCK_EX ); |
a59a8dca |
714 | |
4d35d856 |
715 | my $fh = $self->_fh; |
ffed8b01 |
716 | |
717 | ## |
718 | # Locate offset for bucket list using digest index system |
719 | ## |
d4b1166e |
720 | my $tag = $self->{engine}->load_tag($self, $self->_base_offset); |
ffed8b01 |
721 | if (!$tag) { |
d4b1166e |
722 | $tag = $self->{engine}->create_tag($self, $self->_base_offset, SIG_INDEX, chr(0) x $INDEX_SIZE); |
ffed8b01 |
723 | } |
724 | |
725 | my $ch = 0; |
726 | while ($tag->{signature} ne SIG_BLIST) { |
727 | my $num = ord(substr($md5, $ch, 1)); |
b504ea40 |
728 | |
729 | my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE); |
d4b1166e |
730 | my $new_tag = $self->{engine}->index_lookup($self, $tag, $num); |
b504ea40 |
731 | |
ffed8b01 |
732 | if (!$new_tag) { |
714618f0 |
733 | seek($fh, $ref_loc + $self->_root->{file_offset}, SEEK_SET); |
b504ea40 |
734 | print( $fh pack($LONG_PACK, $self->_root->{end}) ); |
ffed8b01 |
735 | |
d4b1166e |
736 | $tag = $self->{engine}->create_tag($self, $self->_root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); |
b504ea40 |
737 | |
ffed8b01 |
738 | $tag->{ref_loc} = $ref_loc; |
739 | $tag->{ch} = $ch; |
b504ea40 |
740 | |
ffed8b01 |
741 | last; |
742 | } |
743 | else { |
ffed8b01 |
744 | $tag = $new_tag; |
b504ea40 |
745 | |
ffed8b01 |
746 | $tag->{ref_loc} = $ref_loc; |
747 | $tag->{ch} = $ch; |
748 | } |
749 | $ch++; |
750 | } |
751 | |
752 | ## |
753 | # Add key/value to bucket list |
754 | ## |
20f7b20c |
755 | my $result = $self->{engine}->add_bucket( $self, $tag, $md5, $key, $value ); |
ffed8b01 |
756 | |
ffed8b01 |
757 | $self->unlock(); |
758 | |
759 | return $result; |
760 | } |
761 | |
762 | sub FETCH { |
763 | ## |
764 | # Fetch single value or element given plain key or array index |
765 | ## |
cb79ec85 |
766 | my $self = shift->_get_self; |
767 | my $key = shift; |
ffed8b01 |
768 | |
cb79ec85 |
769 | my $md5 = $DIGEST_FUNC->($key); |
770 | |
ffed8b01 |
771 | ## |
772 | # Request shared lock for reading |
773 | ## |
774 | $self->lock( LOCK_SH ); |
775 | |
261d1296 |
776 | my $tag = $self->_find_bucket_list( $md5 ); |
ffed8b01 |
777 | if (!$tag) { |
778 | $self->unlock(); |
779 | return; |
780 | } |
781 | |
782 | ## |
783 | # Get value from bucket list |
784 | ## |
9020ee8c |
785 | my $result = $self->{engine}->get_bucket_value( $self, $tag, $md5 ); |
ffed8b01 |
786 | |
787 | $self->unlock(); |
788 | |
789 | #XXX What is ref() checking here? |
aeeb5497 |
790 | #YYY Filters only apply on scalar values, so the ref check is making |
791 | #YYY sure the fetched bucket is a scalar, not a child hash or array. |
4d35d856 |
792 | return ($result && !ref($result) && $self->_root->{filter_fetch_value}) |
793 | ? $self->_root->{filter_fetch_value}->($result) |
cb79ec85 |
794 | : $result; |
ffed8b01 |
795 | } |
796 | |
797 | sub DELETE { |
798 | ## |
799 | # Delete single key/value pair or element given plain key or array index |
800 | ## |
2ac02042 |
801 | my $self = $_[0]->_get_self; |
feaf1e6f |
802 | my $key = $_[1]; |
ffed8b01 |
803 | |
ffed8b01 |
804 | my $md5 = $DIGEST_FUNC->($key); |
805 | |
806 | ## |
ffed8b01 |
807 | # Request exclusive lock for writing |
808 | ## |
809 | $self->lock( LOCK_EX ); |
810 | |
261d1296 |
811 | my $tag = $self->_find_bucket_list( $md5 ); |
ffed8b01 |
812 | if (!$tag) { |
813 | $self->unlock(); |
814 | return; |
815 | } |
816 | |
817 | ## |
818 | # Delete bucket |
819 | ## |
9020ee8c |
820 | my $value = $self->{engine}->get_bucket_value($self, $tag, $md5 ); |
4d35d856 |
821 | if ($value && !ref($value) && $self->_root->{filter_fetch_value}) { |
822 | $value = $self->_root->{filter_fetch_value}->($value); |
3b6a5056 |
823 | } |
824 | |
ab0e4957 |
825 | my $result = $self->{engine}->delete_bucket( $self, $tag, $md5 ); |
ffed8b01 |
826 | |
827 | ## |
828 | # If this object is an array and the key deleted was on the end of the stack, |
829 | # decrement the length variable. |
830 | ## |
ffed8b01 |
831 | |
832 | $self->unlock(); |
833 | |
81d3d316 |
834 | return $value; |
ffed8b01 |
835 | } |
836 | |
837 | sub EXISTS { |
838 | ## |
839 | # Check if a single key or element exists given plain key or array index |
840 | ## |
2ac02042 |
841 | my $self = $_[0]->_get_self; |
baa27ab6 |
842 | my $key = $_[1]; |
ffed8b01 |
843 | |
ffed8b01 |
844 | my $md5 = $DIGEST_FUNC->($key); |
845 | |
846 | ## |
ffed8b01 |
847 | # Request shared lock for reading |
848 | ## |
849 | $self->lock( LOCK_SH ); |
850 | |
261d1296 |
851 | my $tag = $self->_find_bucket_list( $md5 ); |
ffed8b01 |
852 | |
853 | ## |
854 | # For some reason, the built-in exists() function returns '' for false |
855 | ## |
856 | if (!$tag) { |
857 | $self->unlock(); |
858 | return ''; |
859 | } |
860 | |
861 | ## |
862 | # Check if bucket exists and return 1 or '' |
863 | ## |
912d50b1 |
864 | my $result = $self->{engine}->bucket_exists( $self, $tag, $md5 ) || ''; |
ffed8b01 |
865 | |
866 | $self->unlock(); |
867 | |
868 | return $result; |
869 | } |
870 | |
871 | sub CLEAR { |
872 | ## |
873 | # Clear all keys from hash, or all elements from array. |
874 | ## |
2ac02042 |
875 | my $self = $_[0]->_get_self; |
ffed8b01 |
876 | |
877 | ## |
ffed8b01 |
878 | # Request exclusive lock for writing |
879 | ## |
880 | $self->lock( LOCK_EX ); |
881 | |
4d35d856 |
882 | my $fh = $self->_fh; |
629df3a3 |
883 | |
714618f0 |
884 | seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET); |
629df3a3 |
885 | if (eof $fh) { |
ffed8b01 |
886 | $self->unlock(); |
887 | return; |
888 | } |
889 | |
d4b1166e |
890 | $self->{engine}->create_tag($self, $self->_base_offset, $self->_type, chr(0) x $INDEX_SIZE); |
ffed8b01 |
891 | |
892 | $self->unlock(); |
893 | |
894 | return 1; |
895 | } |
896 | |
ffed8b01 |
897 | ## |
898 | # Public method aliases |
899 | ## |
7f441181 |
900 | sub put { (shift)->STORE( @_ ) } |
901 | sub store { (shift)->STORE( @_ ) } |
902 | sub get { (shift)->FETCH( @_ ) } |
903 | sub fetch { (shift)->FETCH( @_ ) } |
baa27ab6 |
904 | sub delete { (shift)->DELETE( @_ ) } |
905 | sub exists { (shift)->EXISTS( @_ ) } |
906 | sub clear { (shift)->CLEAR( @_ ) } |
ffed8b01 |
907 | |
cc4bef86 |
908 | package DBM::Deep::_::Root; |
909 | |
910 | sub new { |
911 | my $class = shift; |
912 | my ($args) = @_; |
913 | |
914 | my $self = bless { |
915 | file => undef, |
916 | fh => undef, |
714618f0 |
917 | file_offset => 0, |
cc4bef86 |
918 | end => 0, |
919 | autoflush => undef, |
920 | locking => undef, |
cc4bef86 |
921 | debug => undef, |
cc4bef86 |
922 | filter_store_key => undef, |
923 | filter_store_value => undef, |
924 | filter_fetch_key => undef, |
925 | filter_fetch_value => undef, |
926 | autobless => undef, |
927 | locked => 0, |
928 | %$args, |
929 | }, $class; |
930 | |
714618f0 |
931 | if ( $self->{fh} && !$self->{file_offset} ) { |
932 | $self->{file_offset} = tell( $self->{fh} ); |
933 | } |
934 | |
cc4bef86 |
935 | return $self; |
936 | } |
937 | |
938 | sub DESTROY { |
939 | my $self = shift; |
940 | return unless $self; |
941 | |
942 | close $self->{fh} if $self->{fh}; |
943 | |
944 | return; |
945 | } |
946 | |
ffed8b01 |
947 | 1; |
948 | |
949 | __END__ |
950 | |
951 | =head1 NAME |
952 | |
953 | DBM::Deep - A pure perl multi-level hash/array DBM |
954 | |
955 | =head1 SYNOPSIS |
956 | |
957 | use DBM::Deep; |
958 | my $db = DBM::Deep->new( "foo.db" ); |
959 | |
960 | $db->{key} = 'value'; # tie() style |
961 | print $db->{key}; |
962 | |
cbaa107d |
963 | $db->put('key' => 'value'); # OO style |
ffed8b01 |
964 | print $db->get('key'); |
965 | |
966 | # true multi-level support |
967 | $db->{my_complex} = [ |
968 | 'hello', { perl => 'rules' }, |
90f93b43 |
969 | 42, 99, |
970 | ]; |
ffed8b01 |
971 | |
972 | =head1 DESCRIPTION |
973 | |
974 | A unique flat-file database module, written in pure perl. True |
975 | multi-level hash/array support (unlike MLDBM, which is faked), hybrid |
976 | OO / tie() interface, cross-platform FTPable files, and quite fast. Can |
977 | handle millions of keys and unlimited hash levels without significant |
978 | slow-down. Written from the ground-up in pure perl -- this is NOT a |
979 | wrapper around a C-based DBM. Out-of-the-box compatibility with Unix, |
980 | Mac OS X and Windows. |
981 | |
982 | =head1 INSTALLATION |
983 | |
90f93b43 |
984 | Hopefully you are using Perl's excellent CPAN module, which will download |
ffed8b01 |
985 | and install the module for you. If not, get the tarball, and run these |
986 | commands: |
987 | |
988 | tar zxf DBM-Deep-* |
989 | cd DBM-Deep-* |
990 | perl Makefile.PL |
991 | make |
992 | make test |
993 | make install |
994 | |
995 | =head1 SETUP |
996 | |
997 | Construction can be done OO-style (which is the recommended way), or using |
998 | Perl's tie() function. Both are examined here. |
999 | |
1000 | =head2 OO CONSTRUCTION |
1001 | |
1002 | The recommended way to construct a DBM::Deep object is to use the new() |
1003 | method, which gets you a blessed, tied hash or array reference. |
1004 | |
1005 | my $db = DBM::Deep->new( "foo.db" ); |
1006 | |
1007 | This opens a new database handle, mapped to the file "foo.db". If this |
1008 | file does not exist, it will automatically be created. DB files are |
1009 | opened in "r+" (read/write) mode, and the type of object returned is a |
1010 | hash, unless otherwise specified (see L<OPTIONS> below). |
1011 | |
ffed8b01 |
1012 | You can pass a number of options to the constructor to specify things like |
1013 | locking, autoflush, etc. This is done by passing an inline hash: |
1014 | |
1015 | my $db = DBM::Deep->new( |
1016 | file => "foo.db", |
1017 | locking => 1, |
1018 | autoflush => 1 |
1019 | ); |
1020 | |
1021 | Notice that the filename is now specified I<inside> the hash with |
1022 | the "file" parameter, as opposed to being the sole argument to the |
1023 | constructor. This is required if any options are specified. |
1024 | See L<OPTIONS> below for the complete list. |
1025 | |
1026 | |
1027 | |
1028 | You can also start with an array instead of a hash. For this, you must |
1029 | specify the C<type> parameter: |
1030 | |
1031 | my $db = DBM::Deep->new( |
1032 | file => "foo.db", |
1033 | type => DBM::Deep->TYPE_ARRAY |
1034 | ); |
1035 | |
1036 | B<Note:> Specifing the C<type> parameter only takes effect when beginning |
1037 | a new DB file. If you create a DBM::Deep object with an existing file, the |
90f93b43 |
1038 | C<type> will be loaded from the file header, and an error will be thrown if |
1039 | the wrong type is passed in. |
ffed8b01 |
1040 | |
1041 | =head2 TIE CONSTRUCTION |
1042 | |
90f93b43 |
1043 | Alternately, you can create a DBM::Deep handle by using Perl's built-in |
1044 | tie() function. The object returned from tie() can be used to call methods, |
1045 | such as lock() and unlock(), but cannot be used to assign to the DBM::Deep |
1046 | file (as expected with most tie'd objects). |
ffed8b01 |
1047 | |
1048 | my %hash; |
90f93b43 |
1049 | my $db = tie %hash, "DBM::Deep", "foo.db"; |
ffed8b01 |
1050 | |
1051 | my @array; |
90f93b43 |
1052 | my $db = tie @array, "DBM::Deep", "bar.db"; |
ffed8b01 |
1053 | |
1054 | As with the OO constructor, you can replace the DB filename parameter with |
1055 | a hash containing one or more options (see L<OPTIONS> just below for the |
1056 | complete list). |
1057 | |
1058 | tie %hash, "DBM::Deep", { |
1059 | file => "foo.db", |
1060 | locking => 1, |
1061 | autoflush => 1 |
1062 | }; |
1063 | |
1064 | =head2 OPTIONS |
1065 | |
1066 | There are a number of options that can be passed in when constructing your |
1067 | DBM::Deep objects. These apply to both the OO- and tie- based approaches. |
1068 | |
1069 | =over |
1070 | |
1071 | =item * file |
1072 | |
1073 | Filename of the DB file to link the handle to. You can pass a full absolute |
1074 | filesystem path, partial path, or a plain filename if the file is in the |
714618f0 |
1075 | current working directory. This is a required parameter (though q.v. fh). |
1076 | |
1077 | =item * fh |
1078 | |
1079 | If you want, you can pass in the fh instead of the file. This is most useful for doing |
1080 | something like: |
1081 | |
1082 | my $db = DBM::Deep->new( { fh => \*DATA } ); |
1083 | |
1084 | You are responsible for making sure that the fh has been opened appropriately for your |
1085 | needs. If you open it read-only and attempt to write, an exception will be thrown. If you |
1086 | open it write-only or append-only, an exception will be thrown immediately as DBM::Deep |
1087 | needs to read from the fh. |
1088 | |
1089 | =item * file_offset |
1090 | |
1091 | This is the offset within the file that the DBM::Deep db starts. Most of the time, you will |
1092 | not need to set this. However, it's there if you want it. |
1093 | |
1094 | If you pass in fh and do not set this, it will be set appropriately. |
ffed8b01 |
1095 | |
ffed8b01 |
1096 | =item * type |
1097 | |
1098 | This parameter specifies what type of object to create, a hash or array. Use |
1099 | one of these two constants: C<DBM::Deep-E<gt>TYPE_HASH> or C<DBM::Deep-E<gt>TYPE_ARRAY>. |
1100 | This only takes effect when beginning a new file. This is an optional |
1101 | parameter, and defaults to C<DBM::Deep-E<gt>TYPE_HASH>. |
1102 | |
1103 | =item * locking |
1104 | |
1105 | Specifies whether locking is to be enabled. DBM::Deep uses Perl's Fnctl flock() |
1106 | function to lock the database in exclusive mode for writes, and shared mode for |
1107 | reads. Pass any true value to enable. This affects the base DB handle I<and |
1108 | any child hashes or arrays> that use the same DB file. This is an optional |
1109 | parameter, and defaults to 0 (disabled). See L<LOCKING> below for more. |
1110 | |
1111 | =item * autoflush |
1112 | |
90f93b43 |
1113 | Specifies whether autoflush is to be enabled on the underlying filehandle. |
ffed8b01 |
1114 | This obviously slows down write operations, but is required if you may have |
37c5bcf0 |
1115 | multiple processes accessing the same DB file (also consider enable I<locking>). |
1116 | Pass any true value to enable. This is an optional parameter, and defaults to 0 |
ffed8b01 |
1117 | (disabled). |
1118 | |
1119 | =item * autobless |
1120 | |
1121 | If I<autobless> mode is enabled, DBM::Deep will preserve blessed hashes, and |
1122 | restore them when fetched. This is an B<experimental> feature, and does have |
1123 | side-effects. Basically, when hashes are re-blessed into their original |
1124 | classes, they are no longer blessed into the DBM::Deep class! So you won't be |
1125 | able to call any DBM::Deep methods on them. You have been warned. |
1126 | This is an optional parameter, and defaults to 0 (disabled). |
1127 | |
1128 | =item * filter_* |
1129 | |
1130 | See L<FILTERS> below. |
1131 | |
1132 | =item * debug |
1133 | |
1134 | Setting I<debug> mode will make all errors non-fatal, dump them out to |
1135 | STDERR, and continue on. This is for debugging purposes only, and probably |
1136 | not what you want. This is an optional parameter, and defaults to 0 (disabled). |
1137 | |
429e4192 |
1138 | B<NOTE>: This parameter is considered deprecated and should not be used anymore. |
ffed8b01 |
1139 | |
1140 | =back |
1141 | |
1142 | =head1 TIE INTERFACE |
1143 | |
1144 | With DBM::Deep you can access your databases using Perl's standard hash/array |
90f93b43 |
1145 | syntax. Because all DBM::Deep objects are I<tied> to hashes or arrays, you can |
1146 | treat them as such. DBM::Deep will intercept all reads/writes and direct them |
1147 | to the right place -- the DB file. This has nothing to do with the |
1148 | L<TIE CONSTRUCTION> section above. This simply tells you how to use DBM::Deep |
1149 | using regular hashes and arrays, rather than calling functions like C<get()> |
1150 | and C<put()> (although those work too). It is entirely up to you how to want |
1151 | to access your databases. |
ffed8b01 |
1152 | |
1153 | =head2 HASHES |
1154 | |
1155 | You can treat any DBM::Deep object like a normal Perl hash reference. Add keys, |
1156 | or even nested hashes (or arrays) using standard Perl syntax: |
1157 | |
1158 | my $db = DBM::Deep->new( "foo.db" ); |
1159 | |
1160 | $db->{mykey} = "myvalue"; |
1161 | $db->{myhash} = {}; |
1162 | $db->{myhash}->{subkey} = "subvalue"; |
1163 | |
1164 | print $db->{myhash}->{subkey} . "\n"; |
1165 | |
1166 | You can even step through hash keys using the normal Perl C<keys()> function: |
1167 | |
1168 | foreach my $key (keys %$db) { |
1169 | print "$key: " . $db->{$key} . "\n"; |
1170 | } |
1171 | |
1172 | Remember that Perl's C<keys()> function extracts I<every> key from the hash and |
1173 | pushes them onto an array, all before the loop even begins. If you have an |
1174 | extra large hash, this may exhaust Perl's memory. Instead, consider using |
1175 | Perl's C<each()> function, which pulls keys/values one at a time, using very |
1176 | little memory: |
1177 | |
1178 | while (my ($key, $value) = each %$db) { |
1179 | print "$key: $value\n"; |
1180 | } |
1181 | |
1182 | Please note that when using C<each()>, you should always pass a direct |
1183 | hash reference, not a lookup. Meaning, you should B<never> do this: |
1184 | |
1185 | # NEVER DO THIS |
1186 | while (my ($key, $value) = each %{$db->{foo}}) { # BAD |
1187 | |
1188 | This causes an infinite loop, because for each iteration, Perl is calling |
1189 | FETCH() on the $db handle, resulting in a "new" hash for foo every time, so |
1190 | it effectively keeps returning the first key over and over again. Instead, |
1191 | assign a temporary variable to C<$db->{foo}>, then pass that to each(). |
1192 | |
1193 | =head2 ARRAYS |
1194 | |
1195 | As with hashes, you can treat any DBM::Deep object like a normal Perl array |
1196 | reference. This includes inserting, removing and manipulating elements, |
1197 | and the C<push()>, C<pop()>, C<shift()>, C<unshift()> and C<splice()> functions. |
1198 | The object must have first been created using type C<DBM::Deep-E<gt>TYPE_ARRAY>, |
1199 | or simply be a nested array reference inside a hash. Example: |
1200 | |
1201 | my $db = DBM::Deep->new( |
1202 | file => "foo-array.db", |
1203 | type => DBM::Deep->TYPE_ARRAY |
1204 | ); |
1205 | |
1206 | $db->[0] = "foo"; |
1207 | push @$db, "bar", "baz"; |
1208 | unshift @$db, "bah"; |
1209 | |
1210 | my $last_elem = pop @$db; # baz |
1211 | my $first_elem = shift @$db; # bah |
1212 | my $second_elem = $db->[1]; # bar |
1213 | |
1214 | my $num_elements = scalar @$db; |
1215 | |
1216 | =head1 OO INTERFACE |
1217 | |
1218 | In addition to the I<tie()> interface, you can also use a standard OO interface |
1219 | to manipulate all aspects of DBM::Deep databases. Each type of object (hash or |
1220 | array) has its own methods, but both types share the following common methods: |
1221 | C<put()>, C<get()>, C<exists()>, C<delete()> and C<clear()>. |
1222 | |
1223 | =over |
1224 | |
4d35d856 |
1225 | =item * new() / clone() |
1226 | |
1227 | These are the constructor and copy-functions. |
1228 | |
90f93b43 |
1229 | =item * put() / store() |
ffed8b01 |
1230 | |
1231 | Stores a new hash key/value pair, or sets an array element value. Takes two |
1232 | arguments, the hash key or array index, and the new value. The value can be |
1233 | a scalar, hash ref or array ref. Returns true on success, false on failure. |
1234 | |
1235 | $db->put("foo", "bar"); # for hashes |
1236 | $db->put(1, "bar"); # for arrays |
1237 | |
90f93b43 |
1238 | =item * get() / fetch() |
ffed8b01 |
1239 | |
1240 | Fetches the value of a hash key or array element. Takes one argument: the hash |
1241 | key or array index. Returns a scalar, hash ref or array ref, depending on the |
1242 | data type stored. |
1243 | |
1244 | my $value = $db->get("foo"); # for hashes |
1245 | my $value = $db->get(1); # for arrays |
1246 | |
1247 | =item * exists() |
1248 | |
1249 | Checks if a hash key or array index exists. Takes one argument: the hash key |
1250 | or array index. Returns true if it exists, false if not. |
1251 | |
1252 | if ($db->exists("foo")) { print "yay!\n"; } # for hashes |
1253 | if ($db->exists(1)) { print "yay!\n"; } # for arrays |
1254 | |
1255 | =item * delete() |
1256 | |
1257 | Deletes one hash key/value pair or array element. Takes one argument: the hash |
1258 | key or array index. Returns true on success, false if not found. For arrays, |
1259 | the remaining elements located after the deleted element are NOT moved over. |
1260 | The deleted element is essentially just undefined, which is exactly how Perl's |
1261 | internal arrays work. Please note that the space occupied by the deleted |
1262 | key/value or element is B<not> reused again -- see L<UNUSED SPACE RECOVERY> |
1263 | below for details and workarounds. |
1264 | |
1265 | $db->delete("foo"); # for hashes |
1266 | $db->delete(1); # for arrays |
1267 | |
1268 | =item * clear() |
1269 | |
1270 | Deletes B<all> hash keys or array elements. Takes no arguments. No return |
1271 | value. Please note that the space occupied by the deleted keys/values or |
1272 | elements is B<not> reused again -- see L<UNUSED SPACE RECOVERY> below for |
1273 | details and workarounds. |
1274 | |
1275 | $db->clear(); # hashes or arrays |
1276 | |
4d35d856 |
1277 | =item * lock() / unlock() |
1278 | |
1279 | q.v. Locking. |
1280 | |
1281 | =item * optimize() |
1282 | |
1283 | Recover lost disk space. |
1284 | |
1285 | =item * import() / export() |
1286 | |
1287 | Data going in and out. |
1288 | |
1289 | =item * set_digest() / set_pack() / set_filter() |
1290 | |
1291 | q.v. adjusting the interal parameters. |
1292 | |
ffed8b01 |
1293 | =back |
1294 | |
1295 | =head2 HASHES |
1296 | |
1297 | For hashes, DBM::Deep supports all the common methods described above, and the |
1298 | following additional methods: C<first_key()> and C<next_key()>. |
1299 | |
1300 | =over |
1301 | |
1302 | =item * first_key() |
1303 | |
1304 | Returns the "first" key in the hash. As with built-in Perl hashes, keys are |
1305 | fetched in an undefined order (which appears random). Takes no arguments, |
1306 | returns the key as a scalar value. |
1307 | |
1308 | my $key = $db->first_key(); |
1309 | |
1310 | =item * next_key() |
1311 | |
1312 | Returns the "next" key in the hash, given the previous one as the sole argument. |
1313 | Returns undef if there are no more keys to be fetched. |
1314 | |
1315 | $key = $db->next_key($key); |
1316 | |
1317 | =back |
1318 | |
1319 | Here are some examples of using hashes: |
1320 | |
1321 | my $db = DBM::Deep->new( "foo.db" ); |
1322 | |
1323 | $db->put("foo", "bar"); |
1324 | print "foo: " . $db->get("foo") . "\n"; |
1325 | |
1326 | $db->put("baz", {}); # new child hash ref |
1327 | $db->get("baz")->put("buz", "biz"); |
1328 | print "buz: " . $db->get("baz")->get("buz") . "\n"; |
1329 | |
1330 | my $key = $db->first_key(); |
1331 | while ($key) { |
1332 | print "$key: " . $db->get($key) . "\n"; |
1333 | $key = $db->next_key($key); |
1334 | } |
1335 | |
1336 | if ($db->exists("foo")) { $db->delete("foo"); } |
1337 | |
1338 | =head2 ARRAYS |
1339 | |
1340 | For arrays, DBM::Deep supports all the common methods described above, and the |
1341 | following additional methods: C<length()>, C<push()>, C<pop()>, C<shift()>, |
1342 | C<unshift()> and C<splice()>. |
1343 | |
1344 | =over |
1345 | |
1346 | =item * length() |
1347 | |
1348 | Returns the number of elements in the array. Takes no arguments. |
1349 | |
1350 | my $len = $db->length(); |
1351 | |
1352 | =item * push() |
1353 | |
1354 | Adds one or more elements onto the end of the array. Accepts scalars, hash |
1355 | refs or array refs. No return value. |
1356 | |
1357 | $db->push("foo", "bar", {}); |
1358 | |
1359 | =item * pop() |
1360 | |
1361 | Fetches the last element in the array, and deletes it. Takes no arguments. |
1362 | Returns undef if array is empty. Returns the element value. |
1363 | |
1364 | my $elem = $db->pop(); |
1365 | |
1366 | =item * shift() |
1367 | |
1368 | Fetches the first element in the array, deletes it, then shifts all the |
1369 | remaining elements over to take up the space. Returns the element value. This |
1370 | method is not recommended with large arrays -- see L<LARGE ARRAYS> below for |
1371 | details. |
1372 | |
1373 | my $elem = $db->shift(); |
1374 | |
1375 | =item * unshift() |
1376 | |
1377 | Inserts one or more elements onto the beginning of the array, shifting all |
1378 | existing elements over to make room. Accepts scalars, hash refs or array refs. |
1379 | No return value. This method is not recommended with large arrays -- see |
1380 | <LARGE ARRAYS> below for details. |
1381 | |
1382 | $db->unshift("foo", "bar", {}); |
1383 | |
1384 | =item * splice() |
1385 | |
1386 | Performs exactly like Perl's built-in function of the same name. See L<perldoc |
1387 | -f splice> for usage -- it is too complicated to document here. This method is |
1388 | not recommended with large arrays -- see L<LARGE ARRAYS> below for details. |
1389 | |
1390 | =back |
1391 | |
1392 | Here are some examples of using arrays: |
1393 | |
1394 | my $db = DBM::Deep->new( |
1395 | file => "foo.db", |
1396 | type => DBM::Deep->TYPE_ARRAY |
1397 | ); |
1398 | |
1399 | $db->push("bar", "baz"); |
1400 | $db->unshift("foo"); |
1401 | $db->put(3, "buz"); |
1402 | |
1403 | my $len = $db->length(); |
1404 | print "length: $len\n"; # 4 |
1405 | |
1406 | for (my $k=0; $k<$len; $k++) { |
1407 | print "$k: " . $db->get($k) . "\n"; |
1408 | } |
1409 | |
1410 | $db->splice(1, 2, "biz", "baf"); |
1411 | |
1412 | while (my $elem = shift @$db) { |
1413 | print "shifted: $elem\n"; |
1414 | } |
1415 | |
1416 | =head1 LOCKING |
1417 | |
1418 | Enable automatic file locking by passing a true value to the C<locking> |
1419 | parameter when constructing your DBM::Deep object (see L<SETUP> above). |
1420 | |
1421 | my $db = DBM::Deep->new( |
1422 | file => "foo.db", |
1423 | locking => 1 |
1424 | ); |
1425 | |
90f93b43 |
1426 | This causes DBM::Deep to C<flock()> the underlying filehandle with exclusive |
ffed8b01 |
1427 | mode for writes, and shared mode for reads. This is required if you have |
1428 | multiple processes accessing the same database file, to avoid file corruption. |
1429 | Please note that C<flock()> does NOT work for files over NFS. See L<DB OVER |
1430 | NFS> below for more. |
1431 | |
1432 | =head2 EXPLICIT LOCKING |
1433 | |
1434 | You can explicitly lock a database, so it remains locked for multiple |
1435 | transactions. This is done by calling the C<lock()> method, and passing an |
90f93b43 |
1436 | optional lock mode argument (defaults to exclusive mode). This is particularly |
ffed8b01 |
1437 | useful for things like counters, where the current value needs to be fetched, |
1438 | then incremented, then stored again. |
1439 | |
1440 | $db->lock(); |
1441 | my $counter = $db->get("counter"); |
1442 | $counter++; |
1443 | $db->put("counter", $counter); |
1444 | $db->unlock(); |
1445 | |
1446 | # or... |
1447 | |
1448 | $db->lock(); |
1449 | $db->{counter}++; |
1450 | $db->unlock(); |
1451 | |
1452 | You can pass C<lock()> an optional argument, which specifies which mode to use |
1453 | (exclusive or shared). Use one of these two constants: C<DBM::Deep-E<gt>LOCK_EX> |
1454 | or C<DBM::Deep-E<gt>LOCK_SH>. These are passed directly to C<flock()>, and are the |
1455 | same as the constants defined in Perl's C<Fcntl> module. |
1456 | |
1457 | $db->lock( DBM::Deep->LOCK_SH ); |
1458 | # something here |
1459 | $db->unlock(); |
1460 | |
ffed8b01 |
1461 | =head1 IMPORTING/EXPORTING |
1462 | |
1463 | You can import existing complex structures by calling the C<import()> method, |
1464 | and export an entire database into an in-memory structure using the C<export()> |
1465 | method. Both are examined here. |
1466 | |
1467 | =head2 IMPORTING |
1468 | |
1469 | Say you have an existing hash with nested hashes/arrays inside it. Instead of |
1470 | walking the structure and adding keys/elements to the database as you go, |
1471 | simply pass a reference to the C<import()> method. This recursively adds |
1472 | everything to an existing DBM::Deep object for you. Here is an example: |
1473 | |
1474 | my $struct = { |
1475 | key1 => "value1", |
1476 | key2 => "value2", |
1477 | array1 => [ "elem0", "elem1", "elem2" ], |
1478 | hash1 => { |
1479 | subkey1 => "subvalue1", |
1480 | subkey2 => "subvalue2" |
1481 | } |
1482 | }; |
1483 | |
1484 | my $db = DBM::Deep->new( "foo.db" ); |
1485 | $db->import( $struct ); |
1486 | |
1487 | print $db->{key1} . "\n"; # prints "value1" |
1488 | |
1489 | This recursively imports the entire C<$struct> object into C<$db>, including |
1490 | all nested hashes and arrays. If the DBM::Deep object contains exsiting data, |
1491 | keys are merged with the existing ones, replacing if they already exist. |
1492 | The C<import()> method can be called on any database level (not just the base |
1493 | level), and works with both hash and array DB types. |
1494 | |
ffed8b01 |
1495 | B<Note:> Make sure your existing structure has no circular references in it. |
1496 | These will cause an infinite loop when importing. |
1497 | |
1498 | =head2 EXPORTING |
1499 | |
1500 | Calling the C<export()> method on an existing DBM::Deep object will return |
1501 | a reference to a new in-memory copy of the database. The export is done |
1502 | recursively, so all nested hashes/arrays are all exported to standard Perl |
1503 | objects. Here is an example: |
1504 | |
1505 | my $db = DBM::Deep->new( "foo.db" ); |
1506 | |
1507 | $db->{key1} = "value1"; |
1508 | $db->{key2} = "value2"; |
1509 | $db->{hash1} = {}; |
1510 | $db->{hash1}->{subkey1} = "subvalue1"; |
1511 | $db->{hash1}->{subkey2} = "subvalue2"; |
1512 | |
1513 | my $struct = $db->export(); |
1514 | |
1515 | print $struct->{key1} . "\n"; # prints "value1" |
1516 | |
1517 | This makes a complete copy of the database in memory, and returns a reference |
1518 | to it. The C<export()> method can be called on any database level (not just |
1519 | the base level), and works with both hash and array DB types. Be careful of |
1520 | large databases -- you can store a lot more data in a DBM::Deep object than an |
1521 | in-memory Perl structure. |
1522 | |
ffed8b01 |
1523 | B<Note:> Make sure your database has no circular references in it. |
1524 | These will cause an infinite loop when exporting. |
1525 | |
1526 | =head1 FILTERS |
1527 | |
1528 | DBM::Deep has a number of hooks where you can specify your own Perl function |
1529 | to perform filtering on incoming or outgoing data. This is a perfect |
1530 | way to extend the engine, and implement things like real-time compression or |
1531 | encryption. Filtering applies to the base DB level, and all child hashes / |
1532 | arrays. Filter hooks can be specified when your DBM::Deep object is first |
1533 | constructed, or by calling the C<set_filter()> method at any time. There are |
1534 | four available filter hooks, described below: |
1535 | |
1536 | =over |
1537 | |
1538 | =item * filter_store_key |
1539 | |
1540 | This filter is called whenever a hash key is stored. It |
1541 | is passed the incoming key, and expected to return a transformed key. |
1542 | |
1543 | =item * filter_store_value |
1544 | |
1545 | This filter is called whenever a hash key or array element is stored. It |
1546 | is passed the incoming value, and expected to return a transformed value. |
1547 | |
1548 | =item * filter_fetch_key |
1549 | |
1550 | This filter is called whenever a hash key is fetched (i.e. via |
1551 | C<first_key()> or C<next_key()>). It is passed the transformed key, |
1552 | and expected to return the plain key. |
1553 | |
1554 | =item * filter_fetch_value |
1555 | |
1556 | This filter is called whenever a hash key or array element is fetched. |
1557 | It is passed the transformed value, and expected to return the plain value. |
1558 | |
1559 | =back |
1560 | |
1561 | Here are the two ways to setup a filter hook: |
1562 | |
1563 | my $db = DBM::Deep->new( |
1564 | file => "foo.db", |
1565 | filter_store_value => \&my_filter_store, |
1566 | filter_fetch_value => \&my_filter_fetch |
1567 | ); |
1568 | |
1569 | # or... |
1570 | |
1571 | $db->set_filter( "filter_store_value", \&my_filter_store ); |
1572 | $db->set_filter( "filter_fetch_value", \&my_filter_fetch ); |
1573 | |
1574 | Your filter function will be called only when dealing with SCALAR keys or |
1575 | values. When nested hashes and arrays are being stored/fetched, filtering |
1576 | is bypassed. Filters are called as static functions, passed a single SCALAR |
1577 | argument, and expected to return a single SCALAR value. If you want to |
1578 | remove a filter, set the function reference to C<undef>: |
1579 | |
1580 | $db->set_filter( "filter_store_value", undef ); |
1581 | |
1582 | =head2 REAL-TIME ENCRYPTION EXAMPLE |
1583 | |
1584 | Here is a working example that uses the I<Crypt::Blowfish> module to |
1585 | do real-time encryption / decryption of keys & values with DBM::Deep Filters. |
1586 | Please visit L<http://search.cpan.org/search?module=Crypt::Blowfish> for more |
1587 | on I<Crypt::Blowfish>. You'll also need the I<Crypt::CBC> module. |
1588 | |
1589 | use DBM::Deep; |
1590 | use Crypt::Blowfish; |
1591 | use Crypt::CBC; |
1592 | |
1593 | my $cipher = Crypt::CBC->new({ |
1594 | 'key' => 'my secret key', |
1595 | 'cipher' => 'Blowfish', |
1596 | 'iv' => '$KJh#(}q', |
1597 | 'regenerate_key' => 0, |
1598 | 'padding' => 'space', |
1599 | 'prepend_iv' => 0 |
1600 | }); |
1601 | |
1602 | my $db = DBM::Deep->new( |
1603 | file => "foo-encrypt.db", |
1604 | filter_store_key => \&my_encrypt, |
1605 | filter_store_value => \&my_encrypt, |
1606 | filter_fetch_key => \&my_decrypt, |
1607 | filter_fetch_value => \&my_decrypt, |
1608 | ); |
1609 | |
1610 | $db->{key1} = "value1"; |
1611 | $db->{key2} = "value2"; |
1612 | print "key1: " . $db->{key1} . "\n"; |
1613 | print "key2: " . $db->{key2} . "\n"; |
1614 | |
1615 | undef $db; |
1616 | exit; |
1617 | |
1618 | sub my_encrypt { |
1619 | return $cipher->encrypt( $_[0] ); |
1620 | } |
1621 | sub my_decrypt { |
1622 | return $cipher->decrypt( $_[0] ); |
1623 | } |
1624 | |
1625 | =head2 REAL-TIME COMPRESSION EXAMPLE |
1626 | |
1627 | Here is a working example that uses the I<Compress::Zlib> module to do real-time |
1628 | compression / decompression of keys & values with DBM::Deep Filters. |
1629 | Please visit L<http://search.cpan.org/search?module=Compress::Zlib> for |
1630 | more on I<Compress::Zlib>. |
1631 | |
1632 | use DBM::Deep; |
1633 | use Compress::Zlib; |
1634 | |
1635 | my $db = DBM::Deep->new( |
1636 | file => "foo-compress.db", |
1637 | filter_store_key => \&my_compress, |
1638 | filter_store_value => \&my_compress, |
1639 | filter_fetch_key => \&my_decompress, |
1640 | filter_fetch_value => \&my_decompress, |
1641 | ); |
1642 | |
1643 | $db->{key1} = "value1"; |
1644 | $db->{key2} = "value2"; |
1645 | print "key1: " . $db->{key1} . "\n"; |
1646 | print "key2: " . $db->{key2} . "\n"; |
1647 | |
1648 | undef $db; |
1649 | exit; |
1650 | |
1651 | sub my_compress { |
1652 | return Compress::Zlib::memGzip( $_[0] ) ; |
1653 | } |
1654 | sub my_decompress { |
1655 | return Compress::Zlib::memGunzip( $_[0] ) ; |
1656 | } |
1657 | |
1658 | B<Note:> Filtering of keys only applies to hashes. Array "keys" are |
1659 | actually numerical index numbers, and are not filtered. |
1660 | |
1661 | =head1 ERROR HANDLING |
1662 | |
1663 | Most DBM::Deep methods return a true value for success, and call die() on |
95967a5e |
1664 | failure. You can wrap calls in an eval block to catch the die. |
ffed8b01 |
1665 | |
1666 | my $db = DBM::Deep->new( "foo.db" ); # create hash |
1667 | eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call |
1668 | |
90f93b43 |
1669 | print $@; # prints error message |
429e4192 |
1670 | |
ffed8b01 |
1671 | =head1 LARGEFILE SUPPORT |
1672 | |
1673 | If you have a 64-bit system, and your Perl is compiled with both LARGEFILE |
1674 | and 64-bit support, you I<may> be able to create databases larger than 2 GB. |
1675 | DBM::Deep by default uses 32-bit file offset tags, but these can be changed |
1676 | by calling the static C<set_pack()> method before you do anything else. |
1677 | |
1678 | DBM::Deep::set_pack(8, 'Q'); |
1679 | |
1680 | This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words |
1681 | instead of 32-bit longs. After setting these values your DB files have a |
1682 | theoretical maximum size of 16 XB (exabytes). |
1683 | |
ffed8b01 |
1684 | B<Note:> Changing these values will B<NOT> work for existing database files. |
1685 | Only change this for new files, and make sure it stays set consistently |
1686 | throughout the file's life. If you do set these values, you can no longer |
1687 | access 32-bit DB files. You can, however, call C<set_pack(4, 'N')> to change |
1688 | back to 32-bit mode. |
1689 | |
ffed8b01 |
1690 | B<Note:> I have not personally tested files > 2 GB -- all my systems have |
1691 | only a 32-bit Perl. However, I have received user reports that this does |
1692 | indeed work! |
1693 | |
1694 | =head1 LOW-LEVEL ACCESS |
1695 | |
90f93b43 |
1696 | If you require low-level access to the underlying filehandle that DBM::Deep uses, |
4d35d856 |
1697 | you can call the C<_fh()> method, which returns the handle: |
ffed8b01 |
1698 | |
4d35d856 |
1699 | my $fh = $db->_fh(); |
ffed8b01 |
1700 | |
1701 | This method can be called on the root level of the datbase, or any child |
1702 | hashes or arrays. All levels share a I<root> structure, which contains things |
90f93b43 |
1703 | like the filehandle, a reference counter, and all the options specified |
ffed8b01 |
1704 | when you created the object. You can get access to this root structure by |
1705 | calling the C<root()> method. |
1706 | |
4d35d856 |
1707 | my $root = $db->_root(); |
ffed8b01 |
1708 | |
1709 | This is useful for changing options after the object has already been created, |
37c5bcf0 |
1710 | such as enabling/disabling locking, or debug modes. You can also |
ffed8b01 |
1711 | store your own temporary user data in this structure (be wary of name |
1712 | collision), which is then accessible from any child hash or array. |
1713 | |
1714 | =head1 CUSTOM DIGEST ALGORITHM |
1715 | |
1716 | DBM::Deep by default uses the I<Message Digest 5> (MD5) algorithm for hashing |
1717 | keys. However you can override this, and use another algorithm (such as SHA-256) |
14a3acb6 |
1718 | or even write your own. But please note that DBM::Deep currently expects zero |
ffed8b01 |
1719 | collisions, so your algorithm has to be I<perfect>, so to speak. |
1720 | Collision detection may be introduced in a later version. |
1721 | |
1722 | |
1723 | |
1724 | You can specify a custom digest algorithm by calling the static C<set_digest()> |
1725 | function, passing a reference to a subroutine, and the length of the algorithm's |
14a3acb6 |
1726 | hashes (in bytes). This is a global static function, which affects ALL DBM::Deep |
ffed8b01 |
1727 | objects. Here is a working example that uses a 256-bit hash from the |
1728 | I<Digest::SHA256> module. Please see |
1729 | L<http://search.cpan.org/search?module=Digest::SHA256> for more. |
1730 | |
1731 | use DBM::Deep; |
1732 | use Digest::SHA256; |
1733 | |
1734 | my $context = Digest::SHA256::new(256); |
1735 | |
1736 | DBM::Deep::set_digest( \&my_digest, 32 ); |
1737 | |
1738 | my $db = DBM::Deep->new( "foo-sha.db" ); |
1739 | |
1740 | $db->{key1} = "value1"; |
1741 | $db->{key2} = "value2"; |
1742 | print "key1: " . $db->{key1} . "\n"; |
1743 | print "key2: " . $db->{key2} . "\n"; |
1744 | |
1745 | undef $db; |
1746 | exit; |
1747 | |
1748 | sub my_digest { |
1749 | return substr( $context->hash($_[0]), 0, 32 ); |
1750 | } |
1751 | |
1752 | B<Note:> Your returned digest strings must be B<EXACTLY> the number |
1753 | of bytes you specify in the C<set_digest()> function (in this case 32). |
1754 | |
1755 | =head1 CIRCULAR REFERENCES |
1756 | |
1757 | DBM::Deep has B<experimental> support for circular references. Meaning you |
1758 | can have a nested hash key or array element that points to a parent object. |
1759 | This relationship is stored in the DB file, and is preserved between sessions. |
1760 | Here is an example: |
1761 | |
1762 | my $db = DBM::Deep->new( "foo.db" ); |
1763 | |
1764 | $db->{foo} = "bar"; |
1765 | $db->{circle} = $db; # ref to self |
1766 | |
1767 | print $db->{foo} . "\n"; # prints "foo" |
1768 | print $db->{circle}->{foo} . "\n"; # prints "foo" again |
1769 | |
1770 | One catch is, passing the object to a function that recursively walks the |
1771 | object tree (such as I<Data::Dumper> or even the built-in C<optimize()> or |
1772 | C<export()> methods) will result in an infinite loop. The other catch is, |
1773 | if you fetch the I<key> of a circular reference (i.e. using the C<first_key()> |
1774 | or C<next_key()> methods), you will get the I<target object's key>, not the |
1775 | ref's key. This gets even more interesting with the above example, where |
1776 | the I<circle> key points to the base DB object, which technically doesn't |
1777 | have a key. So I made DBM::Deep return "[base]" as the key name in that |
1778 | special case. |
1779 | |
1780 | =head1 CAVEATS / ISSUES / BUGS |
1781 | |
1782 | This section describes all the known issues with DBM::Deep. It you have found |
1783 | something that is not listed here, please send e-mail to L<jhuckaby@cpan.org>. |
1784 | |
1785 | =head2 UNUSED SPACE RECOVERY |
1786 | |
14a3acb6 |
1787 | One major caveat with DBM::Deep is that space occupied by existing keys and |
ffed8b01 |
1788 | values is not recovered when they are deleted. Meaning if you keep deleting |
1789 | and adding new keys, your file will continuously grow. I am working on this, |
1790 | but in the meantime you can call the built-in C<optimize()> method from time to |
1791 | time (perhaps in a crontab or something) to recover all your unused space. |
1792 | |
1793 | $db->optimize(); # returns true on success |
1794 | |
1795 | This rebuilds the ENTIRE database into a new file, then moves it on top of |
1796 | the original. The new file will have no unused space, thus it will take up as |
1797 | little disk space as possible. Please note that this operation can take |
1798 | a long time for large files, and you need enough disk space to temporarily hold |
1799 | 2 copies of your DB file. The temporary file is created in the same directory |
1800 | as the original, named with a ".tmp" extension, and is deleted when the |
1801 | operation completes. Oh, and if locking is enabled, the DB is automatically |
1802 | locked for the entire duration of the copy. |
1803 | |
ffed8b01 |
1804 | B<WARNING:> Only call optimize() on the top-level node of the database, and |
14a3acb6 |
1805 | make sure there are no child references lying around. DBM::Deep keeps a reference |
ffed8b01 |
1806 | counter, and if it is greater than 1, optimize() will abort and return undef. |
1807 | |
1808 | =head2 AUTOVIVIFICATION |
1809 | |
1810 | Unfortunately, autovivification doesn't work with tied hashes. This appears to |
1811 | be a bug in Perl's tie() system, as I<Jakob Schmidt> encountered the very same |
1812 | issue with his I<DWH_FIle> module (see L<http://search.cpan.org/search?module=DWH_File>), |
1813 | and it is also mentioned in the BUGS section for the I<MLDBM> module <see |
1814 | L<http://search.cpan.org/search?module=MLDBM>). Basically, on a new db file, |
1815 | this does not work: |
1816 | |
1817 | $db->{foo}->{bar} = "hello"; |
1818 | |
1819 | Since "foo" doesn't exist, you cannot add "bar" to it. You end up with "foo" |
1820 | being an empty hash. Try this instead, which works fine: |
1821 | |
1822 | $db->{foo} = { bar => "hello" }; |
1823 | |
1824 | As of Perl 5.8.7, this bug still exists. I have walked very carefully through |
1825 | the execution path, and Perl indeed passes an empty hash to the STORE() method. |
1826 | Probably a bug in Perl. |
1827 | |
1828 | =head2 FILE CORRUPTION |
1829 | |
14a3acb6 |
1830 | The current level of error handling in DBM::Deep is minimal. Files I<are> checked |
1831 | for a 32-bit signature when opened, but other corruption in files can cause |
1832 | segmentation faults. DBM::Deep may try to seek() past the end of a file, or get |
ffed8b01 |
1833 | stuck in an infinite loop depending on the level of corruption. File write |
1834 | operations are not checked for failure (for speed), so if you happen to run |
14a3acb6 |
1835 | out of disk space, DBM::Deep will probably fail in a bad way. These things will |
ffed8b01 |
1836 | be addressed in a later version of DBM::Deep. |
1837 | |
1838 | =head2 DB OVER NFS |
1839 | |
14a3acb6 |
1840 | Beware of using DB files over NFS. DBM::Deep uses flock(), which works well on local |
ffed8b01 |
1841 | filesystems, but will NOT protect you from file corruption over NFS. I've heard |
1842 | about setting up your NFS server with a locking daemon, then using lockf() to |
90f93b43 |
1843 | lock your files, but your mileage may vary there as well. From what I |
ffed8b01 |
1844 | understand, there is no real way to do it. However, if you need access to the |
90f93b43 |
1845 | underlying filehandle in DBM::Deep for using some other kind of locking scheme like |
ffed8b01 |
1846 | lockf(), see the L<LOW-LEVEL ACCESS> section above. |
1847 | |
1848 | =head2 COPYING OBJECTS |
1849 | |
1850 | Beware of copying tied objects in Perl. Very strange things can happen. |
14a3acb6 |
1851 | Instead, use DBM::Deep's C<clone()> method which safely copies the object and |
ffed8b01 |
1852 | returns a new, blessed, tied hash or array to the same level in the DB. |
1853 | |
1854 | my $copy = $db->clone(); |
1855 | |
90f93b43 |
1856 | B<Note>: Since clone() here is cloning the object, not the database location, any |
1857 | modifications to either $db or $copy will be visible in both. |
1858 | |
ffed8b01 |
1859 | =head2 LARGE ARRAYS |
1860 | |
1861 | Beware of using C<shift()>, C<unshift()> or C<splice()> with large arrays. |
1862 | These functions cause every element in the array to move, which can be murder |
1863 | on DBM::Deep, as every element has to be fetched from disk, then stored again in |
90f93b43 |
1864 | a different location. This will be addressed in the forthcoming version 1.00. |
ffed8b01 |
1865 | |
9be51a89 |
1866 | =head2 WRITEONLY FILES |
1867 | |
1868 | If you pass in a filehandle to new(), you may have opened it in either a readonly or |
1869 | writeonly mode. STORE will verify that the filehandle is writable. However, there |
1870 | doesn't seem to be a good way to determine if a filehandle is readable. And, if the |
1871 | filehandle isn't readable, it's not clear what will happen. So, don't do that. |
1872 | |
ffed8b01 |
1873 | =head1 PERFORMANCE |
1874 | |
1875 | This section discusses DBM::Deep's speed and memory usage. |
1876 | |
1877 | =head2 SPEED |
1878 | |
1879 | Obviously, DBM::Deep isn't going to be as fast as some C-based DBMs, such as |
1880 | the almighty I<BerkeleyDB>. But it makes up for it in features like true |
1881 | multi-level hash/array support, and cross-platform FTPable files. Even so, |
1882 | DBM::Deep is still pretty fast, and the speed stays fairly consistent, even |
1883 | with huge databases. Here is some test data: |
1884 | |
1885 | Adding 1,000,000 keys to new DB file... |
1886 | |
1887 | At 100 keys, avg. speed is 2,703 keys/sec |
1888 | At 200 keys, avg. speed is 2,642 keys/sec |
1889 | At 300 keys, avg. speed is 2,598 keys/sec |
1890 | At 400 keys, avg. speed is 2,578 keys/sec |
1891 | At 500 keys, avg. speed is 2,722 keys/sec |
1892 | At 600 keys, avg. speed is 2,628 keys/sec |
1893 | At 700 keys, avg. speed is 2,700 keys/sec |
1894 | At 800 keys, avg. speed is 2,607 keys/sec |
1895 | At 900 keys, avg. speed is 2,190 keys/sec |
1896 | At 1,000 keys, avg. speed is 2,570 keys/sec |
1897 | At 2,000 keys, avg. speed is 2,417 keys/sec |
1898 | At 3,000 keys, avg. speed is 1,982 keys/sec |
1899 | At 4,000 keys, avg. speed is 1,568 keys/sec |
1900 | At 5,000 keys, avg. speed is 1,533 keys/sec |
1901 | At 6,000 keys, avg. speed is 1,787 keys/sec |
1902 | At 7,000 keys, avg. speed is 1,977 keys/sec |
1903 | At 8,000 keys, avg. speed is 2,028 keys/sec |
1904 | At 9,000 keys, avg. speed is 2,077 keys/sec |
1905 | At 10,000 keys, avg. speed is 2,031 keys/sec |
1906 | At 20,000 keys, avg. speed is 1,970 keys/sec |
1907 | At 30,000 keys, avg. speed is 2,050 keys/sec |
1908 | At 40,000 keys, avg. speed is 2,073 keys/sec |
1909 | At 50,000 keys, avg. speed is 1,973 keys/sec |
1910 | At 60,000 keys, avg. speed is 1,914 keys/sec |
1911 | At 70,000 keys, avg. speed is 2,091 keys/sec |
1912 | At 80,000 keys, avg. speed is 2,103 keys/sec |
1913 | At 90,000 keys, avg. speed is 1,886 keys/sec |
1914 | At 100,000 keys, avg. speed is 1,970 keys/sec |
1915 | At 200,000 keys, avg. speed is 2,053 keys/sec |
1916 | At 300,000 keys, avg. speed is 1,697 keys/sec |
1917 | At 400,000 keys, avg. speed is 1,838 keys/sec |
1918 | At 500,000 keys, avg. speed is 1,941 keys/sec |
1919 | At 600,000 keys, avg. speed is 1,930 keys/sec |
1920 | At 700,000 keys, avg. speed is 1,735 keys/sec |
1921 | At 800,000 keys, avg. speed is 1,795 keys/sec |
1922 | At 900,000 keys, avg. speed is 1,221 keys/sec |
1923 | At 1,000,000 keys, avg. speed is 1,077 keys/sec |
1924 | |
1925 | This test was performed on a PowerMac G4 1gHz running Mac OS X 10.3.2 & Perl |
1926 | 5.8.1, with an 80GB Ultra ATA/100 HD spinning at 7200RPM. The hash keys and |
1927 | values were between 6 - 12 chars in length. The DB file ended up at 210MB. |
1928 | Run time was 12 min 3 sec. |
1929 | |
1930 | =head2 MEMORY USAGE |
1931 | |
1932 | One of the great things about DBM::Deep is that it uses very little memory. |
1933 | Even with huge databases (1,000,000+ keys) you will not see much increased |
14a3acb6 |
1934 | memory on your process. DBM::Deep relies solely on the filesystem for storing |
ffed8b01 |
1935 | and fetching data. Here is output from I</usr/bin/top> before even opening a |
1936 | database handle: |
1937 | |
1938 | PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND |
1939 | 22831 root 11 0 2716 2716 1296 R 0.0 0.2 0:07 perl |
1940 | |
1941 | Basically the process is taking 2,716K of memory. And here is the same |
1942 | process after storing and fetching 1,000,000 keys: |
1943 | |
1944 | PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND |
1945 | 22831 root 14 0 2772 2772 1328 R 0.0 0.2 13:32 perl |
1946 | |
1947 | Notice the memory usage increased by only 56K. Test was performed on a 700mHz |
1948 | x86 box running Linux RedHat 7.2 & Perl 5.6.1. |
1949 | |
1950 | =head1 DB FILE FORMAT |
1951 | |
1952 | In case you were interested in the underlying DB file format, it is documented |
1953 | here in this section. You don't need to know this to use the module, it's just |
1954 | included for reference. |
1955 | |
1956 | =head2 SIGNATURE |
1957 | |
1958 | DBM::Deep files always start with a 32-bit signature to identify the file type. |
1959 | This is at offset 0. The signature is "DPDB" in network byte order. This is |
90f93b43 |
1960 | checked for when the file is opened and an error will be thrown if it's not found. |
ffed8b01 |
1961 | |
1962 | =head2 TAG |
1963 | |
1964 | The DBM::Deep file is in a I<tagged format>, meaning each section of the file |
1965 | has a standard header containing the type of data, the length of data, and then |
1966 | the data itself. The type is a single character (1 byte), the length is a |
1967 | 32-bit unsigned long in network byte order, and the data is, well, the data. |
1968 | Here is how it unfolds: |
1969 | |
1970 | =head2 MASTER INDEX |
1971 | |
1972 | Immediately after the 32-bit file signature is the I<Master Index> record. |
1973 | This is a standard tag header followed by 1024 bytes (in 32-bit mode) or 2048 |
1974 | bytes (in 64-bit mode) of data. The type is I<H> for hash or I<A> for array, |
1975 | depending on how the DBM::Deep object was constructed. |
1976 | |
ffed8b01 |
1977 | The index works by looking at a I<MD5 Hash> of the hash key (or array index |
1978 | number). The first 8-bit char of the MD5 signature is the offset into the |
1979 | index, multipled by 4 in 32-bit mode, or 8 in 64-bit mode. The value of the |
1980 | index element is a file offset of the next tag for the key/element in question, |
1981 | which is usually a I<Bucket List> tag (see below). |
1982 | |
ffed8b01 |
1983 | The next tag I<could> be another index, depending on how many keys/elements |
1984 | exist. See L<RE-INDEXING> below for details. |
1985 | |
1986 | =head2 BUCKET LIST |
1987 | |
1988 | A I<Bucket List> is a collection of 16 MD5 hashes for keys/elements, plus |
1989 | file offsets to where the actual data is stored. It starts with a standard |
1990 | tag header, with type I<B>, and a data size of 320 bytes in 32-bit mode, or |
1991 | 384 bytes in 64-bit mode. Each MD5 hash is stored in full (16 bytes), plus |
1992 | the 32-bit or 64-bit file offset for the I<Bucket> containing the actual data. |
1993 | When the list fills up, a I<Re-Index> operation is performed (See |
1994 | L<RE-INDEXING> below). |
1995 | |
1996 | =head2 BUCKET |
1997 | |
1998 | A I<Bucket> is a tag containing a key/value pair (in hash mode), or a |
1999 | index/value pair (in array mode). It starts with a standard tag header with |
2000 | type I<D> for scalar data (string, binary, etc.), or it could be a nested |
2001 | hash (type I<H>) or array (type I<A>). The value comes just after the tag |
2002 | header. The size reported in the tag header is only for the value, but then, |
2003 | just after the value is another size (32-bit unsigned long) and then the plain |
2004 | key itself. Since the value is likely to be fetched more often than the plain |
2005 | key, I figured it would be I<slightly> faster to store the value first. |
2006 | |
ffed8b01 |
2007 | If the type is I<H> (hash) or I<A> (array), the value is another I<Master Index> |
2008 | record for the nested structure, where the process begins all over again. |
2009 | |
2010 | =head2 RE-INDEXING |
2011 | |
2012 | After a I<Bucket List> grows to 16 records, its allocated space in the file is |
2013 | exhausted. Then, when another key/element comes in, the list is converted to a |
2014 | new index record. However, this index will look at the next char in the MD5 |
2015 | hash, and arrange new Bucket List pointers accordingly. This process is called |
2016 | I<Re-Indexing>. Basically, a new index tag is created at the file EOF, and all |
2017 | 17 (16 + new one) keys/elements are removed from the old Bucket List and |
2018 | inserted into the new index. Several new Bucket Lists are created in the |
2019 | process, as a new MD5 char from the key is being examined (it is unlikely that |
2020 | the keys will all share the same next char of their MD5s). |
2021 | |
ffed8b01 |
2022 | Because of the way the I<MD5> algorithm works, it is impossible to tell exactly |
2023 | when the Bucket Lists will turn into indexes, but the first round tends to |
2024 | happen right around 4,000 keys. You will see a I<slight> decrease in |
2025 | performance here, but it picks back up pretty quick (see L<SPEED> above). Then |
2026 | it takes B<a lot> more keys to exhaust the next level of Bucket Lists. It's |
2027 | right around 900,000 keys. This process can continue nearly indefinitely -- |
2028 | right up until the point the I<MD5> signatures start colliding with each other, |
2029 | and this is B<EXTREMELY> rare -- like winning the lottery 5 times in a row AND |
2030 | getting struck by lightning while you are walking to cash in your tickets. |
2031 | Theoretically, since I<MD5> hashes are 128-bit values, you I<could> have up to |
2032 | 340,282,366,921,000,000,000,000,000,000,000,000,000 keys/elements (I believe |
2033 | this is 340 unodecillion, but don't quote me). |
2034 | |
2035 | =head2 STORING |
2036 | |
90f93b43 |
2037 | When a new key/element is stored, the key (or index number) is first run through |
ffed8b01 |
2038 | I<Digest::MD5> to get a 128-bit signature (example, in hex: |
2039 | b05783b0773d894396d475ced9d2f4f6). Then, the I<Master Index> record is checked |
37c5bcf0 |
2040 | for the first char of the signature (in this case I<b0>). If it does not exist, |
ffed8b01 |
2041 | a new I<Bucket List> is created for our key (and the next 15 future keys that |
2042 | happen to also have I<b> as their first MD5 char). The entire MD5 is written |
2043 | to the I<Bucket List> along with the offset of the new I<Bucket> record (EOF at |
2044 | this point, unless we are replacing an existing I<Bucket>), where the actual |
2045 | data will be stored. |
2046 | |
2047 | =head2 FETCHING |
2048 | |
2049 | Fetching an existing key/element involves getting a I<Digest::MD5> of the key |
2050 | (or index number), then walking along the indexes. If there are enough |
2051 | keys/elements in this DB level, there might be nested indexes, each linked to |
2052 | a particular char of the MD5. Finally, a I<Bucket List> is pointed to, which |
2053 | contains up to 16 full MD5 hashes. Each is checked for equality to the key in |
2054 | question. If we found a match, the I<Bucket> tag is loaded, where the value and |
2055 | plain key are stored. |
2056 | |
ffed8b01 |
2057 | Fetching the plain key occurs when calling the I<first_key()> and I<next_key()> |
2058 | methods. In this process the indexes are walked systematically, and each key |
2059 | fetched in increasing MD5 order (which is why it appears random). Once the |
b5467b48 |
2060 | I<Bucket> is found, the value is skipped and the plain key returned instead. |
ffed8b01 |
2061 | B<Note:> Do not count on keys being fetched as if the MD5 hashes were |
2062 | alphabetically sorted. This only happens on an index-level -- as soon as the |
2063 | I<Bucket Lists> are hit, the keys will come out in the order they went in -- |
2064 | so it's pretty much undefined how the keys will come out -- just like Perl's |
2065 | built-in hashes. |
2066 | |
261d1296 |
2067 | =head1 CODE COVERAGE |
2068 | |
37c5bcf0 |
2069 | We use B<Devel::Cover> to test the code coverage of our tests, below is the |
90f93b43 |
2070 | B<Devel::Cover> report on this module's test suite. |
7910cf68 |
2071 | |
37c5bcf0 |
2072 | ---------------------------- ------ ------ ------ ------ ------ ------ ------ |
2073 | File stmt bran cond sub pod time total |
2074 | ---------------------------- ------ ------ ------ ------ ------ ------ ------ |
9be51a89 |
2075 | blib/lib/DBM/Deep.pm 95.2 83.8 70.0 98.2 100.0 58.0 91.0 |
2076 | blib/lib/DBM/Deep/Array.pm 100.0 91.1 100.0 100.0 n/a 26.7 98.0 |
2077 | blib/lib/DBM/Deep/Hash.pm 95.3 80.0 100.0 100.0 n/a 15.3 92.4 |
2078 | Total 96.2 84.8 74.4 98.8 100.0 100.0 92.4 |
37c5bcf0 |
2079 | ---------------------------- ------ ------ ------ ------ ------ ------ ------ |
2080 | |
2081 | =head1 MORE INFORMATION |
2082 | |
2083 | Check out the DBM::Deep Google Group at L<http://groups.google.com/group/DBM-Deep> |
2084 | or send email to L<DBM-Deep@googlegroups.com>. |
261d1296 |
2085 | |
aeeb5497 |
2086 | =head1 AUTHORS |
ffed8b01 |
2087 | |
2088 | Joseph Huckaby, L<jhuckaby@cpan.org> |
37c5bcf0 |
2089 | |
aeeb5497 |
2090 | Rob Kinyon, L<rkinyon@cpan.org> |
ffed8b01 |
2091 | |
2092 | Special thanks to Adam Sah and Rich Gaushell! You know why :-) |
2093 | |
2094 | =head1 SEE ALSO |
2095 | |
2096 | perltie(1), Tie::Hash(3), Digest::MD5(3), Fcntl(3), flock(2), lockf(3), nfs(5), |
2097 | Digest::SHA256(3), Crypt::Blowfish(3), Compress::Zlib(3) |
2098 | |
2099 | =head1 LICENSE |
2100 | |
aeeb5497 |
2101 | Copyright (c) 2002-2006 Joseph Huckaby. All Rights Reserved. |
ffed8b01 |
2102 | This is free software, you may use it and distribute it under the |
2103 | same terms as Perl itself. |
2104 | |
2105 | =cut |