From: rkinyon Date: Thu, 18 Jan 2007 15:16:54 +0000 (+0000) Subject: r14851@rob-kinyons-computer: rob | 2007-01-17 22:44:38 -0500 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=de82ff48544d0ace2b68ef0b3343ea94a7a0631f;p=dbsrgits%2FDBM-Deep.git r14851@rob-kinyons-computer: rob | 2007-01-17 22:44:38 -0500 Added some further transaction tests and broke out DBM-Deep.pod from DBM-Deep.pm --- diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm index a203c6e..cd839f4 100644 --- a/lib/DBM/Deep.pm +++ b/lib/DBM/Deep.pm @@ -578,1064 +578,3 @@ sub clear { (shift)->CLEAR( @_ ) } 1; __END__ - -=head1 NAME - -DBM::Deep - A pure perl multi-level hash/array DBM that supports transactions - -=head1 SYNOPSIS - - use DBM::Deep; - my $db = DBM::Deep->new( "foo.db" ); - - $db->{key} = 'value'; - print $db->{key}; - - $db->put('key' => 'value'); - print $db->get('key'); - - # true multi-level support - $db->{my_complex} = [ - 'hello', { perl => 'rules' }, - 42, 99, - ]; - - $db->begin_work; - - # Do stuff here - - $db->rollback; - $db->commit; - - tie my %db, 'DBM::Deep', 'foo.db'; - $db{key} = 'value'; - print $db{key}; - - tied(%db)->put('key' => 'value'); - print tied(%db)->get('key'); - -=head1 DESCRIPTION - -A unique flat-file database module, written in pure perl. True multi-level -hash/array support (unlike MLDBM, which is faked), hybrid OO / tie() -interface, cross-platform FTPable files, ACID transactions, and is quite fast. -Can handle millions of keys and unlimited levels without significant -slow-down. Written from the ground-up in pure perl -- this is NOT a wrapper -around a C-based DBM. Out-of-the-box compatibility with Unix, Mac OS X and -Windows. - -=head1 VERSION DIFFERENCES - -B: 0.99_03 has significant file format differences from prior versions. -THere will be a backwards-compatibility layer in 1.00, but that is slated for -a later 0.99_x release. This version is B backwards compatible with any -other release of DBM::Deep. - -B: 0.99_01 and above have significant file format differences from 0.983 and -before. There will be a backwards-compatibility layer in 1.00, but that is -slated for a later 0.99_x release. This version is B backwards compatible -with 0.983 and before. - -=head1 SETUP - -Construction can be done OO-style (which is the recommended way), or using -Perl's tie() function. Both are examined here. - -=head2 OO CONSTRUCTION - -The recommended way to construct a DBM::Deep object is to use the new() -method, which gets you a blessed I tied hash (or array) reference. - - my $db = DBM::Deep->new( "foo.db" ); - -This opens a new database handle, mapped to the file "foo.db". If this -file does not exist, it will automatically be created. DB files are -opened in "r+" (read/write) mode, and the type of object returned is a -hash, unless otherwise specified (see L below). - -You can pass a number of options to the constructor to specify things like -locking, autoflush, etc. This is done by passing an inline hash (or hashref): - - my $db = DBM::Deep->new( - file => "foo.db", - locking => 1, - autoflush => 1 - ); - -Notice that the filename is now specified I the hash with -the "file" parameter, as opposed to being the sole argument to the -constructor. This is required if any options are specified. -See L below for the complete list. - -You can also start with an array instead of a hash. For this, you must -specify the C parameter: - - my $db = DBM::Deep->new( - file => "foo.db", - type => DBM::Deep->TYPE_ARRAY - ); - -B Specifing the C parameter only takes effect when beginning -a new DB file. If you create a DBM::Deep object with an existing file, the -C will be loaded from the file header, and an error will be thrown if -the wrong type is passed in. - -=head2 TIE CONSTRUCTION - -Alternately, you can create a DBM::Deep handle by using Perl's built-in -tie() function. The object returned from tie() can be used to call methods, -such as lock() and unlock(). (That object can be retrieved from the tied -variable at any time using tied() - please see L for more info. - - my %hash; - my $db = tie %hash, "DBM::Deep", "foo.db"; - - my @array; - my $db = tie @array, "DBM::Deep", "bar.db"; - -As with the OO constructor, you can replace the DB filename parameter with -a hash containing one or more options (see L just below for the -complete list). - - tie %hash, "DBM::Deep", { - file => "foo.db", - locking => 1, - autoflush => 1 - }; - -=head2 OPTIONS - -There are a number of options that can be passed in when constructing your -DBM::Deep objects. These apply to both the OO- and tie- based approaches. - -=over - -=item * file - -Filename of the DB file to link the handle to. You can pass a full absolute -filesystem path, partial path, or a plain filename if the file is in the -current working directory. This is a required parameter (though q.v. fh). - -=item * fh - -If you want, you can pass in the fh instead of the file. This is most useful for doing -something like: - - my $db = DBM::Deep->new( { fh => \*DATA } ); - -You are responsible for making sure that the fh has been opened appropriately for your -needs. If you open it read-only and attempt to write, an exception will be thrown. If you -open it write-only or append-only, an exception will be thrown immediately as DBM::Deep -needs to read from the fh. - -=item * file_offset - -This is the offset within the file that the DBM::Deep db starts. Most of the time, you will -not need to set this. However, it's there if you want it. - -If you pass in fh and do not set this, it will be set appropriately. - -=item * type - -This parameter specifies what type of object to create, a hash or array. Use -one of these two constants: - -=over 4 - -=item * CTYPE_HASH> - -=item * CTYPE_ARRAY>. - -=back - -This only takes effect when beginning a new file. This is an optional -parameter, and defaults to CTYPE_HASH>. - -=item * locking - -Specifies whether locking is to be enabled. DBM::Deep uses Perl's flock() -function to lock the database in exclusive mode for writes, and shared mode -for reads. Pass any true value to enable. This affects the base DB handle -I that use the same DB file. This is an -optional parameter, and defaults to 0 (disabled). See L below for -more. - -=item * autoflush - -Specifies whether autoflush is to be enabled on the underlying filehandle. -This obviously slows down write operations, but is required if you may have -multiple processes accessing the same DB file (also consider enable I). -Pass any true value to enable. This is an optional parameter, and defaults to 0 -(disabled). - -=item * filter_* - -See L below. - -=back - -=head1 TIE INTERFACE - -With DBM::Deep you can access your databases using Perl's standard hash/array -syntax. Because all DBM::Deep objects are I to hashes or arrays, you can -treat them as such. DBM::Deep will intercept all reads/writes and direct them -to the right place -- the DB file. This has nothing to do with the -L section above. This simply tells you how to use DBM::Deep -using regular hashes and arrays, rather than calling functions like C -and C (although those work too). It is entirely up to you how to want -to access your databases. - -=head2 HASHES - -You can treat any DBM::Deep object like a normal Perl hash reference. Add keys, -or even nested hashes (or arrays) using standard Perl syntax: - - my $db = DBM::Deep->new( "foo.db" ); - - $db->{mykey} = "myvalue"; - $db->{myhash} = {}; - $db->{myhash}->{subkey} = "subvalue"; - - print $db->{myhash}->{subkey} . "\n"; - -You can even step through hash keys using the normal Perl C function: - - foreach my $key (keys %$db) { - print "$key: " . $db->{$key} . "\n"; - } - -Remember that Perl's C function extracts I key from the hash and -pushes them onto an array, all before the loop even begins. If you have an -extremely large hash, this may exhaust Perl's memory. Instead, consider using -Perl's C function, which pulls keys/values one at a time, using very -little memory: - - while (my ($key, $value) = each %$db) { - print "$key: $value\n"; - } - -Please note that when using C, you should always pass a direct -hash reference, not a lookup. Meaning, you should B do this: - - # NEVER DO THIS - while (my ($key, $value) = each %{$db->{foo}}) { # BAD - -This causes an infinite loop, because for each iteration, Perl is calling -FETCH() on the $db handle, resulting in a "new" hash for foo every time, so -it effectively keeps returning the first key over and over again. Instead, -assign a temporary variable to C<$db->{foo}>, then pass that to each(). - -=head2 ARRAYS - -As with hashes, you can treat any DBM::Deep object like a normal Perl array -reference. This includes inserting, removing and manipulating elements, -and the C, C, C, C and C functions. -The object must have first been created using type CTYPE_ARRAY>, -or simply be a nested array reference inside a hash. Example: - - my $db = DBM::Deep->new( - file => "foo-array.db", - type => DBM::Deep->TYPE_ARRAY - ); - - $db->[0] = "foo"; - push @$db, "bar", "baz"; - unshift @$db, "bah"; - - my $last_elem = pop @$db; # baz - my $first_elem = shift @$db; # bah - my $second_elem = $db->[1]; # bar - - my $num_elements = scalar @$db; - -=head1 OO INTERFACE - -In addition to the I interface, you can also use a standard OO interface -to manipulate all aspects of DBM::Deep databases. Each type of object (hash or -array) has its own methods, but both types share the following common methods: -C, C, C, C and C. C and -C are aliases to C and C, respectively. - -=over - -=item * new() / clone() - -These are the constructor and copy-functions. - -=item * put() / store() - -Stores a new hash key/value pair, or sets an array element value. Takes two -arguments, the hash key or array index, and the new value. The value can be -a scalar, hash ref or array ref. Returns true on success, false on failure. - - $db->put("foo", "bar"); # for hashes - $db->put(1, "bar"); # for arrays - -=item * get() / fetch() - -Fetches the value of a hash key or array element. Takes one argument: the hash -key or array index. Returns a scalar, hash ref or array ref, depending on the -data type stored. - - my $value = $db->get("foo"); # for hashes - my $value = $db->get(1); # for arrays - -=item * exists() - -Checks if a hash key or array index exists. Takes one argument: the hash key -or array index. Returns true if it exists, false if not. - - if ($db->exists("foo")) { print "yay!\n"; } # for hashes - if ($db->exists(1)) { print "yay!\n"; } # for arrays - -=item * delete() - -Deletes one hash key/value pair or array element. Takes one argument: the hash -key or array index. Returns true on success, false if not found. For arrays, -the remaining elements located after the deleted element are NOT moved over. -The deleted element is essentially just undefined, which is exactly how Perl's -internal arrays work. Please note that the space occupied by the deleted -key/value or element is B reused again -- see L -below for details and workarounds. - - $db->delete("foo"); # for hashes - $db->delete(1); # for arrays - -=item * clear() - -Deletes B hash keys or array elements. Takes no arguments. No return -value. Please note that the space occupied by the deleted keys/values or -elements is B reused again -- see L below for -details and workarounds. - - $db->clear(); # hashes or arrays - -=item * lock() / unlock() - -q.v. Locking. - -=item * optimize() - -Recover lost disk space. This is important to do, especially if you use -transactions. - -=item * import() / export() - -Data going in and out. - -=back - -=head2 HASHES - -For hashes, DBM::Deep supports all the common methods described above, and the -following additional methods: C and C. - -=over - -=item * first_key() - -Returns the "first" key in the hash. As with built-in Perl hashes, keys are -fetched in an undefined order (which appears random). Takes no arguments, -returns the key as a scalar value. - - my $key = $db->first_key(); - -=item * next_key() - -Returns the "next" key in the hash, given the previous one as the sole argument. -Returns undef if there are no more keys to be fetched. - - $key = $db->next_key($key); - -=back - -Here are some examples of using hashes: - - my $db = DBM::Deep->new( "foo.db" ); - - $db->put("foo", "bar"); - print "foo: " . $db->get("foo") . "\n"; - - $db->put("baz", {}); # new child hash ref - $db->get("baz")->put("buz", "biz"); - print "buz: " . $db->get("baz")->get("buz") . "\n"; - - my $key = $db->first_key(); - while ($key) { - print "$key: " . $db->get($key) . "\n"; - $key = $db->next_key($key); - } - - if ($db->exists("foo")) { $db->delete("foo"); } - -=head2 ARRAYS - -For arrays, DBM::Deep supports all the common methods described above, and the -following additional methods: C, C, C, C, -C and C. - -=over - -=item * length() - -Returns the number of elements in the array. Takes no arguments. - - my $len = $db->length(); - -=item * push() - -Adds one or more elements onto the end of the array. Accepts scalars, hash -refs or array refs. No return value. - - $db->push("foo", "bar", {}); - -=item * pop() - -Fetches the last element in the array, and deletes it. Takes no arguments. -Returns undef if array is empty. Returns the element value. - - my $elem = $db->pop(); - -=item * shift() - -Fetches the first element in the array, deletes it, then shifts all the -remaining elements over to take up the space. Returns the element value. This -method is not recommended with large arrays -- see L below for -details. - - my $elem = $db->shift(); - -=item * unshift() - -Inserts one or more elements onto the beginning of the array, shifting all -existing elements over to make room. Accepts scalars, hash refs or array refs. -No return value. This method is not recommended with large arrays -- see - below for details. - - $db->unshift("foo", "bar", {}); - -=item * splice() - -Performs exactly like Perl's built-in function of the same name. See L for usage -- it is too complicated to document here. This method is -not recommended with large arrays -- see L below for details. - -=back - -Here are some examples of using arrays: - - my $db = DBM::Deep->new( - file => "foo.db", - type => DBM::Deep->TYPE_ARRAY - ); - - $db->push("bar", "baz"); - $db->unshift("foo"); - $db->put(3, "buz"); - - my $len = $db->length(); - print "length: $len\n"; # 4 - - for (my $k=0; $k<$len; $k++) { - print "$k: " . $db->get($k) . "\n"; - } - - $db->splice(1, 2, "biz", "baf"); - - while (my $elem = shift @$db) { - print "shifted: $elem\n"; - } - -=head1 LOCKING - -Enable automatic file locking by passing a true value to the C -parameter when constructing your DBM::Deep object (see L above). - - my $db = DBM::Deep->new( - file => "foo.db", - locking => 1 - ); - -This causes DBM::Deep to C the underlying filehandle with exclusive -mode for writes, and shared mode for reads. This is required if you have -multiple processes accessing the same database file, to avoid file corruption. -Please note that C does NOT work for files over NFS. See L below for more. - -=head2 EXPLICIT LOCKING - -You can explicitly lock a database, so it remains locked for multiple -actions. This is done by calling the C method, and passing an -optional lock mode argument (defaults to exclusive mode). This is particularly -useful for things like counters, where the current value needs to be fetched, -then incremented, then stored again. - - $db->lock(); - my $counter = $db->get("counter"); - $counter++; - $db->put("counter", $counter); - $db->unlock(); - - # or... - - $db->lock(); - $db->{counter}++; - $db->unlock(); - -You can pass C an optional argument, which specifies which mode to use -(exclusive or shared). Use one of these two constants: -CLOCK_EX> or CLOCK_SH>. These are passed -directly to C, and are the same as the constants defined in Perl's -L module. - - $db->lock( $db->LOCK_SH ); - # something here - $db->unlock(); - -=head1 IMPORTING/EXPORTING - -You can import existing complex structures by calling the C method, -and export an entire database into an in-memory structure using the C -method. Both are examined here. - -=head2 IMPORTING - -Say you have an existing hash with nested hashes/arrays inside it. Instead of -walking the structure and adding keys/elements to the database as you go, -simply pass a reference to the C method. This recursively adds -everything to an existing DBM::Deep object for you. Here is an example: - - my $struct = { - key1 => "value1", - key2 => "value2", - array1 => [ "elem0", "elem1", "elem2" ], - hash1 => { - subkey1 => "subvalue1", - subkey2 => "subvalue2" - } - }; - - my $db = DBM::Deep->new( "foo.db" ); - $db->import( $struct ); - - print $db->{key1} . "\n"; # prints "value1" - -This recursively imports the entire C<$struct> object into C<$db>, including -all nested hashes and arrays. If the DBM::Deep object contains exsiting data, -keys are merged with the existing ones, replacing if they already exist. -The C method can be called on any database level (not just the base -level), and works with both hash and array DB types. - -B Make sure your existing structure has no circular references in it. -These will cause an infinite loop when importing. There are plans to fix this -in a later release. - -=head2 EXPORTING - -Calling the C method on an existing DBM::Deep object will return -a reference to a new in-memory copy of the database. The export is done -recursively, so all nested hashes/arrays are all exported to standard Perl -objects. Here is an example: - - my $db = DBM::Deep->new( "foo.db" ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - $db->{hash1} = {}; - $db->{hash1}->{subkey1} = "subvalue1"; - $db->{hash1}->{subkey2} = "subvalue2"; - - my $struct = $db->export(); - - print $struct->{key1} . "\n"; # prints "value1" - -This makes a complete copy of the database in memory, and returns a reference -to it. The C method can be called on any database level (not just -the base level), and works with both hash and array DB types. Be careful of -large databases -- you can store a lot more data in a DBM::Deep object than an -in-memory Perl structure. - -B Make sure your database has no circular references in it. -These will cause an infinite loop when exporting. There are plans to fix this -in a later release. - -=head1 FILTERS - -DBM::Deep has a number of hooks where you can specify your own Perl function -to perform filtering on incoming or outgoing data. This is a perfect -way to extend the engine, and implement things like real-time compression or -encryption. Filtering applies to the base DB level, and all child hashes / -arrays. Filter hooks can be specified when your DBM::Deep object is first -constructed, or by calling the C method at any time. There are -four available filter hooks, described below: - -=over - -=item * filter_store_key - -This filter is called whenever a hash key is stored. It -is passed the incoming key, and expected to return a transformed key. - -=item * filter_store_value - -This filter is called whenever a hash key or array element is stored. It -is passed the incoming value, and expected to return a transformed value. - -=item * filter_fetch_key - -This filter is called whenever a hash key is fetched (i.e. via -C or C). It is passed the transformed key, -and expected to return the plain key. - -=item * filter_fetch_value - -This filter is called whenever a hash key or array element is fetched. -It is passed the transformed value, and expected to return the plain value. - -=back - -Here are the two ways to setup a filter hook: - - my $db = DBM::Deep->new( - file => "foo.db", - filter_store_value => \&my_filter_store, - filter_fetch_value => \&my_filter_fetch - ); - - # or... - - $db->set_filter( "filter_store_value", \&my_filter_store ); - $db->set_filter( "filter_fetch_value", \&my_filter_fetch ); - -Your filter function will be called only when dealing with SCALAR keys or -values. When nested hashes and arrays are being stored/fetched, filtering -is bypassed. Filters are called as static functions, passed a single SCALAR -argument, and expected to return a single SCALAR value. If you want to -remove a filter, set the function reference to C: - - $db->set_filter( "filter_store_value", undef ); - -=head2 REAL-TIME ENCRYPTION EXAMPLE - -Here is a working example that uses the I module to -do real-time encryption / decryption of keys & values with DBM::Deep Filters. -Please visit L for more -on I. You'll also need the I module. - - use DBM::Deep; - use Crypt::Blowfish; - use Crypt::CBC; - - my $cipher = Crypt::CBC->new({ - 'key' => 'my secret key', - 'cipher' => 'Blowfish', - 'iv' => '$KJh#(}q', - 'regenerate_key' => 0, - 'padding' => 'space', - 'prepend_iv' => 0 - }); - - my $db = DBM::Deep->new( - file => "foo-encrypt.db", - filter_store_key => \&my_encrypt, - filter_store_value => \&my_encrypt, - filter_fetch_key => \&my_decrypt, - filter_fetch_value => \&my_decrypt, - ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; - - undef $db; - exit; - - sub my_encrypt { - return $cipher->encrypt( $_[0] ); - } - sub my_decrypt { - return $cipher->decrypt( $_[0] ); - } - -=head2 REAL-TIME COMPRESSION EXAMPLE - -Here is a working example that uses the I module to do real-time -compression / decompression of keys & values with DBM::Deep Filters. -Please visit L for -more on I. - - use DBM::Deep; - use Compress::Zlib; - - my $db = DBM::Deep->new( - file => "foo-compress.db", - filter_store_key => \&my_compress, - filter_store_value => \&my_compress, - filter_fetch_key => \&my_decompress, - filter_fetch_value => \&my_decompress, - ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; - - undef $db; - exit; - - sub my_compress { - return Compress::Zlib::memGzip( $_[0] ) ; - } - sub my_decompress { - return Compress::Zlib::memGunzip( $_[0] ) ; - } - -B Filtering of keys only applies to hashes. Array "keys" are -actually numerical index numbers, and are not filtered. - -=head1 ERROR HANDLING - -Most DBM::Deep methods return a true value for success, and call die() on -failure. You can wrap calls in an eval block to catch the die. - - my $db = DBM::Deep->new( "foo.db" ); # create hash - eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call - - print $@; # prints error message - -=head1 LARGEFILE SUPPORT - -If you have a 64-bit system, and your Perl is compiled with both LARGEFILE -and 64-bit support, you I be able to create databases larger than 2 GB. -DBM::Deep by default uses 32-bit file offset tags, but these can be changed -by specifying the 'pack_size' parameter when constructing the file. - - DBM::Deep->new( - filename => $filename, - pack_size => 'large', - ); - -This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words -instead of 32-bit longs. After setting these values your DB files have a -theoretical maximum size of 16 XB (exabytes). - -You can also use C 'small'> in order to use 16-bit file -offsets. - -B Changing these values will B work for existing database files. -Only change this for new files. Once the value has been set, it is stored in -the file's header and cannot be changed for the life of the file. These -parameters are per-file, meaning you can access 32-bit and 64-bit files, as -you choose. - -B We have not personally tested files larger than 2 GB -- all my -systems have only a 32-bit Perl. However, I have received user reports that -this does indeed work! - -=head1 LOW-LEVEL ACCESS - -If you require low-level access to the underlying filehandle that DBM::Deep uses, -you can call the C<_fh()> method, which returns the handle: - - my $fh = $db->_fh(); - -This method can be called on the root level of the datbase, or any child -hashes or arrays. All levels share a I structure, which contains things -like the filehandle, a reference counter, and all the options specified -when you created the object. You can get access to this file object by -calling the C<_storage()> method. - - my $file_obj = $db->_storage(); - -This is useful for changing options after the object has already been created, -such as enabling/disabling locking. You can also store your own temporary user -data in this structure (be wary of name collision), which is then accessible from -any child hash or array. - -=head1 CUSTOM DIGEST ALGORITHM - -DBM::Deep by default uses the I (MD5) algorithm for hashing -keys. However you can override this, and use another algorithm (such as SHA-256) -or even write your own. But please note that DBM::Deep currently expects zero -collisions, so your algorithm has to be I, so to speak. Collision -detection may be introduced in a later version. - -You can specify a custom digest algorithm by passing it into the parameter -list for new(), passing a reference to a subroutine as the 'digest' parameter, -and the length of the algorithm's hashes (in bytes) as the 'hash_size' -parameter. Here is a working example that uses a 256-bit hash from the -I module. Please see -L for more information. - - use DBM::Deep; - use Digest::SHA256; - - my $context = Digest::SHA256::new(256); - - my $db = DBM::Deep->new( - filename => "foo-sha.db", - digest => \&my_digest, - hash_size => 32, - ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; - - undef $db; - exit; - - sub my_digest { - return substr( $context->hash($_[0]), 0, 32 ); - } - -B Your returned digest strings must be B the number -of bytes you specify in the hash_size parameter (in this case 32). - -B If you do choose to use a custom digest algorithm, you must set it -every time you access this file. Otherwise, the default (MD5) will be used. - -=head1 CIRCULAR REFERENCES - -B: DBM::Deep 0.99_03 has turned off circular references pending -evaluation of some edge cases. I hope to be able to re-enable circular -references in a future version prior to 1.00. - -DBM::Deep has B support for circular references. Meaning you -can have a nested hash key or array element that points to a parent object. -This relationship is stored in the DB file, and is preserved between sessions. -Here is an example: - - my $db = DBM::Deep->new( "foo.db" ); - - $db->{foo} = "bar"; - $db->{circle} = $db; # ref to self - - print $db->{foo} . "\n"; # prints "bar" - print $db->{circle}->{foo} . "\n"; # prints "bar" again - -B: Passing the object to a function that recursively walks the -object tree (such as I or even the built-in C or -C methods) will result in an infinite loop. This will be fixed in -a future release. - -=head1 TRANSACTIONS - -New in 0.99_01 is ACID transactions. Every DBM::Deep object is completely -transaction-ready - it is not an option you have to turn on. Three new methods -have been added to support them. They are: - -=over 4 - -=item * begin_work() - -This starts a transaction. - -=item * commit() - -This applies the changes done within the transaction to the mainline and ends -the transaction. - -=item * rollback() - -This discards the changes done within the transaction to the mainline and ends -the transaction. - -=back - -Transactions in DBM::Deep are done using the MVCC method, the same method used -by the InnoDB MySQL table type. - -=head1 PERFORMANCE - -Because DBM::Deep is a conncurrent datastore, every change is flushed to disk -immediately and every read goes to disk. This means that DBM::Deep functions -at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally -50-70ns), or at least 150-200x slower than the comparable in-memory -datastructure in Perl. - -There are several techniques you can use to speed up how DBM::Deep functions. - -=over 4 - -=item * Put it on a ramdisk - -The easiest and quickest mechanism to making DBM::Deep run faster is to create -a ramdisk and locate the DBM::Deep file there. Doing this as an option may -become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN. - -=item * Work at the tightest level possible - -It is much faster to assign the level of your db that you are working with to -an intermediate variable than to re-look it up every time. Thus - - # BAD - while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) { - ... - } - - # GOOD - my $x = $db->{foo}{bar}{baz}; - while ( my ($k, $v) = each %$x ) { - ... - } - -=item * Make your file as tight as possible - -If you know that you are not going to use more than 65K in your database, -consider using the C 'small'> option. This will instruct -DBM::Deep to use 16bit addresses, meaning that the seek times will be less. -The same goes with the number of transactions. num_Txns defaults to 16. If you -can set that to 1 or 2, that will reduce the file-size considerably, thus -reducing seek times. - -=back - -=head1 CAVEATS / ISSUES / BUGS - -This section describes all the known issues with DBM::Deep. It you have found -something that is not listed here, please send e-mail to L. - -=head2 REFERENCES - -(The reasons given assume a high level of Perl understanding, specifically of -references. You can safely skip this section.) - -Currently, the only references supported are HASH and ARRAY. The other reference -types (SCALAR, CODE, GLOB, and REF) cannot be supported for various reasons. - -=over 4 - -=item * GLOB - -These are things like filehandles and other sockets. They can't be supported -because it's completely unclear how DBM::Deep should serialize them. - -=item * SCALAR / REF - -The discussion here refers to the following type of example: - - my $x = 25; - $db->{key1} = \$x; - - $x = 50; - - # In some other process ... - - my $val = ${ $db->{key1} }; - - is( $val, 50, "What actually gets stored in the DB file?" ); - -The problem is one of synchronization. When the variable being referred to -changes value, the reference isn't notified. This means that the new value won't -be stored in the datafile for other processes to read. There is no TIEREF. - -It is theoretically possible to store references to values already within a -DBM::Deep object because everything already is synchronized, but the change to -the internals would be quite large. Specifically, DBM::Deep would have to tie -every single value that is stored. This would bloat the RAM footprint of -DBM::Deep at least twofold (if not more) and be a significant performance drain, -all to support a feature that has never been requested. - -=item * CODE - -L provides a mechanism for serializing coderefs, -including saving off all closure state. However, just as for SCALAR and REF, -that closure state may change without notifying the DBM::Deep object storing -the reference. - -=back - -=head2 FILE CORRUPTION - -The current level of error handling in DBM::Deep is minimal. Files I checked -for a 32-bit signature when opened, but other corruption in files can cause -segmentation faults. DBM::Deep may try to seek() past the end of a file, or get -stuck in an infinite loop depending on the level of corruption. File write -operations are not checked for failure (for speed), so if you happen to run -out of disk space, DBM::Deep will probably fail in a bad way. These things will -be addressed in a later version of DBM::Deep. - -=head2 DB OVER NFS - -Beware of using DBM::Deep files over NFS. DBM::Deep uses flock(), which works -well on local filesystems, but will NOT protect you from file corruption over -NFS. I've heard about setting up your NFS server with a locking daemon, then -using lockf() to lock your files, but your mileage may vary there as well. -From what I understand, there is no real way to do it. However, if you need -access to the underlying filehandle in DBM::Deep for using some other kind of -locking scheme like lockf(), see the L section above. - -=head2 COPYING OBJECTS - -Beware of copying tied objects in Perl. Very strange things can happen. -Instead, use DBM::Deep's C method which safely copies the object and -returns a new, blessed, tied hash or array to the same level in the DB. - - my $copy = $db->clone(); - -B: Since clone() here is cloning the object, not the database location, any -modifications to either $db or $copy will be visible to both. - -=head2 LARGE ARRAYS - -Beware of using C, C or C with large arrays. -These functions cause every element in the array to move, which can be murder -on DBM::Deep, as every element has to be fetched from disk, then stored again in -a different location. This will be addressed in a future version. - -=head2 WRITEONLY FILES - -If you pass in a filehandle to new(), you may have opened it in either a readonly or -writeonly mode. STORE will verify that the filehandle is writable. However, there -doesn't seem to be a good way to determine if a filehandle is readable. And, if the -filehandle isn't readable, it's not clear what will happen. So, don't do that. - -=head1 CODE COVERAGE - -B is used to test the code coverage of the tests. Below is the -B report on this distribution's test suite. - - ---------------------------- ------ ------ ------ ------ ------ ------ ------ - File stmt bran cond sub pod time total - ---------------------------- ------ ------ ------ ------ ------ ------ ------ - blib/lib/DBM/Deep.pm 96.7 87.9 90.5 100.0 89.5 4.5 95.1 - blib/lib/DBM/Deep/Array.pm 100.0 91.4 100.0 100.0 100.0 4.9 98.3 - blib/lib/DBM/Deep/Engine.pm 95.6 85.1 78.0 99.1 0.0 57.4 89.4 - blib/lib/DBM/Deep/File.pm 94.3 86.1 55.6 100.0 0.0 30.7 85.7 - blib/lib/DBM/Deep/Hash.pm 100.0 100.0 100.0 100.0 100.0 2.4 100.0 - Total 96.5 86.9 81.0 99.5 32.1 100.0 91.8 - ---------------------------- ------ ------ ------ ------ ------ ------ ------ - -=head1 MORE INFORMATION - -Check out the DBM::Deep Google Group at L -or send email to L. You can also visit #dbm-deep on -irc.perl.org - -The source code repository is at L - -=head1 MAINTAINER(S) - -Rob Kinyon, L - -Originally written by Joseph Huckaby, L - -Special thanks to Adam Sah and Rich Gaushell! You know why :-) - -Additional thanks go out to Stonehenge who have sponsored the 1.00 release. - -=head1 SEE ALSO - -perltie(1), Tie::Hash(3), Digest::MD5(3), Fcntl(3), flock(2), lockf(3), nfs(5), -Digest::SHA256(3), Crypt::Blowfish(3), Compress::Zlib(3) - -=head1 LICENSE - -Copyright (c) 2007 Rob Kinyon. All Rights Reserved. -This is free software, you may use it and distribute it under the -same terms as Perl itself. - -=cut diff --git a/lib/DBM/Deep.pod b/lib/DBM/Deep.pod new file mode 100644 index 0000000..adc2fc7 --- /dev/null +++ b/lib/DBM/Deep.pod @@ -0,0 +1,1059 @@ +=head1 NAME + +DBM::Deep - A pure perl multi-level hash/array DBM that supports transactions + +=head1 SYNOPSIS + + use DBM::Deep; + my $db = DBM::Deep->new( "foo.db" ); + + $db->{key} = 'value'; + print $db->{key}; + + $db->put('key' => 'value'); + print $db->get('key'); + + # true multi-level support + $db->{my_complex} = [ + 'hello', { perl => 'rules' }, + 42, 99, + ]; + + $db->begin_work; + + # Do stuff here + + $db->rollback; + $db->commit; + + tie my %db, 'DBM::Deep', 'foo.db'; + $db{key} = 'value'; + print $db{key}; + + tied(%db)->put('key' => 'value'); + print tied(%db)->get('key'); + +=head1 DESCRIPTION + +A unique flat-file database module, written in pure perl. True multi-level +hash/array support (unlike MLDBM, which is faked), hybrid OO / tie() +interface, cross-platform FTPable files, ACID transactions, and is quite fast. +Can handle millions of keys and unlimited levels without significant +slow-down. Written from the ground-up in pure perl -- this is NOT a wrapper +around a C-based DBM. Out-of-the-box compatibility with Unix, Mac OS X and +Windows. + +=head1 VERSION DIFFERENCES + +B: 0.99_03 has significant file format differences from prior versions. +THere will be a backwards-compatibility layer in 1.00, but that is slated for +a later 0.99_x release. This version is B backwards compatible with any +other release of DBM::Deep. + +B: 0.99_01 and above have significant file format differences from 0.983 and +before. There will be a backwards-compatibility layer in 1.00, but that is +slated for a later 0.99_x release. This version is B backwards compatible +with 0.983 and before. + +=head1 SETUP + +Construction can be done OO-style (which is the recommended way), or using +Perl's tie() function. Both are examined here. + +=head2 OO CONSTRUCTION + +The recommended way to construct a DBM::Deep object is to use the new() +method, which gets you a blessed I tied hash (or array) reference. + + my $db = DBM::Deep->new( "foo.db" ); + +This opens a new database handle, mapped to the file "foo.db". If this +file does not exist, it will automatically be created. DB files are +opened in "r+" (read/write) mode, and the type of object returned is a +hash, unless otherwise specified (see L below). + +You can pass a number of options to the constructor to specify things like +locking, autoflush, etc. This is done by passing an inline hash (or hashref): + + my $db = DBM::Deep->new( + file => "foo.db", + locking => 1, + autoflush => 1 + ); + +Notice that the filename is now specified I the hash with +the "file" parameter, as opposed to being the sole argument to the +constructor. This is required if any options are specified. +See L below for the complete list. + +You can also start with an array instead of a hash. For this, you must +specify the C parameter: + + my $db = DBM::Deep->new( + file => "foo.db", + type => DBM::Deep->TYPE_ARRAY + ); + +B Specifing the C parameter only takes effect when beginning +a new DB file. If you create a DBM::Deep object with an existing file, the +C will be loaded from the file header, and an error will be thrown if +the wrong type is passed in. + +=head2 TIE CONSTRUCTION + +Alternately, you can create a DBM::Deep handle by using Perl's built-in +tie() function. The object returned from tie() can be used to call methods, +such as lock() and unlock(). (That object can be retrieved from the tied +variable at any time using tied() - please see L for more info. + + my %hash; + my $db = tie %hash, "DBM::Deep", "foo.db"; + + my @array; + my $db = tie @array, "DBM::Deep", "bar.db"; + +As with the OO constructor, you can replace the DB filename parameter with +a hash containing one or more options (see L just below for the +complete list). + + tie %hash, "DBM::Deep", { + file => "foo.db", + locking => 1, + autoflush => 1 + }; + +=head2 OPTIONS + +There are a number of options that can be passed in when constructing your +DBM::Deep objects. These apply to both the OO- and tie- based approaches. + +=over + +=item * file + +Filename of the DB file to link the handle to. You can pass a full absolute +filesystem path, partial path, or a plain filename if the file is in the +current working directory. This is a required parameter (though q.v. fh). + +=item * fh + +If you want, you can pass in the fh instead of the file. This is most useful for doing +something like: + + my $db = DBM::Deep->new( { fh => \*DATA } ); + +You are responsible for making sure that the fh has been opened appropriately for your +needs. If you open it read-only and attempt to write, an exception will be thrown. If you +open it write-only or append-only, an exception will be thrown immediately as DBM::Deep +needs to read from the fh. + +=item * file_offset + +This is the offset within the file that the DBM::Deep db starts. Most of the time, you will +not need to set this. However, it's there if you want it. + +If you pass in fh and do not set this, it will be set appropriately. + +=item * type + +This parameter specifies what type of object to create, a hash or array. Use +one of these two constants: + +=over 4 + +=item * CTYPE_HASH> + +=item * CTYPE_ARRAY>. + +=back + +This only takes effect when beginning a new file. This is an optional +parameter, and defaults to CTYPE_HASH>. + +=item * locking + +Specifies whether locking is to be enabled. DBM::Deep uses Perl's flock() +function to lock the database in exclusive mode for writes, and shared mode +for reads. Pass any true value to enable. This affects the base DB handle +I that use the same DB file. This is an +optional parameter, and defaults to 0 (disabled). See L below for +more. + +=item * autoflush + +Specifies whether autoflush is to be enabled on the underlying filehandle. +This obviously slows down write operations, but is required if you may have +multiple processes accessing the same DB file (also consider enable I). +Pass any true value to enable. This is an optional parameter, and defaults to 0 +(disabled). + +=item * filter_* + +See L below. + +=back + +=head1 TIE INTERFACE + +With DBM::Deep you can access your databases using Perl's standard hash/array +syntax. Because all DBM::Deep objects are I to hashes or arrays, you can +treat them as such. DBM::Deep will intercept all reads/writes and direct them +to the right place -- the DB file. This has nothing to do with the +L section above. This simply tells you how to use DBM::Deep +using regular hashes and arrays, rather than calling functions like C +and C (although those work too). It is entirely up to you how to want +to access your databases. + +=head2 HASHES + +You can treat any DBM::Deep object like a normal Perl hash reference. Add keys, +or even nested hashes (or arrays) using standard Perl syntax: + + my $db = DBM::Deep->new( "foo.db" ); + + $db->{mykey} = "myvalue"; + $db->{myhash} = {}; + $db->{myhash}->{subkey} = "subvalue"; + + print $db->{myhash}->{subkey} . "\n"; + +You can even step through hash keys using the normal Perl C function: + + foreach my $key (keys %$db) { + print "$key: " . $db->{$key} . "\n"; + } + +Remember that Perl's C function extracts I key from the hash and +pushes them onto an array, all before the loop even begins. If you have an +extremely large hash, this may exhaust Perl's memory. Instead, consider using +Perl's C function, which pulls keys/values one at a time, using very +little memory: + + while (my ($key, $value) = each %$db) { + print "$key: $value\n"; + } + +Please note that when using C, you should always pass a direct +hash reference, not a lookup. Meaning, you should B do this: + + # NEVER DO THIS + while (my ($key, $value) = each %{$db->{foo}}) { # BAD + +This causes an infinite loop, because for each iteration, Perl is calling +FETCH() on the $db handle, resulting in a "new" hash for foo every time, so +it effectively keeps returning the first key over and over again. Instead, +assign a temporary variable to C<$db->{foo}>, then pass that to each(). + +=head2 ARRAYS + +As with hashes, you can treat any DBM::Deep object like a normal Perl array +reference. This includes inserting, removing and manipulating elements, +and the C, C, C, C and C functions. +The object must have first been created using type CTYPE_ARRAY>, +or simply be a nested array reference inside a hash. Example: + + my $db = DBM::Deep->new( + file => "foo-array.db", + type => DBM::Deep->TYPE_ARRAY + ); + + $db->[0] = "foo"; + push @$db, "bar", "baz"; + unshift @$db, "bah"; + + my $last_elem = pop @$db; # baz + my $first_elem = shift @$db; # bah + my $second_elem = $db->[1]; # bar + + my $num_elements = scalar @$db; + +=head1 OO INTERFACE + +In addition to the I interface, you can also use a standard OO interface +to manipulate all aspects of DBM::Deep databases. Each type of object (hash or +array) has its own methods, but both types share the following common methods: +C, C, C, C and C. C and +C are aliases to C and C, respectively. + +=over + +=item * new() / clone() + +These are the constructor and copy-functions. + +=item * put() / store() + +Stores a new hash key/value pair, or sets an array element value. Takes two +arguments, the hash key or array index, and the new value. The value can be +a scalar, hash ref or array ref. Returns true on success, false on failure. + + $db->put("foo", "bar"); # for hashes + $db->put(1, "bar"); # for arrays + +=item * get() / fetch() + +Fetches the value of a hash key or array element. Takes one argument: the hash +key or array index. Returns a scalar, hash ref or array ref, depending on the +data type stored. + + my $value = $db->get("foo"); # for hashes + my $value = $db->get(1); # for arrays + +=item * exists() + +Checks if a hash key or array index exists. Takes one argument: the hash key +or array index. Returns true if it exists, false if not. + + if ($db->exists("foo")) { print "yay!\n"; } # for hashes + if ($db->exists(1)) { print "yay!\n"; } # for arrays + +=item * delete() + +Deletes one hash key/value pair or array element. Takes one argument: the hash +key or array index. Returns true on success, false if not found. For arrays, +the remaining elements located after the deleted element are NOT moved over. +The deleted element is essentially just undefined, which is exactly how Perl's +internal arrays work. Please note that the space occupied by the deleted +key/value or element is B reused again -- see L +below for details and workarounds. + + $db->delete("foo"); # for hashes + $db->delete(1); # for arrays + +=item * clear() + +Deletes B hash keys or array elements. Takes no arguments. No return +value. Please note that the space occupied by the deleted keys/values or +elements is B reused again -- see L below for +details and workarounds. + + $db->clear(); # hashes or arrays + +=item * lock() / unlock() + +q.v. Locking. + +=item * optimize() + +Recover lost disk space. This is important to do, especially if you use +transactions. + +=item * import() / export() + +Data going in and out. + +=back + +=head2 HASHES + +For hashes, DBM::Deep supports all the common methods described above, and the +following additional methods: C and C. + +=over + +=item * first_key() + +Returns the "first" key in the hash. As with built-in Perl hashes, keys are +fetched in an undefined order (which appears random). Takes no arguments, +returns the key as a scalar value. + + my $key = $db->first_key(); + +=item * next_key() + +Returns the "next" key in the hash, given the previous one as the sole argument. +Returns undef if there are no more keys to be fetched. + + $key = $db->next_key($key); + +=back + +Here are some examples of using hashes: + + my $db = DBM::Deep->new( "foo.db" ); + + $db->put("foo", "bar"); + print "foo: " . $db->get("foo") . "\n"; + + $db->put("baz", {}); # new child hash ref + $db->get("baz")->put("buz", "biz"); + print "buz: " . $db->get("baz")->get("buz") . "\n"; + + my $key = $db->first_key(); + while ($key) { + print "$key: " . $db->get($key) . "\n"; + $key = $db->next_key($key); + } + + if ($db->exists("foo")) { $db->delete("foo"); } + +=head2 ARRAYS + +For arrays, DBM::Deep supports all the common methods described above, and the +following additional methods: C, C, C, C, +C and C. + +=over + +=item * length() + +Returns the number of elements in the array. Takes no arguments. + + my $len = $db->length(); + +=item * push() + +Adds one or more elements onto the end of the array. Accepts scalars, hash +refs or array refs. No return value. + + $db->push("foo", "bar", {}); + +=item * pop() + +Fetches the last element in the array, and deletes it. Takes no arguments. +Returns undef if array is empty. Returns the element value. + + my $elem = $db->pop(); + +=item * shift() + +Fetches the first element in the array, deletes it, then shifts all the +remaining elements over to take up the space. Returns the element value. This +method is not recommended with large arrays -- see L below for +details. + + my $elem = $db->shift(); + +=item * unshift() + +Inserts one or more elements onto the beginning of the array, shifting all +existing elements over to make room. Accepts scalars, hash refs or array refs. +No return value. This method is not recommended with large arrays -- see + below for details. + + $db->unshift("foo", "bar", {}); + +=item * splice() + +Performs exactly like Perl's built-in function of the same name. See L for usage -- it is too complicated to document here. This method is +not recommended with large arrays -- see L below for details. + +=back + +Here are some examples of using arrays: + + my $db = DBM::Deep->new( + file => "foo.db", + type => DBM::Deep->TYPE_ARRAY + ); + + $db->push("bar", "baz"); + $db->unshift("foo"); + $db->put(3, "buz"); + + my $len = $db->length(); + print "length: $len\n"; # 4 + + for (my $k=0; $k<$len; $k++) { + print "$k: " . $db->get($k) . "\n"; + } + + $db->splice(1, 2, "biz", "baf"); + + while (my $elem = shift @$db) { + print "shifted: $elem\n"; + } + +=head1 LOCKING + +Enable automatic file locking by passing a true value to the C +parameter when constructing your DBM::Deep object (see L above). + + my $db = DBM::Deep->new( + file => "foo.db", + locking => 1 + ); + +This causes DBM::Deep to C the underlying filehandle with exclusive +mode for writes, and shared mode for reads. This is required if you have +multiple processes accessing the same database file, to avoid file corruption. +Please note that C does NOT work for files over NFS. See L below for more. + +=head2 EXPLICIT LOCKING + +You can explicitly lock a database, so it remains locked for multiple +actions. This is done by calling the C method, and passing an +optional lock mode argument (defaults to exclusive mode). This is particularly +useful for things like counters, where the current value needs to be fetched, +then incremented, then stored again. + + $db->lock(); + my $counter = $db->get("counter"); + $counter++; + $db->put("counter", $counter); + $db->unlock(); + + # or... + + $db->lock(); + $db->{counter}++; + $db->unlock(); + +You can pass C an optional argument, which specifies which mode to use +(exclusive or shared). Use one of these two constants: +CLOCK_EX> or CLOCK_SH>. These are passed +directly to C, and are the same as the constants defined in Perl's +L module. + + $db->lock( $db->LOCK_SH ); + # something here + $db->unlock(); + +=head1 IMPORTING/EXPORTING + +You can import existing complex structures by calling the C method, +and export an entire database into an in-memory structure using the C +method. Both are examined here. + +=head2 IMPORTING + +Say you have an existing hash with nested hashes/arrays inside it. Instead of +walking the structure and adding keys/elements to the database as you go, +simply pass a reference to the C method. This recursively adds +everything to an existing DBM::Deep object for you. Here is an example: + + my $struct = { + key1 => "value1", + key2 => "value2", + array1 => [ "elem0", "elem1", "elem2" ], + hash1 => { + subkey1 => "subvalue1", + subkey2 => "subvalue2" + } + }; + + my $db = DBM::Deep->new( "foo.db" ); + $db->import( $struct ); + + print $db->{key1} . "\n"; # prints "value1" + +This recursively imports the entire C<$struct> object into C<$db>, including +all nested hashes and arrays. If the DBM::Deep object contains exsiting data, +keys are merged with the existing ones, replacing if they already exist. +The C method can be called on any database level (not just the base +level), and works with both hash and array DB types. + +B Make sure your existing structure has no circular references in it. +These will cause an infinite loop when importing. There are plans to fix this +in a later release. + +=head2 EXPORTING + +Calling the C method on an existing DBM::Deep object will return +a reference to a new in-memory copy of the database. The export is done +recursively, so all nested hashes/arrays are all exported to standard Perl +objects. Here is an example: + + my $db = DBM::Deep->new( "foo.db" ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + $db->{hash1} = {}; + $db->{hash1}->{subkey1} = "subvalue1"; + $db->{hash1}->{subkey2} = "subvalue2"; + + my $struct = $db->export(); + + print $struct->{key1} . "\n"; # prints "value1" + +This makes a complete copy of the database in memory, and returns a reference +to it. The C method can be called on any database level (not just +the base level), and works with both hash and array DB types. Be careful of +large databases -- you can store a lot more data in a DBM::Deep object than an +in-memory Perl structure. + +B Make sure your database has no circular references in it. +These will cause an infinite loop when exporting. There are plans to fix this +in a later release. + +=head1 FILTERS + +DBM::Deep has a number of hooks where you can specify your own Perl function +to perform filtering on incoming or outgoing data. This is a perfect +way to extend the engine, and implement things like real-time compression or +encryption. Filtering applies to the base DB level, and all child hashes / +arrays. Filter hooks can be specified when your DBM::Deep object is first +constructed, or by calling the C method at any time. There are +four available filter hooks, described below: + +=over + +=item * filter_store_key + +This filter is called whenever a hash key is stored. It +is passed the incoming key, and expected to return a transformed key. + +=item * filter_store_value + +This filter is called whenever a hash key or array element is stored. It +is passed the incoming value, and expected to return a transformed value. + +=item * filter_fetch_key + +This filter is called whenever a hash key is fetched (i.e. via +C or C). It is passed the transformed key, +and expected to return the plain key. + +=item * filter_fetch_value + +This filter is called whenever a hash key or array element is fetched. +It is passed the transformed value, and expected to return the plain value. + +=back + +Here are the two ways to setup a filter hook: + + my $db = DBM::Deep->new( + file => "foo.db", + filter_store_value => \&my_filter_store, + filter_fetch_value => \&my_filter_fetch + ); + + # or... + + $db->set_filter( "filter_store_value", \&my_filter_store ); + $db->set_filter( "filter_fetch_value", \&my_filter_fetch ); + +Your filter function will be called only when dealing with SCALAR keys or +values. When nested hashes and arrays are being stored/fetched, filtering +is bypassed. Filters are called as static functions, passed a single SCALAR +argument, and expected to return a single SCALAR value. If you want to +remove a filter, set the function reference to C: + + $db->set_filter( "filter_store_value", undef ); + +=head2 REAL-TIME ENCRYPTION EXAMPLE + +Here is a working example that uses the I module to +do real-time encryption / decryption of keys & values with DBM::Deep Filters. +Please visit L for more +on I. You'll also need the I module. + + use DBM::Deep; + use Crypt::Blowfish; + use Crypt::CBC; + + my $cipher = Crypt::CBC->new({ + 'key' => 'my secret key', + 'cipher' => 'Blowfish', + 'iv' => '$KJh#(}q', + 'regenerate_key' => 0, + 'padding' => 'space', + 'prepend_iv' => 0 + }); + + my $db = DBM::Deep->new( + file => "foo-encrypt.db", + filter_store_key => \&my_encrypt, + filter_store_value => \&my_encrypt, + filter_fetch_key => \&my_decrypt, + filter_fetch_value => \&my_decrypt, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_encrypt { + return $cipher->encrypt( $_[0] ); + } + sub my_decrypt { + return $cipher->decrypt( $_[0] ); + } + +=head2 REAL-TIME COMPRESSION EXAMPLE + +Here is a working example that uses the I module to do real-time +compression / decompression of keys & values with DBM::Deep Filters. +Please visit L for +more on I. + + use DBM::Deep; + use Compress::Zlib; + + my $db = DBM::Deep->new( + file => "foo-compress.db", + filter_store_key => \&my_compress, + filter_store_value => \&my_compress, + filter_fetch_key => \&my_decompress, + filter_fetch_value => \&my_decompress, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_compress { + return Compress::Zlib::memGzip( $_[0] ) ; + } + sub my_decompress { + return Compress::Zlib::memGunzip( $_[0] ) ; + } + +B Filtering of keys only applies to hashes. Array "keys" are +actually numerical index numbers, and are not filtered. + +=head1 ERROR HANDLING + +Most DBM::Deep methods return a true value for success, and call die() on +failure. You can wrap calls in an eval block to catch the die. + + my $db = DBM::Deep->new( "foo.db" ); # create hash + eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call + + print $@; # prints error message + +=head1 LARGEFILE SUPPORT + +If you have a 64-bit system, and your Perl is compiled with both LARGEFILE +and 64-bit support, you I be able to create databases larger than 2 GB. +DBM::Deep by default uses 32-bit file offset tags, but these can be changed +by specifying the 'pack_size' parameter when constructing the file. + + DBM::Deep->new( + filename => $filename, + pack_size => 'large', + ); + +This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words +instead of 32-bit longs. After setting these values your DB files have a +theoretical maximum size of 16 XB (exabytes). + +You can also use C 'small'> in order to use 16-bit file +offsets. + +B Changing these values will B work for existing database files. +Only change this for new files. Once the value has been set, it is stored in +the file's header and cannot be changed for the life of the file. These +parameters are per-file, meaning you can access 32-bit and 64-bit files, as +you choose. + +B We have not personally tested files larger than 2 GB -- all my +systems have only a 32-bit Perl. However, I have received user reports that +this does indeed work! + +=head1 LOW-LEVEL ACCESS + +If you require low-level access to the underlying filehandle that DBM::Deep uses, +you can call the C<_fh()> method, which returns the handle: + + my $fh = $db->_fh(); + +This method can be called on the root level of the datbase, or any child +hashes or arrays. All levels share a I structure, which contains things +like the filehandle, a reference counter, and all the options specified +when you created the object. You can get access to this file object by +calling the C<_storage()> method. + + my $file_obj = $db->_storage(); + +This is useful for changing options after the object has already been created, +such as enabling/disabling locking. You can also store your own temporary user +data in this structure (be wary of name collision), which is then accessible from +any child hash or array. + +=head1 CUSTOM DIGEST ALGORITHM + +DBM::Deep by default uses the I (MD5) algorithm for hashing +keys. However you can override this, and use another algorithm (such as SHA-256) +or even write your own. But please note that DBM::Deep currently expects zero +collisions, so your algorithm has to be I, so to speak. Collision +detection may be introduced in a later version. + +You can specify a custom digest algorithm by passing it into the parameter +list for new(), passing a reference to a subroutine as the 'digest' parameter, +and the length of the algorithm's hashes (in bytes) as the 'hash_size' +parameter. Here is a working example that uses a 256-bit hash from the +I module. Please see +L for more information. + + use DBM::Deep; + use Digest::SHA256; + + my $context = Digest::SHA256::new(256); + + my $db = DBM::Deep->new( + filename => "foo-sha.db", + digest => \&my_digest, + hash_size => 32, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_digest { + return substr( $context->hash($_[0]), 0, 32 ); + } + +B Your returned digest strings must be B the number +of bytes you specify in the hash_size parameter (in this case 32). + +B If you do choose to use a custom digest algorithm, you must set it +every time you access this file. Otherwise, the default (MD5) will be used. + +=head1 CIRCULAR REFERENCES + +B: DBM::Deep 0.99_03 has turned off circular references pending +evaluation of some edge cases. I hope to be able to re-enable circular +references in a future version prior to 1.00. + +DBM::Deep has B support for circular references. Meaning you +can have a nested hash key or array element that points to a parent object. +This relationship is stored in the DB file, and is preserved between sessions. +Here is an example: + + my $db = DBM::Deep->new( "foo.db" ); + + $db->{foo} = "bar"; + $db->{circle} = $db; # ref to self + + print $db->{foo} . "\n"; # prints "bar" + print $db->{circle}->{foo} . "\n"; # prints "bar" again + +B: Passing the object to a function that recursively walks the +object tree (such as I or even the built-in C or +C methods) will result in an infinite loop. This will be fixed in +a future release. + +=head1 TRANSACTIONS + +New in 0.99_01 is ACID transactions. Every DBM::Deep object is completely +transaction-ready - it is not an option you have to turn on. Three new methods +have been added to support them. They are: + +=over 4 + +=item * begin_work() + +This starts a transaction. + +=item * commit() + +This applies the changes done within the transaction to the mainline and ends +the transaction. + +=item * rollback() + +This discards the changes done within the transaction to the mainline and ends +the transaction. + +=back + +Transactions in DBM::Deep are done using the MVCC method, the same method used +by the InnoDB MySQL table type. + +=head1 PERFORMANCE + +Because DBM::Deep is a conncurrent datastore, every change is flushed to disk +immediately and every read goes to disk. This means that DBM::Deep functions +at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally +50-70ns), or at least 150-200x slower than the comparable in-memory +datastructure in Perl. + +There are several techniques you can use to speed up how DBM::Deep functions. + +=over 4 + +=item * Put it on a ramdisk + +The easiest and quickest mechanism to making DBM::Deep run faster is to create +a ramdisk and locate the DBM::Deep file there. Doing this as an option may +become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN. + +=item * Work at the tightest level possible + +It is much faster to assign the level of your db that you are working with to +an intermediate variable than to re-look it up every time. Thus + + # BAD + while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) { + ... + } + + # GOOD + my $x = $db->{foo}{bar}{baz}; + while ( my ($k, $v) = each %$x ) { + ... + } + +=item * Make your file as tight as possible + +If you know that you are not going to use more than 65K in your database, +consider using the C 'small'> option. This will instruct +DBM::Deep to use 16bit addresses, meaning that the seek times will be less. +The same goes with the number of transactions. num_Txns defaults to 16. If you +can set that to 1 or 2, that will reduce the file-size considerably, thus +reducing seek times. + +=back + +=head1 CAVEATS / ISSUES / BUGS + +This section describes all the known issues with DBM::Deep. It you have found +something that is not listed here, please send e-mail to L. + +=head2 REFERENCES + +(The reasons given assume a high level of Perl understanding, specifically of +references. You can safely skip this section.) + +Currently, the only references supported are HASH and ARRAY. The other reference +types (SCALAR, CODE, GLOB, and REF) cannot be supported for various reasons. + +=over 4 + +=item * GLOB + +These are things like filehandles and other sockets. They can't be supported +because it's completely unclear how DBM::Deep should serialize them. + +=item * SCALAR / REF + +The discussion here refers to the following type of example: + + my $x = 25; + $db->{key1} = \$x; + + $x = 50; + + # In some other process ... + + my $val = ${ $db->{key1} }; + + is( $val, 50, "What actually gets stored in the DB file?" ); + +The problem is one of synchronization. When the variable being referred to +changes value, the reference isn't notified. This means that the new value won't +be stored in the datafile for other processes to read. There is no TIEREF. + +It is theoretically possible to store references to values already within a +DBM::Deep object because everything already is synchronized, but the change to +the internals would be quite large. Specifically, DBM::Deep would have to tie +every single value that is stored. This would bloat the RAM footprint of +DBM::Deep at least twofold (if not more) and be a significant performance drain, +all to support a feature that has never been requested. + +=item * CODE + +L provides a mechanism for serializing coderefs, +including saving off all closure state. However, just as for SCALAR and REF, +that closure state may change without notifying the DBM::Deep object storing +the reference. + +=back + +=head2 FILE CORRUPTION + +The current level of error handling in DBM::Deep is minimal. Files I checked +for a 32-bit signature when opened, but other corruption in files can cause +segmentation faults. DBM::Deep may try to seek() past the end of a file, or get +stuck in an infinite loop depending on the level of corruption. File write +operations are not checked for failure (for speed), so if you happen to run +out of disk space, DBM::Deep will probably fail in a bad way. These things will +be addressed in a later version of DBM::Deep. + +=head2 DB OVER NFS + +Beware of using DBM::Deep files over NFS. DBM::Deep uses flock(), which works +well on local filesystems, but will NOT protect you from file corruption over +NFS. I've heard about setting up your NFS server with a locking daemon, then +using lockf() to lock your files, but your mileage may vary there as well. +From what I understand, there is no real way to do it. However, if you need +access to the underlying filehandle in DBM::Deep for using some other kind of +locking scheme like lockf(), see the L section above. + +=head2 COPYING OBJECTS + +Beware of copying tied objects in Perl. Very strange things can happen. +Instead, use DBM::Deep's C method which safely copies the object and +returns a new, blessed, tied hash or array to the same level in the DB. + + my $copy = $db->clone(); + +B: Since clone() here is cloning the object, not the database location, any +modifications to either $db or $copy will be visible to both. + +=head2 LARGE ARRAYS + +Beware of using C, C or C with large arrays. +These functions cause every element in the array to move, which can be murder +on DBM::Deep, as every element has to be fetched from disk, then stored again in +a different location. This will be addressed in a future version. + +=head2 WRITEONLY FILES + +If you pass in a filehandle to new(), you may have opened it in either a readonly or +writeonly mode. STORE will verify that the filehandle is writable. However, there +doesn't seem to be a good way to determine if a filehandle is readable. And, if the +filehandle isn't readable, it's not clear what will happen. So, don't do that. + +=head1 CODE COVERAGE + +B is used to test the code coverage of the tests. Below is the +B report on this distribution's test suite. + + ---------------------------- ------ ------ ------ ------ ------ ------ ------ + File stmt bran cond sub pod time total + ---------------------------- ------ ------ ------ ------ ------ ------ ------ + blib/lib/DBM/Deep.pm 96.8 87.9 90.5 100.0 89.5 4.6 95.2 + blib/lib/DBM/Deep/Array.pm 100.0 94.3 100.0 100.0 100.0 5.0 98.7 + blib/lib/DBM/Deep/Engine.pm 95.6 85.1 79.7 99.1 0.0 58.2 89.5 + blib/lib/DBM/Deep/File.pm 99.0 88.9 77.8 100.0 0.0 29.9 90.3 + blib/lib/DBM/Deep/Hash.pm 100.0 100.0 100.0 100.0 100.0 2.4 100.0 + Total 97.0 87.8 84.0 99.5 32.1 100.0 92.4 + ---------------------------- ------ ------ ------ ------ ------ ------ ------ +=head1 MORE INFORMATION + +Check out the DBM::Deep Google Group at L +or send email to L. You can also visit #dbm-deep on +irc.perl.org + +The source code repository is at L + +=head1 MAINTAINER(S) + +Rob Kinyon, L + +Originally written by Joseph Huckaby, L + +Special thanks to Adam Sah and Rich Gaushell! You know why :-) + +Additional thanks go out to Stonehenge who have sponsored the 1.00 release. + +=head1 SEE ALSO + +perltie(1), Tie::Hash(3), Digest::MD5(3), Fcntl(3), flock(2), lockf(3), nfs(5), +Digest::SHA256(3), Crypt::Blowfish(3), Compress::Zlib(3) + +=head1 LICENSE + +Copyright (c) 2007 Rob Kinyon. All Rights Reserved. +This is free software, you may use it and distribute it under the +same terms as Perl itself. + +=cut diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index 0381bf1..8526b92 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -341,7 +341,7 @@ sub begin_work { my ($obj) = @_; if ( $self->trans_id ) { - DBM::Deep->_throw_error( "Cannot begin_work within a transaction" ); + DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" ); } my @slots = $self->read_txn_slots; @@ -365,7 +365,7 @@ sub rollback { my ($obj) = @_; if ( !$self->trans_id ) { - DBM::Deep->_throw_error( "Cannot rollback without a transaction" ); + DBM::Deep->_throw_error( "Cannot rollback without an active transaction" ); } # Each entry is the file location for a bucket that has a modification for @@ -402,7 +402,7 @@ sub commit { my ($obj) = @_; if ( !$self->trans_id ) { - DBM::Deep->_throw_error( "Cannot commit without a transaction" ); + DBM::Deep->_throw_error( "Cannot commit without an active transaction" ); } foreach my $entry (@{ $self->get_entries } ) { diff --git a/t/33_transactions.t b/t/33_transactions.t index 672d226..cdf18ad 100644 --- a/t/33_transactions.t +++ b/t/33_transactions.t @@ -1,6 +1,7 @@ use strict; -use Test::More tests => 91; +use Test::More tests => 99; use Test::Deep; +use Test::Exception; use t::common qw( new_fh ); use_ok( 'DBM::Deep' ); @@ -24,16 +25,40 @@ $db1->{x} = 'y'; is( $db1->{x}, 'y', "Before transaction, DB1's X is Y" ); is( $db2->{x}, 'y', "Before transaction, DB2's X is Y" ); -eval { $db1->rollback }; -ok( $@, "Attempting to rollback without a transaction throws an error" ); +cmp_bag( [ keys %$db1 ], [qw( x )], "DB1 keys correct" ); +cmp_bag( [ keys %$db2 ], [qw( x )], "DB2 keys correct" ); -eval { $db1->commit }; -ok( $@, "Attempting to commit without a transaction throws an error" ); +throws_ok { + $db1->rollback; +} qr/Cannot rollback without an active transaction/, "Attempting to rollback without a transaction throws an error"; + +throws_ok { + $db1->commit; +} qr/Cannot commit without an active transaction/, "Attempting to commit without a transaction throws an error"; + +$db1->begin_work; + +throws_ok { + $db1->begin_work; +} qr/Cannot begin_work within an active transaction/, "Attempting to begin_work within a transaction throws an error"; + +lives_ok { + $db1->rollback; +} "Rolling back an empty transaction is ok."; + +cmp_bag( [ keys %$db1 ], [qw( x )], "DB1 keys correct" ); +cmp_bag( [ keys %$db2 ], [qw( x )], "DB2 keys correct" ); $db1->begin_work; -eval { $db1->begin_work }; -ok( $@, "Attempting to begin_work within a transaction throws an error" ); +lives_ok { + $db1->commit; +} "Committing an empty transaction is ok."; + +cmp_bag( [ keys %$db1 ], [qw( x )], "DB1 keys correct" ); +cmp_bag( [ keys %$db2 ], [qw( x )], "DB2 keys correct" ); + +$db1->begin_work; cmp_bag( [ keys %$db1 ], [qw( x )], "DB1 keys correct" ); cmp_bag( [ keys %$db2 ], [qw( x )], "DB2 keys correct" ); diff --git a/t/TODO b/t/TODO index 2645928..e5da204 100644 --- a/t/TODO +++ b/t/TODO @@ -58,8 +58,6 @@ How should this be triggered?! =item * Max out the number of transactions -=item * What happens when commit/rollback are called immediately after begin_work? - =item * Delete something in the head that has its own value in a transaction =item * Run an import within a transaction