Very rudimentary rollback added - needs LOTS AND LOTS of work

[dbsrgits/DBM-Deep.git] / lib / DBM / Deep.pm
diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm

index 524ddcd..9921062 100644 (file)
--- a/lib/DBM/Deep.pm
+++ b/lib/DBM/Deep.pm
@@ -29,13 +29,17 @@ package DBM::Deep;
 #    modify it under the same terms as Perl itself.
 ##
 
+use 5.6.0;
+
 use strict;
+use warnings;
 
 use Fcntl qw( :DEFAULT :flock :seek );
 use Digest::MD5 ();
 use Scalar::Util ();
 
 use DBM::Deep::Engine;
+use DBM::Deep::File;
 
 use vars qw( $VERSION );
 $VERSION = q(0.99_01);
@@ -96,32 +100,32 @@ sub new {
     return bless $self, $class;
 }
 
+# This initializer is called from the various TIE* methods. new() calls tie(),
+# which allows for a single point of entry.
 sub _init {
-    ##
-    # Setup $self and bless into this class.
-    ##
     my $class = shift;
     my ($args) = @_;
 
+    $args->{fileobj} = DBM::Deep::File->new( $args )
+        unless exists $args->{fileobj};
+
+    # locking implicitly enables autoflush
+    if ($args->{locking}) { $args->{autoflush} = 1; }
+
     # These are the defaults to be optionally overridden below
     my $self = bless {
         type        => TYPE_HASH,
-        engine      => DBM::Deep::Engine->new,
+        engine      => DBM::Deep::Engine->new( $args ),
         base_offset => undef,
+        fileobj     => undef,
     }, $class;
 
+    # Grab the parameters we want to use
     foreach my $param ( keys %$self ) {
         next unless exists $args->{$param};
-        $self->{$param} = delete $args->{$param}
+        $self->{$param} = $args->{$param};
     }
 
-    # locking implicitly enables autoflush
-    if ($args->{locking}) { $args->{autoflush} = 1; }
-
-    $self->{root} = exists $args->{root}
-        ? $args->{root}
-        : DBM::Deep::_::Root->new( $args );
-
     $self->{engine}->setup_fh( $self );
 
     return $self;
@@ -155,26 +159,27 @@ sub lock {
 
     if (!defined($self->_fh)) { return; }
 
-    if ($self->_root->{locking}) {
-        if (!$self->_root->{locked}) {
+    if ($self->_fileobj->{locking}) {
+        if (!$self->_fileobj->{locked}) {
             flock($self->_fh, $type);
 
             # refresh end counter in case file has changed size
             my @stats = stat($self->_fh);
-            $self->_root->{end} = $stats[7];
+            $self->_fileobj->{end} = $stats[7];
 
             # double-check file inode, in case another process
             # has optimize()d our file while we were waiting.
-            if ($stats[1] != $self->_root->{inode}) {
-                $self->{engine}->close_fh( $self );
+            if ($stats[1] != $self->_fileobj->{inode}) {
+                $self->_fileobj->close;
+                $self->_fileobj->open;
                 $self->{engine}->setup_fh( $self );
                 flock($self->_fh, $type); # re-lock
 
                 # This may not be necessary after re-opening
-                $self->_root->{end} = (stat($self->_fh))[7]; # re-end
+                $self->_fileobj->{end} = (stat($self->_fh))[7]; # re-end
             }
         }
-        $self->_root->{locked}++;
+        $self->_fileobj->{locked}++;
 
         return 1;
     }
@@ -191,9 +196,9 @@ sub unlock {
 
     if (!defined($self->_fh)) { return; }
 
-    if ($self->_root->{locking} && $self->_root->{locked} > 0) {
-        $self->_root->{locked}--;
-        if (!$self->_root->{locked}) { flock($self->_fh, LOCK_UN); }
+    if ($self->_fileobj->{locking} && $self->_fileobj->{locked} > 0) {
+        $self->_fileobj->{locked}--;
+        if (!$self->_fileobj->{locked}) { flock($self->_fh, LOCK_UN); }
 
         return 1;
     }
@@ -209,8 +214,7 @@ sub _copy_value {
         ${$spot} = $value;
     }
     elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep' ) } ) {
-        my $type = $value->_type;
-        ${$spot} = $type eq TYPE_HASH ? {} : [];
+        ${$spot} = $value->_repr;
         $value->_copy_node( ${$spot} );
     }
     else {
@@ -230,30 +234,11 @@ sub _copy_value {
 }
 
 sub _copy_node {
-    ##
-    # Copy single level of keys or elements to new DB handle.
-    # Recurse for nested structures
-    ##
-    my $self = shift->_get_self;
-    my ($db_temp) = @_;
-
-    if ($self->_type eq TYPE_HASH) {
-        my $key = $self->first_key();
-        while ($key) {
-            my $value = $self->get($key);
-            $self->_copy_value( \$db_temp->{$key}, $value );
-            $key = $self->next_key($key);
-        }
-    }
-    else {
-        my $length = $self->length();
-        for (my $index = 0; $index < $length; $index++) {
-            my $value = $self->get($index);
-            $self->_copy_value( \$db_temp->[$index], $value );
-        }
-    }
+    die "Must be implemented in a child class\n";
+}
 
-    return 1;
+sub _repr {
+    die "Must be implemented in a child class\n";
 }
 
 sub export {
@@ -262,9 +247,7 @@ sub export {
     ##
     my $self = shift->_get_self;
 
-    my $temp;
-    if ($self->_type eq TYPE_HASH) { $temp = {}; }
-    elsif ($self->_type eq TYPE_ARRAY) { $temp = []; }
+    my $temp = $self->_repr;
 
     $self->lock();
     $self->_copy_node( $temp );
@@ -284,22 +267,10 @@ sub import {
 
     # struct is not a reference, so just import based on our type
     if (!ref($struct)) {
-        if ($self->_type eq TYPE_HASH) { $struct = {@_}; }
-        elsif ($self->_type eq TYPE_ARRAY) { $struct = [@_]; }
+        $struct = $self->_repr( @_ );
     }
 
-    my $r = Scalar::Util::reftype($struct) || '';
-    if ($r eq "HASH" && $self->_type eq TYPE_HASH) {
-        foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); }
-    }
-    elsif ($r eq "ARRAY" && $self->_type eq TYPE_ARRAY) {
-        $self->push( @$struct );
-    }
-    else {
-        $self->_throw_error("Cannot import: type mismatch");
-    }
-
-    return 1;
+    return $self->_import( $struct );
 }
 
 sub optimize {
@@ -310,12 +281,12 @@ sub optimize {
     my $self = shift->_get_self;
 
 #XXX Need to create a new test for this
-#    if ($self->_root->{links} > 1) {
+#    if ($self->_fileobj->{links} > 1) {
 #        $self->_throw_error("Cannot optimize: reference count is greater than 1");
 #    }
 
     my $db_temp = DBM::Deep->new(
-        file => $self->_root->{file} . '.tmp',
+        file => $self->_fileobj->{file} . '.tmp',
         type => $self->_type
     );
 
@@ -330,8 +301,8 @@ sub optimize {
     my $perms = $stats[2] & 07777;
     my $uid = $stats[4];
     my $gid = $stats[5];
-    chown( $uid, $gid, $self->_root->{file} . '.tmp' );
-    chmod( $perms, $self->_root->{file} . '.tmp' );
+    chown( $uid, $gid, $self->_fileobj->{file} . '.tmp' );
+    chmod( $perms, $self->_fileobj->{file} . '.tmp' );
 
     # q.v. perlport for more information on this variable
     if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) {
@@ -342,17 +313,18 @@ sub optimize {
         # with a soft copy.
         ##
         $self->unlock();
-        $self->{engine}->close_fh( $self );
+        $self->_fileobj->close;
     }
 
-    if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) {
-        unlink $self->_root->{file} . '.tmp';
+    if (!rename $self->_fileobj->{file} . '.tmp', $self->_fileobj->{file}) {
+        unlink $self->_fileobj->{file} . '.tmp';
         $self->unlock();
         $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!");
     }
 
     $self->unlock();
-    $self->{engine}->close_fh( $self );
+    $self->_fileobj->close;
+    $self->_fileobj->open;
     $self->{engine}->setup_fh( $self );
 
     return 1;
@@ -367,7 +339,7 @@ sub clone {
     return DBM::Deep->new(
         type => $self->_type,
         base_offset => $self->_base_offset,
-        root => $self->_root
+        fileobj => $self->_fileobj,
     );
 }
 
@@ -388,7 +360,7 @@ sub clone {
         my $func = shift;
 
         if ( $is_legal_filter{$type} ) {
-            $self->_root->{"filter_$type"} = $func;
+            $self->_fileobj->{"filter_$type"} = $func;
             return 1;
         }
 
@@ -396,16 +368,32 @@ sub clone {
     }
 }
 
+sub begin_work {
+    my $self = shift->_get_self;
+    $self->_fileobj->begin_transaction;
+    return 1;
+}
+
+sub rollback {
+    my $self = shift->_get_self;
+    $self->_fileobj->end_transaction;
+    return 1;
+}
+
+#sub commit {
+#    my $self = shift->_get_self;
+#}
+
 ##
 # Accessor methods
 ##
 
-sub _root {
+sub _fileobj {
     ##
     # Get access to the root structure
     ##
     my $self = $_[0]->_get_self;
-    return $self->{root};
+    return $self->{fileobj};
 }
 
 sub _type {
@@ -429,7 +417,7 @@ sub _fh {
     # Get access to the raw fh
     ##
     my $self = $_[0]->_get_self;
-    return $self->_root->{fh};
+    return $self->_fileobj->{fh};
 }
 
 ##
@@ -457,7 +445,7 @@ sub STORE {
     my $self = shift->_get_self;
     my ($key, $value) = @_;
 
-    unless ( _is_writable( $self->_fh ) ) {
+    if ( $^O ne 'MSWin32' && !_is_writable( $self->_fh ) ) {
         $self->_throw_error( 'Cannot write to a readonly filehandle' );
     }
 
@@ -468,18 +456,18 @@ sub STORE {
 
     my $md5 = $self->{engine}{digest}->($key);
 
-    my $tag = $self->{engine}->find_bucket_list( $self, $md5, { create => 1 } );
+    my $tag = $self->{engine}->find_bucket_list( $self->_base_offset, $md5, { create => 1 } );
 
     # User may be storing a hash, in which case we do not want it run
     # through the filtering system
-    if ( !ref($value) && $self->_root->{filter_store_value} ) {
-        $value = $self->_root->{filter_store_value}->( $value );
+    if ( !ref($value) && $self->_fileobj->{filter_store_value} ) {
+        $value = $self->_fileobj->{filter_store_value}->( $value );
     }
 
     ##
     # Add key/value to bucket list
     ##
-    my $result = $self->{engine}->add_bucket( $self, $tag, $md5, $key, $value );
+    my $result = $self->{engine}->add_bucket( $tag, $md5, $key, $value );
 
     $self->unlock();
 
@@ -500,7 +488,7 @@ sub FETCH {
     ##
     $self->lock( LOCK_SH );
 
-    my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
+    my $tag = $self->{engine}->find_bucket_list( $self->_base_offset, $md5 );
     if (!$tag) {
         $self->unlock();
         return;
@@ -509,14 +497,14 @@ sub FETCH {
     ##
     # Get value from bucket list
     ##
-    my $result = $self->{engine}->get_bucket_value( $self, $tag, $md5 );
+    my $result = $self->{engine}->get_bucket_value( $tag, $md5 );
 
     $self->unlock();
 
     # Filters only apply to scalar values, so the ref check is making
     # sure the fetched bucket is a scalar, not a child hash or array.
-    return ($result && !ref($result) && $self->_root->{filter_fetch_value})
-        ? $self->_root->{filter_fetch_value}->($result)
+    return ($result && !ref($result) && $self->_fileobj->{filter_fetch_value})
+        ? $self->_fileobj->{filter_fetch_value}->($result)
         : $result;
 }
 
@@ -527,7 +515,7 @@ sub DELETE {
     my $self = $_[0]->_get_self;
     my $key = $_[1];
 
-    unless ( _is_writable( $self->_fh ) ) {
+    if ( $^O ne 'MSWin32' && !_is_writable( $self->_fh ) ) {
         $self->_throw_error( 'Cannot write to a readonly filehandle' );
     }
 
@@ -538,7 +526,7 @@ sub DELETE {
 
     my $md5 = $self->{engine}{digest}->($key);
 
-    my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
+    my $tag = $self->{engine}->find_bucket_list( $self->_base_offset, $md5 );
     if (!$tag) {
         $self->unlock();
         return;
@@ -547,13 +535,13 @@ sub DELETE {
     ##
     # Delete bucket
     ##
-    my $value = $self->{engine}->get_bucket_value($self,  $tag, $md5 );
+    my $value = $self->{engine}->get_bucket_value( $tag, $md5 );
 
-    if (defined $value && !ref($value) && $self->_root->{filter_fetch_value}) {
-        $value = $self->_root->{filter_fetch_value}->($value);
+    if (defined $value && !ref($value) && $self->_fileobj->{filter_fetch_value}) {
+        $value = $self->_fileobj->{filter_fetch_value}->($value);
     }
 
-    my $result = $self->{engine}->delete_bucket( $self, $tag, $md5 );
+    my $result = $self->{engine}->delete_bucket( $tag, $md5 );
 
     ##
     # If this object is an array and the key deleted was on the end of the stack,
@@ -579,7 +567,7 @@ sub EXISTS {
     ##
     $self->lock( LOCK_SH );
 
-    my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
+    my $tag = $self->{engine}->find_bucket_list( $self->_base_offset, $md5 );
     if (!$tag) {
         $self->unlock();
 
@@ -592,7 +580,7 @@ sub EXISTS {
     ##
     # Check if bucket exists and return 1 or ''
     ##
-    my $result = $self->{engine}->bucket_exists( $self, $tag, $md5 ) || '';
+    my $result = $self->{engine}->bucket_exists( $tag, $md5 ) || '';
 
     $self->unlock();
 
@@ -605,7 +593,7 @@ sub CLEAR {
     ##
     my $self = $_[0]->_get_self;
 
-    unless ( _is_writable( $self->_fh ) ) {
+    if ( $^O ne 'MSWin32' && !_is_writable( $self->_fh ) ) {
         $self->_throw_error( 'Cannot write to a readonly filehandle' );
     }
 
@@ -616,14 +604,15 @@ sub CLEAR {
 
     my $fh = $self->_fh;
 
-    seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET);
+    seek($fh, $self->_base_offset + $self->_fileobj->{file_offset}, SEEK_SET);
     if (eof $fh) {
         $self->unlock();
         return;
     }
 
-    $self->{engine}->create_tag(
-        $self, $self->_base_offset, $self->_type,
+#XXX This needs updating to use _release_space
+    $self->{engine}->write_tag(
+        $self->_base_offset, $self->_type,
         chr(0)x$self->{engine}{index_size},
     );
 
@@ -643,44 +632,6 @@ sub delete { (shift)->DELETE( @_ ) }
 sub exists { (shift)->EXISTS( @_ ) }
 sub clear { (shift)->CLEAR( @_ ) }
 
-package DBM::Deep::_::Root;
-
-sub new {
-    my $class = shift;
-    my ($args) = @_;
-
-    my $self = bless {
-        autobless          => undef,
-        autoflush          => undef,
-        end                => 0,
-        fh                 => undef,
-        file               => undef,
-        file_offset        => 0,
-        locking            => undef,
-        locked             => 0,
-        filter_store_key   => undef,
-        filter_store_value => undef,
-        filter_fetch_key   => undef,
-        filter_fetch_value => undef,
-        %$args,
-    }, $class;
-
-    if ( $self->{fh} && !$self->{file_offset} ) {
-        $self->{file_offset} = tell( $self->{fh} );
-    }
-
-    return $self;
-}
-
-sub DESTROY {
-    my $self = shift;
-    return unless $self;
-
-    close $self->{fh} if $self->{fh};
-
-    return;
-}
-
 1;
 __END__
 
@@ -1019,10 +970,6 @@ Recover lost disk space.
 
 Data going in and out.
 
-=item * set_digest() / set_pack() / set_filter()
-
-q.v. adjusting the interal parameters.
-
 =back
 
 =head2 HASHES
@@ -1406,23 +1353,29 @@ failure.  You can wrap calls in an eval block to catch the die.
 If you have a 64-bit system, and your Perl is compiled with both LARGEFILE
 and 64-bit support, you I<may> be able to create databases larger than 2 GB.
 DBM::Deep by default uses 32-bit file offset tags, but these can be changed
-by calling the static C<set_pack()> method before you do anything else.
+by specifying the 'pack_size' parameter when constructing the file.
 
-    DBM::Deep::set_pack(8, 'Q');
+    DBM::Deep->new(
+        filename => $filename,
+        pack_size => 'large',
+    );
 
 This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words
 instead of 32-bit longs.  After setting these values your DB files have a
 theoretical maximum size of 16 XB (exabytes).
 
+You can also use C<pack_size =E<gt> 'small'> in order to use 16-bit file
+offsets.
+
 B<Note:> Changing these values will B<NOT> work for existing database files.
-Only change this for new files, and make sure it stays set consistently
-throughout the file's life.  If you do set these values, you can no longer
-access 32-bit DB files.  You can, however, call C<set_pack(4, 'N')> to change
-back to 32-bit mode.
+Only change this for new files. Once the value has been set, it is stored in
+the file's header and cannot be changed for the life of the file. These
+parameters are per-file, meaning you can access 32-bit and 64-bit files, as
+you chose.
 
-B<Note:> I have not personally tested files > 2 GB -- all my systems have
-only a 32-bit Perl.  However, I have received user reports that this does
-indeed work!
+B<Note:> We have not personally tested files larger than 2 GB -- all my
+systems have only a 32-bit Perl.  However, I have received user reports that
+this does indeed work!
 
 =head1 LOW-LEVEL ACCESS
 
@@ -1434,10 +1387,10 @@ you can call the C<_fh()> method, which returns the handle:
 This method can be called on the root level of the datbase, or any child
 hashes or arrays.  All levels share a I<root> structure, which contains things
 like the filehandle, a reference counter, and all the options specified
-when you created the object.  You can get access to this root structure by
-calling the C<root()> method.
+when you created the object.  You can get access to this file object by
+calling the C<_fileobj()> method.
 
-    my $root = $db->_root();
+    my $file_obj = $db->_fileobj();
 
 This is useful for changing options after the object has already been created,
 such as enabling/disabling locking.  You can also store your own temporary user
@@ -1449,26 +1402,26 @@ any child hash or array.
 DBM::Deep by default uses the I<Message Digest 5> (MD5) algorithm for hashing
 keys.  However you can override this, and use another algorithm (such as SHA-256)
 or even write your own.  But please note that DBM::Deep currently expects zero
-collisions, so your algorithm has to be I<perfect>, so to speak.
-Collision detection may be introduced in a later version.
+collisions, so your algorithm has to be I<perfect>, so to speak. Collision
+detection may be introduced in a later version.
 
-
-
-You can specify a custom digest algorithm by calling the static C<set_digest()>
-function, passing a reference to a subroutine, and the length of the algorithm's
-hashes (in bytes).  This is a global static function, which affects ALL DBM::Deep
-objects.  Here is a working example that uses a 256-bit hash from the
+You can specify a custom digest algorithm by passing it into the parameter
+list for new(), passing a reference to a subroutine as the 'digest' parameter,
+and the length of the algorithm's hashes (in bytes) as the 'hash_size'
+parameter. Here is a working example that uses a 256-bit hash from the
 I<Digest::SHA256> module.  Please see
-L<http://search.cpan.org/search?module=Digest::SHA256> for more.
+L<http://search.cpan.org/search?module=Digest::SHA256> for more information.
 
     use DBM::Deep;
     use Digest::SHA256;
 
     my $context = Digest::SHA256::new(256);
 
-    DBM::Deep::set_digest( \&my_digest, 32 );
-
-    my $db = DBM::Deep->new( "foo-sha.db" );
+    my $db = DBM::Deep->new(
+        filename => "foo-sha.db",
+        digest => \&my_digest,
+        hash_size => 32,
+    );
 
     $db->{key1} = "value1";
     $db->{key2} = "value2";
@@ -1483,7 +1436,10 @@ L<http://search.cpan.org/search?module=Digest::SHA256> for more.
     }
 
 B<Note:> Your returned digest strings must be B<EXACTLY> the number
-of bytes you specify in the C<set_digest()> function (in this case 32).
+of bytes you specify in the hash_size parameter (in this case 32).
+
+B<Note:> If you do choose to use a custom digest algorithm, you must set it
+every time you access this file. Otherwise, the default (MD5) will be used.
 
 =head1 CIRCULAR REFERENCES
 
@@ -1533,26 +1489,6 @@ B<WARNING:> Only call optimize() on the top-level node of the database, and
 make sure there are no child references lying around.  DBM::Deep keeps a reference
 counter, and if it is greater than 1, optimize() will abort and return undef.
 
-=head2 AUTOVIVIFICATION
-
-Unfortunately, autovivification doesn't work with tied hashes.  This appears to
-be a bug in Perl's tie() system, as I<Jakob Schmidt> encountered the very same
-issue with his I<DWH_FIle> module (see L<http://search.cpan.org/search?module=DWH_File>),
-and it is also mentioned in the BUGS section for the I<MLDBM> module <see
-L<http://search.cpan.org/search?module=MLDBM>).  Basically, on a new db file,
-this does not work:
-
-    $db->{foo}->{bar} = "hello";
-
-Since "foo" doesn't exist, you cannot add "bar" to it.  You end up with "foo"
-being an empty hash.  Try this instead, which works fine:
-
-    $db->{foo} = { bar => "hello" };
-
-As of Perl 5.8.7, this bug still exists.  I have walked very carefully through
-the execution path, and Perl indeed passes an empty hash to the STORE() method.
-Probably a bug in Perl.
-
 =head2 REFERENCES
 
 (The reasons given assume a high level of Perl understanding, specifically of