1 package DBM::Deep::Engine;
6 use warnings FATAL => 'all';
8 # Never import symbols into our namespace. We are a class, not a library.
15 # * Every method in here assumes that the storage has been appropriately
16 # safeguarded. This can be anything from flock() to some sort of manual
17 # mutex. But, it's the caller's responsability to make sure that this has
20 # Setup file and tag signatures. These should never change.
21 sub SIG_FILE () { 'DPDB' }
22 sub SIG_HEADER () { 'h' }
23 sub SIG_HASH () { 'H' }
24 sub SIG_ARRAY () { 'A' }
25 sub SIG_NULL () { 'N' }
26 sub SIG_DATA () { 'D' }
27 sub SIG_INDEX () { 'I' }
28 sub SIG_BLIST () { 'B' }
29 sub SIG_FREE () { 'F' }
34 # Please refer to the pack() documentation for further information
36 1 => 'C', # Unsigned char value (no order needed as it's just one byte)
37 2 => 'n', # Unsigned short in "network" (big-endian) order
38 4 => 'N', # Unsigned long in "network" (big-endian) order
39 8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
48 This is an internal-use-only object for L<DBM::Deep/>. It mediates the low-level
49 mapping between the L<DBM::Deep/> objects and the storage medium.
51 The purpose of this documentation is to provide low-level documentation for
52 developers. It is B<not> intended to be used by the general public. This
53 documentation and what it documents can and will change without notice.
57 The engine exposes an API to the DBM::Deep objects (DBM::Deep, DBM::Deep::Array,
58 and DBM::Deep::Hash) for their use to access the actual stored values. This API
69 =item * make_reference
87 =item * lock_exclusive
95 They are explained in their own sections below. These methods, in turn, may
96 provide some bounds-checking, but primarily act to instantiate objects in the
97 Engine::Sector::* hierarchy and dispatch to them.
101 Transactions in DBM::Deep are implemented using a variant of MVCC. This attempts
102 to keep the amount of actual work done against the file low while stil providing
103 Atomicity, Consistency, and Isolation. Durability, unfortunately, cannot be done
108 If another process uses a transaction slot and writes stuff to it, then terminates,
109 the data that process wrote it still within the file. In order to address this,
110 there is also a transaction staleness counter associated within every write.
111 Each time a transaction is started, that process increments that transaction's
112 staleness counter. If, when it reads a value, the staleness counters aren't
113 identical, DBM::Deep will consider the value on disk to be stale and discard it.
117 The fourth leg of ACID is Durability, the guarantee that when a commit returns,
118 the data will be there the next time you read from it. This should be regardless
119 of any crashes or powerdowns in between the commit and subsequent read. DBM::Deep
120 does provide that guarantee; once the commit returns, all of the data has been
121 transferred from the transaction shadow to the HEAD. The issue arises with partial
122 commits - a commit that is interrupted in some fashion. In keeping with DBM::Deep's
123 "tradition" of very light error-checking and non-existent error-handling, there is
124 no way to recover from a partial commit. (This is probably a failure in Consistency
125 as well as Durability.)
127 Other DBMSes use transaction logs (a separate file, generally) to achieve Durability.
128 As DBM::Deep is a single-file, we would have to do something similar to what SQLite
129 and BDB do in terms of committing using synchonized writes. To do this, we would have
130 to use a much higher RAM footprint and some serious programming that make my head
131 hurts just to think about it.
133 =head1 EXTERNAL METHODS
137 ################################################################################
141 This takes a set of args. These args are described in the documentation for
150 $args->{storage} = DBM::Deep::File->new( $args )
151 unless exists $args->{storage};
157 hash_size => 16, # In bytes
158 hash_chars => 256, # Number of chars the algorithm uses per byte
160 num_txns => 1, # The HEAD
161 trans_id => 0, # Default to the HEAD
163 data_sector_size => 64, # Size in bytes of each data sector
165 entries => {}, # This is the list of entries for transactions
169 # Never allow byte_size to be set directly.
170 delete $args->{byte_size};
171 if ( defined $args->{pack_size} ) {
172 if ( lc $args->{pack_size} eq 'small' ) {
173 $args->{byte_size} = 2;
175 elsif ( lc $args->{pack_size} eq 'medium' ) {
176 $args->{byte_size} = 4;
178 elsif ( lc $args->{pack_size} eq 'large' ) {
179 $args->{byte_size} = 8;
182 DBM::Deep->_throw_error( "Unknown pack_size value: '$args->{pack_size}'" );
186 # Grab the parameters we want to use
187 foreach my $param ( keys %$self ) {
188 next unless exists $args->{$param};
189 $self->{$param} = $args->{$param};
193 max_buckets => { floor => 16, ceil => 256 },
194 num_txns => { floor => 1, ceil => 255 },
195 data_sector_size => { floor => 32, ceil => 256 },
198 while ( my ($attr, $c) = each %validations ) {
199 if ( !defined $self->{$attr}
200 || !length $self->{$attr}
201 || $self->{$attr} =~ /\D/
202 || $self->{$attr} < $c->{floor}
204 $self->{$attr} = '(undef)' if !defined $self->{$attr};
205 warn "Floor of $attr is $c->{floor}. Setting it to $c->{floor} from '$self->{$attr}'\n";
206 $self->{$attr} = $c->{floor};
208 elsif ( $self->{$attr} > $c->{ceil} ) {
209 warn "Ceiling of $attr is $c->{ceil}. Setting it to $c->{ceil} from '$self->{$attr}'\n";
210 $self->{$attr} = $c->{ceil};
214 if ( !$self->{digest} ) {
216 $self->{digest} = \&Digest::MD5::md5;
222 ################################################################################
224 =head2 read_value( $obj, $key )
226 This takes an object that provides _base_offset() and a string. It returns the
227 value stored in the corresponding Sector::Value's data section.
233 my ($obj, $key) = @_;
235 # This will be a Reference sector
236 my $sector = $self->_load_sector( $obj->_base_offset )
239 if ( $sector->staleness != $obj->_staleness ) {
243 my $key_md5 = $self->_apply_digest( $key );
245 my $value_sector = $sector->get_data_for({
250 unless ( $value_sector ) {
251 $value_sector = DBM::Deep::Engine::Sector::Null->new({
256 $sector->write_data({
259 value => $value_sector,
263 return $value_sector->data;
266 =head2 get_classname( $obj )
268 This takes an object that provides _base_offset() and returns the classname (if any)
271 It delegates to Sector::Reference::get_classname() for the heavy lifting.
273 It performs a staleness check.
281 # This will be a Reference sector
282 my $sector = $self->_load_sector( $obj->_base_offset )
283 or DBM::Deep->_throw_error( "How did get_classname fail (no sector for '$obj')?!" );
285 if ( $sector->staleness != $obj->_staleness ) {
289 return $sector->get_classname;
292 =head2 make_reference( $obj, $old_key, $new_key )
294 This takes an object that provides _base_offset() and two strings. The
295 strings correspond to the old key and new key, respectively. This operation
296 is equivalent to (given C<< $db->{foo} = []; >>) C<< $db->{bar} = $db->{foo}; >>.
298 This returns nothing.
304 my ($obj, $old_key, $new_key) = @_;
306 # This will be a Reference sector
307 my $sector = $self->_load_sector( $obj->_base_offset )
308 or DBM::Deep->_throw_error( "How did make_reference fail (no sector for '$obj')?!" );
310 if ( $sector->staleness != $obj->_staleness ) {
314 my $old_md5 = $self->_apply_digest( $old_key );
316 my $value_sector = $sector->get_data_for({
321 unless ( $value_sector ) {
322 $value_sector = DBM::Deep::Engine::Sector::Null->new({
327 $sector->write_data({
330 value => $value_sector,
334 if ( $value_sector->isa( 'DBM::Deep::Engine::Sector::Reference' ) ) {
335 $sector->write_data({
337 key_md5 => $self->_apply_digest( $new_key ),
338 value => $value_sector,
340 $value_sector->increment_refcount;
343 $sector->write_data({
345 key_md5 => $self->_apply_digest( $new_key ),
346 value => $value_sector->clone,
353 =head2 key_exists( $obj, $key )
355 This takes an object that provides _base_offset() and a string for
356 the key to be checked. This returns 1 for true and "" for false.
362 my ($obj, $key) = @_;
364 # This will be a Reference sector
365 my $sector = $self->_load_sector( $obj->_base_offset )
368 if ( $sector->staleness != $obj->_staleness ) {
372 my $data = $sector->get_data_for({
373 key_md5 => $self->_apply_digest( $key ),
377 # exists() returns 1 or '' for true/false.
378 return $data ? 1 : '';
381 =head2 delete_key( $obj, $key )
383 This takes an object that provides _base_offset() and a string for
384 the key to be deleted. This returns the result of the Sector::Reference
391 my ($obj, $key) = @_;
393 my $sector = $self->_load_sector( $obj->_base_offset )
396 if ( $sector->staleness != $obj->_staleness ) {
400 return $sector->delete_key({
401 key_md5 => $self->_apply_digest( $key ),
406 =head2 write_value( $obj, $key, $value )
408 This takes an object that provides _base_offset(), a string for the
409 key, and a value. This value can be anything storable within L<DBM::Deep/>.
411 This returns 1 upon success.
417 my ($obj, $key, $value) = @_;
419 my $r = Scalar::Util::reftype( $value ) || '';
422 last if $r eq 'HASH';
423 last if $r eq 'ARRAY';
425 DBM::Deep->_throw_error(
426 "Storage of references of type '$r' is not supported."
430 # This will be a Reference sector
431 my $sector = $self->_load_sector( $obj->_base_offset )
432 or DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep." );
434 if ( $sector->staleness != $obj->_staleness ) {
435 DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep." );
439 if ( !defined $value ) {
440 $class = 'DBM::Deep::Engine::Sector::Null';
442 elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) {
444 if ( $r eq 'ARRAY' ) {
445 $tmpvar = tied @$value;
446 } elsif ( $r eq 'HASH' ) {
447 $tmpvar = tied %$value;
451 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $tmpvar->isa( 'DBM::Deep' ); };
453 unless ( $is_dbm_deep ) {
454 DBM::Deep->_throw_error( "Cannot store something that is tied." );
457 unless ( $tmpvar->_engine->storage == $self->storage ) {
458 DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." );
461 # First, verify if we're storing the same thing to this spot. If we are, then
462 # this should be a no-op. -EJS, 2008-05-19
463 my $loc = $sector->get_data_location_for({
464 key_md5 => $self->_apply_digest( $key ),
468 if ( defined($loc) && $loc == $tmpvar->_base_offset ) {
472 #XXX Can this use $loc?
473 my $value_sector = $self->_load_sector( $tmpvar->_base_offset );
474 $sector->write_data({
476 key_md5 => $self->_apply_digest( $key ),
477 value => $value_sector,
479 $value_sector->increment_refcount;
484 $class = 'DBM::Deep::Engine::Sector::Reference';
485 $type = substr( $r, 0, 1 );
488 if ( tied($value) ) {
489 DBM::Deep->_throw_error( "Cannot store something that is tied." );
491 $class = 'DBM::Deep::Engine::Sector::Scalar';
494 # Create this after loading the reference sector in case something bad happens.
495 # This way, we won't allocate value sector(s) needlessly.
496 my $value_sector = $class->new({
502 $sector->write_data({
504 key_md5 => $self->_apply_digest( $key ),
505 value => $value_sector,
508 # This code is to make sure we write all the values in the $value to the disk
509 # and to make sure all changes to $value after the assignment are reflected
510 # on disk. This may be counter-intuitive at first, but it is correct dwimmery.
511 # NOTE - simply tying $value won't perform a STORE on each value. Hence, the
512 # copy to a temp value.
513 if ( $r eq 'ARRAY' ) {
515 tie @$value, 'DBM::Deep', {
516 base_offset => $value_sector->offset,
517 staleness => $value_sector->staleness,
518 storage => $self->storage,
522 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
524 elsif ( $r eq 'HASH' ) {
526 tie %$value, 'DBM::Deep', {
527 base_offset => $value_sector->offset,
528 staleness => $value_sector->staleness,
529 storage => $self->storage,
534 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
540 =head2 get_next_key( $obj, $prev_key )
542 This takes an object that provides _base_offset() and an optional string
543 representing the prior key returned via a prior invocation of this method.
545 This method delegates to C<< DBM::Deep::Iterator->get_next_key() >>.
549 # XXX Add staleness here
552 my ($obj, $prev_key) = @_;
554 # XXX Need to add logic about resetting the iterator if any key in the reference has changed
555 unless ( $prev_key ) {
556 $obj->{iterator} = DBM::Deep::Iterator->new({
557 base_offset => $obj->_base_offset,
562 return $obj->{iterator}->get_next_key( $obj );
565 ################################################################################
567 =head2 setup_fh( $obj )
569 This takes an object that provides _base_offset(). It will do everything needed
570 in order to properly initialize all values for necessary functioning. If this is
571 called upon an already initialized object, this will also reset the inode.
581 # We're opening the file.
582 unless ( $obj->_base_offset ) {
583 my $bytes_read = $self->_read_file_header;
585 # Creating a new file
586 unless ( $bytes_read ) {
587 $self->_write_file_header;
589 # 1) Create Array/Hash entry
590 my $initial_reference = DBM::Deep::Engine::Sector::Reference->new({
594 $obj->{base_offset} = $initial_reference->offset;
595 $obj->{staleness} = $initial_reference->staleness;
597 $self->storage->flush;
599 # Reading from an existing file
601 $obj->{base_offset} = $bytes_read;
602 my $initial_reference = DBM::Deep::Engine::Sector::Reference->new({
604 offset => $obj->_base_offset,
606 unless ( $initial_reference ) {
607 DBM::Deep->_throw_error("Corrupted file, no master index record");
610 unless ($obj->_type eq $initial_reference->type) {
611 DBM::Deep->_throw_error("File type mismatch");
614 $obj->{staleness} = $initial_reference->staleness;
618 $self->storage->set_inode;
623 =head2 begin_work( $obj )
625 This takes an object that provides _base_offset(). It will set up all necessary
626 bookkeeping in order to run all work within a transaction.
628 If $obj is already within a transaction, an error wiill be thrown. If there are
629 no more available transactions, an error will be thrown.
639 if ( $self->trans_id ) {
640 DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" );
643 my @slots = $self->read_txn_slots;
645 for my $i ( 0 .. $#slots ) {
649 $self->set_trans_id( $i + 1 );
654 DBM::Deep->_throw_error( "Cannot allocate transaction ID" );
656 $self->write_txn_slots( @slots );
658 if ( !$self->trans_id ) {
659 DBM::Deep->_throw_error( "Cannot begin_work - no available transactions" );
665 =head2 rollback( $obj )
667 This takes an object that provides _base_offset(). It will revert all
668 actions taken within the running transaction.
670 If $obj is not within a transaction, an error will be thrown.
680 if ( !$self->trans_id ) {
681 DBM::Deep->_throw_error( "Cannot rollback without an active transaction" );
684 # Each entry is the file location for a bucket that has a modification for
685 # this transaction. The entries need to be expunged.
686 foreach my $entry (@{ $self->get_entries } ) {
687 # Remove the entry here
688 my $read_loc = $entry
692 + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
694 my $data_loc = $self->storage->read_at( $read_loc, $self->byte_size );
695 $data_loc = unpack( $StP{$self->byte_size}, $data_loc );
696 $self->storage->print_at( $read_loc, pack( $StP{$self->byte_size}, 0 ) );
698 if ( $data_loc > 1 ) {
699 $self->_load_sector( $data_loc )->free;
703 $self->clear_entries;
705 my @slots = $self->read_txn_slots;
706 $slots[$self->trans_id-1] = 0;
707 $self->write_txn_slots( @slots );
708 $self->inc_txn_staleness_counter( $self->trans_id );
709 $self->set_trans_id( 0 );
714 =head2 commit( $obj )
716 This takes an object that provides _base_offset(). It will apply all
717 actions taken within the transaction to the HEAD.
719 If $obj is not within a transaction, an error will be thrown.
729 if ( !$self->trans_id ) {
730 DBM::Deep->_throw_error( "Cannot commit without an active transaction" );
733 foreach my $entry (@{ $self->get_entries } ) {
734 # Overwrite the entry in head with the entry in trans_id
739 my $head_loc = $self->storage->read_at( $base, $self->byte_size );
740 $head_loc = unpack( $StP{$self->byte_size}, $head_loc );
742 my $spot = $base + $self->byte_size + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
743 my $trans_loc = $self->storage->read_at(
744 $spot, $self->byte_size,
747 $self->storage->print_at( $base, $trans_loc );
748 $self->storage->print_at(
750 pack( $StP{$self->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ),
753 if ( $head_loc > 1 ) {
754 $self->_load_sector( $head_loc )->free;
758 $self->clear_entries;
760 my @slots = $self->read_txn_slots;
761 $slots[$self->trans_id-1] = 0;
762 $self->write_txn_slots( @slots );
763 $self->inc_txn_staleness_counter( $self->trans_id );
764 $self->set_trans_id( 0 );
769 =head2 lock_exclusive()
771 This takes an object that provides _base_offset(). It will guarantee that
772 the storage has taken precautions to be safe for a write.
774 This returns nothing.
781 return $self->storage->lock_exclusive( $obj );
786 This takes an object that provides _base_offset(). It will guarantee that
787 the storage has taken precautions to be safe for a read.
789 This returns nothing.
796 return $self->storage->lock_shared( $obj );
801 This takes an object that provides _base_offset(). It will guarantee that
802 the storage has released all locks taken.
804 This returns nothing.
812 my $rv = $self->storage->unlock( $obj );
819 ################################################################################
821 =head1 INTERNAL METHODS
823 The following methods are internal-use-only to DBM::Deep::Engine.
827 =head2 read_txn_slots()
829 This takes no arguments.
831 This will return an array with a 1 or 0 in each slot. Each spot represents one
832 available transaction. If the slot is 1, that transaction is taken. If it is 0,
833 the transaction is available.
839 my $bl = $self->txn_bitfield_len;
840 my $num_bits = $bl * 8;
841 return split '', unpack( 'b'.$num_bits,
842 $self->storage->read_at(
843 $self->trans_loc, $bl,
848 =head2 write_txn_slots( @slots )
850 This takes an array of 1's and 0's. This array represents the transaction slots
851 returned by L</read_txn_slots()>. In other words, the following is true:
853 @x = read_txn_slots( write_txn_slots( @x ) );
855 (With the obviously missing object referents added back in.)
859 sub write_txn_slots {
861 my $num_bits = $self->txn_bitfield_len * 8;
862 $self->storage->print_at( $self->trans_loc,
863 pack( 'b'.$num_bits, join('', @_) ),
867 =head2 get_running_txn_ids()
869 This takes no arguments.
871 This will return an array of taken transaction IDs. This wraps L</read_txn_slots()>.
875 sub get_running_txn_ids {
877 my @transactions = $self->read_txn_slots;
878 my @trans_ids = map { $_+1 } grep { $transactions[$_] } 0 .. $#transactions;
881 =head2 get_txn_staleness_counter( $trans_id )
883 This will return the staleness counter for the given transaction ID. Please see
884 L</TRANSACTION STALENESS> for more information.
888 sub get_txn_staleness_counter {
892 # Hardcode staleness of 0 for the HEAD
893 return 0 unless $trans_id;
895 return unpack( $StP{$STALE_SIZE},
896 $self->storage->read_at(
897 $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
903 =head2 inc_txn_staleness_counter( $trans_id )
905 This will increment the staleness counter for the given transaction ID. Please see
906 L</TRANSACTION STALENESS> for more information.
910 sub inc_txn_staleness_counter {
914 # Hardcode staleness of 0 for the HEAD
915 return 0 unless $trans_id;
917 $self->storage->print_at(
918 $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
919 pack( $StP{$STALE_SIZE}, $self->get_txn_staleness_counter( $trans_id ) + 1 ),
925 This takes no arguments.
927 This returns a list of all the sectors that have been modified by this transaction.
933 return [ keys %{ $self->{entries}{$self->trans_id} ||= {} } ];
936 =head2 add_entry( $trans_id, $location )
938 This takes a transaction ID and a file location and marks the sector at that location
939 as having been modified by the transaction identified by $trans_id.
941 This returns nothing.
943 B<NOTE>: Unlike all the other _entries() methods, there are several cases where
944 C<< $trans_id != $self->trans_id >> for this method.
950 my ($trans_id, $loc) = @_;
952 $self->{entries}{$trans_id} ||= {};
953 $self->{entries}{$trans_id}{$loc} = undef;
956 =head2 reindex_entry( $old_loc, $new_loc )
958 This takes two locations (old and new, respectively). If a location that has been
959 modified by this transaction is subsequently reindexed due to a bucketlist
960 overflowing, then the entries hash needs to be made aware of this change.
962 This returns nothing.
968 my ($old_loc, $new_loc) = @_;
971 while ( my ($trans_id, $locs) = each %{ $self->{entries} } ) {
972 if ( exists $locs->{$old_loc} ) {
973 delete $locs->{$old_loc};
974 $locs->{$new_loc} = undef;
980 =head2 clear_entries()
982 This takes no arguments. It will clear the entries list for the running transaction.
984 This returns nothing.
990 delete $self->{entries}{$self->trans_id};
993 ################################################################################
995 =head2 _write_file_header()
997 This writes the file header for a new file. This will write the various settings
998 that set how the file is interpreted.
1000 =head2 _read_file_header()
1002 This reads the file header from an existing file. This will read the various
1003 settings that set how the file is interpreted.
1008 my $header_fixed = length( SIG_FILE ) + 1 + 4 + 4;
1009 my $this_file_version = 3;
1011 sub _write_file_header {
1014 my $nt = $self->num_txns;
1015 my $bl = $self->txn_bitfield_len;
1017 my $header_var = 1 + 1 + 1 + 1 + $bl + $STALE_SIZE * ($nt - 1) + 3 * $self->byte_size;
1019 my $loc = $self->storage->request_space( $header_fixed + $header_var );
1021 $self->storage->print_at( $loc,
1024 pack('N', $this_file_version), # At this point, we're at 9 bytes
1025 pack('N', $header_var), # header size
1026 # --- Above is $header_fixed. Below is $header_var
1027 pack('C', $self->byte_size),
1029 # These shenanigans are to allow a 256 within a C
1030 pack('C', $self->max_buckets - 1),
1031 pack('C', $self->data_sector_size - 1),
1034 pack('C' . $bl, 0 ), # Transaction activeness bitfield
1035 pack($StP{$STALE_SIZE}.($nt-1), 0 x ($nt-1) ), # Transaction staleness counters
1036 pack($StP{$self->byte_size}, 0), # Start of free chain (blist size)
1037 pack($StP{$self->byte_size}, 0), # Start of free chain (data size)
1038 pack($StP{$self->byte_size}, 0), # Start of free chain (index size)
1041 #XXX Set these less fragilely
1042 $self->set_trans_loc( $header_fixed + 4 );
1043 $self->set_chains_loc( $header_fixed + 4 + $bl + $STALE_SIZE * ($nt-1) );
1048 sub _read_file_header {
1051 my $buffer = $self->storage->read_at( 0, $header_fixed );
1052 return unless length($buffer);
1054 my ($file_signature, $sig_header, $file_version, $size) = unpack(
1058 unless ( $file_signature eq SIG_FILE ) {
1059 $self->storage->close;
1060 DBM::Deep->_throw_error( "Signature not found -- file is not a Deep DB" );
1063 unless ( $sig_header eq SIG_HEADER ) {
1064 $self->storage->close;
1065 DBM::Deep->_throw_error( "Pre-1.00 file version found" );
1068 unless ( $file_version == $this_file_version ) {
1069 $self->storage->close;
1070 DBM::Deep->_throw_error(
1071 "Wrong file version found - " . $file_version .
1072 " - expected " . $this_file_version
1076 my $buffer2 = $self->storage->read_at( undef, $size );
1077 my @values = unpack( 'C C C C', $buffer2 );
1079 if ( @values != 4 || grep { !defined } @values ) {
1080 $self->storage->close;
1081 DBM::Deep->_throw_error("Corrupted file - bad header");
1084 #XXX Add warnings if values weren't set right
1085 @{$self}{qw(byte_size max_buckets data_sector_size num_txns)} = @values;
1087 # These shenangians are to allow a 256 within a C
1088 $self->{max_buckets} += 1;
1089 $self->{data_sector_size} += 1;
1091 my $bl = $self->txn_bitfield_len;
1093 my $header_var = scalar(@values) + $bl + $STALE_SIZE * ($self->num_txns - 1) + 3 * $self->byte_size;
1094 unless ( $size == $header_var ) {
1095 $self->storage->close;
1096 DBM::Deep->_throw_error( "Unexpected size found ($size <-> $header_var)." );
1099 $self->set_trans_loc( $header_fixed + scalar(@values) );
1100 $self->set_chains_loc( $header_fixed + scalar(@values) + $bl + $STALE_SIZE * ($self->num_txns - 1) );
1102 return length($buffer) + length($buffer2);
1106 =head2 _load_sector( $offset )
1108 This will instantiate and return the sector object that represents the data found
1117 # Add a catch for offset of 0 or 1
1118 return if !$offset || $offset <= 1;
1120 my $type = $self->storage->read_at( $offset, 1 );
1121 return if $type eq chr(0);
1123 if ( $type eq $self->SIG_ARRAY || $type eq $self->SIG_HASH ) {
1124 return DBM::Deep::Engine::Sector::Reference->new({
1130 # XXX Don't we need key_md5 here?
1131 elsif ( $type eq $self->SIG_BLIST ) {
1132 return DBM::Deep::Engine::Sector::BucketList->new({
1138 elsif ( $type eq $self->SIG_INDEX ) {
1139 return DBM::Deep::Engine::Sector::Index->new({
1145 elsif ( $type eq $self->SIG_NULL ) {
1146 return DBM::Deep::Engine::Sector::Null->new({
1152 elsif ( $type eq $self->SIG_DATA ) {
1153 return DBM::Deep::Engine::Sector::Scalar->new({
1159 # This was deleted from under us, so just return and let the caller figure it out.
1160 elsif ( $type eq $self->SIG_FREE ) {
1164 DBM::Deep->_throw_error( "'$offset': Don't know what to do with type '$type'" );
1167 =head2 _apply_digest( @stuff )
1169 This will apply the digest methd (default to Digest::MD5::md5) to the arguments
1170 passed in and return the result.
1176 return $self->{digest}->(@_);
1179 =head2 _add_free_blist_sector( $offset, $size )
1181 =head2 _add_free_data_sector( $offset, $size )
1183 =head2 _add_free_index_sector( $offset, $size )
1185 These methods are all wrappers around _add_free_sector(), providing the proper
1186 chain offset ($multiple) for the sector type.
1190 sub _add_free_blist_sector { shift->_add_free_sector( 0, @_ ) }
1191 sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) }
1192 sub _add_free_index_sector { shift->_add_free_sector( 2, @_ ) }
1194 =head2 _add_free_sector( $multiple, $offset, $size )
1196 _add_free_sector() takes the offset into the chains location, the offset of the
1197 sector, and the size of that sector. It will mark the sector as a free sector
1198 and put it into the list of sectors that are free of this type for use later.
1200 This returns nothing.
1202 B<NOTE>: $size is unused?
1206 sub _add_free_sector {
1208 my ($multiple, $offset, $size) = @_;
1210 my $chains_offset = $multiple * $self->byte_size;
1212 my $storage = $self->storage;
1214 # Increment staleness.
1215 # XXX Can this increment+modulo be done by "&= 0x1" ?
1216 my $staleness = unpack( $StP{$STALE_SIZE}, $storage->read_at( $offset + SIG_SIZE, $STALE_SIZE ) );
1217 $staleness = ($staleness + 1 ) % ( 2 ** ( 8 * $STALE_SIZE ) );
1218 $storage->print_at( $offset + SIG_SIZE, pack( $StP{$STALE_SIZE}, $staleness ) );
1220 my $old_head = $storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size );
1222 $storage->print_at( $self->chains_loc + $chains_offset,
1223 pack( $StP{$self->byte_size}, $offset ),
1226 # Record the old head in the new sector after the signature and staleness counter
1227 $storage->print_at( $offset + SIG_SIZE + $STALE_SIZE, $old_head );
1230 =head2 _request_blist_sector( $size )
1232 =head2 _request_data_sector( $size )
1234 =head2 _request_index_sector( $size )
1236 These methods are all wrappers around _request_sector(), providing the proper
1237 chain offset ($multiple) for the sector type.
1241 sub _request_blist_sector { shift->_request_sector( 0, @_ ) }
1242 sub _request_data_sector { shift->_request_sector( 1, @_ ) }
1243 sub _request_index_sector { shift->_request_sector( 2, @_ ) }
1245 =head2 _request_sector( $multiple $size )
1247 This takes the offset into the chains location and the size of that sector.
1249 This returns the object with the sector. If there is an available free sector of
1250 that type, then it will be reused. If there isn't one, then a new one will be
1255 sub _request_sector {
1257 my ($multiple, $size) = @_;
1259 my $chains_offset = $multiple * $self->byte_size;
1261 my $old_head = $self->storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size );
1262 my $loc = unpack( $StP{$self->byte_size}, $old_head );
1264 # We don't have any free sectors of the right size, so allocate a new one.
1266 my $offset = $self->storage->request_space( $size );
1268 # Zero out the new sector. This also guarantees correct increases
1270 $self->storage->print_at( $offset, chr(0) x $size );
1275 # Read the new head after the signature and the staleness counter
1276 my $new_head = $self->storage->read_at( $loc + SIG_SIZE + $STALE_SIZE, $self->byte_size );
1277 $self->storage->print_at( $self->chains_loc + $chains_offset, $new_head );
1278 $self->storage->print_at(
1279 $loc + SIG_SIZE + $STALE_SIZE,
1280 pack( $StP{$self->byte_size}, 0 ),
1286 ################################################################################
1290 This takes no arguments. It will do everything necessary to flush all things to
1291 disk. This is usually called during unlock() and setup_fh().
1293 This returns nothing.
1300 # Why do we need to have the storage flush? Shouldn't autoflush take care of things?
1302 $self->storage->flush;
1305 ################################################################################
1307 sub storage { $_[0]{storage} }
1308 sub byte_size { $_[0]{byte_size} }
1309 sub hash_size { $_[0]{hash_size} }
1310 sub hash_chars { $_[0]{hash_chars} }
1311 sub num_txns { $_[0]{num_txns} }
1312 sub max_buckets { $_[0]{max_buckets} }
1313 sub blank_md5 { chr(0) x $_[0]->hash_size }
1314 sub data_sector_size { $_[0]{data_sector_size} }
1316 # This is a calculated value
1317 sub txn_bitfield_len {
1319 unless ( exists $self->{txn_bitfield_len} ) {
1320 my $temp = ($self->num_txns) / 8;
1321 if ( $temp > int( $temp ) ) {
1322 $temp = int( $temp ) + 1;
1324 $self->{txn_bitfield_len} = $temp;
1326 return $self->{txn_bitfield_len};
1329 sub trans_id { $_[0]{trans_id} }
1330 sub set_trans_id { $_[0]{trans_id} = $_[1] }
1332 sub trans_loc { $_[0]{trans_loc} }
1333 sub set_trans_loc { $_[0]{trans_loc} = $_[1] }
1335 sub chains_loc { $_[0]{chains_loc} }
1336 sub set_chains_loc { $_[0]{chains_loc} = $_[1] }
1338 sub cache { $_[0]{cache} ||= {} }
1339 sub clear_cache { %{$_[0]->cache} = () }
1345 my $spot = $self->_read_file_header();
1354 'D' => $self->data_sector_size,
1355 'B' => DBM::Deep::Engine::Sector::BucketList->new({engine=>$self,offset=>1})->size,
1356 'I' => DBM::Deep::Engine::Sector::Index->new({engine=>$self,offset=>1})->size,
1362 $return .= "NumTxns: " . $self->num_txns . $/;
1364 # Read the free sector chains
1366 foreach my $multiple ( 0 .. 2 ) {
1367 $return .= "Chains($types{$multiple}):";
1368 my $old_loc = $self->chains_loc + $multiple * $self->byte_size;
1371 $StP{$self->byte_size},
1372 $self->storage->read_at( $old_loc, $self->byte_size ),
1375 # We're now out of free sectors of this kind.
1380 $sectors{ $types{$multiple} }{ $loc } = undef;
1381 $old_loc = $loc + SIG_SIZE + $STALE_SIZE;
1388 while ( $spot < $self->storage->{end} ) {
1389 # Read each sector in order.
1390 my $sector = $self->_load_sector( $spot );
1392 # Find it in the free-sectors that were found already
1393 foreach my $type ( keys %sectors ) {
1394 if ( exists $sectors{$type}{$spot} ) {
1395 my $size = $sizes{$type};
1396 $return .= sprintf "%08d: %s %04d\n", $spot, 'F' . $type, $size;
1402 die "********\n$return\nDidn't find free sector for $spot in chains\n********\n";
1405 $return .= sprintf "%08d: %s %04d", $spot, $sector->type, $sector->size;
1406 if ( $sector->type eq 'D' ) {
1407 $return .= ' ' . $sector->data;
1409 elsif ( $sector->type eq 'A' || $sector->type eq 'H' ) {
1410 $return .= ' REF: ' . $sector->get_refcount;
1412 elsif ( $sector->type eq 'B' ) {
1413 foreach my $bucket ( $sector->chopped_up ) {
1415 $return .= sprintf "%08d", unpack($StP{$self->byte_size},
1416 substr( $bucket->[-1], $self->hash_size, $self->byte_size),
1418 my $l = unpack( $StP{$self->byte_size},
1419 substr( $bucket->[-1],
1420 $self->hash_size + $self->byte_size,
1424 $return .= sprintf " %08d", $l;
1425 foreach my $txn ( 0 .. $self->num_txns - 2 ) {
1426 my $l = unpack( $StP{$self->byte_size},
1427 substr( $bucket->[-1],
1428 $self->hash_size + 2 * $self->byte_size + $txn * ($self->byte_size + $STALE_SIZE),
1432 $return .= sprintf " %08d", $l;
1438 $spot += $sector->size;
1445 ################################################################################
1447 package DBM::Deep::Iterator;
1455 engine => $args->{engine},
1456 base_offset => $args->{base_offset},
1459 Scalar::Util::weaken( $self->{engine} );
1464 sub reset { $_[0]{breadcrumbs} = [] }
1466 sub get_sector_iterator {
1470 my $sector = $self->{engine}->_load_sector( $loc )
1473 if ( $sector->isa( 'DBM::Deep::Engine::Sector::Index' ) ) {
1474 return DBM::Deep::Iterator::Index->new({
1479 elsif ( $sector->isa( 'DBM::Deep::Engine::Sector::BucketList' ) ) {
1480 return DBM::Deep::Iterator::BucketList->new({
1486 DBM::Deep->_throw_error( "get_sector_iterator(): Why did $loc make a $sector?" );
1493 my $crumbs = $self->{breadcrumbs};
1494 my $e = $self->{engine};
1496 unless ( @$crumbs ) {
1497 # This will be a Reference sector
1498 my $sector = $e->_load_sector( $self->{base_offset} )
1499 # If no sector is found, thist must have been deleted from under us.
1502 if ( $sector->staleness != $obj->_staleness ) {
1506 my $loc = $sector->get_blist_loc
1509 push @$crumbs, $self->get_sector_iterator( $loc );
1514 unless ( @$crumbs ) {
1519 my $iterator = $crumbs->[-1];
1521 # This level is done.
1522 if ( $iterator->at_end ) {
1527 if ( $iterator->isa( 'DBM::Deep::Iterator::Index' ) ) {
1528 # If we don't have any more, it will be caught at the
1530 if ( my $next = $iterator->get_next_iterator ) {
1531 push @$crumbs, $next;
1536 unless ( $iterator->isa( 'DBM::Deep::Iterator::BucketList' ) ) {
1537 DBM::Deep->_throw_error(
1538 "Should have a bucketlist iterator here - instead have $iterator"
1542 # At this point, we have a BucketList iterator
1543 my $key = $iterator->get_next_key;
1544 if ( defined $key ) {
1547 #XXX else { $iterator->set_to_end() } ?
1549 # We hit the end of the bucketlist iterator, so redo
1553 DBM::Deep->_throw_error( "get_next_key(): How did we get here?" );
1556 package DBM::Deep::Iterator::Index;
1559 my $self = bless $_[1] => $_[0];
1560 $self->{curr_index} = 0;
1566 return $self->{curr_index} >= $self->{iterator}{engine}->hash_chars;
1569 sub get_next_iterator {
1574 return if $self->at_end;
1575 $loc = $self->{sector}->get_entry( $self->{curr_index}++ );
1578 return $self->{iterator}->get_sector_iterator( $loc );
1581 package DBM::Deep::Iterator::BucketList;
1584 my $self = bless $_[1] => $_[0];
1585 $self->{curr_index} = 0;
1591 return $self->{curr_index} >= $self->{iterator}{engine}->max_buckets;
1597 return if $self->at_end;
1599 my $idx = $self->{curr_index}++;
1601 my $data_loc = $self->{sector}->get_data_location_for({
1606 #XXX Do we want to add corruption checks here?
1607 return $self->{sector}->get_key_for( $idx )->data;
1610 package DBM::Deep::Engine::Sector;
1613 my $self = bless $_[1], $_[0];
1614 Scalar::Util::weaken( $self->{engine} );
1620 #sub clone { DBM::Deep->_throw_error( "Must be implemented in the child class" ); }
1622 sub engine { $_[0]{engine} }
1623 sub offset { $_[0]{offset} }
1624 sub type { $_[0]{type} }
1628 return $self->engine->SIG_SIZE + $STALE_SIZE;
1634 my $e = $self->engine;
1636 $e->storage->print_at( $self->offset, $e->SIG_FREE );
1637 # Skip staleness counter
1638 $e->storage->print_at( $self->offset + $self->base_size,
1639 chr(0) x ($self->size - $self->base_size),
1642 my $free_meth = $self->free_meth;
1643 $e->$free_meth( $self->offset, $self->size );
1648 package DBM::Deep::Engine::Sector::Data;
1650 our @ISA = qw( DBM::Deep::Engine::Sector );
1653 sub size { $_[0]{engine}->data_sector_size }
1654 sub free_meth { return '_add_free_data_sector' }
1658 return ref($self)->new({
1659 engine => $self->engine,
1660 type => $self->type,
1661 data => $self->data,
1665 package DBM::Deep::Engine::Sector::Scalar;
1667 our @ISA = qw( DBM::Deep::Engine::Sector::Data );
1672 my $chain_loc = $self->chain_loc;
1674 $self->SUPER::free();
1677 $self->engine->_load_sector( $chain_loc )->free;
1683 sub type { $_[0]{engine}->SIG_DATA }
1687 my $engine = $self->engine;
1689 unless ( $self->offset ) {
1690 my $data_section = $self->size - $self->base_size - $engine->byte_size - 1;
1692 $self->{offset} = $engine->_request_data_sector( $self->size );
1694 my $data = delete $self->{data};
1695 my $dlen = length $data;
1697 my $curr_offset = $self->offset;
1698 while ( $continue ) {
1700 my $next_offset = 0;
1702 my ($leftover, $this_len, $chunk);
1703 if ( $dlen > $data_section ) {
1705 $this_len = $data_section;
1706 $chunk = substr( $data, 0, $this_len );
1708 $dlen -= $data_section;
1709 $next_offset = $engine->_request_data_sector( $self->size );
1710 $data = substr( $data, $this_len );
1713 $leftover = $data_section - $dlen;
1720 $engine->storage->print_at( $curr_offset, $self->type ); # Sector type
1722 $engine->storage->print_at( $curr_offset + $self->base_size,
1723 pack( $StP{$engine->byte_size}, $next_offset ), # Chain loc
1724 pack( $StP{1}, $this_len ), # Data length
1725 $chunk, # Data to be stored in this sector
1726 chr(0) x $leftover, # Zero-fill the rest
1729 $curr_offset = $next_offset;
1739 my $buffer = $self->engine->storage->read_at(
1740 $self->offset + $self->base_size + $self->engine->byte_size, 1
1743 return unpack( $StP{1}, $buffer );
1749 $StP{$self->engine->byte_size},
1750 $self->engine->storage->read_at(
1751 $self->offset + $self->base_size,
1752 $self->engine->byte_size,
1764 my $chain_loc = $self->chain_loc;
1766 $data .= $self->engine->storage->read_at(
1767 $self->offset + $self->base_size + $self->engine->byte_size + 1, $self->data_length,
1770 last unless $chain_loc;
1772 $self = $self->engine->_load_sector( $chain_loc );
1778 package DBM::Deep::Engine::Sector::Null;
1780 our @ISA = qw( DBM::Deep::Engine::Sector::Data );
1782 sub type { $_[0]{engine}->SIG_NULL }
1783 sub data_length { 0 }
1789 my $engine = $self->engine;
1791 unless ( $self->offset ) {
1792 my $leftover = $self->size - $self->base_size - 1 * $engine->byte_size - 1;
1794 $self->{offset} = $engine->_request_data_sector( $self->size );
1795 $engine->storage->print_at( $self->offset, $self->type ); # Sector type
1796 # Skip staleness counter
1797 $engine->storage->print_at( $self->offset + $self->base_size,
1798 pack( $StP{$engine->byte_size}, 0 ), # Chain loc
1799 pack( $StP{1}, $self->data_length ), # Data length
1800 chr(0) x $leftover, # Zero-fill the rest
1807 package DBM::Deep::Engine::Sector::Reference;
1809 our @ISA = qw( DBM::Deep::Engine::Sector::Data );
1814 my $e = $self->engine;
1816 unless ( $self->offset ) {
1817 my $classname = Scalar::Util::blessed( delete $self->{data} );
1818 my $leftover = $self->size - $self->base_size - 3 * $e->byte_size;
1820 my $class_offset = 0;
1821 if ( defined $classname ) {
1822 my $class_sector = DBM::Deep::Engine::Sector::Scalar->new({
1826 $class_offset = $class_sector->offset;
1829 $self->{offset} = $e->_request_data_sector( $self->size );
1830 $e->storage->print_at( $self->offset, $self->type ); # Sector type
1831 # Skip staleness counter
1832 $e->storage->print_at( $self->offset + $self->base_size,
1833 pack( $StP{$e->byte_size}, 0 ), # Index/BList loc
1834 pack( $StP{$e->byte_size}, $class_offset ), # Classname loc
1835 pack( $StP{$e->byte_size}, 1 ), # Initial refcount
1836 chr(0) x $leftover, # Zero-fill the rest
1840 $self->{type} = $e->storage->read_at( $self->offset, 1 );
1843 $self->{staleness} = unpack(
1845 $e->storage->read_at( $self->offset + $e->SIG_SIZE, $STALE_SIZE ),
1851 sub staleness { $_[0]{staleness} }
1853 sub get_data_location_for {
1857 # Assume that the head is not allowed unless otherwise specified.
1858 $args->{allow_head} = 0 unless exists $args->{allow_head};
1860 # Assume we don't create a new blist location unless otherwise specified.
1861 $args->{create} = 0 unless exists $args->{create};
1863 my $blist = $self->get_bucket_list({
1864 key_md5 => $args->{key_md5},
1865 key => $args->{key},
1866 create => $args->{create},
1868 return unless $blist && $blist->{found};
1870 # At this point, $blist knows where the md5 is. What it -doesn't- know yet
1871 # is whether or not this transaction has this key. That's part of the next
1873 my $location = $blist->get_data_location_for({
1874 allow_head => $args->{allow_head},
1884 my $location = $self->get_data_location_for( $args )
1887 return $self->engine->_load_sector( $location );
1894 my $blist = $self->get_bucket_list({
1895 key_md5 => $args->{key_md5},
1896 key => $args->{key},
1898 }) or DBM::Deep->_throw_error( "How did write_data fail (no blist)?!" );
1900 # Handle any transactional bookkeeping.
1901 if ( $self->engine->trans_id ) {
1902 if ( ! $blist->has_md5 ) {
1903 $blist->mark_deleted({
1909 my @trans_ids = $self->engine->get_running_txn_ids;
1910 if ( $blist->has_md5 ) {
1912 my $old_value = $blist->get_data_for;
1913 foreach my $other_trans_id ( @trans_ids ) {
1914 next if $blist->get_data_location_for({
1915 trans_id => $other_trans_id,
1919 trans_id => $other_trans_id,
1920 key => $args->{key},
1921 key_md5 => $args->{key_md5},
1922 value => $old_value->clone,
1929 foreach my $other_trans_id ( @trans_ids ) {
1930 #XXX This doesn't seem to possible to ever happen . . .
1931 next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 });
1932 $blist->mark_deleted({
1933 trans_id => $other_trans_id,
1940 #XXX Is this safe to do transactionally?
1941 # Free the place we're about to write to.
1942 if ( $blist->get_data_location_for({ allow_head => 0 }) ) {
1943 $blist->get_data_for({ allow_head => 0 })->free;
1947 key => $args->{key},
1948 key_md5 => $args->{key_md5},
1949 value => $args->{value},
1957 # XXX What should happen if this fails?
1958 my $blist = $self->get_bucket_list({
1959 key_md5 => $args->{key_md5},
1960 }) or DBM::Deep->_throw_error( "How did delete_key fail (no blist)?!" );
1962 # Save the location so that we can free the data
1963 my $location = $blist->get_data_location_for({
1966 my $old_value = $location && $self->engine->_load_sector( $location );
1968 my @trans_ids = $self->engine->get_running_txn_ids;
1970 # If we're the HEAD and there are running txns, then we need to clone this value to the other
1971 # transactions to preserve Isolation.
1972 if ( $self->engine->trans_id == 0 ) {
1974 foreach my $other_trans_id ( @trans_ids ) {
1975 next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 });
1977 trans_id => $other_trans_id,
1978 key => $args->{key},
1979 key_md5 => $args->{key_md5},
1980 value => $old_value->clone,
1988 $blist->mark_deleted( $args );
1991 $data = $old_value->data({ export => 1 });
1996 $data = $blist->delete_md5( $args );
2005 my $e = $self->engine;
2006 my $blist_loc = $e->storage->read_at( $self->offset + $self->base_size, $e->byte_size );
2007 return unpack( $StP{$e->byte_size}, $blist_loc );
2010 sub get_bucket_list {
2015 # XXX Add in check here for recycling?
2017 my $engine = $self->engine;
2019 my $blist_loc = $self->get_blist_loc;
2021 # There's no index or blist yet
2022 unless ( $blist_loc ) {
2023 return unless $args->{create};
2025 my $blist = DBM::Deep::Engine::Sector::BucketList->new({
2027 key_md5 => $args->{key_md5},
2030 $engine->storage->print_at( $self->offset + $self->base_size,
2031 pack( $StP{$engine->byte_size}, $blist->offset ),
2037 my $sector = $engine->_load_sector( $blist_loc )
2038 or DBM::Deep->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" );
2040 my $last_sector = undef;
2041 while ( $sector->isa( 'DBM::Deep::Engine::Sector::Index' ) ) {
2042 $blist_loc = $sector->get_entry( ord( substr( $args->{key_md5}, $i++, 1 ) ) );
2043 $last_sector = $sector;
2045 $sector = $engine->_load_sector( $blist_loc )
2046 or DBM::Deep->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" );
2054 # This means we went through the Index sector(s) and found an empty slot
2055 unless ( $sector ) {
2056 return unless $args->{create};
2058 DBM::Deep->_throw_error( "No last_sector when attempting to build a new entry" )
2059 unless $last_sector;
2061 my $blist = DBM::Deep::Engine::Sector::BucketList->new({
2063 key_md5 => $args->{key_md5},
2066 $last_sector->set_entry( ord( substr( $args->{key_md5}, $i - 1, 1 ) ) => $blist->offset );
2071 $sector->find_md5( $args->{key_md5} );
2073 # See whether or not we need to reindex the bucketlist
2074 # Yes, the double-braces are there for a reason. if() doesn't create a redo-able block,
2075 # so we have to create a bare block within the if() for redo-purposes. Patch and idea
2076 # submitted by sprout@cpan.org. -RobK, 2008-01-09
2077 if ( !$sector->has_md5 && $args->{create} && $sector->{idx} == -1 ) {{
2080 my $new_index = DBM::Deep::Engine::Sector::Index->new({
2085 #XXX q.v. the comments for this function.
2086 foreach my $entry ( $sector->chopped_up ) {
2087 my ($spot, $md5) = @{$entry};
2088 my $idx = ord( substr( $md5, $i, 1 ) );
2090 # XXX This is inefficient
2091 my $blist = $blist_cache{$idx}
2092 ||= DBM::Deep::Engine::Sector::BucketList->new({
2096 $new_index->set_entry( $idx => $blist->offset );
2098 my $new_spot = $blist->write_at_next_open( $md5 );
2099 $engine->reindex_entry( $spot => $new_spot );
2102 # Handle the new item separately.
2104 my $idx = ord( substr( $args->{key_md5}, $i, 1 ) );
2106 # If all the previous blist's items have been thrown into one
2107 # blist and the new item belongs in there too, we need
2109 if ( keys %blist_cache == 1 and each %blist_cache == $idx ) {
2112 my $blist = $blist_cache{$idx}
2113 ||= DBM::Deep::Engine::Sector::BucketList->new({
2117 $new_index->set_entry( $idx => $blist->offset );
2120 $blist->find_md5( $args->{key_md5} );
2122 key => $args->{key},
2123 key_md5 => $args->{key_md5},
2124 value => DBM::Deep::Engine::Sector::Null->new({
2130 # my $blist = $blist_cache{$idx}
2131 # ||= DBM::Deep::Engine::Sector::BucketList->new({
2132 # engine => $engine,
2135 # $new_index->set_entry( $idx => $blist->offset );
2137 # #XXX THIS IS HACKY!
2138 # $blist->find_md5( $args->{key_md5} );
2139 # $blist->write_md5({
2140 # key => $args->{key},
2141 # key_md5 => $args->{key_md5},
2142 # value => DBM::Deep::Engine::Sector::Null->new({
2143 # engine => $engine,
2149 if ( $last_sector ) {
2150 $last_sector->set_entry(
2151 ord( substr( $args->{key_md5}, $i - 1, 1 ) ),
2155 $engine->storage->print_at( $self->offset + $self->base_size,
2156 pack( $StP{$engine->byte_size}, $new_index->offset ),
2164 (undef, $sector) = %blist_cache;
2165 $last_sector = $new_index;
2169 $sector = $blist_cache{ ord( substr( $args->{key_md5}, $i, 1 ) ) };
2170 $sector->find_md5( $args->{key_md5} );
2176 sub get_class_offset {
2179 my $e = $self->engine;
2181 $StP{$e->byte_size},
2182 $e->storage->read_at(
2183 $self->offset + $self->base_size + 1 * $e->byte_size, $e->byte_size,
2191 my $class_offset = $self->get_class_offset;
2193 return unless $class_offset;
2195 return $self->engine->_load_sector( $class_offset )->data;
2204 unless ( $obj = $self->engine->cache->{ $self->offset } ) {
2205 $obj = DBM::Deep->new({
2206 type => $self->type,
2207 base_offset => $self->offset,
2208 staleness => $self->staleness,
2209 storage => $self->engine->storage,
2210 engine => $self->engine,
2213 if ( $self->engine->storage->{autobless} ) {
2214 my $classname = $self->get_classname;
2215 if ( defined $classname ) {
2216 bless $obj, $classname;
2220 $self->engine->cache->{$self->offset} = $obj;
2223 # We're not exporting, so just return.
2224 unless ( $args->{export} ) {
2228 # We shouldn't export if this is still referred to.
2229 if ( $self->get_refcount > 1 ) {
2233 return $obj->export;
2239 # We're not ready to be removed yet.
2240 if ( $self->decrement_refcount > 0 ) {
2244 # Rebless the object into DBM::Deep::Null.
2245 eval { %{ $self->engine->cache->{ $self->offset } } = (); };
2246 eval { @{ $self->engine->cache->{ $self->offset } } = (); };
2247 bless $self->engine->cache->{ $self->offset }, 'DBM::Deep::Null';
2248 delete $self->engine->cache->{ $self->offset };
2250 my $blist_loc = $self->get_blist_loc;
2251 $self->engine->_load_sector( $blist_loc )->free if $blist_loc;
2253 my $class_loc = $self->get_class_offset;
2254 $self->engine->_load_sector( $class_loc )->free if $class_loc;
2256 $self->SUPER::free();
2259 sub increment_refcount {
2262 my $refcount = $self->get_refcount;
2266 $self->write_refcount( $refcount );
2271 sub decrement_refcount {
2274 my $refcount = $self->get_refcount;
2278 $self->write_refcount( $refcount );
2286 my $e = $self->engine;
2288 $StP{$e->byte_size},
2289 $e->storage->read_at(
2290 $self->offset + $self->base_size + 2 * $e->byte_size, $e->byte_size,
2295 sub write_refcount {
2299 my $e = $self->engine;
2300 $e->storage->print_at(
2301 $self->offset + $self->base_size + 2 * $e->byte_size,
2302 pack( $StP{$e->byte_size}, $num ),
2306 package DBM::Deep::Engine::Sector::BucketList;
2308 our @ISA = qw( DBM::Deep::Engine::Sector );
2313 my $engine = $self->engine;
2315 unless ( $self->offset ) {
2316 my $leftover = $self->size - $self->base_size;
2318 $self->{offset} = $engine->_request_blist_sector( $self->size );
2319 $engine->storage->print_at( $self->offset, $engine->SIG_BLIST ); # Sector type
2320 # Skip staleness counter
2321 $engine->storage->print_at( $self->offset + $self->base_size,
2322 chr(0) x $leftover, # Zero-fill the data
2326 if ( $self->{key_md5} ) {
2335 $self->engine->storage->print_at( $self->offset + $self->base_size,
2336 chr(0) x ($self->size - $self->base_size), # Zero-fill the data
2342 unless ( $self->{size} ) {
2343 my $e = $self->engine;
2344 # Base + numbuckets * bucketsize
2345 $self->{size} = $self->base_size + $e->max_buckets * $self->bucket_size;
2347 return $self->{size};
2350 sub free_meth { return '_add_free_blist_sector' }
2355 my $e = $self->engine;
2356 foreach my $bucket ( $self->chopped_up ) {
2357 my $rest = $bucket->[-1];
2359 # Delete the keysector
2360 my $l = unpack( $StP{$e->byte_size}, substr( $rest, $e->hash_size, $e->byte_size ) );
2361 my $s = $e->_load_sector( $l ); $s->free if $s;
2363 # Delete the HEAD sector
2364 $l = unpack( $StP{$e->byte_size},
2366 $e->hash_size + $e->byte_size,
2370 $s = $e->_load_sector( $l ); $s->free if $s;
2372 foreach my $txn ( 0 .. $e->num_txns - 2 ) {
2373 my $l = unpack( $StP{$e->byte_size},
2375 $e->hash_size + 2 * $e->byte_size + $txn * ($e->byte_size + $STALE_SIZE),
2379 my $s = $e->_load_sector( $l ); $s->free if $s;
2383 $self->SUPER::free();
2388 unless ( $self->{bucket_size} ) {
2389 my $e = $self->engine;
2390 # Key + head (location) + transactions (location + staleness-counter)
2391 my $location_size = $e->byte_size + $e->byte_size + ($e->num_txns - 1) * ($e->byte_size + $STALE_SIZE);
2392 $self->{bucket_size} = $e->hash_size + $location_size;
2394 return $self->{bucket_size};
2397 # XXX This is such a poor hack. I need to rethink this code.
2401 my $e = $self->engine;
2404 foreach my $idx ( 0 .. $e->max_buckets - 1 ) {
2405 my $spot = $self->offset + $self->base_size + $idx * $self->bucket_size;
2406 my $md5 = $e->storage->read_at( $spot, $e->hash_size );
2408 #XXX If we're chopping, why would we ever have the blank_md5?
2409 last if $md5 eq $e->blank_md5;
2411 my $rest = $e->storage->read_at( undef, $self->bucket_size - $e->hash_size );
2412 push @buckets, [ $spot, $md5 . $rest ];
2418 sub write_at_next_open {
2422 #XXX This is such a hack!
2423 $self->{_next_open} = 0 unless exists $self->{_next_open};
2425 my $spot = $self->offset + $self->base_size + $self->{_next_open}++ * $self->bucket_size;
2426 $self->engine->storage->print_at( $spot, $entry );
2433 unless ( exists $self->{found} ) {
2436 return $self->{found};
2442 $self->{found} = undef;
2446 $self->{key_md5} = shift;
2449 # If we don't have an MD5, then what are we supposed to do?
2450 unless ( exists $self->{key_md5} ) {
2451 DBM::Deep->_throw_error( "Cannot find_md5 without a key_md5 set" );
2454 my $e = $self->engine;
2455 foreach my $idx ( 0 .. $e->max_buckets - 1 ) {
2456 my $potential = $e->storage->read_at(
2457 $self->offset + $self->base_size + $idx * $self->bucket_size, $e->hash_size,
2460 if ( $potential eq $e->blank_md5 ) {
2461 $self->{idx} = $idx;
2465 if ( $potential eq $self->{key_md5} ) {
2467 $self->{idx} = $idx;
2479 DBM::Deep->_throw_error( "write_md5: no key" ) unless exists $args->{key};
2480 DBM::Deep->_throw_error( "write_md5: no key_md5" ) unless exists $args->{key_md5};
2481 DBM::Deep->_throw_error( "write_md5: no value" ) unless exists $args->{value};
2483 my $engine = $self->engine;
2485 $args->{trans_id} = $engine->trans_id unless exists $args->{trans_id};
2487 my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
2488 $engine->add_entry( $args->{trans_id}, $spot );
2490 unless ($self->{found}) {
2491 my $key_sector = DBM::Deep::Engine::Sector::Scalar->new({
2493 data => $args->{key},
2496 $engine->storage->print_at( $spot,
2498 pack( $StP{$engine->byte_size}, $key_sector->offset ),
2503 + $engine->hash_size
2504 + $engine->byte_size;
2506 if ( $args->{trans_id} ) {
2507 $loc += $engine->byte_size + ($args->{trans_id} - 1) * ( $engine->byte_size + $STALE_SIZE );
2509 $engine->storage->print_at( $loc,
2510 pack( $StP{$engine->byte_size}, $args->{value}->offset ),
2511 pack( $StP{$STALE_SIZE}, $engine->get_txn_staleness_counter( $args->{trans_id} ) ),
2515 $engine->storage->print_at( $loc,
2516 pack( $StP{$engine->byte_size}, $args->{value}->offset ),
2526 my $engine = $self->engine;
2528 $args->{trans_id} = $engine->trans_id unless exists $args->{trans_id};
2530 my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
2531 $engine->add_entry( $args->{trans_id}, $spot );
2534 + $engine->hash_size
2535 + $engine->byte_size;
2537 if ( $args->{trans_id} ) {
2538 $loc += $engine->byte_size + ($args->{trans_id} - 1) * ( $engine->byte_size + $STALE_SIZE );
2540 $engine->storage->print_at( $loc,
2541 pack( $StP{$engine->byte_size}, 1 ), # 1 is the marker for deleted
2542 pack( $StP{$STALE_SIZE}, $engine->get_txn_staleness_counter( $args->{trans_id} ) ),
2546 $engine->storage->print_at( $loc,
2547 pack( $StP{$engine->byte_size}, 1 ), # 1 is the marker for deleted
2557 my $engine = $self->engine;
2558 return undef unless $self->{found};
2560 # Save the location so that we can free the data
2561 my $location = $self->get_data_location_for({
2564 my $key_sector = $self->get_key_for;
2566 my $spot = $self->offset + $self->base_size + $self->{idx} * $self->bucket_size;
2567 $engine->storage->print_at( $spot,
2568 $engine->storage->read_at(
2569 $spot + $self->bucket_size,
2570 $self->bucket_size * ( $engine->max_buckets - $self->{idx} - 1 ),
2572 chr(0) x $self->bucket_size,
2577 my $data_sector = $self->engine->_load_sector( $location );
2578 my $data = $data_sector->data({ export => 1 });
2584 sub get_data_location_for {
2589 $args->{allow_head} = 0 unless exists $args->{allow_head};
2590 $args->{trans_id} = $self->engine->trans_id unless exists $args->{trans_id};
2591 $args->{idx} = $self->{idx} unless exists $args->{idx};
2593 my $e = $self->engine;
2595 my $spot = $self->offset + $self->base_size
2596 + $args->{idx} * $self->bucket_size
2600 if ( $args->{trans_id} ) {
2601 $spot += $e->byte_size + ($args->{trans_id} - 1) * ( $e->byte_size + $STALE_SIZE );
2604 my $buffer = $e->storage->read_at(
2606 $e->byte_size + $STALE_SIZE,
2608 my ($loc, $staleness) = unpack( $StP{$e->byte_size} . ' ' . $StP{$STALE_SIZE}, $buffer );
2610 # XXX Merge the two if-clauses below
2611 if ( $args->{trans_id} ) {
2612 # We have found an entry that is old, so get rid of it
2613 if ( $staleness != (my $s = $e->get_txn_staleness_counter( $args->{trans_id} ) ) ) {
2614 $e->storage->print_at(
2616 pack( $StP{$e->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ),
2622 # If we're in a transaction and we never wrote to this location, try the
2624 if ( $args->{trans_id} && !$loc && $args->{allow_head} ) {
2625 return $self->get_data_location_for({
2628 idx => $args->{idx},
2632 return $loc <= 1 ? 0 : $loc;
2640 return unless $self->{found};
2641 my $location = $self->get_data_location_for({
2642 allow_head => $args->{allow_head},
2644 return $self->engine->_load_sector( $location );
2650 $idx = $self->{idx} unless defined $idx;
2652 if ( $idx >= $self->engine->max_buckets ) {
2653 DBM::Deep->_throw_error( "get_key_for(): Attempting to retrieve $idx" );
2656 my $location = $self->engine->storage->read_at(
2657 $self->offset + $self->base_size + $idx * $self->bucket_size + $self->engine->hash_size,
2658 $self->engine->byte_size,
2660 $location = unpack( $StP{$self->engine->byte_size}, $location );
2661 DBM::Deep->_throw_error( "get_key_for: No location?" ) unless $location;
2663 return $self->engine->_load_sector( $location );
2666 package DBM::Deep::Engine::Sector::Index;
2668 our @ISA = qw( DBM::Deep::Engine::Sector );
2673 my $engine = $self->engine;
2675 unless ( $self->offset ) {
2676 my $leftover = $self->size - $self->base_size;
2678 $self->{offset} = $engine->_request_index_sector( $self->size );
2679 $engine->storage->print_at( $self->offset, $engine->SIG_INDEX ); # Sector type
2680 # Skip staleness counter
2681 $engine->storage->print_at( $self->offset + $self->base_size,
2682 chr(0) x $leftover, # Zero-fill the rest
2692 unless ( $self->{size} ) {
2693 my $e = $self->engine;
2694 $self->{size} = $self->base_size + $e->byte_size * $e->hash_chars;
2696 return $self->{size};
2699 sub free_meth { return '_add_free_index_sector' }
2703 my $e = $self->engine;
2705 for my $i ( 0 .. $e->hash_chars - 1 ) {
2706 my $l = $self->get_entry( $i ) or next;
2707 $e->_load_sector( $l )->free;
2710 $self->SUPER::free();
2716 return $self->offset + $self->base_size + $idx * $self->engine->byte_size;
2723 my $e = $self->engine;
2725 DBM::Deep->_throw_error( "get_entry: Out of range ($idx)" )
2726 if $idx < 0 || $idx >= $e->hash_chars;
2729 $StP{$e->byte_size},
2730 $e->storage->read_at( $self->_loc_for( $idx ), $e->byte_size ),
2736 my ($idx, $loc) = @_;
2738 my $e = $self->engine;
2740 DBM::Deep->_throw_error( "set_entry: Out of range ($idx)" )
2741 if $idx < 0 || $idx >= $e->hash_chars;
2743 $self->engine->storage->print_at(
2744 $self->_loc_for( $idx ),
2745 pack( $StP{$e->byte_size}, $loc ),
2749 # This was copied from MARCEL's Class::Null. However, I couldn't use it because
2750 # I need an undef value, not an implementation of the Null Class pattern.
2751 package DBM::Deep::Null;
2754 'bool' => sub { undef },
2755 '""' => sub { undef },
2756 '0+' => sub { undef },
2758 nomethod => 'AUTOLOAD';
2760 sub AUTOLOAD { return; }