1 package DBM::Deep::Engine;
6 use warnings FATAL => 'all';
8 # Never import symbols into our namespace. We are a class, not a library.
15 # * Every method in here assumes that the storage has been appropriately
16 # safeguarded. This can be anything from flock() to some sort of manual
17 # mutex. But, it's the caller's responsability to make sure that this has
20 # Setup file and tag signatures. These should never change.
21 sub SIG_FILE () { 'DPDB' }
22 sub SIG_HEADER () { 'h' }
23 sub SIG_HASH () { 'H' }
24 sub SIG_ARRAY () { 'A' }
25 sub SIG_NULL () { 'N' }
26 sub SIG_DATA () { 'D' }
27 sub SIG_INDEX () { 'I' }
28 sub SIG_BLIST () { 'B' }
29 sub SIG_FREE () { 'F' }
34 # Please refer to the pack() documentation for further information
36 1 => 'C', # Unsigned char value (no order needed as it's just one byte)
37 2 => 'n', # Unsigned short in "network" (big-endian) order
38 4 => 'N', # Unsigned long in "network" (big-endian) order
39 8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
41 sub StP { $StP{$_[1]} }
43 # Import these after the SIG_* definitions because those definitions are used
44 # in the headers of these classes. -RobK, 2008-06-20
45 use DBM::Deep::Engine::Sector::BucketList;
46 use DBM::Deep::Engine::Sector::FileHeader;
47 use DBM::Deep::Engine::Sector::Index;
48 use DBM::Deep::Engine::Sector::Null;
49 use DBM::Deep::Engine::Sector::Reference;
50 use DBM::Deep::Engine::Sector::Scalar;
51 use DBM::Deep::Iterator;
53 ################################################################################
59 $args->{storage} = DBM::Deep::File->new( $args )
60 unless exists $args->{storage};
66 hash_size => 16, # In bytes
67 hash_chars => 256, # Number of chars the algorithm uses per byte
69 num_txns => 1, # The HEAD
70 trans_id => 0, # Default to the HEAD
72 data_sector_size => 64, # Size in bytes of each data sector
74 entries => {}, # This is the list of entries for transactions
78 # Never allow byte_size to be set directly.
79 delete $args->{byte_size};
80 if ( defined $args->{pack_size} ) {
81 if ( lc $args->{pack_size} eq 'small' ) {
82 $args->{byte_size} = 2;
84 elsif ( lc $args->{pack_size} eq 'medium' ) {
85 $args->{byte_size} = 4;
87 elsif ( lc $args->{pack_size} eq 'large' ) {
88 $args->{byte_size} = 8;
91 DBM::Deep->_throw_error( "Unknown pack_size value: '$args->{pack_size}'" );
95 # Grab the parameters we want to use
96 foreach my $param ( keys %$self ) {
97 next unless exists $args->{$param};
98 $self->{$param} = $args->{$param};
102 max_buckets => { floor => 16, ceil => 256 },
103 num_txns => { floor => 1, ceil => 255 },
104 data_sector_size => { floor => 32, ceil => 256 },
107 while ( my ($attr, $c) = each %validations ) {
108 if ( !defined $self->{$attr}
109 || !length $self->{$attr}
110 || $self->{$attr} =~ /\D/
111 || $self->{$attr} < $c->{floor}
113 $self->{$attr} = '(undef)' if !defined $self->{$attr};
114 warn "Floor of $attr is $c->{floor}. Setting it to $c->{floor} from '$self->{$attr}'\n";
115 $self->{$attr} = $c->{floor};
117 elsif ( $self->{$attr} > $c->{ceil} ) {
118 warn "Ceiling of $attr is $c->{ceil}. Setting it to $c->{ceil} from '$self->{$attr}'\n";
119 $self->{$attr} = $c->{ceil};
123 if ( !$self->{digest} ) {
125 $self->{digest} = \&Digest::MD5::md5;
131 ################################################################################
135 my ($obj, $key) = @_;
137 # This will be a Reference sector
138 my $sector = $self->_load_sector( $obj->_base_offset )
141 if ( $sector->staleness != $obj->_staleness ) {
145 my $key_md5 = $self->_apply_digest( $key );
147 my $value_sector = $sector->get_data_for({
152 unless ( $value_sector ) {
153 $value_sector = DBM::Deep::Engine::Sector::Null->new({
158 $sector->write_data({
161 value => $value_sector,
165 return $value_sector->data;
172 # This will be a Reference sector
173 my $sector = $self->_load_sector( $obj->_base_offset )
174 or DBM::Deep->_throw_error( "How did get_classname fail (no sector for '$obj')?!" );
176 if ( $sector->staleness != $obj->_staleness ) {
180 return $sector->get_classname;
185 my ($obj, $old_key, $new_key) = @_;
187 # This will be a Reference sector
188 my $sector = $self->_load_sector( $obj->_base_offset )
189 or DBM::Deep->_throw_error( "How did make_reference fail (no sector for '$obj')?!" );
191 if ( $sector->staleness != $obj->_staleness ) {
195 my $old_md5 = $self->_apply_digest( $old_key );
197 my $value_sector = $sector->get_data_for({
202 unless ( $value_sector ) {
203 $value_sector = DBM::Deep::Engine::Sector::Null->new({
208 $sector->write_data({
211 value => $value_sector,
215 if ( $value_sector->isa( 'DBM::Deep::Engine::Sector::Reference' ) ) {
216 $sector->write_data({
218 key_md5 => $self->_apply_digest( $new_key ),
219 value => $value_sector,
221 $value_sector->increment_refcount;
224 $sector->write_data({
226 key_md5 => $self->_apply_digest( $new_key ),
227 value => $value_sector->clone,
234 my ($obj, $key) = @_;
236 # This will be a Reference sector
237 my $sector = $self->_load_sector( $obj->_base_offset )
240 if ( $sector->staleness != $obj->_staleness ) {
244 my $data = $sector->get_data_for({
245 key_md5 => $self->_apply_digest( $key ),
249 # exists() returns 1 or '' for true/false.
250 return $data ? 1 : '';
255 my ($obj, $key) = @_;
257 my $sector = $self->_load_sector( $obj->_base_offset )
260 if ( $sector->staleness != $obj->_staleness ) {
264 return $sector->delete_key({
265 key_md5 => $self->_apply_digest( $key ),
272 my ($obj, $key, $value) = @_;
274 my $r = Scalar::Util::reftype( $value ) || '';
277 last if $r eq 'HASH';
278 last if $r eq 'ARRAY';
280 DBM::Deep->_throw_error(
281 "Storage of references of type '$r' is not supported."
285 # This will be a Reference sector
286 my $sector = $self->_load_sector( $obj->_base_offset )
287 or DBM::Deep->_throw_error( "1: Cannot write to a deleted spot in DBM::Deep." );
289 if ( $sector->staleness != $obj->_staleness ) {
290 DBM::Deep->_throw_error( "2: Cannot write to a deleted spot in DBM::Deep." );
294 if ( !defined $value ) {
295 $class = 'DBM::Deep::Engine::Sector::Null';
297 elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) {
299 if ( $r eq 'ARRAY' ) {
300 $tmpvar = tied @$value;
301 } elsif ( $r eq 'HASH' ) {
302 $tmpvar = tied %$value;
306 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $tmpvar->isa( 'DBM::Deep' ); };
308 unless ( $is_dbm_deep ) {
309 DBM::Deep->_throw_error( "Cannot store something that is tied." );
312 unless ( $tmpvar->_engine->storage == $self->storage ) {
313 DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." );
316 # First, verify if we're storing the same thing to this spot. If we are, then
317 # this should be a no-op. -EJS, 2008-05-19
318 my $loc = $sector->get_data_location_for({
319 key_md5 => $self->_apply_digest( $key ),
323 if ( defined($loc) && $loc == $tmpvar->_base_offset ) {
327 #XXX Can this use $loc?
328 my $value_sector = $self->_load_sector( $tmpvar->_base_offset );
329 $sector->write_data({
331 key_md5 => $self->_apply_digest( $key ),
332 value => $value_sector,
334 $value_sector->increment_refcount;
339 $class = 'DBM::Deep::Engine::Sector::Reference';
340 $type = substr( $r, 0, 1 );
343 if ( tied($value) ) {
344 DBM::Deep->_throw_error( "Cannot store something that is tied." );
346 $class = 'DBM::Deep::Engine::Sector::Scalar';
349 # Create this after loading the reference sector in case something bad happens.
350 # This way, we won't allocate value sector(s) needlessly.
351 my $value_sector = $class->new({
357 $sector->write_data({
359 key_md5 => $self->_apply_digest( $key ),
360 value => $value_sector,
363 # This code is to make sure we write all the values in the $value to the disk
364 # and to make sure all changes to $value after the assignment are reflected
365 # on disk. This may be counter-intuitive at first, but it is correct dwimmery.
366 # NOTE - simply tying $value won't perform a STORE on each value. Hence, the
367 # copy to a temp value.
368 if ( $r eq 'ARRAY' ) {
370 tie @$value, 'DBM::Deep', {
371 base_offset => $value_sector->offset,
372 staleness => $value_sector->staleness,
373 storage => $self->storage,
377 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
379 elsif ( $r eq 'HASH' ) {
381 tie %$value, 'DBM::Deep', {
382 base_offset => $value_sector->offset,
383 staleness => $value_sector->staleness,
384 storage => $self->storage,
389 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
395 # XXX Add staleness here
398 my ($obj, $prev_key) = @_;
400 # XXX Need to add logic about resetting the iterator if any key in the reference has changed
401 unless ( $prev_key ) {
402 $obj->{iterator} = DBM::Deep::Iterator->new({
403 base_offset => $obj->_base_offset,
408 return $obj->{iterator}->get_next_key( $obj );
411 ################################################################################
417 return 1 if $obj->_base_offset;
419 my $header = DBM::Deep::Engine::Sector::FileHeader->new({
423 # Creating a new file
424 if ( $header->is_new ) {
425 # 1) Create Array/Hash entry
426 my $sector = DBM::Deep::Engine::Sector::Reference->new({
430 $obj->{base_offset} = $sector->offset;
431 $obj->{staleness} = $sector->staleness;
435 # Reading from an existing file
437 $obj->{base_offset} = $header->size;
438 my $sector = DBM::Deep::Engine::Sector::Reference->new({
440 offset => $obj->_base_offset,
443 DBM::Deep->_throw_error("Corrupted file, no master index record");
446 unless ($obj->_type eq $sector->type) {
447 DBM::Deep->_throw_error("File type mismatch");
450 $obj->{staleness} = $sector->staleness;
453 $self->storage->set_inode;
462 if ( $self->trans_id ) {
463 DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" );
466 my @slots = $self->read_txn_slots;
468 for my $i ( 0 .. $#slots ) {
472 $self->set_trans_id( $i + 1 );
477 DBM::Deep->_throw_error( "Cannot allocate transaction ID" );
479 $self->write_txn_slots( @slots );
481 if ( !$self->trans_id ) {
482 DBM::Deep->_throw_error( "Cannot begin_work - no available transactions" );
492 if ( !$self->trans_id ) {
493 DBM::Deep->_throw_error( "Cannot rollback without an active transaction" );
496 # Each entry is the file location for a bucket that has a modification for
497 # this transaction. The entries need to be expunged.
498 foreach my $entry (@{ $self->get_entries } ) {
499 # Remove the entry here
500 my $read_loc = $entry
504 + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
506 my $data_loc = $self->storage->read_at( $read_loc, $self->byte_size );
507 $data_loc = unpack( $StP{$self->byte_size}, $data_loc );
508 $self->storage->print_at( $read_loc, pack( $StP{$self->byte_size}, 0 ) );
510 if ( $data_loc > 1 ) {
511 $self->_load_sector( $data_loc )->free;
515 $self->clear_entries;
517 my @slots = $self->read_txn_slots;
518 $slots[$self->trans_id-1] = 0;
519 $self->write_txn_slots( @slots );
520 $self->inc_txn_staleness_counter( $self->trans_id );
521 $self->set_trans_id( 0 );
530 if ( !$self->trans_id ) {
531 DBM::Deep->_throw_error( "Cannot commit without an active transaction" );
534 foreach my $entry (@{ $self->get_entries } ) {
535 # Overwrite the entry in head with the entry in trans_id
540 my $head_loc = $self->storage->read_at( $base, $self->byte_size );
541 $head_loc = unpack( $StP{$self->byte_size}, $head_loc );
543 my $spot = $base + $self->byte_size + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
544 my $trans_loc = $self->storage->read_at(
545 $spot, $self->byte_size,
548 $self->storage->print_at( $base, $trans_loc );
549 $self->storage->print_at(
551 pack( $StP{$self->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ),
554 if ( $head_loc > 1 ) {
555 $self->_load_sector( $head_loc )->free;
559 $self->clear_entries;
561 my @slots = $self->read_txn_slots;
562 $slots[$self->trans_id-1] = 0;
563 $self->write_txn_slots( @slots );
564 $self->inc_txn_staleness_counter( $self->trans_id );
565 $self->set_trans_id( 0 );
572 my $bl = $self->txn_bitfield_len;
573 my $num_bits = $bl * 8;
574 return split '', unpack( 'b'.$num_bits,
575 $self->storage->read_at(
576 $self->trans_loc, $bl,
581 sub write_txn_slots {
583 my $num_bits = $self->txn_bitfield_len * 8;
584 $self->storage->print_at( $self->trans_loc,
585 pack( 'b'.$num_bits, join('', @_) ),
589 sub get_running_txn_ids {
591 my @transactions = $self->read_txn_slots;
592 my @trans_ids = map { $_+1} grep { $transactions[$_] } 0 .. $#transactions;
595 sub get_txn_staleness_counter {
599 # Hardcode staleness of 0 for the HEAD
600 return 0 unless $trans_id;
602 return unpack( $StP{$STALE_SIZE},
603 $self->storage->read_at(
604 $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
610 sub inc_txn_staleness_counter {
614 # Hardcode staleness of 0 for the HEAD
615 return 0 unless $trans_id;
617 $self->storage->print_at(
618 $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
619 pack( $StP{$STALE_SIZE}, $self->get_txn_staleness_counter( $trans_id ) + 1 ),
625 return [ keys %{ $self->{entries}{$self->trans_id} ||= {} } ];
630 my ($trans_id, $loc) = @_;
632 $self->{entries}{$trans_id} ||= {};
633 $self->{entries}{$trans_id}{$loc} = undef;
636 # If the buckets are being relocated because of a reindexing, the entries
637 # mechanism needs to be made aware of it.
640 my ($old_loc, $new_loc) = @_;
643 while ( my ($trans_id, $locs) = each %{ $self->{entries} } ) {
644 if ( exists $locs->{$old_loc} ) {
645 delete $locs->{$old_loc};
646 $locs->{$new_loc} = undef;
654 delete $self->{entries}{$self->trans_id};
657 ################################################################################
663 # Add a catch for offset of 0 or 1
664 return if !$offset || $offset <= 1;
666 unless ( exists $self->sector_cache->{ $offset } ) {
667 my $type = $self->storage->read_at( $offset, $self->SIG_SIZE );
669 # XXX Don't we want to do something more proactive here? -RobK, 2008-06-19
670 return if $type eq chr(0);
672 if ( $type eq $self->SIG_ARRAY || $type eq $self->SIG_HASH ) {
673 $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::Reference->new({
679 # XXX Don't we need key_md5 here?
680 elsif ( $type eq $self->SIG_BLIST ) {
681 $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::BucketList->new({
687 elsif ( $type eq $self->SIG_INDEX ) {
688 $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::Index->new({
694 elsif ( $type eq $self->SIG_NULL ) {
695 $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::Null->new({
701 elsif ( $type eq $self->SIG_DATA ) {
702 $self->sector_cache->{$offset} = DBM::Deep::Engine::Sector::Scalar->new({
708 # This was deleted from under us, so just return and let the caller figure it out.
709 elsif ( $type eq $self->SIG_FREE ) {
713 DBM::Deep->_throw_error( "'$offset': Don't know what to do with type '$type'" );
717 return $self->sector_cache->{$offset};
722 return $self->{digest}->(@_);
725 sub _add_free_blist_sector { shift->_add_free_sector( 0, @_ ) }
726 sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) }
727 sub _add_free_index_sector { shift->_add_free_sector( 2, @_ ) }
729 sub _add_free_sector {
731 my ($multiple, $offset, $size) = @_;
733 my $chains_offset = $multiple * $self->byte_size;
735 my $storage = $self->storage;
737 # Increment staleness.
738 # XXX Can this increment+modulo be done by "&= 0x1" ?
739 my $staleness = unpack( $StP{$STALE_SIZE}, $storage->read_at( $offset + SIG_SIZE, $STALE_SIZE ) );
740 $staleness = ($staleness + 1 ) % ( 2 ** ( 8 * $STALE_SIZE ) );
741 $storage->print_at( $offset + SIG_SIZE, pack( $StP{$STALE_SIZE}, $staleness ) );
743 my $old_head = $storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size );
745 $storage->print_at( $self->chains_loc + $chains_offset,
746 pack( $StP{$self->byte_size}, $offset ),
749 # Record the old head in the new sector after the signature and staleness counter
750 $storage->print_at( $offset + SIG_SIZE + $STALE_SIZE, $old_head );
753 sub _request_blist_sector { shift->_request_sector( 0, @_ ) }
754 sub _request_data_sector { shift->_request_sector( 1, @_ ) }
755 sub _request_index_sector { shift->_request_sector( 2, @_ ) }
757 sub _request_sector {
759 my ($multiple, $size) = @_;
761 my $chains_offset = $multiple * $self->byte_size;
763 my $old_head = $self->storage->read_at( $self->chains_loc + $chains_offset, $self->byte_size );
764 my $loc = unpack( $StP{$self->byte_size}, $old_head );
766 # We don't have any free sectors of the right size, so allocate a new one.
768 my $offset = $self->storage->request_space( $size );
770 # Zero out the new sector. This also guarantees correct increases
772 $self->storage->print_at( $offset, chr(0) x $size );
777 # Read the new head after the signature and the staleness counter
778 my $new_head = $self->storage->read_at( $loc + SIG_SIZE + $STALE_SIZE, $self->byte_size );
779 $self->storage->print_at( $self->chains_loc + $chains_offset, $new_head );
780 $self->storage->print_at(
781 $loc + SIG_SIZE + $STALE_SIZE,
782 pack( $StP{$self->byte_size}, 0 ),
788 ################################################################################
792 return $self->{sector_cache} ||= {};
795 sub clear_sector_cache {
797 $self->{sector_cache} = {};
802 return $self->{dirty_sectors} ||= {};
805 sub clear_dirty_sectors {
807 $self->{dirty_sectors} = {};
810 sub add_dirty_sector {
814 # if ( exists $self->dirty_sectors->{ $sector->offset } ) {
815 # DBM::Deep->_throw_error( "We have a duplicate sector!! " . $sector->offset );
818 $self->dirty_sectors->{ $sector->offset } = $sector;
824 my $sectors = $self->dirty_sectors;
825 for my $offset (sort { $a <=> $b } keys %{ $sectors }) {
826 $sectors->{$offset}->flush;
829 $self->clear_dirty_sectors;
831 $self->clear_sector_cache;
834 ################################################################################
839 return $self->storage->lock_exclusive( $obj );
845 return $self->storage->lock_shared( $obj );
852 my $rv = $self->storage->unlock( $obj );
859 ################################################################################
861 sub storage { $_[0]{storage} }
862 sub byte_size { $_[0]{byte_size} }
863 sub hash_size { $_[0]{hash_size} }
864 sub hash_chars { $_[0]{hash_chars} }
865 sub num_txns { $_[0]{num_txns} }
866 sub max_buckets { $_[0]{max_buckets} }
867 sub blank_md5 { chr(0) x $_[0]->hash_size }
868 sub data_sector_size { $_[0]{data_sector_size} }
870 # This is a calculated value
871 sub txn_bitfield_len {
873 unless ( exists $self->{txn_bitfield_len} ) {
874 my $temp = ($self->num_txns) / 8;
875 if ( $temp > int( $temp ) ) {
876 $temp = int( $temp ) + 1;
878 $self->{txn_bitfield_len} = $temp;
880 return $self->{txn_bitfield_len};
883 sub trans_id { $_[0]{trans_id} }
884 sub set_trans_id { $_[0]{trans_id} = $_[1] }
886 sub trans_loc { $_[0]{trans_loc} }
887 sub set_trans_loc { $_[0]{trans_loc} = $_[1] }
889 sub chains_loc { $_[0]{chains_loc} }
890 sub set_chains_loc { $_[0]{chains_loc} = $_[1] }
892 sub cache { $_[0]{cache} ||= {} }
893 sub clear_cache { %{$_[0]->cache} = () }
900 my $header_sector = DBM::Deep::Engine::Sector::FileHeader->new({
911 'D' => $self->data_sector_size,
912 'B' => DBM::Deep::Engine::Sector::BucketList->new({engine=>$self,offset=>1})->size,
913 'I' => DBM::Deep::Engine::Sector::Index->new({engine=>$self,offset=>1})->size,
919 $return .= "Size: " . (-s $self->storage->{fh}) . $/;
922 $return .= "NumTxns: " . $self->num_txns . $/;
924 # Read the free sector chains
926 foreach my $multiple ( 0 .. 2 ) {
927 $return .= "Chains($types{$multiple}):";
928 my $old_loc = $self->chains_loc + $multiple * $self->byte_size;
931 $StP{$self->byte_size},
932 $self->storage->read_at( $old_loc, $self->byte_size ),
935 # We're now out of free sectors of this kind.
940 $sectors{ $types{$multiple} }{ $loc } = undef;
941 $old_loc = $loc + SIG_SIZE + $STALE_SIZE;
947 my $spot = $header_sector->size;
949 while ( $spot < $self->storage->{end} ) {
950 # Read each sector in order.
951 my $sector = $self->_load_sector( $spot );
953 # Find it in the free-sectors that were found already
954 foreach my $type ( keys %sectors ) {
955 if ( exists $sectors{$type}{$spot} ) {
956 my $size = $sizes{$type};
957 $return .= sprintf "%08d: %s %04d\n", $spot, 'F' . $type, $size;
963 die "********\n$return\nDidn't find free sector for $spot in chains\n********\n";
966 $return .= sprintf "%08d: %s %04d", $spot, $sector->type, $sector->size;
967 if ( $sector->type eq 'D' ) {
968 $return .= ' ' . $sector->data;
970 elsif ( $sector->type eq 'A' || $sector->type eq 'H' ) {
971 $return .= ' REF: ' . $sector->get_refcount;
973 elsif ( $sector->type eq 'B' ) {
974 foreach my $bucket ( $sector->chopped_up ) {
976 $return .= sprintf "%08d", unpack($StP{$self->byte_size},
977 substr( $bucket->[-1], $self->hash_size, $self->byte_size),
979 my $l = unpack( $StP{$self->byte_size},
980 substr( $bucket->[-1],
981 $self->hash_size + $self->byte_size,
985 $return .= sprintf " %08d", $l;
986 foreach my $txn ( 0 .. $self->num_txns - 2 ) {
987 my $l = unpack( $StP{$self->byte_size},
988 substr( $bucket->[-1],
989 $self->hash_size + 2 * $self->byte_size + $txn * ($self->byte_size + $STALE_SIZE),
993 $return .= sprintf " %08d", $l;
999 $spot += $sector->size;