1 package DBM::Deep::Engine;
6 use warnings FATAL => 'all';
8 # Never import symbols into our namespace. We are a class, not a library.
15 # * Every method in here assumes that the storage has been appropriately
16 # safeguarded. This can be anything from flock() to some sort of manual
17 # mutex. But, it's the caller's responsability to make sure that this has
20 # Setup file and tag signatures. These should never change.
21 sub SIG_FILE () { 'DPDB' }
22 sub SIG_HEADER () { 'h' }
23 sub SIG_HASH () { 'H' }
24 sub SIG_ARRAY () { 'A' }
25 sub SIG_NULL () { 'N' }
26 sub SIG_DATA () { 'D' }
27 sub SIG_INDEX () { 'I' }
28 sub SIG_BLIST () { 'B' }
29 sub SIG_FREE () { 'F' }
34 # Please refer to the pack() documentation for further information
36 1 => 'C', # Unsigned char value (no order needed as it's just one byte)
37 2 => 'n', # Unsigned short in "network" (big-endian) order
38 4 => 'N', # Unsigned long in "network" (big-endian) order
39 8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
41 sub StP { $StP{$_[1]} }
43 # Import these after the SIG_* definitions because those definitions are used
44 # in the headers of these classes. -RobK, 2008-06-20
45 use DBM::Deep::Engine::Sector::BucketList;
46 use DBM::Deep::Engine::Sector::FileHeader;
47 use DBM::Deep::Engine::Sector::Index;
48 use DBM::Deep::Engine::Sector::Null;
49 use DBM::Deep::Engine::Sector::Reference;
50 use DBM::Deep::Engine::Sector::Scalar;
51 use DBM::Deep::Iterator;
53 ################################################################################
59 $args->{storage} = DBM::Deep::File->new( $args )
60 unless exists $args->{storage};
66 hash_size => 16, # In bytes
67 hash_chars => 256, # Number of chars the algorithm uses per byte
69 num_txns => 1, # The HEAD
70 trans_id => 0, # Default to the HEAD
72 data_sector_size => 64, # Size in bytes of each data sector
74 entries => {}, # This is the list of entries for transactions
78 # Never allow byte_size to be set directly.
79 delete $args->{byte_size};
80 if ( defined $args->{pack_size} ) {
81 if ( lc $args->{pack_size} eq 'small' ) {
82 $args->{byte_size} = 2;
84 elsif ( lc $args->{pack_size} eq 'medium' ) {
85 $args->{byte_size} = 4;
87 elsif ( lc $args->{pack_size} eq 'large' ) {
88 $args->{byte_size} = 8;
91 DBM::Deep->_throw_error( "Unknown pack_size value: '$args->{pack_size}'" );
95 # Grab the parameters we want to use
96 foreach my $param ( keys %$self ) {
97 next unless exists $args->{$param};
98 $self->{$param} = $args->{$param};
102 max_buckets => { floor => 16, ceil => 256 },
103 num_txns => { floor => 1, ceil => 255 },
104 data_sector_size => { floor => 32, ceil => 256 },
107 while ( my ($attr, $c) = each %validations ) {
108 if ( !defined $self->{$attr}
109 || !length $self->{$attr}
110 || $self->{$attr} =~ /\D/
111 || $self->{$attr} < $c->{floor}
113 $self->{$attr} = '(undef)' if !defined $self->{$attr};
114 warn "Floor of $attr is $c->{floor}. Setting it to $c->{floor} from '$self->{$attr}'\n";
115 $self->{$attr} = $c->{floor};
117 elsif ( $self->{$attr} > $c->{ceil} ) {
118 warn "Ceiling of $attr is $c->{ceil}. Setting it to $c->{ceil} from '$self->{$attr}'\n";
119 $self->{$attr} = $c->{ceil};
123 if ( !$self->{digest} ) {
125 $self->{digest} = \&Digest::MD5::md5;
131 ################################################################################
135 my ($obj, $key) = @_;
137 # This will be a Reference sector
138 my $sector = $self->_load_sector( $obj->_base_offset )
141 if ( $sector->staleness != $obj->_staleness ) {
145 my $key_md5 = $self->_apply_digest( $key );
147 my $value_sector = $sector->get_data_for({
152 unless ( $value_sector ) {
153 $value_sector = DBM::Deep::Engine::Sector::Null->new({
158 $sector->write_data({
161 value => $value_sector,
165 return $value_sector->data;
172 # This will be a Reference sector
173 my $sector = $self->_load_sector( $obj->_base_offset )
174 or DBM::Deep->_throw_error( "How did get_classname fail (no sector for '$obj')?!" );
176 if ( $sector->staleness != $obj->_staleness ) {
180 return $sector->get_classname;
185 my ($obj, $old_key, $new_key) = @_;
187 # This will be a Reference sector
188 my $sector = $self->_load_sector( $obj->_base_offset )
189 or DBM::Deep->_throw_error( "How did make_reference fail (no sector for '$obj')?!" );
191 if ( $sector->staleness != $obj->_staleness ) {
195 my $old_md5 = $self->_apply_digest( $old_key );
197 my $value_sector = $sector->get_data_for({
202 unless ( $value_sector ) {
203 $value_sector = DBM::Deep::Engine::Sector::Null->new({
208 $sector->write_data({
211 value => $value_sector,
215 if ( $value_sector->isa( 'DBM::Deep::Engine::Sector::Reference' ) ) {
216 $sector->write_data({
218 key_md5 => $self->_apply_digest( $new_key ),
219 value => $value_sector,
221 $value_sector->increment_refcount;
224 $sector->write_data({
226 key_md5 => $self->_apply_digest( $new_key ),
227 value => $value_sector->clone,
234 my ($obj, $key) = @_;
236 # This will be a Reference sector
237 my $sector = $self->_load_sector( $obj->_base_offset )
240 if ( $sector->staleness != $obj->_staleness ) {
244 my $data = $sector->get_data_for({
245 key_md5 => $self->_apply_digest( $key ),
249 # exists() returns 1 or '' for true/false.
250 return $data ? 1 : '';
255 my ($obj, $key) = @_;
257 my $sector = $self->_load_sector( $obj->_base_offset )
260 if ( $sector->staleness != $obj->_staleness ) {
264 return $sector->delete_key({
265 key_md5 => $self->_apply_digest( $key ),
272 my ($obj, $key, $value) = @_;
274 my $r = Scalar::Util::reftype( $value ) || '';
277 last if $r eq 'HASH';
278 last if $r eq 'ARRAY';
280 DBM::Deep->_throw_error(
281 "Storage of references of type '$r' is not supported."
285 # This will be a Reference sector
286 my $sector = $self->_load_sector( $obj->_base_offset )
287 or DBM::Deep->_throw_error( "1: Cannot write to a deleted spot in DBM::Deep." );
289 if ( $sector->staleness != $obj->_staleness ) {
290 DBM::Deep->_throw_error( "2: Cannot write to a deleted spot in DBM::Deep." );
294 if ( !defined $value ) {
295 $class = 'DBM::Deep::Engine::Sector::Null';
297 elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) {
299 if ( $r eq 'ARRAY' ) {
300 $tmpvar = tied @$value;
301 } elsif ( $r eq 'HASH' ) {
302 $tmpvar = tied %$value;
306 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $tmpvar->isa( 'DBM::Deep' ); };
308 unless ( $is_dbm_deep ) {
309 DBM::Deep->_throw_error( "Cannot store something that is tied." );
312 unless ( $tmpvar->_engine->storage == $self->storage ) {
313 DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." );
316 # First, verify if we're storing the same thing to this spot. If we are, then
317 # this should be a no-op. -EJS, 2008-05-19
318 my $loc = $sector->get_data_location_for({
319 key_md5 => $self->_apply_digest( $key ),
323 if ( defined($loc) && $loc == $tmpvar->_base_offset ) {
327 #XXX Can this use $loc?
328 my $value_sector = $self->_load_sector( $tmpvar->_base_offset );
329 $sector->write_data({
331 key_md5 => $self->_apply_digest( $key ),
332 value => $value_sector,
334 $value_sector->increment_refcount;
339 $class = 'DBM::Deep::Engine::Sector::Reference';
340 $type = substr( $r, 0, 1 );
343 if ( tied($value) ) {
344 DBM::Deep->_throw_error( "Cannot store something that is tied." );
346 $class = 'DBM::Deep::Engine::Sector::Scalar';
349 # Create this after loading the reference sector in case something bad happens.
350 # This way, we won't allocate value sector(s) needlessly.
351 my $value_sector = $class->new({
357 $sector->write_data({
359 key_md5 => $self->_apply_digest( $key ),
360 value => $value_sector,
363 # This code is to make sure we write all the values in the $value to the disk
364 # and to make sure all changes to $value after the assignment are reflected
365 # on disk. This may be counter-intuitive at first, but it is correct dwimmery.
366 # NOTE - simply tying $value won't perform a STORE on each value. Hence, the
367 # copy to a temp value.
368 if ( $r eq 'ARRAY' ) {
370 tie @$value, 'DBM::Deep', {
371 base_offset => $value_sector->offset,
372 staleness => $value_sector->staleness,
373 storage => $self->storage,
377 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
379 elsif ( $r eq 'HASH' ) {
381 tie %$value, 'DBM::Deep', {
382 base_offset => $value_sector->offset,
383 staleness => $value_sector->staleness,
384 storage => $self->storage,
389 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
395 # XXX Add staleness here
398 my ($obj, $prev_key) = @_;
400 # XXX Need to add logic about resetting the iterator if any key in the reference has changed
401 unless ( $prev_key ) {
402 $obj->{iterator} = DBM::Deep::Iterator->new({
403 base_offset => $obj->_base_offset,
408 return $obj->{iterator}->get_next_key( $obj );
411 ################################################################################
417 return 1 if $obj->_base_offset;
419 my $header = $self->_load_header;
421 # Creating a new file
422 if ( $header->is_new ) {
423 # 1) Create Array/Hash entry
424 my $sector = DBM::Deep::Engine::Sector::Reference->new({
428 $obj->{base_offset} = $sector->offset;
429 $obj->{staleness} = $sector->staleness;
433 # Reading from an existing file
435 $obj->{base_offset} = $header->size;
436 my $sector = DBM::Deep::Engine::Sector::Reference->new({
438 offset => $obj->_base_offset,
441 DBM::Deep->_throw_error("Corrupted file, no master index record");
444 unless ($obj->_type eq $sector->type) {
445 DBM::Deep->_throw_error("File type mismatch");
448 $obj->{staleness} = $sector->staleness;
451 $self->storage->set_inode;
460 if ( $self->trans_id ) {
461 DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" );
464 my @slots = $self->read_txn_slots;
466 for my $i ( 0 .. $#slots ) {
470 $self->set_trans_id( $i + 1 );
475 DBM::Deep->_throw_error( "Cannot allocate transaction ID" );
477 $self->write_txn_slots( @slots );
479 if ( !$self->trans_id ) {
480 DBM::Deep->_throw_error( "Cannot begin_work - no available transactions" );
490 if ( !$self->trans_id ) {
491 DBM::Deep->_throw_error( "Cannot rollback without an active transaction" );
494 # Each entry is the file location for a bucket that has a modification for
495 # this transaction. The entries need to be expunged.
496 foreach my $entry (@{ $self->get_entries } ) {
497 # Remove the entry here
498 my $read_loc = $entry
502 + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
504 my $data_loc = $self->storage->read_at( $read_loc, $self->byte_size );
505 $data_loc = unpack( $StP{$self->byte_size}, $data_loc );
506 $self->storage->print_at( $read_loc, pack( $StP{$self->byte_size}, 0 ) );
508 if ( $data_loc > 1 ) {
509 $self->_load_sector( $data_loc )->free;
513 $self->clear_entries;
515 my @slots = $self->read_txn_slots;
516 $slots[$self->trans_id-1] = 0;
517 $self->write_txn_slots( @slots );
518 $self->inc_txn_staleness_counter( $self->trans_id );
519 $self->set_trans_id( 0 );
528 if ( !$self->trans_id ) {
529 DBM::Deep->_throw_error( "Cannot commit without an active transaction" );
532 foreach my $entry (@{ $self->get_entries } ) {
533 # Overwrite the entry in head with the entry in trans_id
538 my $head_loc = $self->storage->read_at( $base, $self->byte_size );
539 $head_loc = unpack( $StP{$self->byte_size}, $head_loc );
541 my $spot = $base + $self->byte_size + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
542 my $trans_loc = $self->storage->read_at(
543 $spot, $self->byte_size,
546 $self->storage->print_at( $base, $trans_loc );
547 $self->storage->print_at(
549 pack( $StP{$self->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ),
552 if ( $head_loc > 1 ) {
553 $self->_load_sector( $head_loc )->free;
557 $self->clear_entries;
559 my @slots = $self->read_txn_slots;
560 $slots[$self->trans_id-1] = 0;
561 $self->write_txn_slots( @slots );
562 $self->inc_txn_staleness_counter( $self->trans_id );
563 $self->set_trans_id( 0 );
570 return $self->_load_header->read_txn_slots;
573 sub write_txn_slots {
575 my $num_bits = $self->txn_bitfield_len * 8;
576 $self->storage->print_at( $self->trans_loc,
577 pack( 'b'.$num_bits, join('', @_) ),
581 sub get_running_txn_ids {
583 my @transactions = $self->read_txn_slots;
584 my @trans_ids = map { $_+1} grep { $transactions[$_] } 0 .. $#transactions;
587 sub get_txn_staleness_counter {
591 # Hardcode staleness of 0 for the HEAD
592 return 0 unless $trans_id;
594 return unpack( $StP{$STALE_SIZE},
595 $self->storage->read_at(
596 $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
602 sub inc_txn_staleness_counter {
606 # Hardcode staleness of 0 for the HEAD
607 return 0 unless $trans_id;
609 $self->storage->print_at(
610 $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
611 pack( $StP{$STALE_SIZE}, $self->get_txn_staleness_counter( $trans_id ) + 1 ),
617 return [ keys %{ $self->{entries}{$self->trans_id} ||= {} } ];
622 my ($trans_id, $loc) = @_;
624 $self->{entries}{$trans_id} ||= {};
625 $self->{entries}{$trans_id}{$loc} = undef;
628 # If the buckets are being relocated because of a reindexing, the entries
629 # mechanism needs to be made aware of it.
632 my ($old_loc, $new_loc) = @_;
635 while ( my ($trans_id, $locs) = each %{ $self->{entries} } ) {
636 if ( exists $locs->{$old_loc} ) {
637 delete $locs->{$old_loc};
638 $locs->{$new_loc} = undef;
646 delete $self->{entries}{$self->trans_id};
649 ################################################################################
653 return $self->{digest}->(@_);
656 sub _add_free_blist_sector { shift->_add_free_sector( 0, @_ ) }
657 sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) }
658 sub _add_free_index_sector { shift->_add_free_sector( 2, @_ ) }
659 sub _add_free_sector { shift->_load_header->add_free_sector( @_ ) }
661 sub _request_blist_sector { shift->_request_sector( 0, @_ ) }
662 sub _request_data_sector { shift->_request_sector( 1, @_ ) }
663 sub _request_index_sector { shift->_request_sector( 2, @_ ) }
664 sub _request_sector { shift->_load_header->request_sector( @_ ) }
666 ################################################################################
670 SIG_ARRAY => 'Reference',
671 SIG_HASH => 'Reference',
672 SIG_BLIST => 'BucketList',
673 SIG_INDEX => 'Index',
675 SIG_DATA => 'Scalar',
679 while ( my ($k,$v) = each %t ) {
680 $class_for{ DBM::Deep::Engine->$k } = "DBM::Deep::Engine::Sector::$v";
687 my $data = $self->get_data( $offset )
688 or return;#die "Cannot read from '$offset'\n";
689 my $type = substr( $$data, 0, 1 );
690 my $class = $class_for{ $type };
697 *_load_sector = \&load_sector;
702 #XXX Does this mean we make too many objects? -RobK, 2008-06-23
703 return DBM::Deep::Engine::Sector::FileHeader->new({
708 *_load_header = \&load_header;
712 my ($offset, $size) = @_;
713 return unless defined $offset;
715 unless ( exists $self->sector_cache->{$offset} ) {
716 # Don't worry about the header sector. It will manage itself.
717 return unless $offset;
719 if ( !defined $size ) {
720 my $type = $self->storage->read_at( $offset, 1 )
721 or die "($offset): Cannot read from '$offset' to find the type\n";
723 if ( $type eq $self->SIG_FREE ) {
727 my $class = $class_for{$type}
728 or die "($offset): Cannot find class for '$type'\n";
729 $size = $class->size( $self )
730 or die "($offset): '$class' doesn't return a size\n";
731 $self->sector_cache->{$offset} = $type . $self->storage->read_at( undef, $size - 1 );
734 $self->sector_cache->{$offset} = $self->storage->read_at( $offset, $size )
739 return \$self->sector_cache->{$offset};
745 return $self->{sector_cache} ||= {};
748 sub clear_sector_cache {
750 $self->{sector_cache} = {};
755 return $self->{dirty_sectors} ||= {};
758 sub clear_dirty_sectors {
760 $self->{dirty_sectors} = {};
763 sub add_dirty_sector {
767 $self->dirty_sectors->{ $offset } = undef;
773 my $sectors = $self->dirty_sectors;
774 for my $offset (sort { $a <=> $b } keys %{ $sectors }) {
775 $self->storage->print_at( $offset, $self->sector_cache->{$offset} );
778 $self->clear_dirty_sectors;
780 $self->clear_sector_cache;
783 ################################################################################
788 return $self->storage->lock_exclusive( $obj );
794 return $self->storage->lock_shared( $obj );
801 my $rv = $self->storage->unlock( $obj );
808 ################################################################################
810 sub storage { $_[0]{storage} }
811 sub byte_size { $_[0]{byte_size} }
812 sub hash_size { $_[0]{hash_size} }
813 sub hash_chars { $_[0]{hash_chars} }
814 sub num_txns { $_[0]{num_txns} }
815 sub max_buckets { $_[0]{max_buckets} }
816 sub blank_md5 { chr(0) x $_[0]->hash_size }
817 sub data_sector_size { $_[0]{data_sector_size} }
819 # This is a calculated value
820 sub txn_bitfield_len {
822 unless ( exists $self->{txn_bitfield_len} ) {
823 my $temp = ($self->num_txns) / 8;
824 if ( $temp > int( $temp ) ) {
825 $temp = int( $temp ) + 1;
827 $self->{txn_bitfield_len} = $temp;
829 return $self->{txn_bitfield_len};
832 sub trans_id { $_[0]{trans_id} }
833 sub set_trans_id { $_[0]{trans_id} = $_[1] }
835 sub trans_loc { $_[0]{trans_loc} }
836 sub set_trans_loc { $_[0]{trans_loc} = $_[1] }
838 sub chains_loc { $_[0]{chains_loc} }
839 sub set_chains_loc { $_[0]{chains_loc} = $_[1] }
841 sub cache { $_[0]{cache} ||= {} }
842 sub clear_cache { %{$_[0]->cache} = () }
849 my $header_sector = DBM::Deep::Engine::Sector::FileHeader->new({
860 'D' => $self->data_sector_size,
861 'B' => DBM::Deep::Engine::Sector::BucketList->new({engine=>$self,offset=>1})->size,
862 'I' => DBM::Deep::Engine::Sector::Index->new({engine=>$self,offset=>1})->size,
868 $return .= "Size: " . (-s $self->storage->{fh}) . $/;
871 $return .= "NumTxns: " . $self->num_txns . $/;
873 # Read the free sector chains
875 foreach my $multiple ( 0 .. 2 ) {
876 $return .= "Chains($types{$multiple}):";
877 my $old_loc = $self->chains_loc + $multiple * $self->byte_size;
880 $StP{$self->byte_size},
881 $self->storage->read_at( $old_loc, $self->byte_size ),
884 # We're now out of free sectors of this kind.
889 $sectors{ $types{$multiple} }{ $loc } = undef;
890 $old_loc = $loc + SIG_SIZE + $STALE_SIZE;
896 my $spot = $header_sector->size;
898 while ( $spot < $self->storage->{end} ) {
899 # Read each sector in order.
900 my $sector = $self->_load_sector( $spot );
902 # Find it in the free-sectors that were found already
903 foreach my $type ( keys %sectors ) {
904 if ( exists $sectors{$type}{$spot} ) {
905 my $size = $sizes{$type};
906 $return .= sprintf "%08d: %s %04d\n", $spot, 'F' . $type, $size;
912 die "********\n$return\nDidn't find free sector for $spot in chains\n********\n";
915 $return .= sprintf "%08d: %s %04d", $spot, $sector->type, $sector->size;
916 if ( $sector->type eq 'D' ) {
917 $return .= ' ' . $sector->data;
919 elsif ( $sector->type eq 'A' || $sector->type eq 'H' ) {
920 $return .= ' REF: ' . $sector->get_refcount;
922 elsif ( $sector->type eq 'B' ) {
923 foreach my $bucket ( $sector->chopped_up ) {
925 $return .= sprintf "%08d", unpack($StP{$self->byte_size},
926 substr( $bucket->[-1], $self->hash_size, $self->byte_size),
928 my $l = unpack( $StP{$self->byte_size},
929 substr( $bucket->[-1],
930 $self->hash_size + $self->byte_size,
934 $return .= sprintf " %08d", $l;
935 foreach my $txn ( 0 .. $self->num_txns - 2 ) {
936 my $l = unpack( $StP{$self->byte_size},
937 substr( $bucket->[-1],
938 $self->hash_size + 2 * $self->byte_size + $txn * ($self->byte_size + $STALE_SIZE),
942 $return .= sprintf " %08d", $l;
948 $spot += $sector->size;