1 package DBM::Deep::Engine;
6 use warnings FATAL => 'all';
8 # Never import symbols into our namespace. We are a class, not a library.
15 # * Every method in here assumes that the storage has been appropriately
16 # safeguarded. This can be anything from flock() to some sort of manual
17 # mutex. But, it's the caller's responsability to make sure that this has
20 # Setup file and tag signatures. These should never change.
21 sub SIG_FILE () { 'DPDB' }
22 sub SIG_HEADER () { 'h' }
23 sub SIG_HASH () { 'H' }
24 sub SIG_ARRAY () { 'A' }
25 sub SIG_NULL () { 'N' }
26 sub SIG_DATA () { 'D' }
27 sub SIG_INDEX () { 'I' }
28 sub SIG_BLIST () { 'B' }
29 sub SIG_FREE () { 'F' }
34 # Please refer to the pack() documentation for further information
36 1 => 'C', # Unsigned char value (no order needed as it's just one byte)
37 2 => 'n', # Unsigned short in "network" (big-endian) order
38 4 => 'N', # Unsigned long in "network" (big-endian) order
39 8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
41 sub StP { $StP{$_[1]} }
43 # Import these after the SIG_* definitions because those definitions are used
44 # in the headers of these classes. -RobK, 2008-06-20
45 use DBM::Deep::Engine::Sector::BucketList;
46 use DBM::Deep::Engine::Sector::FileHeader;
47 use DBM::Deep::Engine::Sector::Index;
48 use DBM::Deep::Engine::Sector::Null;
49 use DBM::Deep::Engine::Sector::Reference;
50 use DBM::Deep::Engine::Sector::Scalar;
51 use DBM::Deep::Iterator;
53 ################################################################################
59 $args->{storage} = DBM::Deep::File->new( $args )
60 unless exists $args->{storage};
66 hash_size => 16, # In bytes
67 hash_chars => 256, # Number of chars the algorithm uses per byte
69 num_txns => 1, # The HEAD
70 trans_id => 0, # Default to the HEAD
72 data_sector_size => 64, # Size in bytes of each data sector
74 entries => {}, # This is the list of entries for transactions
78 # Never allow byte_size to be set directly.
79 delete $args->{byte_size};
80 if ( defined $args->{pack_size} ) {
81 if ( lc $args->{pack_size} eq 'small' ) {
82 $args->{byte_size} = 2;
84 elsif ( lc $args->{pack_size} eq 'medium' ) {
85 $args->{byte_size} = 4;
87 elsif ( lc $args->{pack_size} eq 'large' ) {
88 $args->{byte_size} = 8;
91 DBM::Deep->_throw_error( "Unknown pack_size value: '$args->{pack_size}'" );
95 # Grab the parameters we want to use
96 foreach my $param ( keys %$self ) {
97 next unless exists $args->{$param};
98 $self->{$param} = $args->{$param};
102 max_buckets => { floor => 16, ceil => 256 },
103 num_txns => { floor => 1, ceil => 255 },
104 data_sector_size => { floor => 32, ceil => 256 },
107 while ( my ($attr, $c) = each %validations ) {
108 if ( !defined $self->{$attr}
109 || !length $self->{$attr}
110 || $self->{$attr} =~ /\D/
111 || $self->{$attr} < $c->{floor}
113 $self->{$attr} = '(undef)' if !defined $self->{$attr};
114 warn "Floor of $attr is $c->{floor}. Setting it to $c->{floor} from '$self->{$attr}'\n";
115 $self->{$attr} = $c->{floor};
117 elsif ( $self->{$attr} > $c->{ceil} ) {
118 warn "Ceiling of $attr is $c->{ceil}. Setting it to $c->{ceil} from '$self->{$attr}'\n";
119 $self->{$attr} = $c->{ceil};
123 if ( !$self->{digest} ) {
125 $self->{digest} = \&Digest::MD5::md5;
131 ################################################################################
135 my ($obj, $key) = @_;
137 # This will be a Reference sector
138 my $sector = $self->_load_sector( $obj->_base_offset )
141 if ( $sector->staleness != $obj->_staleness ) {
145 my $key_md5 = $self->_apply_digest( $key );
147 my $value_sector = $sector->get_data_for({
152 unless ( $value_sector ) {
153 $value_sector = DBM::Deep::Engine::Sector::Null->new({
158 $sector->write_data({
161 value => $value_sector,
165 return $value_sector->data;
172 # This will be a Reference sector
173 my $sector = $self->_load_sector( $obj->_base_offset )
174 or DBM::Deep->_throw_error( "How did get_classname fail (no sector for '$obj')?!" );
176 if ( $sector->staleness != $obj->_staleness ) {
180 return $sector->get_classname;
185 my ($obj, $old_key, $new_key) = @_;
187 # This will be a Reference sector
188 my $sector = $self->_load_sector( $obj->_base_offset )
189 or DBM::Deep->_throw_error( "How did make_reference fail (no sector for '$obj')?!" );
191 if ( $sector->staleness != $obj->_staleness ) {
195 my $old_md5 = $self->_apply_digest( $old_key );
197 my $value_sector = $sector->get_data_for({
202 unless ( $value_sector ) {
203 $value_sector = DBM::Deep::Engine::Sector::Null->new({
208 $sector->write_data({
211 value => $value_sector,
215 if ( $value_sector->isa( 'DBM::Deep::Engine::Sector::Reference' ) ) {
216 $sector->write_data({
218 key_md5 => $self->_apply_digest( $new_key ),
219 value => $value_sector,
221 $value_sector->increment_refcount;
224 $sector->write_data({
226 key_md5 => $self->_apply_digest( $new_key ),
227 value => $value_sector->clone,
234 my ($obj, $key) = @_;
236 # This will be a Reference sector
237 my $sector = $self->_load_sector( $obj->_base_offset )
240 if ( $sector->staleness != $obj->_staleness ) {
244 my $data = $sector->get_data_for({
245 key_md5 => $self->_apply_digest( $key ),
249 # exists() returns 1 or '' for true/false.
250 return $data ? 1 : '';
255 my ($obj, $key) = @_;
257 my $sector = $self->_load_sector( $obj->_base_offset )
260 if ( $sector->staleness != $obj->_staleness ) {
264 return $sector->delete_key({
265 key_md5 => $self->_apply_digest( $key ),
272 my ($obj, $key, $value) = @_;
274 my $r = Scalar::Util::reftype( $value ) || '';
277 last if $r eq 'HASH';
278 last if $r eq 'ARRAY';
280 DBM::Deep->_throw_error(
281 "Storage of references of type '$r' is not supported."
285 # This will be a Reference sector
286 my $sector = $self->_load_sector( $obj->_base_offset )
287 or DBM::Deep->_throw_error( "1: Cannot write to a deleted spot in DBM::Deep." );
289 if ( $sector->staleness != $obj->_staleness ) {
290 DBM::Deep->_throw_error( "2: Cannot write to a deleted spot in DBM::Deep." );
294 if ( !defined $value ) {
295 $class = 'DBM::Deep::Engine::Sector::Null';
297 elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) {
299 if ( $r eq 'ARRAY' ) {
300 $tmpvar = tied @$value;
301 } elsif ( $r eq 'HASH' ) {
302 $tmpvar = tied %$value;
306 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $tmpvar->isa( 'DBM::Deep' ); };
308 unless ( $is_dbm_deep ) {
309 DBM::Deep->_throw_error( "Cannot store something that is tied." );
312 unless ( $tmpvar->_engine->storage == $self->storage ) {
313 DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." );
316 # First, verify if we're storing the same thing to this spot. If we are, then
317 # this should be a no-op. -EJS, 2008-05-19
318 my $loc = $sector->get_data_location_for({
319 key_md5 => $self->_apply_digest( $key ),
323 if ( defined($loc) && $loc == $tmpvar->_base_offset ) {
327 #XXX Can this use $loc?
328 my $value_sector = $self->_load_sector( $tmpvar->_base_offset );
329 $sector->write_data({
331 key_md5 => $self->_apply_digest( $key ),
332 value => $value_sector,
334 $value_sector->increment_refcount;
339 $class = 'DBM::Deep::Engine::Sector::Reference';
340 $type = substr( $r, 0, 1 );
343 if ( tied($value) ) {
344 DBM::Deep->_throw_error( "Cannot store something that is tied." );
346 $class = 'DBM::Deep::Engine::Sector::Scalar';
349 # Create this after loading the reference sector in case something bad happens.
350 # This way, we won't allocate value sector(s) needlessly.
351 my $value_sector = $class->new({
357 $sector->write_data({
359 key_md5 => $self->_apply_digest( $key ),
360 value => $value_sector,
363 # This code is to make sure we write all the values in the $value to the disk
364 # and to make sure all changes to $value after the assignment are reflected
365 # on disk. This may be counter-intuitive at first, but it is correct dwimmery.
366 # NOTE - simply tying $value won't perform a STORE on each value. Hence, the
367 # copy to a temp value.
368 if ( $r eq 'ARRAY' ) {
370 tie @$value, 'DBM::Deep', {
371 base_offset => $value_sector->offset,
372 staleness => $value_sector->staleness,
373 storage => $self->storage,
377 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
379 elsif ( $r eq 'HASH' ) {
381 tie %$value, 'DBM::Deep', {
382 base_offset => $value_sector->offset,
383 staleness => $value_sector->staleness,
384 storage => $self->storage,
389 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
395 # XXX Add staleness here
398 my ($obj, $prev_key) = @_;
400 # XXX Need to add logic about resetting the iterator if any key in the reference has changed
401 unless ( $prev_key ) {
402 $obj->{iterator} = DBM::Deep::Iterator->new({
403 base_offset => $obj->_base_offset,
408 return $obj->{iterator}->get_next_key( $obj );
411 ################################################################################
417 return 1 if $obj->_base_offset;
419 my $header = $self->_load_header;
421 # Creating a new file
422 if ( $header->is_new ) {
423 # 1) Create Array/Hash entry
424 my $sector = DBM::Deep::Engine::Sector::Reference->new({
428 $obj->{base_offset} = $sector->offset;
429 $obj->{staleness} = $sector->staleness;
433 # Reading from an existing file
435 $obj->{base_offset} = $header->size;
436 my $sector = DBM::Deep::Engine::Sector::Reference->new({
438 offset => $obj->_base_offset,
441 DBM::Deep->_throw_error("Corrupted file, no master index record");
444 unless ($obj->_type eq $sector->type) {
445 DBM::Deep->_throw_error("File type mismatch");
448 $obj->{staleness} = $sector->staleness;
451 $self->storage->set_inode;
460 if ( $self->trans_id ) {
461 DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" );
464 my @slots = $self->read_txn_slots;
466 for my $i ( 0 .. $#slots ) {
470 $self->set_trans_id( $i + 1 );
475 DBM::Deep->_throw_error( "Cannot allocate transaction ID" );
477 $self->write_txn_slots( @slots );
479 if ( !$self->trans_id ) {
480 DBM::Deep->_throw_error( "Cannot begin_work - no available transactions" );
490 if ( !$self->trans_id ) {
491 DBM::Deep->_throw_error( "Cannot rollback without an active transaction" );
494 # Each entry is the file location for a bucket that has a modification for
495 # this transaction. The entries need to be expunged.
496 foreach my $entry (@{ $self->get_entries } ) {
497 # Remove the entry here
498 my $read_loc = $entry
502 + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
504 my $data_loc = $self->storage->read_at( $read_loc, $self->byte_size );
505 $data_loc = unpack( $StP{$self->byte_size}, $data_loc );
506 $self->storage->print_at( $read_loc, pack( $StP{$self->byte_size}, 0 ) );
508 if ( $data_loc > 1 ) {
509 $self->_load_sector( $data_loc )->free;
513 $self->clear_entries;
515 my @slots = $self->read_txn_slots;
516 $slots[$self->trans_id-1] = 0;
517 $self->write_txn_slots( @slots );
518 $self->inc_txn_staleness_counter( $self->trans_id );
519 $self->set_trans_id( 0 );
528 if ( !$self->trans_id ) {
529 DBM::Deep->_throw_error( "Cannot commit without an active transaction" );
532 foreach my $entry (@{ $self->get_entries } ) {
533 # Overwrite the entry in head with the entry in trans_id
538 my $head_loc = $self->storage->read_at( $base, $self->byte_size );
539 $head_loc = unpack( $StP{$self->byte_size}, $head_loc );
541 my $spot = $base + $self->byte_size + ($self->trans_id - 1) * ( $self->byte_size + $STALE_SIZE );
542 my $trans_loc = $self->storage->read_at(
543 $spot, $self->byte_size,
546 $self->storage->print_at( $base, $trans_loc );
547 $self->storage->print_at(
549 pack( $StP{$self->byte_size} . ' ' . $StP{$STALE_SIZE}, (0) x 2 ),
552 if ( $head_loc > 1 ) {
553 $self->_load_sector( $head_loc )->free;
557 $self->clear_entries;
559 my @slots = $self->read_txn_slots;
560 $slots[$self->trans_id-1] = 0;
561 $self->write_txn_slots( @slots );
562 $self->inc_txn_staleness_counter( $self->trans_id );
563 $self->set_trans_id( 0 );
570 my $bl = $self->txn_bitfield_len;
571 my $num_bits = $bl * 8;
572 return split '', unpack( 'b'.$num_bits,
573 $self->storage->read_at(
574 $self->trans_loc, $bl,
579 sub write_txn_slots {
581 my $num_bits = $self->txn_bitfield_len * 8;
582 $self->storage->print_at( $self->trans_loc,
583 pack( 'b'.$num_bits, join('', @_) ),
587 sub get_running_txn_ids {
589 my @transactions = $self->read_txn_slots;
590 my @trans_ids = map { $_+1} grep { $transactions[$_] } 0 .. $#transactions;
593 sub get_txn_staleness_counter {
597 # Hardcode staleness of 0 for the HEAD
598 return 0 unless $trans_id;
600 return unpack( $StP{$STALE_SIZE},
601 $self->storage->read_at(
602 $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
608 sub inc_txn_staleness_counter {
612 # Hardcode staleness of 0 for the HEAD
613 return 0 unless $trans_id;
615 $self->storage->print_at(
616 $self->trans_loc + $self->txn_bitfield_len + $STALE_SIZE * ($trans_id - 1),
617 pack( $StP{$STALE_SIZE}, $self->get_txn_staleness_counter( $trans_id ) + 1 ),
623 return [ keys %{ $self->{entries}{$self->trans_id} ||= {} } ];
628 my ($trans_id, $loc) = @_;
630 $self->{entries}{$trans_id} ||= {};
631 $self->{entries}{$trans_id}{$loc} = undef;
634 # If the buckets are being relocated because of a reindexing, the entries
635 # mechanism needs to be made aware of it.
638 my ($old_loc, $new_loc) = @_;
641 while ( my ($trans_id, $locs) = each %{ $self->{entries} } ) {
642 if ( exists $locs->{$old_loc} ) {
643 delete $locs->{$old_loc};
644 $locs->{$new_loc} = undef;
652 delete $self->{entries}{$self->trans_id};
655 ################################################################################
659 return $self->{digest}->(@_);
662 sub _add_free_blist_sector { shift->_add_free_sector( 0, @_ ) }
663 sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) }
664 sub _add_free_index_sector { shift->_add_free_sector( 2, @_ ) }
665 sub _add_free_sector { shift->_load_header->add_free_sector( @_ ) }
667 sub _request_blist_sector { shift->_request_sector( 0, @_ ) }
668 sub _request_data_sector { shift->_request_sector( 1, @_ ) }
669 sub _request_index_sector { shift->_request_sector( 2, @_ ) }
670 sub _request_sector { shift->_load_header->request_sector( @_ ) }
672 ################################################################################
676 SIG_ARRAY => 'Reference',
677 SIG_HASH => 'Reference',
678 SIG_BLIST => 'BucketList',
679 SIG_INDEX => 'Index',
681 SIG_DATA => 'Scalar',
685 while ( my ($k,$v) = each %t ) {
686 $class_for{ DBM::Deep::Engine->$k } = "DBM::Deep::Engine::Sector::$v";
693 my $data = $self->get_data( $offset )
694 or return;#die "Cannot read from '$offset'\n";
695 my $type = substr( $$data, 0, 1 );
696 my $class = $class_for{ $type };
703 *_load_sector = \&load_sector;
708 #XXX Does this mean we make too many objects? -RobK, 2008-06-23
709 return DBM::Deep::Engine::Sector::FileHeader->new({
714 *_load_header = \&load_header;
718 my ($offset, $size) = @_;
719 return unless defined $offset;
721 unless ( exists $self->sector_cache->{$offset} ) {
722 # Don't worry about the header sector. It will manage itself.
723 return unless $offset;
725 if ( !defined $size ) {
726 my $type = $self->storage->read_at( $offset, 1 )
727 or die "($offset): Cannot read from '$offset' to find the type\n";
729 if ( $type eq $self->SIG_FREE ) {
733 my $class = $class_for{$type}
734 or die "($offset): Cannot find class for '$type'\n";
735 $size = $class->size( $self )
736 or die "($offset): '$class' doesn't return a size\n";
737 $self->sector_cache->{$offset} = $type . $self->storage->read_at( undef, $size - 1 );
740 $self->sector_cache->{$offset} = $self->storage->read_at( $offset, $size )
745 return \$self->sector_cache->{$offset};
751 return $self->{sector_cache} ||= {};
754 sub clear_sector_cache {
756 $self->{sector_cache} = {};
761 return $self->{dirty_sectors} ||= {};
764 sub clear_dirty_sectors {
766 $self->{dirty_sectors} = {};
769 sub add_dirty_sector {
773 $self->dirty_sectors->{ $offset } = undef;
779 my $sectors = $self->dirty_sectors;
780 for my $offset (sort { $a <=> $b } keys %{ $sectors }) {
781 $self->storage->print_at( $offset, $self->sector_cache->{$offset} );
784 $self->clear_dirty_sectors;
786 $self->clear_sector_cache;
789 ################################################################################
794 return $self->storage->lock_exclusive( $obj );
800 return $self->storage->lock_shared( $obj );
807 my $rv = $self->storage->unlock( $obj );
814 ################################################################################
816 sub storage { $_[0]{storage} }
817 sub byte_size { $_[0]{byte_size} }
818 sub hash_size { $_[0]{hash_size} }
819 sub hash_chars { $_[0]{hash_chars} }
820 sub num_txns { $_[0]{num_txns} }
821 sub max_buckets { $_[0]{max_buckets} }
822 sub blank_md5 { chr(0) x $_[0]->hash_size }
823 sub data_sector_size { $_[0]{data_sector_size} }
825 # This is a calculated value
826 sub txn_bitfield_len {
828 unless ( exists $self->{txn_bitfield_len} ) {
829 my $temp = ($self->num_txns) / 8;
830 if ( $temp > int( $temp ) ) {
831 $temp = int( $temp ) + 1;
833 $self->{txn_bitfield_len} = $temp;
835 return $self->{txn_bitfield_len};
838 sub trans_id { $_[0]{trans_id} }
839 sub set_trans_id { $_[0]{trans_id} = $_[1] }
841 sub trans_loc { $_[0]{trans_loc} }
842 sub set_trans_loc { $_[0]{trans_loc} = $_[1] }
844 sub chains_loc { $_[0]{chains_loc} }
845 sub set_chains_loc { $_[0]{chains_loc} = $_[1] }
847 sub cache { $_[0]{cache} ||= {} }
848 sub clear_cache { %{$_[0]->cache} = () }
855 my $header_sector = DBM::Deep::Engine::Sector::FileHeader->new({
866 'D' => $self->data_sector_size,
867 'B' => DBM::Deep::Engine::Sector::BucketList->new({engine=>$self,offset=>1})->size,
868 'I' => DBM::Deep::Engine::Sector::Index->new({engine=>$self,offset=>1})->size,
874 $return .= "Size: " . (-s $self->storage->{fh}) . $/;
877 $return .= "NumTxns: " . $self->num_txns . $/;
879 # Read the free sector chains
881 foreach my $multiple ( 0 .. 2 ) {
882 $return .= "Chains($types{$multiple}):";
883 my $old_loc = $self->chains_loc + $multiple * $self->byte_size;
886 $StP{$self->byte_size},
887 $self->storage->read_at( $old_loc, $self->byte_size ),
890 # We're now out of free sectors of this kind.
895 $sectors{ $types{$multiple} }{ $loc } = undef;
896 $old_loc = $loc + SIG_SIZE + $STALE_SIZE;
902 my $spot = $header_sector->size;
904 while ( $spot < $self->storage->{end} ) {
905 # Read each sector in order.
906 my $sector = $self->_load_sector( $spot );
908 # Find it in the free-sectors that were found already
909 foreach my $type ( keys %sectors ) {
910 if ( exists $sectors{$type}{$spot} ) {
911 my $size = $sizes{$type};
912 $return .= sprintf "%08d: %s %04d\n", $spot, 'F' . $type, $size;
918 die "********\n$return\nDidn't find free sector for $spot in chains\n********\n";
921 $return .= sprintf "%08d: %s %04d", $spot, $sector->type, $sector->size;
922 if ( $sector->type eq 'D' ) {
923 $return .= ' ' . $sector->data;
925 elsif ( $sector->type eq 'A' || $sector->type eq 'H' ) {
926 $return .= ' REF: ' . $sector->get_refcount;
928 elsif ( $sector->type eq 'B' ) {
929 foreach my $bucket ( $sector->chopped_up ) {
931 $return .= sprintf "%08d", unpack($StP{$self->byte_size},
932 substr( $bucket->[-1], $self->hash_size, $self->byte_size),
934 my $l = unpack( $StP{$self->byte_size},
935 substr( $bucket->[-1],
936 $self->hash_size + $self->byte_size,
940 $return .= sprintf " %08d", $l;
941 foreach my $txn ( 0 .. $self->num_txns - 2 ) {
942 my $l = unpack( $StP{$self->byte_size},
943 substr( $bucket->[-1],
944 $self->hash_size + 2 * $self->byte_size + $txn * ($self->byte_size + $STALE_SIZE),
948 $return .= sprintf " %08d", $l;
954 $spot += $sector->size;