1 package DBM::Deep::Engine;
6 use warnings FATAL => 'all';
8 # Never import symbols into our namespace. We are a class, not a library.
15 # * Every method in here assumes that the storage has been appropriately
16 # safeguarded. This can be anything from flock() to some sort of manual
17 # mutex. But, it's the caller's responsability to make sure that this has
20 # Setup file and tag signatures. These should never change.
21 sub SIG_FILE () { 'DPDB' }
22 sub SIG_HEADER () { 'h' }
23 sub SIG_HASH () { 'H' }
24 sub SIG_ARRAY () { 'A' }
25 sub SIG_NULL () { 'N' }
26 sub SIG_DATA () { 'D' }
27 sub SIG_INDEX () { 'I' }
28 sub SIG_BLIST () { 'B' }
29 sub SIG_FREE () { 'F' }
34 # Please refer to the pack() documentation for further information
36 1 => 'C', # Unsigned char value (no order needed as it's just one byte)
37 2 => 'n', # Unsigned short in "network" (big-endian) order
38 4 => 'N', # Unsigned long in "network" (big-endian) order
39 8 => 'Q', # Usigned quad (no order specified, presumably machine-dependent)
41 sub StP { $StP{$_[1]} }
43 # Import these after the SIG_* definitions because those definitions are used
44 # in the headers of these classes. -RobK, 2008-06-20
45 use DBM::Deep::Engine::Sector::BucketList;
46 use DBM::Deep::Engine::Sector::FileHeader;
47 use DBM::Deep::Engine::Sector::Index;
48 use DBM::Deep::Engine::Sector::Null;
49 use DBM::Deep::Engine::Sector::Reference;
50 use DBM::Deep::Engine::Sector::Scalar;
51 use DBM::Deep::Iterator;
53 ################################################################################
59 $args->{storage} = DBM::Deep::File->new( $args )
60 unless exists $args->{storage};
66 hash_size => 16, # In bytes
67 hash_chars => 256, # Number of chars the algorithm uses per byte
69 num_txns => 1, # The HEAD
70 trans_id => 0, # Default to the HEAD
72 data_sector_size => 64, # Size in bytes of each data sector
74 entries => {}, # This is the list of entries for transactions
78 # Never allow byte_size to be set directly.
79 delete $args->{byte_size};
80 if ( defined $args->{pack_size} ) {
81 if ( lc $args->{pack_size} eq 'small' ) {
82 $args->{byte_size} = 2;
84 elsif ( lc $args->{pack_size} eq 'medium' ) {
85 $args->{byte_size} = 4;
87 elsif ( lc $args->{pack_size} eq 'large' ) {
88 $args->{byte_size} = 8;
91 DBM::Deep->_throw_error( "Unknown pack_size value: '$args->{pack_size}'" );
95 # Grab the parameters we want to use
96 foreach my $param ( keys %$self ) {
97 next unless exists $args->{$param};
98 $self->{$param} = $args->{$param};
102 max_buckets => { floor => 16, ceil => 256 },
103 num_txns => { floor => 1, ceil => 255 },
104 data_sector_size => { floor => 32, ceil => 256 },
107 while ( my ($attr, $c) = each %validations ) {
108 if ( !defined $self->{$attr}
109 || !length $self->{$attr}
110 || $self->{$attr} =~ /\D/
111 || $self->{$attr} < $c->{floor}
113 $self->{$attr} = '(undef)' if !defined $self->{$attr};
114 warn "Floor of $attr is $c->{floor}. Setting it to $c->{floor} from '$self->{$attr}'\n";
115 $self->{$attr} = $c->{floor};
117 elsif ( $self->{$attr} > $c->{ceil} ) {
118 warn "Ceiling of $attr is $c->{ceil}. Setting it to $c->{ceil} from '$self->{$attr}'\n";
119 $self->{$attr} = $c->{ceil};
123 if ( !$self->{digest} ) {
125 $self->{digest} = \&Digest::MD5::md5;
131 ################################################################################
135 my ($obj, $key) = @_;
137 # This will be a Reference sector
138 my $sector = $self->_load_sector( $obj->_base_offset )
141 if ( $sector->staleness != $obj->_staleness ) {
145 my $key_md5 = $self->_apply_digest( $key );
147 my $value_sector = $sector->get_data_for({
152 unless ( $value_sector ) {
153 $value_sector = DBM::Deep::Engine::Sector::Null->new({
158 $sector->write_data({
161 value => $value_sector,
165 return $value_sector->data;
172 # This will be a Reference sector
173 my $sector = $self->_load_sector( $obj->_base_offset )
174 or DBM::Deep->_throw_error( "How did get_classname fail (no sector for '$obj')?!" );
176 if ( $sector->staleness != $obj->_staleness ) {
180 return $sector->get_classname;
185 my ($obj, $old_key, $new_key) = @_;
187 # This will be a Reference sector
188 my $sector = $self->_load_sector( $obj->_base_offset )
189 or DBM::Deep->_throw_error( "How did make_reference fail (no sector for '$obj')?!" );
191 if ( $sector->staleness != $obj->_staleness ) {
195 my $old_md5 = $self->_apply_digest( $old_key );
197 my $value_sector = $sector->get_data_for({
202 unless ( $value_sector ) {
203 $value_sector = DBM::Deep::Engine::Sector::Null->new({
208 $sector->write_data({
211 value => $value_sector,
215 if ( $value_sector->isa( 'DBM::Deep::Engine::Sector::Reference' ) ) {
216 $sector->write_data({
218 key_md5 => $self->_apply_digest( $new_key ),
219 value => $value_sector,
221 $value_sector->increment_refcount;
224 $sector->write_data({
226 key_md5 => $self->_apply_digest( $new_key ),
227 value => $value_sector->clone,
234 my ($obj, $key) = @_;
236 # This will be a Reference sector
237 my $sector = $self->_load_sector( $obj->_base_offset )
240 if ( $sector->staleness != $obj->_staleness ) {
244 my $data = $sector->get_data_for({
245 key_md5 => $self->_apply_digest( $key ),
249 # exists() returns 1 or '' for true/false.
250 return $data ? 1 : '';
255 my ($obj, $key) = @_;
257 my $sector = $self->_load_sector( $obj->_base_offset )
260 if ( $sector->staleness != $obj->_staleness ) {
264 return $sector->delete_key({
265 key_md5 => $self->_apply_digest( $key ),
272 my ($obj, $key, $value) = @_;
274 my $r = Scalar::Util::reftype( $value ) || '';
277 last if $r eq 'HASH';
278 last if $r eq 'ARRAY';
280 DBM::Deep->_throw_error(
281 "Storage of references of type '$r' is not supported."
285 # This will be a Reference sector
286 my $sector = $self->_load_sector( $obj->_base_offset )
287 or DBM::Deep->_throw_error( "1: Cannot write to a deleted spot in DBM::Deep." );
289 if ( $sector->staleness != $obj->_staleness ) {
290 DBM::Deep->_throw_error( "2: Cannot write to a deleted spot in DBM::Deep." );
294 if ( !defined $value ) {
295 $class = 'DBM::Deep::Engine::Sector::Null';
297 elsif ( $r eq 'ARRAY' || $r eq 'HASH' ) {
299 if ( $r eq 'ARRAY' ) {
300 $tmpvar = tied @$value;
301 } elsif ( $r eq 'HASH' ) {
302 $tmpvar = tied %$value;
306 my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $tmpvar->isa( 'DBM::Deep' ); };
308 unless ( $is_dbm_deep ) {
309 DBM::Deep->_throw_error( "Cannot store something that is tied." );
312 unless ( $tmpvar->_engine->storage == $self->storage ) {
313 DBM::Deep->_throw_error( "Cannot store values across DBM::Deep files. Please use export() instead." );
316 # First, verify if we're storing the same thing to this spot. If we are, then
317 # this should be a no-op. -EJS, 2008-05-19
318 my $loc = $sector->get_data_location_for({
319 key_md5 => $self->_apply_digest( $key ),
323 if ( defined($loc) && $loc == $tmpvar->_base_offset ) {
327 #XXX Can this use $loc?
328 my $value_sector = $self->_load_sector( $tmpvar->_base_offset );
329 $sector->write_data({
331 key_md5 => $self->_apply_digest( $key ),
332 value => $value_sector,
334 $value_sector->increment_refcount;
339 $class = 'DBM::Deep::Engine::Sector::Reference';
340 $type = substr( $r, 0, 1 );
343 if ( tied($value) ) {
344 DBM::Deep->_throw_error( "Cannot store something that is tied." );
346 $class = 'DBM::Deep::Engine::Sector::Scalar';
349 # Create this after loading the reference sector in case something bad happens.
350 # This way, we won't allocate value sector(s) needlessly.
351 my $value_sector = $class->new({
357 $sector->write_data({
359 key_md5 => $self->_apply_digest( $key ),
360 value => $value_sector,
363 # This code is to make sure we write all the values in the $value to the disk
364 # and to make sure all changes to $value after the assignment are reflected
365 # on disk. This may be counter-intuitive at first, but it is correct dwimmery.
366 # NOTE - simply tying $value won't perform a STORE on each value. Hence, the
367 # copy to a temp value.
368 if ( $r eq 'ARRAY' ) {
370 tie @$value, 'DBM::Deep', {
371 base_offset => $value_sector->offset,
372 staleness => $value_sector->staleness,
373 storage => $self->storage,
377 bless $value, 'DBM::Deep::Array' unless Scalar::Util::blessed( $value );
379 elsif ( $r eq 'HASH' ) {
381 tie %$value, 'DBM::Deep', {
382 base_offset => $value_sector->offset,
383 staleness => $value_sector->staleness,
384 storage => $self->storage,
389 bless $value, 'DBM::Deep::Hash' unless Scalar::Util::blessed( $value );
395 # XXX Add staleness here
398 my ($obj, $prev_key) = @_;
400 # XXX Need to add logic about resetting the iterator if any key in the reference has changed
401 unless ( $prev_key ) {
402 $obj->{iterator} = DBM::Deep::Iterator->new({
403 base_offset => $obj->_base_offset,
408 return $obj->{iterator}->get_next_key( $obj );
411 ################################################################################
417 return 1 if $obj->_base_offset;
419 my $header = $self->_load_header;
421 # Creating a new file
422 if ( $header->is_new ) {
423 # 1) Create Array/Hash entry
424 my $sector = DBM::Deep::Engine::Sector::Reference->new({
428 $obj->{base_offset} = $sector->offset;
429 $obj->{staleness} = $sector->staleness;
433 # Reading from an existing file
435 $obj->{base_offset} = $header->size;
436 my $sector = DBM::Deep::Engine::Sector::Reference->new({
438 offset => $obj->_base_offset,
441 DBM::Deep->_throw_error("Corrupted file, no master index record");
444 unless ($obj->_type eq $sector->type) {
445 DBM::Deep->_throw_error("File type mismatch");
448 $obj->{staleness} = $sector->staleness;
451 $self->storage->set_inode;
460 if ( $self->trans_id ) {
461 DBM::Deep->_throw_error( "Cannot begin_work within an active transaction" );
464 my @slots = $self->read_txn_slots;
466 for my $i ( 0 .. $#slots ) {
470 $self->set_trans_id( $i + 1 );
475 DBM::Deep->_throw_error( "Cannot allocate transaction ID" );
477 $self->write_txn_slots( @slots );
479 if ( !$self->trans_id ) {
480 DBM::Deep->_throw_error( "Cannot begin_work - no available transactions" );
490 if ( !$self->trans_id ) {
491 DBM::Deep->_throw_error( "Cannot rollback without an active transaction" );
494 foreach my $entry ( @{ $self->get_entries } ) {
495 my ($sector, $idx) = split ':', $entry;
496 $self->_load_sector( $sector )->rollback( $idx );
499 $self->clear_entries;
501 my @slots = $self->read_txn_slots;
502 $slots[$self->trans_id-1] = 0;
503 $self->write_txn_slots( @slots );
504 $self->inc_txn_staleness_counter( $self->trans_id );
505 $self->set_trans_id( 0 );
514 if ( !$self->trans_id ) {
515 DBM::Deep->_throw_error( "Cannot commit without an active transaction" );
518 foreach my $entry ( @{ $self->get_entries } ) {
519 my ($sector, $idx) = split ':', $entry;
520 $self->_load_sector( $sector )->commit( $idx );
523 $self->clear_entries;
525 my @slots = $self->read_txn_slots;
526 $slots[$self->trans_id-1] = 0;
527 $self->write_txn_slots( @slots );
528 $self->inc_txn_staleness_counter( $self->trans_id );
529 $self->set_trans_id( 0 );
536 return $self->_load_header->read_txn_slots(@_);
539 sub write_txn_slots {
541 return $self->_load_header->write_txn_slots(@_);
544 sub get_running_txn_ids {
546 my @transactions = $self->read_txn_slots;
547 my @trans_ids = map { $_+1} grep { $transactions[$_] } 0 .. $#transactions;
550 sub get_txn_staleness_counter {
552 return $self->_load_header->get_txn_staleness_counter(@_);
555 sub inc_txn_staleness_counter {
557 return $self->_load_header->inc_txn_staleness_counter(@_);
562 return [ keys %{ $self->{entries}{$self->trans_id} ||= {} } ];
567 my ($trans_id, $loc, $idx) = @_;
569 $self->{entries}{$trans_id} ||= {};
570 $self->{entries}{$trans_id}{"$loc:$idx"} = undef;
573 # If the buckets are being relocated because of a reindexing, the entries
574 # mechanism needs to be made aware of it.
577 my ($old_loc, $old_idx, $new_loc, $new_idx) = @_;
580 while ( my ($trans_id, $locs) = each %{ $self->{entries} } ) {
581 if ( exists $locs->{"$old_loc:$old_idx"} ) {
582 delete $locs->{"$old_loc:$old_idx"};
583 $locs->{"$new_loc:$new_idx"} = undef;
591 delete $self->{entries}{$self->trans_id};
594 ################################################################################
598 return $self->{digest}->(@_);
601 sub _add_free_blist_sector { shift->_add_free_sector( 0, @_ ) }
602 sub _add_free_data_sector { shift->_add_free_sector( 1, @_ ) }
603 sub _add_free_index_sector { shift->_add_free_sector( 2, @_ ) }
604 sub _add_free_sector { shift->_load_header->add_free_sector( @_ ) }
606 sub _request_blist_sector { shift->_request_sector( 0, @_ ) }
607 sub _request_data_sector { shift->_request_sector( 1, @_ ) }
608 sub _request_index_sector { shift->_request_sector( 2, @_ ) }
609 sub _request_sector { shift->_load_header->request_sector( @_ ) }
611 ################################################################################
615 SIG_ARRAY => 'Reference',
616 SIG_HASH => 'Reference',
617 SIG_BLIST => 'BucketList',
618 SIG_INDEX => 'Index',
620 SIG_DATA => 'Scalar',
624 while ( my ($k,$v) = each %t ) {
625 $class_for{ DBM::Deep::Engine->$k } = "DBM::Deep::Engine::Sector::$v";
632 my $data = $self->get_data( $offset )
633 or return;#die "Cannot read from '$offset'\n";
634 my $type = substr( $$data, 0, 1 );
635 my $class = $class_for{ $type };
642 *_load_sector = \&load_sector;
647 #XXX Does this mean we make too many objects? -RobK, 2008-06-23
648 return DBM::Deep::Engine::Sector::FileHeader->new({
653 *_load_header = \&load_header;
657 my ($offset, $size) = @_;
658 return unless defined $offset;
660 unless ( exists $self->sector_cache->{$offset} ) {
661 # Don't worry about the header sector. It will manage itself.
662 return unless $offset;
664 if ( !defined $size ) {
665 my $type = $self->storage->read_at( $offset, 1 )
666 or die "($offset): Cannot read from '$offset' to find the type\n";
668 if ( $type eq $self->SIG_FREE ) {
672 my $class = $class_for{$type}
673 or die "($offset): Cannot find class for '$type'\n";
674 $size = $class->size( $self )
675 or die "($offset): '$class' doesn't return a size\n";
676 $self->sector_cache->{$offset} = $type . $self->storage->read_at( undef, $size - 1 );
679 $self->sector_cache->{$offset} = $self->storage->read_at( $offset, $size )
684 return \$self->sector_cache->{$offset};
690 return $self->{sector_cache} ||= {};
693 sub clear_sector_cache {
695 $self->{sector_cache} = {};
700 return $self->{dirty_sectors} ||= {};
703 sub clear_dirty_sectors {
705 $self->{dirty_sectors} = {};
708 sub add_dirty_sector {
712 $self->dirty_sectors->{ $offset } = undef;
718 my $sectors = $self->dirty_sectors;
719 for my $offset (sort { $a <=> $b } keys %{ $sectors }) {
720 $self->storage->print_at( $offset, $self->sector_cache->{$offset} );
723 # Why do we need to have the storage flush? Shouldn't autoflush take care of things?
725 $self->storage->flush;
727 $self->clear_dirty_sectors;
729 $self->clear_sector_cache;
732 ################################################################################
737 return $self->storage->lock_exclusive( $obj );
743 return $self->storage->lock_shared( $obj );
750 my $rv = $self->storage->unlock( $obj );
757 ################################################################################
759 sub storage { $_[0]{storage} }
760 sub byte_size { $_[0]{byte_size} }
761 sub hash_size { $_[0]{hash_size} }
762 sub hash_chars { $_[0]{hash_chars} }
763 sub num_txns { $_[0]{num_txns} }
764 sub max_buckets { $_[0]{max_buckets} }
765 sub blank_md5 { chr(0) x $_[0]->hash_size }
766 sub data_sector_size { $_[0]{data_sector_size} }
768 # This is a calculated value
769 sub txn_bitfield_len {
771 unless ( exists $self->{txn_bitfield_len} ) {
772 my $temp = ($self->num_txns) / 8;
773 if ( $temp > int( $temp ) ) {
774 $temp = int( $temp ) + 1;
776 $self->{txn_bitfield_len} = $temp;
778 return $self->{txn_bitfield_len};
781 sub trans_id { $_[0]{trans_id} }
782 sub set_trans_id { $_[0]{trans_id} = $_[1] }
784 sub trans_loc { $_[0]{trans_loc} }
785 sub set_trans_loc { $_[0]{trans_loc} = $_[1] }
787 sub chains_loc { $_[0]{chains_loc} }
788 sub set_chains_loc { $_[0]{chains_loc} = $_[1] }
790 sub cache { $_[0]{cache} ||= {} }
791 sub clear_cache { %{$_[0]->cache} = () }
798 my $header_sector = DBM::Deep::Engine::Sector::FileHeader->new({
809 'D' => $self->data_sector_size,
810 'B' => DBM::Deep::Engine::Sector::BucketList->new({engine=>$self,offset=>1})->size,
811 'I' => DBM::Deep::Engine::Sector::Index->new({engine=>$self,offset=>1})->size,
817 $return .= "Size: " . (-s $self->storage->{fh}) . $/;
820 $return .= "NumTxns: " . $self->num_txns . $/;
822 # Read the free sector chains
824 foreach my $multiple ( 0 .. 2 ) {
825 $return .= "Chains($types{$multiple}):";
826 my $old_loc = $self->chains_loc + $multiple * $self->byte_size;
829 $StP{$self->byte_size},
830 $self->storage->read_at( $old_loc, $self->byte_size ),
833 # We're now out of free sectors of this kind.
838 $sectors{ $types{$multiple} }{ $loc } = undef;
839 $old_loc = $loc + SIG_SIZE + $STALE_SIZE;
845 my $spot = $header_sector->size;
847 while ( $spot < $self->storage->{end} ) {
848 # Read each sector in order.
849 my $sector = $self->_load_sector( $spot );
851 # Find it in the free-sectors that were found already
852 foreach my $type ( keys %sectors ) {
853 if ( exists $sectors{$type}{$spot} ) {
854 my $size = $sizes{$type};
855 $return .= sprintf "%08d: %s %04d\n", $spot, 'F' . $type, $size;
861 die "********\n$return\nDidn't find free sector for $spot in chains\n********\n";
864 $return .= sprintf "%08d: %s %04d", $spot, $sector->type, $sector->size;
865 if ( $sector->type eq 'D' ) {
866 $return .= ' ' . $sector->data;
868 elsif ( $sector->type eq 'A' || $sector->type eq 'H' ) {
869 $return .= ' REF: ' . $sector->get_refcount;
871 elsif ( $sector->type eq 'B' ) {
872 foreach my $bucket ( $sector->chopped_up ) {
874 $return .= sprintf "%08d", unpack($StP{$self->byte_size},
875 substr( $bucket->[-1], $self->hash_size, $self->byte_size),
877 my $l = unpack( $StP{$self->byte_size},
878 substr( $bucket->[-1],
879 $self->hash_size + $self->byte_size,
883 $return .= sprintf " %08d", $l;
884 foreach my $txn ( 0 .. $self->num_txns - 2 ) {
885 my $l = unpack( $StP{$self->byte_size},
886 substr( $bucket->[-1],
887 $self->hash_size + 2 * $self->byte_size + $txn * ($self->byte_size + $STALE_SIZE),
891 $return .= sprintf " %08d", $l;
897 $spot += $sector->size;