Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
2 | |
460b1067 |
3 | use 5.6.0; |
4 | |
a20d9a3f |
5 | use strict; |
460b1067 |
6 | use warnings; |
a20d9a3f |
7 | |
633df1fd |
8 | use Fcntl qw( :DEFAULT :flock ); |
359a01ac |
9 | use Scalar::Util (); |
a20d9a3f |
10 | |
21838116 |
11 | # File-wide notes: |
20b7f047 |
12 | # * To add to bucket_size, make sure you modify the following: |
13 | # - calculate_sizes() |
14 | # - _get_key_subloc() |
15 | # - add_bucket() - where the buckets are printed |
21838116 |
16 | |
8db25060 |
17 | ## |
18 | # Setup file and tag signatures. These should never change. |
19 | ## |
20 | sub SIG_FILE () { 'DPDB' } |
460b1067 |
21 | sub SIG_HEADER () { 'h' } |
8db25060 |
22 | sub SIG_INTERNAL () { 'i' } |
23 | sub SIG_HASH () { 'H' } |
24 | sub SIG_ARRAY () { 'A' } |
8db25060 |
25 | sub SIG_NULL () { 'N' } |
26 | sub SIG_DATA () { 'D' } |
27 | sub SIG_INDEX () { 'I' } |
28 | sub SIG_BLIST () { 'B' } |
7b1e1aa1 |
29 | sub SIG_FREE () { 'F' } |
8db25060 |
30 | sub SIG_SIZE () { 1 } |
31 | |
612969fb |
32 | sub new { |
33 | my $class = shift; |
34 | my ($args) = @_; |
35 | |
36 | my $self = bless { |
37 | long_size => 4, |
38 | long_pack => 'N', |
39 | data_size => 4, |
40 | data_pack => 'N', |
251dfd0e |
41 | |
612969fb |
42 | digest => \&Digest::MD5::md5, |
43 | hash_size => 16, |
251dfd0e |
44 | |
81d16922 |
45 | ## |
d5d7c51d |
46 | # Maximum number of buckets per list before another level of indexing is |
e0098e7f |
47 | # done. Increase this value for slightly greater speed, but larger database |
d5d7c51d |
48 | # files. DO NOT decrease this value below 16, due to risk of recursive |
49 | # reindex overrun. |
81d16922 |
50 | ## |
612969fb |
51 | max_buckets => 16, |
460b1067 |
52 | |
53 | fileobj => undef, |
359a01ac |
54 | obj => undef, |
612969fb |
55 | }, $class; |
56 | |
e0098e7f |
57 | if ( defined $args->{pack_size} ) { |
58 | if ( lc $args->{pack_size} eq 'small' ) { |
59 | $args->{long_size} = 2; |
c9b6d0d8 |
60 | $args->{long_pack} = 'n'; |
e0098e7f |
61 | } |
62 | elsif ( lc $args->{pack_size} eq 'medium' ) { |
63 | $args->{long_size} = 4; |
64 | $args->{long_pack} = 'N'; |
65 | } |
66 | elsif ( lc $args->{pack_size} eq 'large' ) { |
67 | $args->{long_size} = 8; |
68 | $args->{long_pack} = 'Q'; |
69 | } |
70 | else { |
71 | die "Unknown pack_size value: '$args->{pack_size}'\n"; |
72 | } |
73 | } |
74 | |
fde3db1a |
75 | # Grab the parameters we want to use |
76 | foreach my $param ( keys %$self ) { |
77 | next unless exists $args->{$param}; |
3e9498a1 |
78 | $self->{$param} = $args->{$param}; |
fde3db1a |
79 | } |
359a01ac |
80 | Scalar::Util::weaken( $self->{obj} ) if $self->{obj}; |
fde3db1a |
81 | |
e0098e7f |
82 | if ( $self->{max_buckets} < 16 ) { |
83 | warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n"; |
84 | $self->{max_buckets} = 16; |
85 | } |
86 | |
260a80b4 |
87 | return $self; |
88 | } |
89 | |
460b1067 |
90 | sub _fileobj { return $_[0]{fileobj} } |
460b1067 |
91 | |
260a80b4 |
92 | sub calculate_sizes { |
93 | my $self = shift; |
94 | |
633df1fd |
95 | # The 2**8 here indicates the number of different characters in the |
96 | # current hashing algorithm |
28394a1a |
97 | #XXX Does this need to be updated with different hashing algorithms? |
e0098e7f |
98 | $self->{index_size} = (2**8) * $self->{long_size}; |
28394a1a |
99 | $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3; |
e0098e7f |
100 | $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; |
612969fb |
101 | |
260a80b4 |
102 | return; |
1bf65be7 |
103 | } |
104 | |
fde3db1a |
105 | sub write_file_header { |
0d0f3d5d |
106 | my $self = shift; |
0d0f3d5d |
107 | |
019404df |
108 | my $loc = $self->_fileobj->request_space( length( SIG_FILE ) + 21 ); |
0d0f3d5d |
109 | |
019404df |
110 | $self->_fileobj->print_at( $loc, |
260a80b4 |
111 | SIG_FILE, |
460b1067 |
112 | SIG_HEADER, |
113 | pack('N', 1), # header version |
114 | pack('N', 12), # header size |
15ba72cc |
115 | pack('N', 0), # currently running transaction IDs |
c9b6d0d8 |
116 | pack('n', $self->{long_size}), |
260a80b4 |
117 | pack('A', $self->{long_pack}), |
c9b6d0d8 |
118 | pack('n', $self->{data_size}), |
260a80b4 |
119 | pack('A', $self->{data_pack}), |
c9b6d0d8 |
120 | pack('n', $self->{max_buckets}), |
260a80b4 |
121 | ); |
0d0f3d5d |
122 | |
20b7f047 |
123 | $self->_fileobj->set_transaction_offset( 13 ); |
124 | |
0d0f3d5d |
125 | return; |
126 | } |
127 | |
fde3db1a |
128 | sub read_file_header { |
e064ccd1 |
129 | my $self = shift; |
e064ccd1 |
130 | |
7dcefff3 |
131 | my $buffer = $self->_fileobj->read_at( 0, length(SIG_FILE) + 9 ); |
132 | return unless length($buffer); |
460b1067 |
133 | |
134 | my ($file_signature, $sig_header, $header_version, $size) = unpack( |
135 | 'A4 A N N', $buffer |
42f79e07 |
136 | ); |
e064ccd1 |
137 | |
460b1067 |
138 | unless ( $file_signature eq SIG_FILE ) { |
15ba72cc |
139 | $self->_fileobj->close; |
e96daec8 |
140 | $self->_throw_error( "Signature not found -- file is not a Deep DB" ); |
460b1067 |
141 | } |
260a80b4 |
142 | |
460b1067 |
143 | unless ( $sig_header eq SIG_HEADER ) { |
15ba72cc |
144 | $self->_fileobj->close; |
e96daec8 |
145 | $self->_throw_error( "Old file version found." ); |
460b1067 |
146 | } |
9b2370e0 |
147 | |
7dcefff3 |
148 | my $buffer2 = $self->_fileobj->read_at( undef, $size ); |
c9b6d0d8 |
149 | my ($running_transactions, @values) = unpack( 'N n A n A n', $buffer2 ); |
15ba72cc |
150 | |
151 | $self->_fileobj->set_transaction_offset( 13 ); |
152 | |
460b1067 |
153 | if ( @values < 5 || grep { !defined } @values ) { |
15ba72cc |
154 | $self->_fileobj->close; |
e96daec8 |
155 | $self->_throw_error("Corrupted file - bad header"); |
e064ccd1 |
156 | } |
157 | |
460b1067 |
158 | #XXX Add warnings if values weren't set right |
159 | @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values; |
160 | |
7dcefff3 |
161 | return length($buffer) + length($buffer2); |
e064ccd1 |
162 | } |
163 | |
460b1067 |
164 | sub setup_fh { |
165 | my $self = shift; |
166 | my ($obj) = @_; |
70b55428 |
167 | |
7dcefff3 |
168 | # Need to remove use of $fh here |
169 | my $fh = $self->_fileobj->{fh}; |
6fde4ed2 |
170 | flock $fh, LOCK_EX; |
118ba343 |
171 | |
260a80b4 |
172 | #XXX The duplication of calculate_sizes needs to go away |
6fde4ed2 |
173 | unless ( $obj->{base_offset} ) { |
e96daec8 |
174 | my $bytes_read = $self->read_file_header; |
118ba343 |
175 | |
260a80b4 |
176 | $self->calculate_sizes; |
177 | |
118ba343 |
178 | ## |
fde3db1a |
179 | # File is empty -- write header and master index |
118ba343 |
180 | ## |
181 | if (!$bytes_read) { |
aa83bc1e |
182 | $self->_fileobj->audit( "# Database created on" ); |
359a01ac |
183 | |
e96daec8 |
184 | $self->write_file_header; |
118ba343 |
185 | |
22e20cce |
186 | $obj->{base_offset} = $self->_fileobj->request_space( |
187 | $self->tag_size( $self->{index_size} ), |
188 | ); |
118ba343 |
189 | |
9e4f83a0 |
190 | $self->write_tag( |
e96daec8 |
191 | $obj->_base_offset, $obj->_type, |
f37c15ab |
192 | chr(0)x$self->{index_size}, |
118ba343 |
193 | ); |
194 | |
195 | # Flush the filehandle |
196 | my $old_fh = select $fh; |
197 | my $old_af = $|; $| = 1; $| = $old_af; |
198 | select $old_fh; |
199 | } |
200 | else { |
201 | $obj->{base_offset} = $bytes_read; |
202 | |
203 | ## |
fde3db1a |
204 | # Get our type from master index header |
118ba343 |
205 | ## |
359a01ac |
206 | my $tag = $self->load_tag($obj->_base_offset); |
207 | unless ( $tag ) { |
208 | flock $fh, LOCK_UN; |
209 | $self->_throw_error("Corrupted file, no master index record"); |
210 | } |
118ba343 |
211 | |
e96daec8 |
212 | unless ($obj->_type eq $tag->{signature}) { |
359a01ac |
213 | flock $fh, LOCK_UN; |
e96daec8 |
214 | $self->_throw_error("File type mismatch"); |
118ba343 |
215 | } |
216 | } |
118ba343 |
217 | } |
260a80b4 |
218 | else { |
219 | $self->calculate_sizes; |
220 | } |
e06824f8 |
221 | |
673464d9 |
222 | #XXX We have to make sure we don't mess up when autoflush isn't turned on |
7dcefff3 |
223 | $self->_fileobj->set_inode; |
70b55428 |
224 | |
6fde4ed2 |
225 | flock $fh, LOCK_UN; |
226 | |
70b55428 |
227 | return 1; |
228 | } |
229 | |
16d1ad9b |
230 | sub tag_size { |
231 | my $self = shift; |
232 | my ($size) = @_; |
233 | return SIG_SIZE + $self->{data_size} + $size; |
234 | } |
235 | |
9e4f83a0 |
236 | sub write_tag { |
20f7b20c |
237 | ## |
238 | # Given offset, signature and content, create tag and write to disk |
239 | ## |
d4b1166e |
240 | my $self = shift; |
e96daec8 |
241 | my ($offset, $sig, $content) = @_; |
f37c15ab |
242 | my $size = length( $content ); |
20f7b20c |
243 | |
7dcefff3 |
244 | $self->_fileobj->print_at( |
245 | $offset, |
246 | $sig, pack($self->{data_pack}, $size), $content, |
247 | ); |
20f7b20c |
248 | |
f37c15ab |
249 | return unless defined $offset; |
250 | |
20f7b20c |
251 | return { |
252 | signature => $sig, |
253 | size => $size, |
8db25060 |
254 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
255 | content => $content |
256 | }; |
d4b1166e |
257 | } |
258 | |
259 | sub load_tag { |
20f7b20c |
260 | ## |
261 | # Given offset, load single tag and return signature, size and data |
262 | ## |
d4b1166e |
263 | my $self = shift; |
e96daec8 |
264 | my ($offset) = @_; |
20f7b20c |
265 | |
7dcefff3 |
266 | my $fileobj = $self->_fileobj; |
20f7b20c |
267 | |
7dcefff3 |
268 | my $s = SIG_SIZE + $self->{data_size}; |
269 | my $b = $fileobj->read_at( $offset, $s ); |
251dfd0e |
270 | my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); |
20f7b20c |
271 | |
7dcefff3 |
272 | my $buffer = $fileobj->read_at( undef, $size ); |
20f7b20c |
273 | |
274 | return { |
275 | signature => $sig, |
276 | size => $size, |
8db25060 |
277 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
278 | content => $buffer |
279 | }; |
d4b1166e |
280 | } |
281 | |
56ec4340 |
282 | sub _get_dbm_object { |
283 | my $item = shift; |
284 | |
285 | my $obj = eval { |
286 | local $SIG{__DIE__}; |
287 | if ($item->isa( 'DBM::Deep' )) { |
288 | return $item; |
289 | } |
290 | return; |
291 | }; |
292 | return $obj if $obj; |
293 | |
294 | my $r = Scalar::Util::reftype( $item ) || ''; |
295 | if ( $r eq 'HASH' ) { |
296 | my $obj = eval { |
297 | local $SIG{__DIE__}; |
298 | my $obj = tied(%$item); |
299 | if ($obj->isa( 'DBM::Deep' )) { |
300 | return $obj; |
301 | } |
302 | return; |
303 | }; |
304 | return $obj if $obj; |
305 | } |
306 | elsif ( $r eq 'ARRAY' ) { |
307 | my $obj = eval { |
308 | local $SIG{__DIE__}; |
309 | my $obj = tied(@$item); |
310 | if ($obj->isa( 'DBM::Deep' )) { |
311 | return $obj; |
312 | } |
313 | return; |
314 | }; |
315 | return $obj if $obj; |
316 | } |
317 | |
318 | return; |
319 | } |
320 | |
29b01632 |
321 | sub _length_needed { |
322 | my $self = shift; |
e96daec8 |
323 | my ($value, $key) = @_; |
29b01632 |
324 | |
325 | my $is_dbm_deep = eval { |
326 | local $SIG{'__DIE__'}; |
327 | $value->isa( 'DBM::Deep' ); |
328 | }; |
329 | |
633df1fd |
330 | my $len = SIG_SIZE |
331 | + $self->{data_size} # size for value |
332 | + $self->{data_size} # size for key |
333 | + length( $key ); # length of key |
29b01632 |
334 | |
e96daec8 |
335 | if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) { |
633df1fd |
336 | # long_size is for the internal reference |
f37c15ab |
337 | return $len + $self->{long_size}; |
29b01632 |
338 | } |
339 | |
e96daec8 |
340 | if ( $self->_fileobj->{autobless} ) { |
9a187d8c |
341 | # This is for the bit saying whether or not this thing is blessed. |
342 | $len += 1; |
343 | } |
344 | |
633df1fd |
345 | my $r = Scalar::Util::reftype( $value ) || ''; |
29b01632 |
346 | unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
f37c15ab |
347 | if ( defined $value ) { |
348 | $len += length( $value ); |
349 | } |
350 | return $len; |
29b01632 |
351 | } |
352 | |
f37c15ab |
353 | $len += $self->{index_size}; |
29b01632 |
354 | |
355 | # if autobless is enabled, must also take into consideration |
f37c15ab |
356 | # the class name as it is stored after the key. |
e96daec8 |
357 | if ( $self->_fileobj->{autobless} ) { |
56ec4340 |
358 | my $c = Scalar::Util::blessed($value); |
359 | if ( defined $c && !$is_dbm_deep ) { |
360 | $len += $self->{data_size} + length($c); |
29b01632 |
361 | } |
362 | } |
363 | |
f37c15ab |
364 | return $len; |
29b01632 |
365 | } |
366 | |
20f7b20c |
367 | sub add_bucket { |
368 | ## |
369 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
370 | # plain (undigested) key and value. |
371 | ## |
d4b1166e |
372 | my $self = shift; |
359a01ac |
373 | my ($tag, $md5, $plain_key, $value, $deleted, $orig_key) = @_; |
c9b6d0d8 |
374 | $deleted ||= 0; |
75be6413 |
375 | |
21838116 |
376 | local($/,$\); |
377 | |
eea0d863 |
378 | # This verifies that only supported values will be stored. |
379 | { |
380 | my $r = Scalar::Util::reftype( $value ); |
381 | last if !defined $r; |
382 | |
383 | last if $r eq 'HASH'; |
384 | last if $r eq 'ARRAY'; |
385 | |
e96daec8 |
386 | $self->_throw_error( |
eea0d863 |
387 | "Storage of variables of type '$r' is not supported." |
388 | ); |
389 | } |
390 | |
20f7b20c |
391 | my $location = 0; |
392 | my $result = 2; |
393 | |
019404df |
394 | my $fileobj = $self->_fileobj; |
20f7b20c |
395 | |
e96daec8 |
396 | my $actual_length = $self->_length_needed( $value, $plain_key ); |
20f7b20c |
397 | |
21838116 |
398 | #ACID - This is a mutation. Must only find the exact transaction |
c9b6d0d8 |
399 | my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5, 1 ); |
400 | |
401 | my @transactions; |
019404df |
402 | if ( $fileobj->transaction_id == 0 ) { |
403 | @transactions = $fileobj->current_transactions; |
c9b6d0d8 |
404 | } |
75be6413 |
405 | |
e96daec8 |
406 | # $self->_release_space( $size, $subloc ); |
386bab6c |
407 | # Updating a known md5 |
f9c33187 |
408 | #XXX This needs updating to use _release_space |
386bab6c |
409 | if ( $subloc ) { |
410 | $result = 1; |
20f7b20c |
411 | |
386bab6c |
412 | if ($actual_length <= $size) { |
413 | $location = $subloc; |
20f7b20c |
414 | } |
75be6413 |
415 | else { |
019404df |
416 | $location = $fileobj->request_space( $actual_length ); |
417 | |
418 | $fileobj->print_at( $tag->{offset} + $offset + $self->{hash_size}, |
419 | pack($self->{long_pack}, $location ), |
420 | pack($self->{long_pack}, $actual_length ), |
421 | pack('n n', $fileobj->transaction_id, $deleted ), |
386bab6c |
422 | ); |
75be6413 |
423 | } |
75be6413 |
424 | } |
386bab6c |
425 | # Adding a new md5 |
426 | elsif ( defined $offset ) { |
019404df |
427 | $location = $fileobj->request_space( $actual_length ); |
386bab6c |
428 | |
019404df |
429 | $fileobj->print_at( $tag->{offset} + $offset, |
430 | $md5, |
431 | pack($self->{long_pack}, $location ), |
432 | pack($self->{long_pack}, $actual_length ), |
433 | pack('n n', $fileobj->transaction_id, $deleted ), |
434 | ); |
c9b6d0d8 |
435 | |
436 | for ( @transactions ) { |
437 | my $tag2 = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} ); |
019404df |
438 | $fileobj->{transaction_id} = $_; |
359a01ac |
439 | $self->add_bucket( $tag2, $md5, '', '', 1, $orig_key ); |
019404df |
440 | $fileobj->{transaction_id} = 0; |
c9b6d0d8 |
441 | } |
22e20cce |
442 | $tag = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} ); |
386bab6c |
443 | } |
444 | # If bucket didn't fit into list, split into a new index level |
019404df |
445 | # split_index() will do the _fileobj->request_space() call |
386bab6c |
446 | else { |
e96daec8 |
447 | $location = $self->split_index( $md5, $tag ); |
386bab6c |
448 | } |
20f7b20c |
449 | |
359a01ac |
450 | $self->write_value( $location, $plain_key, $value, $orig_key ); |
d5d7c51d |
451 | |
452 | return $result; |
453 | } |
454 | |
455 | sub write_value { |
456 | my $self = shift; |
359a01ac |
457 | my ($location, $key, $value, $orig_key) = @_; |
d5d7c51d |
458 | |
7dcefff3 |
459 | my $fileobj = $self->_fileobj; |
d5d7c51d |
460 | |
9d4fa373 |
461 | my $dbm_deep_obj = _get_dbm_object( $value ); |
7dcefff3 |
462 | if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $fileobj ) { |
e96daec8 |
463 | $self->_throw_error( "Cannot cross-reference. Use export() instead" ); |
9d4fa373 |
464 | } |
d5d7c51d |
465 | |
20f7b20c |
466 | ## |
d5d7c51d |
467 | # Write signature based on content type, set content length and write |
468 | # actual value. |
20f7b20c |
469 | ## |
9d4fa373 |
470 | my $r = Scalar::Util::reftype( $value ) || ''; |
471 | if ( $dbm_deep_obj ) { |
7dcefff3 |
472 | $self->write_tag( $location, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) ); |
f37c15ab |
473 | } |
474 | elsif ($r eq 'HASH') { |
9d4fa373 |
475 | if ( !$dbm_deep_obj && tied %{$value} ) { |
e96daec8 |
476 | $self->_throw_error( "Cannot store something that is tied" ); |
019ab3a1 |
477 | } |
7dcefff3 |
478 | $self->write_tag( $location, SIG_HASH, chr(0)x$self->{index_size} ); |
f37c15ab |
479 | } |
480 | elsif ($r eq 'ARRAY') { |
9d4fa373 |
481 | if ( !$dbm_deep_obj && tied @{$value} ) { |
e96daec8 |
482 | $self->_throw_error( "Cannot store something that is tied" ); |
019ab3a1 |
483 | } |
7dcefff3 |
484 | $self->write_tag( $location, SIG_ARRAY, chr(0)x$self->{index_size} ); |
f37c15ab |
485 | } |
486 | elsif (!defined($value)) { |
7dcefff3 |
487 | $self->write_tag( $location, SIG_NULL, '' ); |
d5d7c51d |
488 | } |
489 | else { |
7dcefff3 |
490 | $self->write_tag( $location, SIG_DATA, $value ); |
d5d7c51d |
491 | } |
20f7b20c |
492 | |
d5d7c51d |
493 | ## |
494 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
495 | ## |
7dcefff3 |
496 | $fileobj->print_at( undef, pack($self->{data_pack}, length($key)) . $key ); |
20f7b20c |
497 | |
9a187d8c |
498 | # Internal references don't care about autobless |
9d4fa373 |
499 | return 1 if $dbm_deep_obj; |
9a187d8c |
500 | |
d5d7c51d |
501 | ## |
502 | # If value is blessed, preserve class name |
503 | ## |
7dcefff3 |
504 | if ( $fileobj->{autobless} ) { |
633df1fd |
505 | if ( defined( my $c = Scalar::Util::blessed($value) ) ) { |
7dcefff3 |
506 | $fileobj->print_at( undef, chr(1), pack($self->{data_pack}, length($c)) . $c ); |
20f7b20c |
507 | } |
d5d7c51d |
508 | else { |
7dcefff3 |
509 | $fileobj->print_at( undef, chr(0) ); |
20f7b20c |
510 | } |
d5d7c51d |
511 | } |
20f7b20c |
512 | |
d5d7c51d |
513 | ## |
56ec4340 |
514 | # Tie the passed in reference so that changes to it are reflected in the |
515 | # datafile. The use of $location as the base_offset will act as the |
516 | # the linkage between parent and child. |
517 | # |
518 | # The overall assignment is a hack around the fact that just tying doesn't |
519 | # store the values. This may not be the wrong thing to do. |
d5d7c51d |
520 | ## |
9d4fa373 |
521 | if ($r eq 'HASH') { |
522 | my %x = %$value; |
523 | tie %$value, 'DBM::Deep', { |
524 | base_offset => $location, |
7dcefff3 |
525 | fileobj => $fileobj, |
359a01ac |
526 | parent => $self->{obj}, |
527 | parent_key => $orig_key, |
9d4fa373 |
528 | }; |
529 | %$value = %x; |
530 | } |
531 | elsif ($r eq 'ARRAY') { |
532 | my @x = @$value; |
533 | tie @$value, 'DBM::Deep', { |
534 | base_offset => $location, |
7dcefff3 |
535 | fileobj => $fileobj, |
359a01ac |
536 | parent => $self->{obj}, |
537 | parent_key => $orig_key, |
9d4fa373 |
538 | }; |
539 | @$value = @x; |
20f7b20c |
540 | } |
d4b1166e |
541 | |
d5d7c51d |
542 | return 1; |
d4b1166e |
543 | } |
544 | |
75be6413 |
545 | sub split_index { |
546 | my $self = shift; |
e96daec8 |
547 | my ($md5, $tag) = @_; |
75be6413 |
548 | |
019404df |
549 | my $fileobj = $self->_fileobj; |
21838116 |
550 | |
019404df |
551 | my $loc = $fileobj->request_space( |
e96daec8 |
552 | $self->tag_size( $self->{index_size} ), |
16d1ad9b |
553 | ); |
554 | |
019404df |
555 | $fileobj->print_at( $tag->{ref_loc}, pack($self->{long_pack}, $loc) ); |
75be6413 |
556 | |
9e4f83a0 |
557 | my $index_tag = $self->write_tag( |
e96daec8 |
558 | $loc, SIG_INDEX, |
f37c15ab |
559 | chr(0)x$self->{index_size}, |
75be6413 |
560 | ); |
561 | |
019404df |
562 | my $newtag_loc = $fileobj->request_space( |
e96daec8 |
563 | $self->tag_size( $self->{bucket_list_size} ), |
f9c33187 |
564 | ); |
75be6413 |
565 | |
7b1e1aa1 |
566 | my $keys = $tag->{content} |
f9c33187 |
567 | . $md5 . pack($self->{long_pack}, $newtag_loc) |
28394a1a |
568 | . pack($self->{long_pack}, 0) # size |
20b7f047 |
569 | . pack($self->{long_pack}, 0); # transaction ID |
75be6413 |
570 | |
f9c33187 |
571 | my @newloc = (); |
75be6413 |
572 | BUCKET: |
633df1fd |
573 | # The <= here is deliberate - we have max_buckets+1 keys to iterate |
574 | # through, unlike every other loop that uses max_buckets as a stop. |
75be6413 |
575 | for (my $i = 0; $i <= $self->{max_buckets}; $i++) { |
9a187d8c |
576 | my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i ); |
75be6413 |
577 | |
f9c33187 |
578 | die "[INTERNAL ERROR]: No key in split_index()\n" unless $key; |
579 | die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc; |
75be6413 |
580 | |
75be6413 |
581 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
582 | |
f9c33187 |
583 | if ($newloc[$num]) { |
7dcefff3 |
584 | my $subkeys = $fileobj->read_at( $newloc[$num], $self->{bucket_list_size} ); |
75be6413 |
585 | |
f9c33187 |
586 | # This is looking for the first empty spot |
7b1e1aa1 |
587 | my ($subloc, $offset, $size) = $self->_find_in_buckets( |
f9c33187 |
588 | { content => $subkeys }, '', |
7b1e1aa1 |
589 | ); |
75be6413 |
590 | |
633df1fd |
591 | $fileobj->print_at( |
592 | $newloc[$num] + $offset, |
593 | $key, pack($self->{long_pack}, $old_subloc), |
594 | ); |
7b1e1aa1 |
595 | |
596 | next; |
75be6413 |
597 | } |
75be6413 |
598 | |
019404df |
599 | my $loc = $fileobj->request_space( |
e96daec8 |
600 | $self->tag_size( $self->{bucket_list_size} ), |
7b1e1aa1 |
601 | ); |
2603d86e |
602 | |
019404df |
603 | $fileobj->print_at( |
604 | $index_tag->{offset} + ($num * $self->{long_size}), |
605 | pack($self->{long_pack}, $loc), |
606 | ); |
75be6413 |
607 | |
7b1e1aa1 |
608 | my $blist_tag = $self->write_tag( |
e96daec8 |
609 | $loc, SIG_BLIST, |
7b1e1aa1 |
610 | chr(0)x$self->{bucket_list_size}, |
611 | ); |
612 | |
019404df |
613 | $fileobj->print_at( $blist_tag->{offset}, $key . pack($self->{long_pack}, $old_subloc) ); |
7b1e1aa1 |
614 | |
f9c33187 |
615 | $newloc[$num] = $blist_tag->{offset}; |
7b1e1aa1 |
616 | } |
617 | |
618 | $self->_release_space( |
e96daec8 |
619 | $self->tag_size( $self->{bucket_list_size} ), |
7b1e1aa1 |
620 | $tag->{offset} - SIG_SIZE - $self->{data_size}, |
621 | ); |
75be6413 |
622 | |
f9c33187 |
623 | return $newtag_loc; |
75be6413 |
624 | } |
625 | |
8db25060 |
626 | sub read_from_loc { |
627 | my $self = shift; |
359a01ac |
628 | my ($subloc, $orig_key) = @_; |
8db25060 |
629 | |
7dcefff3 |
630 | my $fileobj = $self->_fileobj; |
8db25060 |
631 | |
7dcefff3 |
632 | my $signature = $fileobj->read_at( $subloc, SIG_SIZE ); |
8db25060 |
633 | |
634 | ## |
635 | # If value is a hash or array, return new DBM::Deep object with correct offset |
636 | ## |
637 | if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) { |
685e40f1 |
638 | my $new_obj = DBM::Deep->new({ |
359a01ac |
639 | type => $signature, |
8db25060 |
640 | base_offset => $subloc, |
e96daec8 |
641 | fileobj => $self->_fileobj, |
359a01ac |
642 | parent => $self->{obj}, |
643 | parent_key => $orig_key, |
685e40f1 |
644 | }); |
8db25060 |
645 | |
460b1067 |
646 | if ($new_obj->_fileobj->{autobless}) { |
8db25060 |
647 | ## |
648 | # Skip over value and plain key to see if object needs |
649 | # to be re-blessed |
650 | ## |
7dcefff3 |
651 | $fileobj->increment_pointer( $self->{data_size} + $self->{index_size} ); |
8db25060 |
652 | |
7dcefff3 |
653 | my $size = $fileobj->read_at( undef, $self->{data_size} ); |
c6ea6b6c |
654 | $size = unpack($self->{data_pack}, $size); |
7dcefff3 |
655 | if ($size) { $fileobj->increment_pointer( $size ); } |
8db25060 |
656 | |
7dcefff3 |
657 | my $bless_bit = $fileobj->read_at( undef, 1 ); |
8db25060 |
658 | if (ord($bless_bit)) { |
659 | ## |
660 | # Yes, object needs to be re-blessed |
661 | ## |
7dcefff3 |
662 | my $size = $fileobj->read_at( undef, $self->{data_size} ); |
c6ea6b6c |
663 | $size = unpack($self->{data_pack}, $size); |
7dcefff3 |
664 | |
665 | my $class_name; |
666 | if ($size) { $class_name = $fileobj->read_at( undef, $size ); } |
667 | if (defined $class_name) { $new_obj = bless( $new_obj, $class_name ); } |
8db25060 |
668 | } |
669 | } |
670 | |
685e40f1 |
671 | return $new_obj; |
8db25060 |
672 | } |
673 | elsif ( $signature eq SIG_INTERNAL ) { |
7dcefff3 |
674 | my $size = $fileobj->read_at( undef, $self->{data_size} ); |
8db25060 |
675 | $size = unpack($self->{data_pack}, $size); |
676 | |
677 | if ( $size ) { |
7dcefff3 |
678 | my $new_loc = $fileobj->read_at( undef, $size ); |
679 | $new_loc = unpack( $self->{long_pack}, $new_loc ); |
359a01ac |
680 | return $self->read_from_loc( $new_loc, $orig_key ); |
8db25060 |
681 | } |
682 | else { |
683 | return; |
684 | } |
685 | } |
686 | ## |
687 | # Otherwise return actual value |
688 | ## |
460b1067 |
689 | elsif ( $signature eq SIG_DATA ) { |
7dcefff3 |
690 | my $size = $fileobj->read_at( undef, $self->{data_size} ); |
8db25060 |
691 | $size = unpack($self->{data_pack}, $size); |
692 | |
693 | my $value = ''; |
7dcefff3 |
694 | if ($size) { $value = $fileobj->read_at( undef, $size ); } |
8db25060 |
695 | return $value; |
696 | } |
697 | |
698 | ## |
699 | # Key exists, but content is null |
700 | ## |
701 | return; |
702 | } |
703 | |
9020ee8c |
704 | sub get_bucket_value { |
beac1dff |
705 | ## |
706 | # Fetch single value given tag and MD5 digested key. |
707 | ## |
708 | my $self = shift; |
359a01ac |
709 | my ($tag, $md5, $orig_key) = @_; |
9020ee8c |
710 | |
21838116 |
711 | #ACID - This is a read. Can find exact or HEAD |
94e8af14 |
712 | my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 ); |
713 | |
714 | if ( !$subloc ) { |
715 | #XXX Need to use real key |
716 | # $self->add_bucket( $tag, $md5, $orig_key, undef, undef, $orig_key ); |
717 | # return; |
718 | } |
719 | elsif ( !$is_deleted ) { |
359a01ac |
720 | return $self->read_from_loc( $subloc, $orig_key ); |
386bab6c |
721 | } |
94e8af14 |
722 | |
beac1dff |
723 | return; |
9020ee8c |
724 | } |
ab0e4957 |
725 | |
726 | sub delete_bucket { |
beac1dff |
727 | ## |
728 | # Delete single key/value pair given tag and MD5 digested key. |
729 | ## |
730 | my $self = shift; |
a97c8f67 |
731 | my ($tag, $md5, $orig_key) = @_; |
ab0e4957 |
732 | |
22e20cce |
733 | #ACID - Although this is a mutation, we must find any transaction. |
734 | # This is because we need to mark something as deleted that is in the HEAD. |
735 | my ($subloc, $offset, $size,$is_deleted) = $self->_find_in_buckets( $tag, $md5 ); |
633df1fd |
736 | |
737 | return if !$subloc; |
738 | |
739 | my $fileobj = $self->_fileobj; |
740 | |
741 | my @transactions; |
742 | if ( $fileobj->transaction_id == 0 ) { |
743 | @transactions = $fileobj->current_transactions; |
744 | } |
745 | |
633df1fd |
746 | if ( $fileobj->transaction_id == 0 ) { |
747 | my $value = $self->read_from_loc( $subloc, $orig_key ); |
748 | |
749 | for (@transactions) { |
633df1fd |
750 | $fileobj->{transaction_id} = $_; |
751 | #XXX Need to use real key |
22e20cce |
752 | $self->add_bucket( $tag, $md5, $orig_key, $value, undef, $orig_key ); |
633df1fd |
753 | $fileobj->{transaction_id} = 0; |
754 | } |
22e20cce |
755 | $tag = $self->load_tag( $tag->{offset} - SIG_SIZE - $self->{data_size} ); |
633df1fd |
756 | |
22e20cce |
757 | #XXX This needs _release_space() for the value and anything below |
633df1fd |
758 | $fileobj->print_at( |
019404df |
759 | $tag->{offset} + $offset, |
633df1fd |
760 | substr( $tag->{content}, $offset + $self->{bucket_size} ), |
019404df |
761 | chr(0) x $self->{bucket_size}, |
762 | ); |
386bab6c |
763 | } |
633df1fd |
764 | else { |
22e20cce |
765 | $self->add_bucket( $tag, $md5, '', '', 1, $orig_key ); |
633df1fd |
766 | } |
767 | |
768 | return 1; |
ab0e4957 |
769 | } |
770 | |
912d50b1 |
771 | sub bucket_exists { |
beac1dff |
772 | ## |
773 | # Check existence of single key given tag and MD5 digested key. |
774 | ## |
775 | my $self = shift; |
e96daec8 |
776 | my ($tag, $md5) = @_; |
912d50b1 |
777 | |
21838116 |
778 | #ACID - This is a read. Can find exact or HEAD |
c9b6d0d8 |
779 | my ($subloc, $offset, $size, $is_deleted) = $self->_find_in_buckets( $tag, $md5 ); |
780 | return ($subloc && !$is_deleted) && 1; |
912d50b1 |
781 | } |
782 | |
6736c116 |
783 | sub find_bucket_list { |
beac1dff |
784 | ## |
785 | # Locate offset for bucket list, given digested key |
786 | ## |
787 | my $self = shift; |
e96daec8 |
788 | my ($offset, $md5, $args) = @_; |
d0b74c17 |
789 | $args = {} unless $args; |
790 | |
21838116 |
791 | local($/,$\); |
792 | |
beac1dff |
793 | ## |
794 | # Locate offset for bucket list using digest index system |
795 | ## |
e96daec8 |
796 | my $tag = $self->load_tag( $offset ) |
797 | or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" ); |
d0b74c17 |
798 | |
e5fc7e69 |
799 | my $ch = 0; |
8db25060 |
800 | while ($tag->{signature} ne SIG_BLIST) { |
d0b74c17 |
801 | my $num = ord substr($md5, $ch, 1); |
802 | |
803 | my $ref_loc = $tag->{offset} + ($num * $self->{long_size}); |
e96daec8 |
804 | $tag = $self->index_lookup( $tag, $num ); |
d0b74c17 |
805 | |
806 | if (!$tag) { |
29b01632 |
807 | return if !$args->{create}; |
d0b74c17 |
808 | |
019404df |
809 | my $loc = $self->_fileobj->request_space( |
e96daec8 |
810 | $self->tag_size( $self->{bucket_list_size} ), |
16d1ad9b |
811 | ); |
812 | |
019404df |
813 | $self->_fileobj->print_at( $ref_loc, pack($self->{long_pack}, $loc) ); |
d0b74c17 |
814 | |
9e4f83a0 |
815 | $tag = $self->write_tag( |
e96daec8 |
816 | $loc, SIG_BLIST, |
f37c15ab |
817 | chr(0)x$self->{bucket_list_size}, |
d5d7c51d |
818 | ); |
819 | |
820 | $tag->{ref_loc} = $ref_loc; |
821 | $tag->{ch} = $ch; |
822 | |
823 | last; |
d0b74c17 |
824 | } |
825 | |
16d1ad9b |
826 | $tag->{ch} = $ch++; |
d0b74c17 |
827 | $tag->{ref_loc} = $ref_loc; |
beac1dff |
828 | } |
d0b74c17 |
829 | |
beac1dff |
830 | return $tag; |
6736c116 |
831 | } |
832 | |
d0b74c17 |
833 | sub index_lookup { |
834 | ## |
835 | # Given index tag, lookup single entry in index and return . |
836 | ## |
837 | my $self = shift; |
e96daec8 |
838 | my ($tag, $index) = @_; |
d0b74c17 |
839 | |
840 | my $location = unpack( |
841 | $self->{long_pack}, |
842 | substr( |
843 | $tag->{content}, |
844 | $index * $self->{long_size}, |
845 | $self->{long_size}, |
846 | ), |
847 | ); |
848 | |
849 | if (!$location) { return; } |
850 | |
e96daec8 |
851 | return $self->load_tag( $location ); |
d0b74c17 |
852 | } |
853 | |
6736c116 |
854 | sub traverse_index { |
beac1dff |
855 | ## |
856 | # Scan index and recursively step into deeper levels, looking for next key. |
857 | ## |
6736c116 |
858 | my $self = shift; |
859 | my ($obj, $offset, $ch, $force_return_next) = @_; |
d0b74c17 |
860 | |
e96daec8 |
861 | my $tag = $self->load_tag( $offset ); |
6736c116 |
862 | |
8db25060 |
863 | if ($tag->{signature} ne SIG_BLIST) { |
beac1dff |
864 | my $content = $tag->{content}; |
e5fc7e69 |
865 | my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); |
d0b74c17 |
866 | |
d5d7c51d |
867 | for (my $idx = $start; $idx < (2**8); $idx++) { |
e5fc7e69 |
868 | my $subloc = unpack( |
869 | $self->{long_pack}, |
e06824f8 |
870 | substr( |
871 | $content, |
872 | $idx * $self->{long_size}, |
873 | $self->{long_size}, |
874 | ), |
e5fc7e69 |
875 | ); |
876 | |
beac1dff |
877 | if ($subloc) { |
e5fc7e69 |
878 | my $result = $self->traverse_index( |
879 | $obj, $subloc, $ch + 1, $force_return_next, |
880 | ); |
881 | |
beac1dff |
882 | if (defined($result)) { return $result; } |
883 | } |
884 | } # index loop |
d0b74c17 |
885 | |
beac1dff |
886 | $obj->{return_next} = 1; |
887 | } # tag is an index |
d0b74c17 |
888 | |
e5fc7e69 |
889 | else { |
beac1dff |
890 | my $keys = $tag->{content}; |
891 | if ($force_return_next) { $obj->{return_next} = 1; } |
d0b74c17 |
892 | |
beac1dff |
893 | ## |
894 | # Iterate through buckets, looking for a key match |
895 | ## |
8db25060 |
896 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
9cec1360 |
897 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
d0b74c17 |
898 | |
8db25060 |
899 | # End of bucket list -- return to outer loop |
beac1dff |
900 | if (!$subloc) { |
beac1dff |
901 | $obj->{return_next} = 1; |
902 | last; |
903 | } |
8db25060 |
904 | # Located previous key -- return next one found |
beac1dff |
905 | elsif ($key eq $obj->{prev_md5}) { |
beac1dff |
906 | $obj->{return_next} = 1; |
907 | next; |
908 | } |
8db25060 |
909 | # Seek to bucket location and skip over signature |
beac1dff |
910 | elsif ($obj->{return_next}) { |
7dcefff3 |
911 | my $fileobj = $self->_fileobj; |
d0b74c17 |
912 | |
beac1dff |
913 | # Skip over value to get to plain key |
7dcefff3 |
914 | my $sig = $fileobj->read_at( $subloc, SIG_SIZE ); |
8db25060 |
915 | |
7dcefff3 |
916 | my $size = $fileobj->read_at( undef, $self->{data_size} ); |
e5fc7e69 |
917 | $size = unpack($self->{data_pack}, $size); |
7dcefff3 |
918 | if ($size) { $fileobj->increment_pointer( $size ); } |
d0b74c17 |
919 | |
beac1dff |
920 | # Read in plain key and return as scalar |
7dcefff3 |
921 | $size = $fileobj->read_at( undef, $self->{data_size} ); |
e5fc7e69 |
922 | $size = unpack($self->{data_pack}, $size); |
7dcefff3 |
923 | my $plain_key; |
924 | if ($size) { $plain_key = $fileobj->read_at( undef, $size); } |
d0b74c17 |
925 | |
beac1dff |
926 | return $plain_key; |
927 | } |
8db25060 |
928 | } |
d0b74c17 |
929 | |
beac1dff |
930 | $obj->{return_next} = 1; |
931 | } # tag is a bucket list |
d0b74c17 |
932 | |
beac1dff |
933 | return; |
6736c116 |
934 | } |
935 | |
936 | sub get_next_key { |
beac1dff |
937 | ## |
938 | # Locate next key, given digested previous one |
939 | ## |
6736c116 |
940 | my $self = shift; |
941 | my ($obj) = @_; |
d0b74c17 |
942 | |
beac1dff |
943 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
944 | $obj->{return_next} = 0; |
d0b74c17 |
945 | |
beac1dff |
946 | ## |
947 | # If the previous key was not specifed, start at the top and |
948 | # return the first one found. |
949 | ## |
950 | if (!$obj->{prev_md5}) { |
951 | $obj->{prev_md5} = chr(0) x $self->{hash_size}; |
952 | $obj->{return_next} = 1; |
953 | } |
d0b74c17 |
954 | |
beac1dff |
955 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
6736c116 |
956 | } |
957 | |
75be6413 |
958 | # Utilities |
959 | |
9cec1360 |
960 | sub _get_key_subloc { |
75be6413 |
961 | my $self = shift; |
962 | my ($keys, $idx) = @_; |
963 | |
c9b6d0d8 |
964 | my ($key, $subloc, $size, $transaction_id, $is_deleted) = unpack( |
28394a1a |
965 | # This is 'a', not 'A'. Please read the pack() documentation for the |
966 | # difference between the two and why it's important. |
c9b6d0d8 |
967 | "a$self->{hash_size} $self->{long_pack}2 n2", |
75be6413 |
968 | substr( |
969 | $keys, |
9cec1360 |
970 | ($idx * $self->{bucket_size}), |
971 | $self->{bucket_size}, |
75be6413 |
972 | ), |
973 | ); |
974 | |
c9b6d0d8 |
975 | return ($key, $subloc, $size, $transaction_id, $is_deleted); |
75be6413 |
976 | } |
977 | |
d608b06e |
978 | sub _find_in_buckets { |
979 | my $self = shift; |
21838116 |
980 | my ($tag, $md5, $exact) = @_; |
22e20cce |
981 | $exact ||= 0; |
d608b06e |
982 | |
28394a1a |
983 | my $trans_id = $self->_fileobj->transaction_id; |
984 | |
21838116 |
985 | my @zero; |
986 | |
d608b06e |
987 | BUCKET: |
988 | for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { |
c9b6d0d8 |
989 | my ($key, $subloc, $size, $transaction_id, $is_deleted) = $self->_get_key_subloc( |
9a187d8c |
990 | $tag->{content}, $i, |
991 | ); |
d608b06e |
992 | |
c9b6d0d8 |
993 | my @rv = ($subloc, $i * $self->{bucket_size}, $size, $is_deleted); |
21838116 |
994 | |
995 | unless ( $subloc ) { |
633df1fd |
996 | if ( !$exact && @zero && $trans_id ) { |
c9b6d0d8 |
997 | @rv = ($zero[2], $zero[0] * $self->{bucket_size},$zero[3],$is_deleted); |
20b7f047 |
998 | } |
21838116 |
999 | return @rv; |
1000 | } |
1001 | |
1002 | next BUCKET if $key ne $md5; |
d608b06e |
1003 | |
21838116 |
1004 | # Save off the HEAD in case we need it. |
c9b6d0d8 |
1005 | @zero = ($i,$key,$subloc,$size,$transaction_id,$is_deleted) if $transaction_id == 0; |
d608b06e |
1006 | |
21838116 |
1007 | next BUCKET if $transaction_id != $trans_id; |
1008 | |
1009 | return @rv; |
d608b06e |
1010 | } |
1011 | |
1012 | return; |
1013 | } |
1014 | |
994ccd8e |
1015 | sub _release_space { |
1016 | my $self = shift; |
e96daec8 |
1017 | my ($size, $loc) = @_; |
994ccd8e |
1018 | |
7b1e1aa1 |
1019 | my $next_loc = 0; |
1020 | |
019404df |
1021 | $self->_fileobj->print_at( $loc, |
1022 | SIG_FREE, |
1023 | pack($self->{long_pack}, $size ), |
1024 | pack($self->{long_pack}, $next_loc ), |
7b1e1aa1 |
1025 | ); |
1026 | |
994ccd8e |
1027 | return; |
1028 | } |
1029 | |
e96daec8 |
1030 | sub _throw_error { |
1031 | die "DBM::Deep: $_[1]\n"; |
1032 | } |
1033 | |
a20d9a3f |
1034 | 1; |
1035 | __END__ |