Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
2 | |
3 | use strict; |
4 | |
5 | use Fcntl qw( :DEFAULT :flock :seek ); |
6 | |
1bf65be7 |
7 | ## |
8 | # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file. |
9 | # (Perl must be compiled with largefile support for files > 2 GB) |
10 | # |
11 | # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. |
12 | # (Perl must be compiled with largefile and 64-bit long support) |
13 | ## |
14 | ## |
15 | # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value. |
16 | # Upgrading this is possible (see above) but probably not necessary. If you need |
17 | # more than 4 GB for a single key or value, this module is really not for you :-) |
18 | ## |
19 | our ($LONG_SIZE, $LONG_PACK, $DATA_LENGTH_SIZE, $DATA_LENGTH_PACK); |
20 | ## |
21 | # Maximum number of buckets per list before another level of indexing is done. |
22 | # Increase this value for slightly greater speed, but larger database files. |
23 | # DO NOT decrease this value below 16, due to risk of recursive reindex overrun. |
24 | ## |
25 | our $MAX_BUCKETS = 16; |
26 | our ($DIGEST_FUNC, $HASH_SIZE); |
27 | our ($INDEX_SIZE, $BUCKET_SIZE, $BUCKET_LIST_SIZE); |
28 | set_digest(); |
29 | |
30 | sub _precalc_sizes { |
31 | ## |
32 | # Precalculate index, bucket and bucket list sizes |
33 | ## |
34 | |
35 | #XXX I don't like this ... |
36 | set_pack() unless defined $LONG_SIZE; |
37 | |
38 | $INDEX_SIZE = 256 * $LONG_SIZE; |
39 | $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE; |
40 | $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE; |
41 | } |
42 | |
43 | sub set_pack { |
44 | ## |
45 | # Set pack/unpack modes (see file header for more) |
46 | ## |
47 | my ($long_s, $long_p, $data_s, $data_p) = @_; |
48 | |
49 | $LONG_SIZE = $long_s ? $long_s : 4; |
50 | $LONG_PACK = $long_p ? $long_p : 'N'; |
51 | |
52 | $DATA_LENGTH_SIZE = $data_s ? $data_s : 4; |
53 | $DATA_LENGTH_PACK = $data_p ? $data_p : 'N'; |
54 | |
55 | _precalc_sizes(); |
56 | } |
57 | |
58 | sub set_digest { |
59 | ## |
60 | # Set key digest function (default is MD5) |
61 | ## |
62 | my ($digest_func, $hash_size) = @_; |
63 | |
64 | $DIGEST_FUNC = $digest_func ? $digest_func : \&Digest::MD5::md5; |
65 | $HASH_SIZE = $hash_size ? $hash_size : 16; |
66 | |
67 | _precalc_sizes(); |
68 | } |
69 | |
70b55428 |
70 | sub setup_fh { |
71 | my $self = shift; |
72 | my ($obj) = @_; |
73 | |
74 | $self->open( $obj ) if !defined $obj->_fh; |
75 | |
76 | #XXX Is {end} busted? Can't we just seek( $fh, 0, SEEK_END ) ? |
77 | unless ( $obj->_root->{inode} ) { |
78 | my @stats = stat($obj->_fh); |
79 | $obj->_root->{inode} = $stats[1]; |
80 | $obj->_root->{end} = $stats[7]; |
81 | } |
82 | |
83 | return 1; |
84 | } |
85 | |
a20d9a3f |
86 | sub open { |
20f7b20c |
87 | ## |
88 | # Open a fh to the database, create if nonexistent. |
89 | # Make sure file signature matches DBM::Deep spec. |
90 | ## |
a20d9a3f |
91 | my $self = shift; |
70b55428 |
92 | my ($obj) = @_; |
a20d9a3f |
93 | |
20f7b20c |
94 | if (defined($obj->_fh)) { $self->close( $obj ); } |
95 | |
a20d9a3f |
96 | eval { |
97 | local $SIG{'__DIE__'}; |
98 | # Theoretically, adding O_BINARY should remove the need for the binmode |
99 | # Of course, testing it is going to be ... interesting. |
100 | my $flags = O_RDWR | O_CREAT | O_BINARY; |
101 | |
102 | my $fh; |
cd59cad8 |
103 | sysopen( $fh, $obj->_root->{file}, $flags ) |
a20d9a3f |
104 | or $fh = undef; |
cd59cad8 |
105 | $obj->_root->{fh} = $fh; |
106 | }; if ($@ ) { $obj->_throw_error( "Received error: $@\n" ); } |
20f7b20c |
107 | if (! defined($obj->_fh)) { |
108 | return $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!"); |
109 | } |
a20d9a3f |
110 | |
cd59cad8 |
111 | my $fh = $obj->_fh; |
a20d9a3f |
112 | |
113 | #XXX Can we remove this by using the right sysopen() flags? |
114 | # Maybe ... q.v. above |
115 | binmode $fh; # for win32 |
116 | |
cd59cad8 |
117 | if ($obj->_root->{autoflush}) { |
a20d9a3f |
118 | my $old = select $fh; |
119 | $|=1; |
120 | select $old; |
121 | } |
20f7b20c |
122 | |
cd59cad8 |
123 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
a20d9a3f |
124 | |
125 | my $signature; |
126 | my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE)); |
20f7b20c |
127 | |
a20d9a3f |
128 | ## |
129 | # File is empty -- write signature and master index |
130 | ## |
131 | if (!$bytes_read) { |
cd59cad8 |
132 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
a20d9a3f |
133 | print( $fh DBM::Deep->SIG_FILE); |
1bf65be7 |
134 | $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $INDEX_SIZE); |
a20d9a3f |
135 | |
136 | my $plain_key = "[base]"; |
1bf65be7 |
137 | print( $fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key ); |
a20d9a3f |
138 | |
139 | # Flush the filehandle |
140 | my $old_fh = select $fh; |
141 | my $old_af = $|; $| = 1; $| = $old_af; |
142 | select $old_fh; |
143 | |
a20d9a3f |
144 | return 1; |
145 | } |
20f7b20c |
146 | |
a20d9a3f |
147 | ## |
148 | # Check signature was valid |
149 | ## |
150 | unless ($signature eq DBM::Deep->SIG_FILE) { |
cd59cad8 |
151 | $self->close( $obj ); |
152 | return $obj->_throw_error("Signature not found -- file is not a Deep DB"); |
a20d9a3f |
153 | } |
154 | |
a20d9a3f |
155 | ## |
156 | # Get our type from master index signature |
157 | ## |
d4b1166e |
158 | my $tag = $self->load_tag($obj, $obj->_base_offset); |
a20d9a3f |
159 | |
160 | #XXX We probably also want to store the hash algorithm name and not assume anything |
161 | #XXX The cool thing would be to allow a different hashing algorithm at every level |
162 | |
163 | if (!$tag) { |
20f7b20c |
164 | return $obj->_throw_error("Corrupted file, no master index record"); |
a20d9a3f |
165 | } |
cd59cad8 |
166 | if ($obj->{type} ne $tag->{signature}) { |
20f7b20c |
167 | return $obj->_throw_error("File type mismatch"); |
a20d9a3f |
168 | } |
20f7b20c |
169 | |
a20d9a3f |
170 | return 1; |
171 | } |
172 | |
cd59cad8 |
173 | sub close { |
174 | my $self = shift; |
175 | my $obj = shift; |
176 | |
177 | if ( my $fh = $obj->_root->{fh} ) { |
178 | close $fh; |
179 | } |
180 | $obj->_root->{fh} = undef; |
181 | |
182 | return 1; |
183 | } |
184 | |
d4b1166e |
185 | sub create_tag { |
20f7b20c |
186 | ## |
187 | # Given offset, signature and content, create tag and write to disk |
188 | ## |
d4b1166e |
189 | my $self = shift; |
20f7b20c |
190 | my ($obj, $offset, $sig, $content) = @_; |
191 | my $size = length($content); |
192 | |
d4b1166e |
193 | my $fh = $obj->_fh; |
194 | |
20f7b20c |
195 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
1bf65be7 |
196 | print( $fh $sig . pack($DATA_LENGTH_PACK, $size) . $content ); |
20f7b20c |
197 | |
198 | if ($offset == $obj->_root->{end}) { |
1bf65be7 |
199 | $obj->_root->{end} += DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE + $size; |
20f7b20c |
200 | } |
201 | |
202 | return { |
203 | signature => $sig, |
204 | size => $size, |
1bf65be7 |
205 | offset => $offset + DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE, |
20f7b20c |
206 | content => $content |
207 | }; |
d4b1166e |
208 | } |
209 | |
210 | sub load_tag { |
20f7b20c |
211 | ## |
212 | # Given offset, load single tag and return signature, size and data |
213 | ## |
d4b1166e |
214 | my $self = shift; |
20f7b20c |
215 | my ($obj, $offset) = @_; |
216 | |
d4b1166e |
217 | my $fh = $obj->_fh; |
218 | |
20f7b20c |
219 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
220 | if (eof $fh) { return undef; } |
221 | |
d4b1166e |
222 | my $b; |
1bf65be7 |
223 | read( $fh, $b, DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE ); |
224 | my ($sig, $size) = unpack( "A $DATA_LENGTH_PACK", $b ); |
20f7b20c |
225 | |
226 | my $buffer; |
227 | read( $fh, $buffer, $size); |
228 | |
229 | return { |
230 | signature => $sig, |
231 | size => $size, |
1bf65be7 |
232 | offset => $offset + DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE, |
20f7b20c |
233 | content => $buffer |
234 | }; |
d4b1166e |
235 | } |
236 | |
237 | sub index_lookup { |
20f7b20c |
238 | ## |
239 | # Given index tag, lookup single entry in index and return . |
240 | ## |
241 | my $self = shift; |
242 | my ($obj, $tag, $index) = @_; |
243 | |
1bf65be7 |
244 | my $location = unpack($LONG_PACK, substr($tag->{content}, $index * $LONG_SIZE, $LONG_SIZE) ); |
20f7b20c |
245 | if (!$location) { return; } |
246 | |
247 | return $self->load_tag( $obj, $location ); |
248 | } |
249 | |
250 | sub add_bucket { |
251 | ## |
252 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
253 | # plain (undigested) key and value. |
254 | ## |
d4b1166e |
255 | my $self = shift; |
20f7b20c |
256 | my ($obj, $tag, $md5, $plain_key, $value) = @_; |
257 | my $keys = $tag->{content}; |
258 | my $location = 0; |
259 | my $result = 2; |
260 | |
261 | my $root = $obj->_root; |
262 | |
263 | my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) }; |
264 | my $internal_ref = $is_dbm_deep && ($value->_root eq $root); |
265 | |
266 | my $fh = $obj->_fh; |
267 | |
268 | ## |
269 | # Iterate through buckets, seeing if this is a new entry or a replace. |
270 | ## |
1bf65be7 |
271 | for (my $i=0; $i<$MAX_BUCKETS; $i++) { |
272 | my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); |
20f7b20c |
273 | if (!$subloc) { |
274 | ## |
275 | # Found empty bucket (end of list). Populate and exit loop. |
276 | ## |
277 | $result = 2; |
278 | |
279 | $location = $internal_ref |
280 | ? $value->_base_offset |
281 | : $root->{end}; |
282 | |
1bf65be7 |
283 | seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET); |
284 | print( $fh $md5 . pack($LONG_PACK, $location) ); |
20f7b20c |
285 | last; |
286 | } |
287 | |
1bf65be7 |
288 | my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); |
20f7b20c |
289 | if ($md5 eq $key) { |
290 | ## |
291 | # Found existing bucket with same key. Replace with new value. |
292 | ## |
293 | $result = 1; |
294 | |
295 | if ($internal_ref) { |
296 | $location = $value->_base_offset; |
1bf65be7 |
297 | seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET); |
298 | print( $fh $md5 . pack($LONG_PACK, $location) ); |
20f7b20c |
299 | return $result; |
300 | } |
301 | |
302 | seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET); |
303 | my $size; |
1bf65be7 |
304 | read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); |
20f7b20c |
305 | |
306 | ## |
307 | # If value is a hash, array, or raw value with equal or less size, we can |
308 | # reuse the same content area of the database. Otherwise, we have to create |
309 | # a new content area at the EOF. |
310 | ## |
311 | my $actual_length; |
312 | my $r = Scalar::Util::reftype( $value ) || ''; |
313 | if ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
1bf65be7 |
314 | $actual_length = $INDEX_SIZE; |
20f7b20c |
315 | |
316 | # if autobless is enabled, must also take into consideration |
317 | # the class name, as it is stored along with key/value. |
318 | if ( $root->{autobless} ) { |
319 | my $value_class = Scalar::Util::blessed($value); |
320 | if ( defined $value_class && !$value->isa('DBM::Deep') ) { |
321 | $actual_length += length($value_class); |
322 | } |
323 | } |
324 | } |
325 | else { $actual_length = length($value); } |
326 | |
327 | if ($actual_length <= $size) { |
328 | $location = $subloc; |
329 | } |
330 | else { |
331 | $location = $root->{end}; |
1bf65be7 |
332 | seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $HASH_SIZE + $root->{file_offset}, SEEK_SET); |
333 | print( $fh pack($LONG_PACK, $location) ); |
20f7b20c |
334 | } |
335 | |
336 | last; |
337 | } |
338 | } |
339 | |
340 | ## |
341 | # If this is an internal reference, return now. |
342 | # No need to write value or plain key |
343 | ## |
344 | if ($internal_ref) { |
345 | return $result; |
346 | } |
347 | |
348 | ## |
349 | # If bucket didn't fit into list, split into a new index level |
350 | ## |
351 | if (!$location) { |
352 | seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); |
1bf65be7 |
353 | print( $fh pack($LONG_PACK, $root->{end}) ); |
20f7b20c |
354 | |
1bf65be7 |
355 | my $index_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_INDEX, chr(0) x $INDEX_SIZE); |
20f7b20c |
356 | my @offsets = (); |
357 | |
1bf65be7 |
358 | $keys .= $md5 . pack($LONG_PACK, 0); |
20f7b20c |
359 | |
1bf65be7 |
360 | for (my $i=0; $i<=$MAX_BUCKETS; $i++) { |
361 | my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); |
20f7b20c |
362 | if ($key) { |
1bf65be7 |
363 | my $old_subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + |
364 | $HASH_SIZE, $LONG_SIZE)); |
20f7b20c |
365 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
366 | |
367 | if ($offsets[$num]) { |
1bf65be7 |
368 | my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $DATA_LENGTH_SIZE; |
20f7b20c |
369 | seek($fh, $offset + $root->{file_offset}, SEEK_SET); |
370 | my $subkeys; |
1bf65be7 |
371 | read( $fh, $subkeys, $BUCKET_LIST_SIZE); |
20f7b20c |
372 | |
1bf65be7 |
373 | for (my $k=0; $k<$MAX_BUCKETS; $k++) { |
374 | my $subloc = unpack($LONG_PACK, substr($subkeys, ($k * $BUCKET_SIZE) + |
375 | $HASH_SIZE, $LONG_SIZE)); |
20f7b20c |
376 | if (!$subloc) { |
1bf65be7 |
377 | seek($fh, $offset + ($k * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET); |
378 | print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) ); |
20f7b20c |
379 | last; |
380 | } |
381 | } # k loop |
382 | } |
383 | else { |
384 | $offsets[$num] = $root->{end}; |
1bf65be7 |
385 | seek($fh, $index_tag->{offset} + ($num * $LONG_SIZE) + $root->{file_offset}, SEEK_SET); |
386 | print( $fh pack($LONG_PACK, $root->{end}) ); |
20f7b20c |
387 | |
1bf65be7 |
388 | my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); |
20f7b20c |
389 | |
390 | seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); |
1bf65be7 |
391 | print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) ); |
20f7b20c |
392 | } |
393 | } # key is real |
394 | } # i loop |
395 | |
396 | $location ||= $root->{end}; |
397 | } # re-index bucket list |
398 | |
399 | ## |
400 | # Seek to content area and store signature, value and plaintext key |
401 | ## |
402 | if ($location) { |
403 | my $content_length; |
404 | seek($fh, $location + $root->{file_offset}, SEEK_SET); |
405 | |
406 | ## |
407 | # Write signature based on content type, set content length and write actual value. |
408 | ## |
409 | my $r = Scalar::Util::reftype($value) || ''; |
410 | if ($r eq 'HASH') { |
411 | print( $fh DBM::Deep->TYPE_HASH ); |
1bf65be7 |
412 | print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE ); |
413 | $content_length = $INDEX_SIZE; |
20f7b20c |
414 | } |
415 | elsif ($r eq 'ARRAY') { |
416 | print( $fh DBM::Deep->TYPE_ARRAY ); |
1bf65be7 |
417 | print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE ); |
418 | $content_length = $INDEX_SIZE; |
20f7b20c |
419 | } |
420 | elsif (!defined($value)) { |
421 | print( $fh DBM::Deep->SIG_NULL ); |
1bf65be7 |
422 | print( $fh pack($DATA_LENGTH_PACK, 0) ); |
20f7b20c |
423 | $content_length = 0; |
424 | } |
425 | else { |
426 | print( $fh DBM::Deep->SIG_DATA ); |
1bf65be7 |
427 | print( $fh pack($DATA_LENGTH_PACK, length($value)) . $value ); |
20f7b20c |
428 | $content_length = length($value); |
429 | } |
430 | |
431 | ## |
432 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
433 | ## |
1bf65be7 |
434 | print( $fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key ); |
20f7b20c |
435 | |
436 | ## |
437 | # If value is blessed, preserve class name |
438 | ## |
439 | if ( $root->{autobless} ) { |
440 | my $value_class = Scalar::Util::blessed($value); |
441 | if ( defined $value_class && $value_class ne 'DBM::Deep' ) { |
442 | ## |
443 | # Blessed ref -- will restore later |
444 | ## |
445 | print( $fh chr(1) ); |
1bf65be7 |
446 | print( $fh pack($DATA_LENGTH_PACK, length($value_class)) . $value_class ); |
20f7b20c |
447 | $content_length += 1; |
1bf65be7 |
448 | $content_length += $DATA_LENGTH_SIZE + length($value_class); |
20f7b20c |
449 | } |
450 | else { |
451 | print( $fh chr(0) ); |
452 | $content_length += 1; |
453 | } |
454 | } |
455 | |
456 | ## |
457 | # If this is a new content area, advance EOF counter |
458 | ## |
459 | if ($location == $root->{end}) { |
460 | $root->{end} += DBM::Deep->SIG_SIZE; |
1bf65be7 |
461 | $root->{end} += $DATA_LENGTH_SIZE + $content_length; |
462 | $root->{end} += $DATA_LENGTH_SIZE + length($plain_key); |
20f7b20c |
463 | } |
464 | |
465 | ## |
466 | # If content is a hash or array, create new child DBM::Deep object and |
467 | # pass each key or element to it. |
468 | ## |
469 | if ($r eq 'HASH') { |
470 | my $branch = DBM::Deep->new( |
471 | type => DBM::Deep->TYPE_HASH, |
472 | base_offset => $location, |
473 | root => $root, |
474 | ); |
475 | foreach my $key (keys %{$value}) { |
476 | $branch->STORE( $key, $value->{$key} ); |
477 | } |
478 | } |
479 | elsif ($r eq 'ARRAY') { |
480 | my $branch = DBM::Deep->new( |
481 | type => DBM::Deep->TYPE_ARRAY, |
482 | base_offset => $location, |
483 | root => $root, |
484 | ); |
485 | my $index = 0; |
486 | foreach my $element (@{$value}) { |
487 | $branch->STORE( $index, $element ); |
488 | $index++; |
489 | } |
490 | } |
491 | |
492 | return $result; |
493 | } |
d4b1166e |
494 | |
20f7b20c |
495 | return $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file"); |
d4b1166e |
496 | } |
497 | |
9020ee8c |
498 | sub get_bucket_value { |
499 | ## |
500 | # Fetch single value given tag and MD5 digested key. |
501 | ## |
502 | my $self = shift; |
503 | my ($obj, $tag, $md5) = @_; |
504 | my $keys = $tag->{content}; |
505 | |
506 | my $fh = $obj->_fh; |
507 | |
508 | ## |
509 | # Iterate through buckets, looking for a key match |
510 | ## |
511 | BUCKET: |
1bf65be7 |
512 | for (my $i=0; $i<$MAX_BUCKETS; $i++) { |
513 | my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); |
514 | my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); |
9020ee8c |
515 | |
516 | if (!$subloc) { |
517 | ## |
518 | # Hit end of list, no match |
519 | ## |
520 | return; |
521 | } |
522 | |
523 | if ( $md5 ne $key ) { |
524 | next BUCKET; |
525 | } |
526 | |
527 | ## |
528 | # Found match -- seek to offset and read signature |
529 | ## |
530 | my $signature; |
531 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
532 | read( $fh, $signature, DBM::Deep->SIG_SIZE); |
533 | |
534 | ## |
535 | # If value is a hash or array, return new DBM::Deep object with correct offset |
536 | ## |
537 | if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) { |
538 | my $obj = DBM::Deep->new( |
539 | type => $signature, |
540 | base_offset => $subloc, |
541 | root => $obj->_root, |
542 | ); |
543 | |
544 | if ($obj->_root->{autobless}) { |
545 | ## |
546 | # Skip over value and plain key to see if object needs |
547 | # to be re-blessed |
548 | ## |
1bf65be7 |
549 | seek($fh, $DATA_LENGTH_SIZE + $INDEX_SIZE, SEEK_CUR); |
9020ee8c |
550 | |
551 | my $size; |
1bf65be7 |
552 | read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); |
9020ee8c |
553 | if ($size) { seek($fh, $size, SEEK_CUR); } |
554 | |
555 | my $bless_bit; |
556 | read( $fh, $bless_bit, 1); |
557 | if (ord($bless_bit)) { |
558 | ## |
559 | # Yes, object needs to be re-blessed |
560 | ## |
561 | my $class_name; |
1bf65be7 |
562 | read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); |
9020ee8c |
563 | if ($size) { read( $fh, $class_name, $size); } |
564 | if ($class_name) { $obj = bless( $obj, $class_name ); } |
565 | } |
566 | } |
567 | |
568 | return $obj; |
569 | } |
570 | |
571 | ## |
572 | # Otherwise return actual value |
573 | ## |
574 | elsif ($signature eq DBM::Deep->SIG_DATA) { |
575 | my $size; |
576 | my $value = ''; |
1bf65be7 |
577 | read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); |
9020ee8c |
578 | if ($size) { read( $fh, $value, $size); } |
579 | return $value; |
580 | } |
581 | |
582 | ## |
583 | # Key exists, but content is null |
584 | ## |
585 | else { return; } |
586 | } # i loop |
587 | |
588 | return; |
589 | } |
ab0e4957 |
590 | |
591 | sub delete_bucket { |
592 | ## |
593 | # Delete single key/value pair given tag and MD5 digested key. |
594 | ## |
595 | my $self = shift; |
596 | my ($obj, $tag, $md5) = @_; |
597 | my $keys = $tag->{content}; |
598 | |
599 | my $fh = $obj->_fh; |
600 | |
601 | ## |
602 | # Iterate through buckets, looking for a key match |
603 | ## |
604 | BUCKET: |
1bf65be7 |
605 | for (my $i=0; $i<$MAX_BUCKETS; $i++) { |
606 | my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); |
607 | my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); |
ab0e4957 |
608 | |
609 | if (!$subloc) { |
610 | ## |
611 | # Hit end of list, no match |
612 | ## |
613 | return; |
614 | } |
615 | |
616 | if ( $md5 ne $key ) { |
617 | next BUCKET; |
618 | } |
619 | |
620 | ## |
621 | # Matched key -- delete bucket and return |
622 | ## |
1bf65be7 |
623 | seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $obj->_root->{file_offset}, SEEK_SET); |
624 | print( $fh substr($keys, ($i+1) * $BUCKET_SIZE ) ); |
625 | print( $fh chr(0) x $BUCKET_SIZE ); |
ab0e4957 |
626 | |
627 | return 1; |
628 | } # i loop |
629 | |
630 | return; |
631 | } |
632 | |
912d50b1 |
633 | sub bucket_exists { |
634 | ## |
635 | # Check existence of single key given tag and MD5 digested key. |
636 | ## |
637 | my $self = shift; |
638 | my ($obj, $tag, $md5) = @_; |
639 | my $keys = $tag->{content}; |
640 | |
641 | ## |
642 | # Iterate through buckets, looking for a key match |
643 | ## |
644 | BUCKET: |
1bf65be7 |
645 | for (my $i=0; $i<$MAX_BUCKETS; $i++) { |
646 | my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); |
647 | my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); |
912d50b1 |
648 | |
649 | if (!$subloc) { |
650 | ## |
651 | # Hit end of list, no match |
652 | ## |
653 | return; |
654 | } |
655 | |
656 | if ( $md5 ne $key ) { |
657 | next BUCKET; |
658 | } |
659 | |
660 | ## |
661 | # Matched key -- return true |
662 | ## |
663 | return 1; |
664 | } # i loop |
665 | |
666 | return; |
667 | } |
668 | |
6736c116 |
669 | sub find_bucket_list { |
670 | ## |
671 | # Locate offset for bucket list, given digested key |
672 | ## |
673 | my $self = shift; |
674 | my ($obj, $md5) = @_; |
675 | |
676 | ## |
677 | # Locate offset for bucket list using digest index system |
678 | ## |
679 | my $ch = 0; |
680 | my $tag = $self->load_tag($obj, $obj->_base_offset); |
681 | if (!$tag) { return; } |
682 | |
683 | while ($tag->{signature} ne DBM::Deep->SIG_BLIST) { |
684 | $tag = $self->index_lookup($obj, $tag, ord(substr($md5, $ch, 1))); |
685 | if (!$tag) { return; } |
686 | $ch++; |
687 | } |
688 | |
689 | return $tag; |
690 | } |
691 | |
692 | sub traverse_index { |
693 | ## |
694 | # Scan index and recursively step into deeper levels, looking for next key. |
695 | ## |
696 | my $self = shift; |
697 | my ($obj, $offset, $ch, $force_return_next) = @_; |
698 | $force_return_next = undef unless $force_return_next; |
699 | |
700 | my $tag = $self->load_tag($obj, $offset ); |
701 | |
702 | my $fh = $obj->_fh; |
703 | |
704 | if ($tag->{signature} ne DBM::Deep->SIG_BLIST) { |
705 | my $content = $tag->{content}; |
706 | my $start; |
707 | if ($obj->{return_next}) { $start = 0; } |
708 | else { $start = ord(substr($obj->{prev_md5}, $ch, 1)); } |
709 | |
710 | for (my $index = $start; $index < 256; $index++) { |
1bf65be7 |
711 | my $subloc = unpack($LONG_PACK, substr($content, $index * $LONG_SIZE, $LONG_SIZE) ); |
6736c116 |
712 | if ($subloc) { |
713 | my $result = $self->traverse_index( $obj, $subloc, $ch + 1, $force_return_next ); |
714 | if (defined($result)) { return $result; } |
715 | } |
716 | } # index loop |
717 | |
718 | $obj->{return_next} = 1; |
719 | } # tag is an index |
720 | |
721 | elsif ($tag->{signature} eq DBM::Deep->SIG_BLIST) { |
722 | my $keys = $tag->{content}; |
723 | if ($force_return_next) { $obj->{return_next} = 1; } |
724 | |
725 | ## |
726 | # Iterate through buckets, looking for a key match |
727 | ## |
1bf65be7 |
728 | for (my $i=0; $i<$MAX_BUCKETS; $i++) { |
729 | my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); |
730 | my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); |
6736c116 |
731 | |
732 | if (!$subloc) { |
733 | ## |
734 | # End of bucket list -- return to outer loop |
735 | ## |
736 | $obj->{return_next} = 1; |
737 | last; |
738 | } |
739 | elsif ($key eq $obj->{prev_md5}) { |
740 | ## |
741 | # Located previous key -- return next one found |
742 | ## |
743 | $obj->{return_next} = 1; |
744 | next; |
745 | } |
746 | elsif ($obj->{return_next}) { |
747 | ## |
748 | # Seek to bucket location and skip over signature |
749 | ## |
750 | seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET); |
751 | |
752 | ## |
753 | # Skip over value to get to plain key |
754 | ## |
755 | my $size; |
1bf65be7 |
756 | read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); |
6736c116 |
757 | if ($size) { seek($fh, $size, SEEK_CUR); } |
758 | |
759 | ## |
760 | # Read in plain key and return as scalar |
761 | ## |
762 | my $plain_key; |
1bf65be7 |
763 | read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); |
6736c116 |
764 | if ($size) { read( $fh, $plain_key, $size); } |
765 | |
766 | return $plain_key; |
767 | } |
768 | } # bucket loop |
769 | |
770 | $obj->{return_next} = 1; |
771 | } # tag is a bucket list |
772 | |
773 | return; |
774 | } |
775 | |
776 | sub get_next_key { |
777 | ## |
778 | # Locate next key, given digested previous one |
779 | ## |
780 | my $self = shift; |
781 | my ($obj) = @_; |
782 | |
783 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
784 | $obj->{return_next} = 0; |
785 | |
786 | ## |
787 | # If the previous key was not specifed, start at the top and |
788 | # return the first one found. |
789 | ## |
790 | if (!$obj->{prev_md5}) { |
1bf65be7 |
791 | $obj->{prev_md5} = chr(0) x $HASH_SIZE; |
6736c116 |
792 | $obj->{return_next} = 1; |
793 | } |
794 | |
795 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
796 | } |
797 | |
a20d9a3f |
798 | 1; |
799 | __END__ |