Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
2 | |
3 | use strict; |
4 | |
5 | use Fcntl qw( :DEFAULT :flock :seek ); |
6 | |
612969fb |
7 | sub precalc_sizes { |
beac1dff |
8 | ## |
9 | # Precalculate index, bucket and bucket list sizes |
10 | ## |
251dfd0e |
11 | my $self = shift; |
1bf65be7 |
12 | |
251dfd0e |
13 | $self->{index_size} = (2**8) * $self->{long_size}; |
14 | $self->{bucket_size} = $self->{hash_size} + $self->{long_size}; |
15 | $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; |
1bf65be7 |
16 | |
251dfd0e |
17 | return 1; |
1bf65be7 |
18 | } |
19 | |
20 | sub set_pack { |
beac1dff |
21 | ## |
22 | # Set pack/unpack modes (see file header for more) |
23 | ## |
251dfd0e |
24 | my $self = shift; |
1bf65be7 |
25 | my ($long_s, $long_p, $data_s, $data_p) = @_; |
26 | |
81d16922 |
27 | ## |
28 | # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file. |
beac1dff |
29 | # (Perl must be compiled with largefile support for files > 2 GB) |
81d16922 |
30 | # |
31 | # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. |
beac1dff |
32 | # (Perl must be compiled with largefile and 64-bit long support) |
81d16922 |
33 | ## |
251dfd0e |
34 | $self->{long_size} = $long_s ? $long_s : 4; |
35 | $self->{long_pack} = $long_p ? $long_p : 'N'; |
1bf65be7 |
36 | |
81d16922 |
37 | ## |
38 | # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value. |
39 | # Upgrading this is possible (see above) but probably not necessary. If you need |
40 | # more than 4 GB for a single key or value, this module is really not for you :-) |
41 | ## |
251dfd0e |
42 | $self->{data_size} = $data_s ? $data_s : 4; |
43 | $self->{data_pack} = $data_p ? $data_p : 'N'; |
1bf65be7 |
44 | |
beac1dff |
45 | return $self->precalc_sizes(); |
1bf65be7 |
46 | } |
47 | |
48 | sub set_digest { |
beac1dff |
49 | ## |
50 | # Set key digest function (default is MD5) |
51 | ## |
251dfd0e |
52 | my $self = shift; |
1bf65be7 |
53 | my ($digest_func, $hash_size) = @_; |
54 | |
612969fb |
55 | $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5; |
251dfd0e |
56 | $self->{hash_size} = $hash_size ? $hash_size : 16; |
612969fb |
57 | |
beac1dff |
58 | return $self->precalc_sizes(); |
612969fb |
59 | } |
60 | |
61 | sub new { |
62 | my $class = shift; |
63 | my ($args) = @_; |
64 | |
65 | my $self = bless { |
66 | long_size => 4, |
67 | long_pack => 'N', |
68 | data_size => 4, |
69 | data_pack => 'N', |
251dfd0e |
70 | |
612969fb |
71 | digest => \&Digest::MD5::md5, |
72 | hash_size => 16, |
251dfd0e |
73 | |
81d16922 |
74 | ## |
75 | # Maximum number of buckets per list before another level of indexing is done. |
76 | # Increase this value for slightly greater speed, but larger database files. |
77 | # DO NOT decrease this value below 16, due to risk of recursive reindex overrun. |
78 | ## |
612969fb |
79 | max_buckets => 16, |
80 | }, $class; |
81 | |
251dfd0e |
82 | $self->precalc_sizes; |
612969fb |
83 | |
84 | return $self; |
1bf65be7 |
85 | } |
86 | |
70b55428 |
87 | sub setup_fh { |
88 | my $self = shift; |
89 | my ($obj) = @_; |
90 | |
91 | $self->open( $obj ) if !defined $obj->_fh; |
92 | |
93 | #XXX Is {end} busted? Can't we just seek( $fh, 0, SEEK_END ) ? |
94 | unless ( $obj->_root->{inode} ) { |
95 | my @stats = stat($obj->_fh); |
96 | $obj->_root->{inode} = $stats[1]; |
97 | $obj->_root->{end} = $stats[7]; |
98 | } |
99 | |
100 | return 1; |
101 | } |
102 | |
a20d9a3f |
103 | sub open { |
20f7b20c |
104 | ## |
105 | # Open a fh to the database, create if nonexistent. |
106 | # Make sure file signature matches DBM::Deep spec. |
107 | ## |
a20d9a3f |
108 | my $self = shift; |
70b55428 |
109 | my ($obj) = @_; |
a20d9a3f |
110 | |
3d1b8be9 |
111 | if (defined($obj->_fh)) { $self->close_fh( $obj ); } |
20f7b20c |
112 | |
a20d9a3f |
113 | eval { |
114 | local $SIG{'__DIE__'}; |
115 | # Theoretically, adding O_BINARY should remove the need for the binmode |
116 | # Of course, testing it is going to be ... interesting. |
117 | my $flags = O_RDWR | O_CREAT | O_BINARY; |
118 | |
119 | my $fh; |
cd59cad8 |
120 | sysopen( $fh, $obj->_root->{file}, $flags ) |
a20d9a3f |
121 | or $fh = undef; |
cd59cad8 |
122 | $obj->_root->{fh} = $fh; |
123 | }; if ($@ ) { $obj->_throw_error( "Received error: $@\n" ); } |
20f7b20c |
124 | if (! defined($obj->_fh)) { |
125 | return $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!"); |
126 | } |
a20d9a3f |
127 | |
cd59cad8 |
128 | my $fh = $obj->_fh; |
a20d9a3f |
129 | |
130 | #XXX Can we remove this by using the right sysopen() flags? |
131 | # Maybe ... q.v. above |
132 | binmode $fh; # for win32 |
133 | |
cd59cad8 |
134 | if ($obj->_root->{autoflush}) { |
a20d9a3f |
135 | my $old = select $fh; |
136 | $|=1; |
137 | select $old; |
138 | } |
20f7b20c |
139 | |
cd59cad8 |
140 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
a20d9a3f |
141 | |
142 | my $signature; |
143 | my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE)); |
20f7b20c |
144 | |
a20d9a3f |
145 | ## |
146 | # File is empty -- write signature and master index |
147 | ## |
148 | if (!$bytes_read) { |
cd59cad8 |
149 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
a20d9a3f |
150 | print( $fh DBM::Deep->SIG_FILE); |
ec1bce6b |
151 | $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size}); |
a20d9a3f |
152 | |
153 | my $plain_key = "[base]"; |
251dfd0e |
154 | print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key ); |
a20d9a3f |
155 | |
156 | # Flush the filehandle |
157 | my $old_fh = select $fh; |
158 | my $old_af = $|; $| = 1; $| = $old_af; |
159 | select $old_fh; |
160 | |
a20d9a3f |
161 | return 1; |
162 | } |
20f7b20c |
163 | |
a20d9a3f |
164 | ## |
165 | # Check signature was valid |
166 | ## |
167 | unless ($signature eq DBM::Deep->SIG_FILE) { |
3d1b8be9 |
168 | $self->close_fh( $obj ); |
cd59cad8 |
169 | return $obj->_throw_error("Signature not found -- file is not a Deep DB"); |
a20d9a3f |
170 | } |
171 | |
a20d9a3f |
172 | ## |
173 | # Get our type from master index signature |
174 | ## |
d4b1166e |
175 | my $tag = $self->load_tag($obj, $obj->_base_offset); |
a20d9a3f |
176 | |
177 | #XXX We probably also want to store the hash algorithm name and not assume anything |
178 | #XXX The cool thing would be to allow a different hashing algorithm at every level |
179 | |
180 | if (!$tag) { |
20f7b20c |
181 | return $obj->_throw_error("Corrupted file, no master index record"); |
a20d9a3f |
182 | } |
cd59cad8 |
183 | if ($obj->{type} ne $tag->{signature}) { |
20f7b20c |
184 | return $obj->_throw_error("File type mismatch"); |
a20d9a3f |
185 | } |
20f7b20c |
186 | |
a20d9a3f |
187 | return 1; |
188 | } |
189 | |
3d1b8be9 |
190 | sub close_fh { |
cd59cad8 |
191 | my $self = shift; |
192 | my $obj = shift; |
193 | |
194 | if ( my $fh = $obj->_root->{fh} ) { |
195 | close $fh; |
196 | } |
197 | $obj->_root->{fh} = undef; |
198 | |
199 | return 1; |
200 | } |
201 | |
d4b1166e |
202 | sub create_tag { |
20f7b20c |
203 | ## |
204 | # Given offset, signature and content, create tag and write to disk |
205 | ## |
d4b1166e |
206 | my $self = shift; |
20f7b20c |
207 | my ($obj, $offset, $sig, $content) = @_; |
208 | my $size = length($content); |
209 | |
d4b1166e |
210 | my $fh = $obj->_fh; |
211 | |
20f7b20c |
212 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
251dfd0e |
213 | print( $fh $sig . pack($self->{data_pack}, $size) . $content ); |
20f7b20c |
214 | |
215 | if ($offset == $obj->_root->{end}) { |
251dfd0e |
216 | $obj->_root->{end} += DBM::Deep->SIG_SIZE + $self->{data_size} + $size; |
20f7b20c |
217 | } |
218 | |
219 | return { |
220 | signature => $sig, |
221 | size => $size, |
251dfd0e |
222 | offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size}, |
20f7b20c |
223 | content => $content |
224 | }; |
d4b1166e |
225 | } |
226 | |
227 | sub load_tag { |
20f7b20c |
228 | ## |
229 | # Given offset, load single tag and return signature, size and data |
230 | ## |
d4b1166e |
231 | my $self = shift; |
20f7b20c |
232 | my ($obj, $offset) = @_; |
233 | |
d4b1166e |
234 | my $fh = $obj->_fh; |
235 | |
20f7b20c |
236 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
237 | if (eof $fh) { return undef; } |
238 | |
d4b1166e |
239 | my $b; |
251dfd0e |
240 | read( $fh, $b, DBM::Deep->SIG_SIZE + $self->{data_size} ); |
241 | my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); |
20f7b20c |
242 | |
243 | my $buffer; |
244 | read( $fh, $buffer, $size); |
245 | |
246 | return { |
247 | signature => $sig, |
248 | size => $size, |
251dfd0e |
249 | offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size}, |
20f7b20c |
250 | content => $buffer |
251 | }; |
d4b1166e |
252 | } |
253 | |
254 | sub index_lookup { |
20f7b20c |
255 | ## |
256 | # Given index tag, lookup single entry in index and return . |
257 | ## |
258 | my $self = shift; |
259 | my ($obj, $tag, $index) = @_; |
260 | |
251dfd0e |
261 | my $location = unpack($self->{long_pack}, substr($tag->{content}, $index * $self->{long_size}, $self->{long_size}) ); |
20f7b20c |
262 | if (!$location) { return; } |
263 | |
264 | return $self->load_tag( $obj, $location ); |
265 | } |
266 | |
267 | sub add_bucket { |
268 | ## |
269 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
270 | # plain (undigested) key and value. |
271 | ## |
d4b1166e |
272 | my $self = shift; |
20f7b20c |
273 | my ($obj, $tag, $md5, $plain_key, $value) = @_; |
274 | my $keys = $tag->{content}; |
275 | my $location = 0; |
276 | my $result = 2; |
277 | |
278 | my $root = $obj->_root; |
279 | |
280 | my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) }; |
281 | my $internal_ref = $is_dbm_deep && ($value->_root eq $root); |
282 | |
283 | my $fh = $obj->_fh; |
284 | |
285 | ## |
286 | # Iterate through buckets, seeing if this is a new entry or a replace. |
287 | ## |
251dfd0e |
288 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
289 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
20f7b20c |
290 | if (!$subloc) { |
291 | ## |
292 | # Found empty bucket (end of list). Populate and exit loop. |
293 | ## |
294 | $result = 2; |
295 | |
296 | $location = $internal_ref |
297 | ? $value->_base_offset |
298 | : $root->{end}; |
299 | |
251dfd0e |
300 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); |
301 | print( $fh $md5 . pack($self->{long_pack}, $location) ); |
20f7b20c |
302 | last; |
303 | } |
304 | |
251dfd0e |
305 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
20f7b20c |
306 | if ($md5 eq $key) { |
307 | ## |
308 | # Found existing bucket with same key. Replace with new value. |
309 | ## |
310 | $result = 1; |
311 | |
312 | if ($internal_ref) { |
313 | $location = $value->_base_offset; |
251dfd0e |
314 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); |
315 | print( $fh $md5 . pack($self->{long_pack}, $location) ); |
20f7b20c |
316 | return $result; |
317 | } |
318 | |
319 | seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET); |
320 | my $size; |
251dfd0e |
321 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
20f7b20c |
322 | |
323 | ## |
324 | # If value is a hash, array, or raw value with equal or less size, we can |
325 | # reuse the same content area of the database. Otherwise, we have to create |
326 | # a new content area at the EOF. |
327 | ## |
328 | my $actual_length; |
329 | my $r = Scalar::Util::reftype( $value ) || ''; |
330 | if ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
ec1bce6b |
331 | $actual_length = $self->{index_size}; |
20f7b20c |
332 | |
333 | # if autobless is enabled, must also take into consideration |
334 | # the class name, as it is stored along with key/value. |
335 | if ( $root->{autobless} ) { |
336 | my $value_class = Scalar::Util::blessed($value); |
337 | if ( defined $value_class && !$value->isa('DBM::Deep') ) { |
338 | $actual_length += length($value_class); |
339 | } |
340 | } |
341 | } |
342 | else { $actual_length = length($value); } |
343 | |
344 | if ($actual_length <= $size) { |
345 | $location = $subloc; |
346 | } |
347 | else { |
348 | $location = $root->{end}; |
251dfd0e |
349 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, SEEK_SET); |
350 | print( $fh pack($self->{long_pack}, $location) ); |
20f7b20c |
351 | } |
352 | |
353 | last; |
354 | } |
355 | } |
356 | |
357 | ## |
358 | # If this is an internal reference, return now. |
359 | # No need to write value or plain key |
360 | ## |
361 | if ($internal_ref) { |
362 | return $result; |
363 | } |
364 | |
365 | ## |
366 | # If bucket didn't fit into list, split into a new index level |
367 | ## |
368 | if (!$location) { |
369 | seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); |
251dfd0e |
370 | print( $fh pack($self->{long_pack}, $root->{end}) ); |
20f7b20c |
371 | |
ec1bce6b |
372 | my $index_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_INDEX, chr(0) x $self->{index_size}); |
20f7b20c |
373 | my @offsets = (); |
374 | |
251dfd0e |
375 | $keys .= $md5 . pack($self->{long_pack}, 0); |
20f7b20c |
376 | |
251dfd0e |
377 | for (my $i=0; $i<=$self->{max_buckets}; $i++) { |
378 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
20f7b20c |
379 | if ($key) { |
251dfd0e |
380 | my $old_subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + |
381 | $self->{hash_size}, $self->{long_size})); |
20f7b20c |
382 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
383 | |
384 | if ($offsets[$num]) { |
251dfd0e |
385 | my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $self->{data_size}; |
20f7b20c |
386 | seek($fh, $offset + $root->{file_offset}, SEEK_SET); |
387 | my $subkeys; |
251dfd0e |
388 | read( $fh, $subkeys, $self->{bucket_list_size}); |
20f7b20c |
389 | |
251dfd0e |
390 | for (my $k=0; $k<$self->{max_buckets}; $k++) { |
391 | my $subloc = unpack($self->{long_pack}, substr($subkeys, ($k * $self->{bucket_size}) + |
392 | $self->{hash_size}, $self->{long_size})); |
20f7b20c |
393 | if (!$subloc) { |
251dfd0e |
394 | seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); |
395 | print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); |
20f7b20c |
396 | last; |
397 | } |
398 | } # k loop |
399 | } |
400 | else { |
401 | $offsets[$num] = $root->{end}; |
251dfd0e |
402 | seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); |
403 | print( $fh pack($self->{long_pack}, $root->{end}) ); |
20f7b20c |
404 | |
251dfd0e |
405 | my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $self->{bucket_list_size}); |
20f7b20c |
406 | |
407 | seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); |
251dfd0e |
408 | print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); |
20f7b20c |
409 | } |
410 | } # key is real |
411 | } # i loop |
412 | |
413 | $location ||= $root->{end}; |
414 | } # re-index bucket list |
415 | |
416 | ## |
417 | # Seek to content area and store signature, value and plaintext key |
418 | ## |
419 | if ($location) { |
420 | my $content_length; |
421 | seek($fh, $location + $root->{file_offset}, SEEK_SET); |
422 | |
423 | ## |
424 | # Write signature based on content type, set content length and write actual value. |
425 | ## |
426 | my $r = Scalar::Util::reftype($value) || ''; |
427 | if ($r eq 'HASH') { |
428 | print( $fh DBM::Deep->TYPE_HASH ); |
251dfd0e |
429 | print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); |
ec1bce6b |
430 | $content_length = $self->{index_size}; |
20f7b20c |
431 | } |
432 | elsif ($r eq 'ARRAY') { |
433 | print( $fh DBM::Deep->TYPE_ARRAY ); |
251dfd0e |
434 | print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); |
ec1bce6b |
435 | $content_length = $self->{index_size}; |
20f7b20c |
436 | } |
437 | elsif (!defined($value)) { |
438 | print( $fh DBM::Deep->SIG_NULL ); |
251dfd0e |
439 | print( $fh pack($self->{data_pack}, 0) ); |
20f7b20c |
440 | $content_length = 0; |
441 | } |
442 | else { |
443 | print( $fh DBM::Deep->SIG_DATA ); |
251dfd0e |
444 | print( $fh pack($self->{data_pack}, length($value)) . $value ); |
20f7b20c |
445 | $content_length = length($value); |
446 | } |
447 | |
448 | ## |
449 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
450 | ## |
251dfd0e |
451 | print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key ); |
20f7b20c |
452 | |
453 | ## |
454 | # If value is blessed, preserve class name |
455 | ## |
456 | if ( $root->{autobless} ) { |
457 | my $value_class = Scalar::Util::blessed($value); |
458 | if ( defined $value_class && $value_class ne 'DBM::Deep' ) { |
459 | ## |
460 | # Blessed ref -- will restore later |
461 | ## |
462 | print( $fh chr(1) ); |
251dfd0e |
463 | print( $fh pack($self->{data_pack}, length($value_class)) . $value_class ); |
20f7b20c |
464 | $content_length += 1; |
251dfd0e |
465 | $content_length += $self->{data_size} + length($value_class); |
20f7b20c |
466 | } |
467 | else { |
468 | print( $fh chr(0) ); |
469 | $content_length += 1; |
470 | } |
471 | } |
472 | |
473 | ## |
474 | # If this is a new content area, advance EOF counter |
475 | ## |
476 | if ($location == $root->{end}) { |
477 | $root->{end} += DBM::Deep->SIG_SIZE; |
251dfd0e |
478 | $root->{end} += $self->{data_size} + $content_length; |
479 | $root->{end} += $self->{data_size} + length($plain_key); |
20f7b20c |
480 | } |
481 | |
482 | ## |
483 | # If content is a hash or array, create new child DBM::Deep object and |
484 | # pass each key or element to it. |
485 | ## |
486 | if ($r eq 'HASH') { |
487 | my $branch = DBM::Deep->new( |
488 | type => DBM::Deep->TYPE_HASH, |
489 | base_offset => $location, |
490 | root => $root, |
491 | ); |
492 | foreach my $key (keys %{$value}) { |
493 | $branch->STORE( $key, $value->{$key} ); |
494 | } |
495 | } |
496 | elsif ($r eq 'ARRAY') { |
497 | my $branch = DBM::Deep->new( |
498 | type => DBM::Deep->TYPE_ARRAY, |
499 | base_offset => $location, |
500 | root => $root, |
501 | ); |
502 | my $index = 0; |
503 | foreach my $element (@{$value}) { |
504 | $branch->STORE( $index, $element ); |
505 | $index++; |
506 | } |
507 | } |
508 | |
509 | return $result; |
510 | } |
d4b1166e |
511 | |
20f7b20c |
512 | return $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file"); |
d4b1166e |
513 | } |
514 | |
9020ee8c |
515 | sub get_bucket_value { |
beac1dff |
516 | ## |
517 | # Fetch single value given tag and MD5 digested key. |
518 | ## |
519 | my $self = shift; |
520 | my ($obj, $tag, $md5) = @_; |
521 | my $keys = $tag->{content}; |
9020ee8c |
522 | |
523 | my $fh = $obj->_fh; |
524 | |
beac1dff |
525 | ## |
526 | # Iterate through buckets, looking for a key match |
527 | ## |
9020ee8c |
528 | BUCKET: |
beac1dff |
529 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
530 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
531 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
9020ee8c |
532 | |
beac1dff |
533 | if (!$subloc) { |
534 | ## |
535 | # Hit end of list, no match |
536 | ## |
537 | return; |
538 | } |
9020ee8c |
539 | |
540 | if ( $md5 ne $key ) { |
541 | next BUCKET; |
542 | } |
543 | |
544 | ## |
545 | # Found match -- seek to offset and read signature |
546 | ## |
547 | my $signature; |
548 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
549 | read( $fh, $signature, DBM::Deep->SIG_SIZE); |
550 | |
551 | ## |
552 | # If value is a hash or array, return new DBM::Deep object with correct offset |
553 | ## |
554 | if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) { |
555 | my $obj = DBM::Deep->new( |
556 | type => $signature, |
557 | base_offset => $subloc, |
558 | root => $obj->_root, |
559 | ); |
560 | |
561 | if ($obj->_root->{autobless}) { |
562 | ## |
563 | # Skip over value and plain key to see if object needs |
564 | # to be re-blessed |
565 | ## |
251dfd0e |
566 | seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); |
9020ee8c |
567 | |
568 | my $size; |
251dfd0e |
569 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
9020ee8c |
570 | if ($size) { seek($fh, $size, SEEK_CUR); } |
571 | |
572 | my $bless_bit; |
573 | read( $fh, $bless_bit, 1); |
574 | if (ord($bless_bit)) { |
575 | ## |
576 | # Yes, object needs to be re-blessed |
577 | ## |
578 | my $class_name; |
251dfd0e |
579 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
9020ee8c |
580 | if ($size) { read( $fh, $class_name, $size); } |
581 | if ($class_name) { $obj = bless( $obj, $class_name ); } |
582 | } |
583 | } |
584 | |
585 | return $obj; |
586 | } |
587 | |
588 | ## |
589 | # Otherwise return actual value |
590 | ## |
591 | elsif ($signature eq DBM::Deep->SIG_DATA) { |
592 | my $size; |
593 | my $value = ''; |
251dfd0e |
594 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
9020ee8c |
595 | if ($size) { read( $fh, $value, $size); } |
596 | return $value; |
597 | } |
598 | |
599 | ## |
600 | # Key exists, but content is null |
601 | ## |
602 | else { return; } |
beac1dff |
603 | } # i loop |
9020ee8c |
604 | |
beac1dff |
605 | return; |
9020ee8c |
606 | } |
ab0e4957 |
607 | |
608 | sub delete_bucket { |
beac1dff |
609 | ## |
610 | # Delete single key/value pair given tag and MD5 digested key. |
611 | ## |
612 | my $self = shift; |
613 | my ($obj, $tag, $md5) = @_; |
614 | my $keys = $tag->{content}; |
ab0e4957 |
615 | |
616 | my $fh = $obj->_fh; |
beac1dff |
617 | |
618 | ## |
619 | # Iterate through buckets, looking for a key match |
620 | ## |
ab0e4957 |
621 | BUCKET: |
beac1dff |
622 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
623 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
624 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
ab0e4957 |
625 | |
beac1dff |
626 | if (!$subloc) { |
627 | ## |
628 | # Hit end of list, no match |
629 | ## |
630 | return; |
631 | } |
ab0e4957 |
632 | |
633 | if ( $md5 ne $key ) { |
634 | next BUCKET; |
635 | } |
636 | |
637 | ## |
638 | # Matched key -- delete bucket and return |
639 | ## |
251dfd0e |
640 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET); |
641 | print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) ); |
642 | print( $fh chr(0) x $self->{bucket_size} ); |
ab0e4957 |
643 | |
644 | return 1; |
beac1dff |
645 | } # i loop |
ab0e4957 |
646 | |
beac1dff |
647 | return; |
ab0e4957 |
648 | } |
649 | |
912d50b1 |
650 | sub bucket_exists { |
beac1dff |
651 | ## |
652 | # Check existence of single key given tag and MD5 digested key. |
653 | ## |
654 | my $self = shift; |
655 | my ($obj, $tag, $md5) = @_; |
656 | my $keys = $tag->{content}; |
657 | |
658 | ## |
659 | # Iterate through buckets, looking for a key match |
660 | ## |
912d50b1 |
661 | BUCKET: |
beac1dff |
662 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
663 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
664 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
912d50b1 |
665 | |
beac1dff |
666 | if (!$subloc) { |
667 | ## |
668 | # Hit end of list, no match |
669 | ## |
670 | return; |
671 | } |
912d50b1 |
672 | |
673 | if ( $md5 ne $key ) { |
674 | next BUCKET; |
675 | } |
676 | |
677 | ## |
678 | # Matched key -- return true |
679 | ## |
680 | return 1; |
beac1dff |
681 | } # i loop |
912d50b1 |
682 | |
beac1dff |
683 | return; |
912d50b1 |
684 | } |
685 | |
6736c116 |
686 | sub find_bucket_list { |
beac1dff |
687 | ## |
688 | # Locate offset for bucket list, given digested key |
689 | ## |
690 | my $self = shift; |
691 | my ($obj, $md5) = @_; |
692 | |
693 | ## |
694 | # Locate offset for bucket list using digest index system |
695 | ## |
696 | my $ch = 0; |
697 | my $tag = $self->load_tag($obj, $obj->_base_offset); |
698 | if (!$tag) { return; } |
699 | |
700 | while ($tag->{signature} ne DBM::Deep->SIG_BLIST) { |
701 | $tag = $self->index_lookup($obj, $tag, ord(substr($md5, $ch, 1))); |
702 | if (!$tag) { return; } |
703 | $ch++; |
704 | } |
705 | |
706 | return $tag; |
6736c116 |
707 | } |
708 | |
709 | sub traverse_index { |
beac1dff |
710 | ## |
711 | # Scan index and recursively step into deeper levels, looking for next key. |
712 | ## |
6736c116 |
713 | my $self = shift; |
714 | my ($obj, $offset, $ch, $force_return_next) = @_; |
715 | $force_return_next = undef unless $force_return_next; |
beac1dff |
716 | |
717 | my $tag = $self->load_tag($obj, $offset ); |
6736c116 |
718 | |
719 | my $fh = $obj->_fh; |
beac1dff |
720 | |
721 | if ($tag->{signature} ne DBM::Deep->SIG_BLIST) { |
722 | my $content = $tag->{content}; |
723 | my $start; |
724 | if ($obj->{return_next}) { $start = 0; } |
725 | else { $start = ord(substr($obj->{prev_md5}, $ch, 1)); } |
726 | |
727 | for (my $index = $start; $index < 256; $index++) { |
728 | my $subloc = unpack($self->{long_pack}, substr($content, $index * $self->{long_size}, $self->{long_size}) ); |
729 | if ($subloc) { |
730 | my $result = $self->traverse_index( $obj, $subloc, $ch + 1, $force_return_next ); |
731 | if (defined($result)) { return $result; } |
732 | } |
733 | } # index loop |
734 | |
735 | $obj->{return_next} = 1; |
736 | } # tag is an index |
737 | |
738 | elsif ($tag->{signature} eq DBM::Deep->SIG_BLIST) { |
739 | my $keys = $tag->{content}; |
740 | if ($force_return_next) { $obj->{return_next} = 1; } |
741 | |
742 | ## |
743 | # Iterate through buckets, looking for a key match |
744 | ## |
745 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
746 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
747 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
748 | |
749 | if (!$subloc) { |
750 | ## |
751 | # End of bucket list -- return to outer loop |
752 | ## |
753 | $obj->{return_next} = 1; |
754 | last; |
755 | } |
756 | elsif ($key eq $obj->{prev_md5}) { |
757 | ## |
758 | # Located previous key -- return next one found |
759 | ## |
760 | $obj->{return_next} = 1; |
761 | next; |
762 | } |
763 | elsif ($obj->{return_next}) { |
764 | ## |
765 | # Seek to bucket location and skip over signature |
766 | ## |
767 | seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET); |
768 | |
769 | ## |
770 | # Skip over value to get to plain key |
771 | ## |
772 | my $size; |
773 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
774 | if ($size) { seek($fh, $size, SEEK_CUR); } |
775 | |
776 | ## |
777 | # Read in plain key and return as scalar |
778 | ## |
779 | my $plain_key; |
780 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
781 | if ($size) { read( $fh, $plain_key, $size); } |
782 | |
783 | return $plain_key; |
784 | } |
785 | } # bucket loop |
786 | |
787 | $obj->{return_next} = 1; |
788 | } # tag is a bucket list |
789 | |
790 | return; |
6736c116 |
791 | } |
792 | |
793 | sub get_next_key { |
beac1dff |
794 | ## |
795 | # Locate next key, given digested previous one |
796 | ## |
6736c116 |
797 | my $self = shift; |
798 | my ($obj) = @_; |
beac1dff |
799 | |
800 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
801 | $obj->{return_next} = 0; |
802 | |
803 | ## |
804 | # If the previous key was not specifed, start at the top and |
805 | # return the first one found. |
806 | ## |
807 | if (!$obj->{prev_md5}) { |
808 | $obj->{prev_md5} = chr(0) x $self->{hash_size}; |
809 | $obj->{return_next} = 1; |
810 | } |
811 | |
812 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
6736c116 |
813 | } |
814 | |
a20d9a3f |
815 | 1; |
816 | __END__ |