Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
e5fc7e69 |
2 | use XXX; |
a20d9a3f |
3 | |
4 | use strict; |
5 | |
6 | use Fcntl qw( :DEFAULT :flock :seek ); |
7 | |
612969fb |
8 | sub precalc_sizes { |
beac1dff |
9 | ## |
10 | # Precalculate index, bucket and bucket list sizes |
11 | ## |
251dfd0e |
12 | my $self = shift; |
1bf65be7 |
13 | |
251dfd0e |
14 | $self->{index_size} = (2**8) * $self->{long_size}; |
15 | $self->{bucket_size} = $self->{hash_size} + $self->{long_size}; |
16 | $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; |
1bf65be7 |
17 | |
251dfd0e |
18 | return 1; |
1bf65be7 |
19 | } |
20 | |
21 | sub set_pack { |
beac1dff |
22 | ## |
23 | # Set pack/unpack modes (see file header for more) |
24 | ## |
251dfd0e |
25 | my $self = shift; |
1bf65be7 |
26 | my ($long_s, $long_p, $data_s, $data_p) = @_; |
27 | |
81d16922 |
28 | ## |
29 | # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file. |
beac1dff |
30 | # (Perl must be compiled with largefile support for files > 2 GB) |
81d16922 |
31 | # |
32 | # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. |
beac1dff |
33 | # (Perl must be compiled with largefile and 64-bit long support) |
81d16922 |
34 | ## |
251dfd0e |
35 | $self->{long_size} = $long_s ? $long_s : 4; |
36 | $self->{long_pack} = $long_p ? $long_p : 'N'; |
1bf65be7 |
37 | |
81d16922 |
38 | ## |
39 | # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value. |
40 | # Upgrading this is possible (see above) but probably not necessary. If you need |
41 | # more than 4 GB for a single key or value, this module is really not for you :-) |
42 | ## |
251dfd0e |
43 | $self->{data_size} = $data_s ? $data_s : 4; |
44 | $self->{data_pack} = $data_p ? $data_p : 'N'; |
1bf65be7 |
45 | |
beac1dff |
46 | return $self->precalc_sizes(); |
1bf65be7 |
47 | } |
48 | |
49 | sub set_digest { |
beac1dff |
50 | ## |
51 | # Set key digest function (default is MD5) |
52 | ## |
251dfd0e |
53 | my $self = shift; |
1bf65be7 |
54 | my ($digest_func, $hash_size) = @_; |
55 | |
d0b74c17 |
56 | $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5; |
251dfd0e |
57 | $self->{hash_size} = $hash_size ? $hash_size : 16; |
612969fb |
58 | |
beac1dff |
59 | return $self->precalc_sizes(); |
612969fb |
60 | } |
61 | |
62 | sub new { |
63 | my $class = shift; |
64 | my ($args) = @_; |
65 | |
66 | my $self = bless { |
67 | long_size => 4, |
68 | long_pack => 'N', |
69 | data_size => 4, |
70 | data_pack => 'N', |
251dfd0e |
71 | |
612969fb |
72 | digest => \&Digest::MD5::md5, |
73 | hash_size => 16, |
251dfd0e |
74 | |
81d16922 |
75 | ## |
76 | # Maximum number of buckets per list before another level of indexing is done. |
77 | # Increase this value for slightly greater speed, but larger database files. |
78 | # DO NOT decrease this value below 16, due to risk of recursive reindex overrun. |
79 | ## |
612969fb |
80 | max_buckets => 16, |
81 | }, $class; |
82 | |
251dfd0e |
83 | $self->precalc_sizes; |
612969fb |
84 | |
85 | return $self; |
1bf65be7 |
86 | } |
87 | |
70b55428 |
88 | sub setup_fh { |
89 | my $self = shift; |
90 | my ($obj) = @_; |
91 | |
92 | $self->open( $obj ) if !defined $obj->_fh; |
93 | |
673464d9 |
94 | #XXX We have to make sure we don't mess up when autoflush isn't turned on |
70b55428 |
95 | unless ( $obj->_root->{inode} ) { |
96 | my @stats = stat($obj->_fh); |
97 | $obj->_root->{inode} = $stats[1]; |
98 | $obj->_root->{end} = $stats[7]; |
99 | } |
100 | |
101 | return 1; |
102 | } |
103 | |
a20d9a3f |
104 | sub open { |
20f7b20c |
105 | ## |
106 | # Open a fh to the database, create if nonexistent. |
107 | # Make sure file signature matches DBM::Deep spec. |
108 | ## |
a20d9a3f |
109 | my $self = shift; |
70b55428 |
110 | my ($obj) = @_; |
a20d9a3f |
111 | |
3d1b8be9 |
112 | if (defined($obj->_fh)) { $self->close_fh( $obj ); } |
20f7b20c |
113 | |
673464d9 |
114 | # Theoretically, adding O_BINARY should remove the need for the binmode |
115 | # Of course, testing it is going to be ... interesting. |
116 | my $flags = O_RDWR | O_CREAT | O_BINARY; |
a20d9a3f |
117 | |
673464d9 |
118 | my $fh; |
119 | sysopen( $fh, $obj->_root->{file}, $flags ) |
120 | or $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!"); |
121 | $obj->_root->{fh} = $fh; |
a20d9a3f |
122 | |
123 | #XXX Can we remove this by using the right sysopen() flags? |
124 | # Maybe ... q.v. above |
125 | binmode $fh; # for win32 |
126 | |
cd59cad8 |
127 | if ($obj->_root->{autoflush}) { |
a20d9a3f |
128 | my $old = select $fh; |
129 | $|=1; |
130 | select $old; |
131 | } |
20f7b20c |
132 | |
cd59cad8 |
133 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
a20d9a3f |
134 | |
135 | my $signature; |
136 | my $bytes_read = read( $fh, $signature, length(DBM::Deep->SIG_FILE)); |
20f7b20c |
137 | |
a20d9a3f |
138 | ## |
139 | # File is empty -- write signature and master index |
140 | ## |
141 | if (!$bytes_read) { |
cd59cad8 |
142 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
a20d9a3f |
143 | print( $fh DBM::Deep->SIG_FILE); |
d0b74c17 |
144 | |
ec1bce6b |
145 | $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size}); |
a20d9a3f |
146 | |
e5fc7e69 |
147 | # Why is this being printed here? I'm not seeing where anything actually points to |
148 | # this spot. |
149 | #XXX $obj->_root->{end} isn't updated from these 10 bytes that are being written |
a20d9a3f |
150 | my $plain_key = "[base]"; |
251dfd0e |
151 | print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key ); |
a20d9a3f |
152 | |
153 | # Flush the filehandle |
154 | my $old_fh = select $fh; |
155 | my $old_af = $|; $| = 1; $| = $old_af; |
156 | select $old_fh; |
157 | |
a20d9a3f |
158 | return 1; |
159 | } |
20f7b20c |
160 | |
a20d9a3f |
161 | ## |
162 | # Check signature was valid |
163 | ## |
164 | unless ($signature eq DBM::Deep->SIG_FILE) { |
3d1b8be9 |
165 | $self->close_fh( $obj ); |
e5fc7e69 |
166 | $obj->_throw_error("Signature not found -- file is not a Deep DB"); |
a20d9a3f |
167 | } |
168 | |
a20d9a3f |
169 | ## |
170 | # Get our type from master index signature |
171 | ## |
e5fc7e69 |
172 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
173 | or $obj->_throw_error("Corrupted file, no master index record"); |
d0b74c17 |
174 | |
e5fc7e69 |
175 | unless ($obj->{type} eq $tag->{signature}) { |
176 | $obj->_throw_error("File type mismatch"); |
a20d9a3f |
177 | } |
20f7b20c |
178 | |
d0b74c17 |
179 | #XXX We probably also want to store the hash algorithm name and not assume anything |
180 | #XXX The cool thing would be to allow a different hashing algorithm at every level |
181 | |
a20d9a3f |
182 | return 1; |
183 | } |
184 | |
3d1b8be9 |
185 | sub close_fh { |
cd59cad8 |
186 | my $self = shift; |
a21f2d90 |
187 | my ($obj) = @_; |
cd59cad8 |
188 | |
189 | if ( my $fh = $obj->_root->{fh} ) { |
190 | close $fh; |
191 | } |
192 | $obj->_root->{fh} = undef; |
193 | |
194 | return 1; |
195 | } |
196 | |
d4b1166e |
197 | sub create_tag { |
20f7b20c |
198 | ## |
199 | # Given offset, signature and content, create tag and write to disk |
200 | ## |
d4b1166e |
201 | my $self = shift; |
20f7b20c |
202 | my ($obj, $offset, $sig, $content) = @_; |
203 | my $size = length($content); |
204 | |
d4b1166e |
205 | my $fh = $obj->_fh; |
206 | |
20f7b20c |
207 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
251dfd0e |
208 | print( $fh $sig . pack($self->{data_pack}, $size) . $content ); |
20f7b20c |
209 | |
210 | if ($offset == $obj->_root->{end}) { |
251dfd0e |
211 | $obj->_root->{end} += DBM::Deep->SIG_SIZE + $self->{data_size} + $size; |
20f7b20c |
212 | } |
213 | |
214 | return { |
215 | signature => $sig, |
216 | size => $size, |
251dfd0e |
217 | offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size}, |
20f7b20c |
218 | content => $content |
219 | }; |
d4b1166e |
220 | } |
221 | |
222 | sub load_tag { |
20f7b20c |
223 | ## |
224 | # Given offset, load single tag and return signature, size and data |
225 | ## |
d4b1166e |
226 | my $self = shift; |
20f7b20c |
227 | my ($obj, $offset) = @_; |
228 | |
d4b1166e |
229 | my $fh = $obj->_fh; |
230 | |
20f7b20c |
231 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
e5fc7e69 |
232 | |
233 | #XXX I'm not sure this check will work given autoflush ... |
234 | return if eof $fh; |
20f7b20c |
235 | |
d4b1166e |
236 | my $b; |
251dfd0e |
237 | read( $fh, $b, DBM::Deep->SIG_SIZE + $self->{data_size} ); |
238 | my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); |
20f7b20c |
239 | |
240 | my $buffer; |
241 | read( $fh, $buffer, $size); |
242 | |
243 | return { |
244 | signature => $sig, |
245 | size => $size, |
251dfd0e |
246 | offset => $offset + DBM::Deep->SIG_SIZE + $self->{data_size}, |
20f7b20c |
247 | content => $buffer |
248 | }; |
d4b1166e |
249 | } |
250 | |
20f7b20c |
251 | sub add_bucket { |
252 | ## |
253 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
254 | # plain (undigested) key and value. |
255 | ## |
d4b1166e |
256 | my $self = shift; |
20f7b20c |
257 | my ($obj, $tag, $md5, $plain_key, $value) = @_; |
258 | my $keys = $tag->{content}; |
259 | my $location = 0; |
260 | my $result = 2; |
261 | |
262 | my $root = $obj->_root; |
263 | |
264 | my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) }; |
265 | my $internal_ref = $is_dbm_deep && ($value->_root eq $root); |
266 | |
267 | my $fh = $obj->_fh; |
268 | |
269 | ## |
270 | # Iterate through buckets, seeing if this is a new entry or a replace. |
271 | ## |
d0b74c17 |
272 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
251dfd0e |
273 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
20f7b20c |
274 | if (!$subloc) { |
275 | ## |
276 | # Found empty bucket (end of list). Populate and exit loop. |
277 | ## |
278 | $result = 2; |
279 | |
280 | $location = $internal_ref |
281 | ? $value->_base_offset |
282 | : $root->{end}; |
e5fc7e69 |
283 | print "NEW: $location\n"; |
20f7b20c |
284 | |
251dfd0e |
285 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); |
286 | print( $fh $md5 . pack($self->{long_pack}, $location) ); |
20f7b20c |
287 | last; |
288 | } |
289 | |
251dfd0e |
290 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
20f7b20c |
291 | if ($md5 eq $key) { |
292 | ## |
293 | # Found existing bucket with same key. Replace with new value. |
294 | ## |
295 | $result = 1; |
296 | |
297 | if ($internal_ref) { |
298 | $location = $value->_base_offset; |
251dfd0e |
299 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); |
300 | print( $fh $md5 . pack($self->{long_pack}, $location) ); |
20f7b20c |
301 | return $result; |
302 | } |
303 | |
304 | seek($fh, $subloc + DBM::Deep->SIG_SIZE + $root->{file_offset}, SEEK_SET); |
305 | my $size; |
251dfd0e |
306 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
20f7b20c |
307 | |
308 | ## |
309 | # If value is a hash, array, or raw value with equal or less size, we can |
310 | # reuse the same content area of the database. Otherwise, we have to create |
311 | # a new content area at the EOF. |
312 | ## |
313 | my $actual_length; |
314 | my $r = Scalar::Util::reftype( $value ) || ''; |
315 | if ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
ec1bce6b |
316 | $actual_length = $self->{index_size}; |
20f7b20c |
317 | |
318 | # if autobless is enabled, must also take into consideration |
319 | # the class name, as it is stored along with key/value. |
320 | if ( $root->{autobless} ) { |
321 | my $value_class = Scalar::Util::blessed($value); |
322 | if ( defined $value_class && !$value->isa('DBM::Deep') ) { |
323 | $actual_length += length($value_class); |
324 | } |
325 | } |
326 | } |
327 | else { $actual_length = length($value); } |
328 | |
329 | if ($actual_length <= $size) { |
330 | $location = $subloc; |
331 | } |
332 | else { |
333 | $location = $root->{end}; |
251dfd0e |
334 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, SEEK_SET); |
335 | print( $fh pack($self->{long_pack}, $location) ); |
20f7b20c |
336 | } |
337 | |
338 | last; |
339 | } |
340 | } |
341 | |
342 | ## |
343 | # If this is an internal reference, return now. |
344 | # No need to write value or plain key |
345 | ## |
346 | if ($internal_ref) { |
347 | return $result; |
348 | } |
349 | |
350 | ## |
351 | # If bucket didn't fit into list, split into a new index level |
352 | ## |
353 | if (!$location) { |
354 | seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); |
251dfd0e |
355 | print( $fh pack($self->{long_pack}, $root->{end}) ); |
20f7b20c |
356 | |
ec1bce6b |
357 | my $index_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_INDEX, chr(0) x $self->{index_size}); |
20f7b20c |
358 | my @offsets = (); |
359 | |
251dfd0e |
360 | $keys .= $md5 . pack($self->{long_pack}, 0); |
20f7b20c |
361 | |
251dfd0e |
362 | for (my $i=0; $i<=$self->{max_buckets}; $i++) { |
363 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
20f7b20c |
364 | if ($key) { |
251dfd0e |
365 | my $old_subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + |
366 | $self->{hash_size}, $self->{long_size})); |
20f7b20c |
367 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
368 | |
369 | if ($offsets[$num]) { |
251dfd0e |
370 | my $offset = $offsets[$num] + DBM::Deep->SIG_SIZE + $self->{data_size}; |
20f7b20c |
371 | seek($fh, $offset + $root->{file_offset}, SEEK_SET); |
372 | my $subkeys; |
251dfd0e |
373 | read( $fh, $subkeys, $self->{bucket_list_size}); |
20f7b20c |
374 | |
251dfd0e |
375 | for (my $k=0; $k<$self->{max_buckets}; $k++) { |
376 | my $subloc = unpack($self->{long_pack}, substr($subkeys, ($k * $self->{bucket_size}) + |
377 | $self->{hash_size}, $self->{long_size})); |
20f7b20c |
378 | if (!$subloc) { |
251dfd0e |
379 | seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); |
380 | print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); |
20f7b20c |
381 | last; |
382 | } |
383 | } # k loop |
384 | } |
385 | else { |
386 | $offsets[$num] = $root->{end}; |
251dfd0e |
387 | seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); |
388 | print( $fh pack($self->{long_pack}, $root->{end}) ); |
20f7b20c |
389 | |
251dfd0e |
390 | my $blist_tag = $self->create_tag($obj, $root->{end}, DBM::Deep->SIG_BLIST, chr(0) x $self->{bucket_list_size}); |
20f7b20c |
391 | |
392 | seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); |
251dfd0e |
393 | print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); |
20f7b20c |
394 | } |
395 | } # key is real |
396 | } # i loop |
397 | |
398 | $location ||= $root->{end}; |
399 | } # re-index bucket list |
400 | |
401 | ## |
402 | # Seek to content area and store signature, value and plaintext key |
403 | ## |
404 | if ($location) { |
405 | my $content_length; |
406 | seek($fh, $location + $root->{file_offset}, SEEK_SET); |
407 | |
408 | ## |
409 | # Write signature based on content type, set content length and write actual value. |
410 | ## |
411 | my $r = Scalar::Util::reftype($value) || ''; |
412 | if ($r eq 'HASH') { |
413 | print( $fh DBM::Deep->TYPE_HASH ); |
251dfd0e |
414 | print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); |
ec1bce6b |
415 | $content_length = $self->{index_size}; |
20f7b20c |
416 | } |
417 | elsif ($r eq 'ARRAY') { |
418 | print( $fh DBM::Deep->TYPE_ARRAY ); |
251dfd0e |
419 | print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); |
ec1bce6b |
420 | $content_length = $self->{index_size}; |
20f7b20c |
421 | } |
422 | elsif (!defined($value)) { |
423 | print( $fh DBM::Deep->SIG_NULL ); |
251dfd0e |
424 | print( $fh pack($self->{data_pack}, 0) ); |
20f7b20c |
425 | $content_length = 0; |
426 | } |
427 | else { |
428 | print( $fh DBM::Deep->SIG_DATA ); |
251dfd0e |
429 | print( $fh pack($self->{data_pack}, length($value)) . $value ); |
20f7b20c |
430 | $content_length = length($value); |
431 | } |
432 | |
433 | ## |
434 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
435 | ## |
251dfd0e |
436 | print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key ); |
20f7b20c |
437 | |
438 | ## |
439 | # If value is blessed, preserve class name |
440 | ## |
441 | if ( $root->{autobless} ) { |
442 | my $value_class = Scalar::Util::blessed($value); |
443 | if ( defined $value_class && $value_class ne 'DBM::Deep' ) { |
444 | ## |
445 | # Blessed ref -- will restore later |
446 | ## |
447 | print( $fh chr(1) ); |
251dfd0e |
448 | print( $fh pack($self->{data_pack}, length($value_class)) . $value_class ); |
20f7b20c |
449 | $content_length += 1; |
251dfd0e |
450 | $content_length += $self->{data_size} + length($value_class); |
20f7b20c |
451 | } |
452 | else { |
453 | print( $fh chr(0) ); |
454 | $content_length += 1; |
455 | } |
456 | } |
457 | |
458 | ## |
459 | # If this is a new content area, advance EOF counter |
460 | ## |
461 | if ($location == $root->{end}) { |
462 | $root->{end} += DBM::Deep->SIG_SIZE; |
251dfd0e |
463 | $root->{end} += $self->{data_size} + $content_length; |
464 | $root->{end} += $self->{data_size} + length($plain_key); |
20f7b20c |
465 | } |
466 | |
467 | ## |
468 | # If content is a hash or array, create new child DBM::Deep object and |
469 | # pass each key or element to it. |
470 | ## |
471 | if ($r eq 'HASH') { |
472 | my $branch = DBM::Deep->new( |
473 | type => DBM::Deep->TYPE_HASH, |
474 | base_offset => $location, |
475 | root => $root, |
476 | ); |
477 | foreach my $key (keys %{$value}) { |
478 | $branch->STORE( $key, $value->{$key} ); |
479 | } |
480 | } |
481 | elsif ($r eq 'ARRAY') { |
482 | my $branch = DBM::Deep->new( |
483 | type => DBM::Deep->TYPE_ARRAY, |
484 | base_offset => $location, |
485 | root => $root, |
486 | ); |
487 | my $index = 0; |
488 | foreach my $element (@{$value}) { |
489 | $branch->STORE( $index, $element ); |
490 | $index++; |
491 | } |
492 | } |
493 | |
494 | return $result; |
495 | } |
d4b1166e |
496 | |
e5fc7e69 |
497 | $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file"); |
d4b1166e |
498 | } |
499 | |
9020ee8c |
500 | sub get_bucket_value { |
beac1dff |
501 | ## |
502 | # Fetch single value given tag and MD5 digested key. |
503 | ## |
504 | my $self = shift; |
505 | my ($obj, $tag, $md5) = @_; |
506 | my $keys = $tag->{content}; |
9020ee8c |
507 | |
508 | my $fh = $obj->_fh; |
509 | |
beac1dff |
510 | ## |
511 | # Iterate through buckets, looking for a key match |
512 | ## |
9020ee8c |
513 | BUCKET: |
beac1dff |
514 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
515 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
516 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
9020ee8c |
517 | |
beac1dff |
518 | if (!$subloc) { |
519 | ## |
520 | # Hit end of list, no match |
521 | ## |
522 | return; |
523 | } |
9020ee8c |
524 | |
525 | if ( $md5 ne $key ) { |
526 | next BUCKET; |
527 | } |
528 | |
529 | ## |
530 | # Found match -- seek to offset and read signature |
531 | ## |
532 | my $signature; |
533 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
534 | read( $fh, $signature, DBM::Deep->SIG_SIZE); |
d0b74c17 |
535 | |
9020ee8c |
536 | ## |
537 | # If value is a hash or array, return new DBM::Deep object with correct offset |
538 | ## |
539 | if (($signature eq DBM::Deep->TYPE_HASH) || ($signature eq DBM::Deep->TYPE_ARRAY)) { |
540 | my $obj = DBM::Deep->new( |
541 | type => $signature, |
542 | base_offset => $subloc, |
543 | root => $obj->_root, |
544 | ); |
d0b74c17 |
545 | |
9020ee8c |
546 | if ($obj->_root->{autobless}) { |
547 | ## |
548 | # Skip over value and plain key to see if object needs |
549 | # to be re-blessed |
550 | ## |
251dfd0e |
551 | seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); |
d0b74c17 |
552 | |
9020ee8c |
553 | my $size; |
251dfd0e |
554 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
9020ee8c |
555 | if ($size) { seek($fh, $size, SEEK_CUR); } |
d0b74c17 |
556 | |
9020ee8c |
557 | my $bless_bit; |
558 | read( $fh, $bless_bit, 1); |
559 | if (ord($bless_bit)) { |
560 | ## |
561 | # Yes, object needs to be re-blessed |
562 | ## |
563 | my $class_name; |
251dfd0e |
564 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
9020ee8c |
565 | if ($size) { read( $fh, $class_name, $size); } |
566 | if ($class_name) { $obj = bless( $obj, $class_name ); } |
567 | } |
568 | } |
d0b74c17 |
569 | |
9020ee8c |
570 | return $obj; |
571 | } |
d0b74c17 |
572 | |
9020ee8c |
573 | ## |
574 | # Otherwise return actual value |
575 | ## |
576 | elsif ($signature eq DBM::Deep->SIG_DATA) { |
577 | my $size; |
578 | my $value = ''; |
251dfd0e |
579 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
9020ee8c |
580 | if ($size) { read( $fh, $value, $size); } |
581 | return $value; |
582 | } |
d0b74c17 |
583 | |
9020ee8c |
584 | ## |
585 | # Key exists, but content is null |
586 | ## |
587 | else { return; } |
beac1dff |
588 | } # i loop |
9020ee8c |
589 | |
beac1dff |
590 | return; |
9020ee8c |
591 | } |
ab0e4957 |
592 | |
593 | sub delete_bucket { |
beac1dff |
594 | ## |
595 | # Delete single key/value pair given tag and MD5 digested key. |
596 | ## |
597 | my $self = shift; |
598 | my ($obj, $tag, $md5) = @_; |
599 | my $keys = $tag->{content}; |
ab0e4957 |
600 | |
601 | my $fh = $obj->_fh; |
d0b74c17 |
602 | |
beac1dff |
603 | ## |
604 | # Iterate through buckets, looking for a key match |
605 | ## |
ab0e4957 |
606 | BUCKET: |
beac1dff |
607 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
608 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
609 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
ab0e4957 |
610 | |
beac1dff |
611 | if (!$subloc) { |
612 | ## |
613 | # Hit end of list, no match |
614 | ## |
615 | return; |
616 | } |
ab0e4957 |
617 | |
618 | if ( $md5 ne $key ) { |
619 | next BUCKET; |
620 | } |
621 | |
622 | ## |
623 | # Matched key -- delete bucket and return |
624 | ## |
251dfd0e |
625 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET); |
626 | print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) ); |
627 | print( $fh chr(0) x $self->{bucket_size} ); |
d0b74c17 |
628 | |
ab0e4957 |
629 | return 1; |
beac1dff |
630 | } # i loop |
ab0e4957 |
631 | |
beac1dff |
632 | return; |
ab0e4957 |
633 | } |
634 | |
912d50b1 |
635 | sub bucket_exists { |
beac1dff |
636 | ## |
637 | # Check existence of single key given tag and MD5 digested key. |
638 | ## |
639 | my $self = shift; |
640 | my ($obj, $tag, $md5) = @_; |
641 | my $keys = $tag->{content}; |
d0b74c17 |
642 | |
beac1dff |
643 | ## |
644 | # Iterate through buckets, looking for a key match |
645 | ## |
912d50b1 |
646 | BUCKET: |
beac1dff |
647 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
648 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
649 | my $subloc = unpack($self->{long_pack}, substr($keys, ($i * $self->{bucket_size}) + $self->{hash_size}, $self->{long_size})); |
912d50b1 |
650 | |
beac1dff |
651 | if (!$subloc) { |
652 | ## |
653 | # Hit end of list, no match |
654 | ## |
655 | return; |
656 | } |
912d50b1 |
657 | |
658 | if ( $md5 ne $key ) { |
659 | next BUCKET; |
660 | } |
661 | |
662 | ## |
663 | # Matched key -- return true |
664 | ## |
665 | return 1; |
beac1dff |
666 | } # i loop |
912d50b1 |
667 | |
beac1dff |
668 | return; |
912d50b1 |
669 | } |
670 | |
6736c116 |
671 | sub find_bucket_list { |
beac1dff |
672 | ## |
673 | # Locate offset for bucket list, given digested key |
674 | ## |
675 | my $self = shift; |
d0b74c17 |
676 | my ($obj, $md5, $args) = @_; |
677 | $args = {} unless $args; |
678 | |
beac1dff |
679 | ## |
680 | # Locate offset for bucket list using digest index system |
681 | ## |
e5fc7e69 |
682 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
683 | or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" ); |
684 | #print $obj->_base_offset, " : $tag->{signature} : $tag->{offset} : $tag->{size}\n"; |
d0b74c17 |
685 | |
e5fc7e69 |
686 | my $ch = 0; |
beac1dff |
687 | while ($tag->{signature} ne DBM::Deep->SIG_BLIST) { |
d0b74c17 |
688 | my $num = ord substr($md5, $ch, 1); |
689 | |
690 | my $ref_loc = $tag->{offset} + ($num * $self->{long_size}); |
691 | $tag = $self->index_lookup( $obj, $tag, $num ); |
692 | |
693 | if (!$tag) { |
694 | if ( $args->{create} ) { |
695 | my $fh = $obj->_fh; |
696 | seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); |
697 | print( $fh pack($self->{long_pack}, $obj->_root->{end}) ); |
698 | |
699 | $tag = $self->create_tag( |
700 | $obj, $obj->_root->{end}, |
701 | DBM::Deep->SIG_BLIST, |
702 | chr(0) x $self->{bucket_list_size}, |
703 | ); |
704 | |
705 | $tag->{ref_loc} = $ref_loc; |
706 | $tag->{ch} = $ch; |
707 | |
708 | last; |
709 | } |
710 | else { |
711 | return; |
712 | } |
713 | } |
714 | |
715 | $tag->{ch} = $ch; |
716 | $tag->{ref_loc} = $ref_loc; |
717 | |
beac1dff |
718 | $ch++; |
719 | } |
d0b74c17 |
720 | |
beac1dff |
721 | return $tag; |
6736c116 |
722 | } |
723 | |
d0b74c17 |
724 | sub index_lookup { |
725 | ## |
726 | # Given index tag, lookup single entry in index and return . |
727 | ## |
728 | my $self = shift; |
729 | my ($obj, $tag, $index) = @_; |
730 | |
731 | my $location = unpack( |
732 | $self->{long_pack}, |
733 | substr( |
734 | $tag->{content}, |
735 | $index * $self->{long_size}, |
736 | $self->{long_size}, |
737 | ), |
738 | ); |
739 | |
740 | if (!$location) { return; } |
741 | |
742 | return $self->load_tag( $obj, $location ); |
743 | } |
744 | |
6736c116 |
745 | sub traverse_index { |
beac1dff |
746 | ## |
747 | # Scan index and recursively step into deeper levels, looking for next key. |
748 | ## |
6736c116 |
749 | my $self = shift; |
750 | my ($obj, $offset, $ch, $force_return_next) = @_; |
d0b74c17 |
751 | |
beac1dff |
752 | my $tag = $self->load_tag($obj, $offset ); |
6736c116 |
753 | |
754 | my $fh = $obj->_fh; |
d0b74c17 |
755 | |
beac1dff |
756 | if ($tag->{signature} ne DBM::Deep->SIG_BLIST) { |
757 | my $content = $tag->{content}; |
e5fc7e69 |
758 | my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); |
d0b74c17 |
759 | |
beac1dff |
760 | for (my $index = $start; $index < 256; $index++) { |
e5fc7e69 |
761 | my $subloc = unpack( |
762 | $self->{long_pack}, |
763 | substr($content, $index * $self->{long_size}, $self->{long_size}), |
764 | ); |
765 | |
beac1dff |
766 | if ($subloc) { |
e5fc7e69 |
767 | my $result = $self->traverse_index( |
768 | $obj, $subloc, $ch + 1, $force_return_next, |
769 | ); |
770 | |
beac1dff |
771 | if (defined($result)) { return $result; } |
772 | } |
773 | } # index loop |
d0b74c17 |
774 | |
beac1dff |
775 | $obj->{return_next} = 1; |
776 | } # tag is an index |
d0b74c17 |
777 | |
e5fc7e69 |
778 | else { |
beac1dff |
779 | my $keys = $tag->{content}; |
780 | if ($force_return_next) { $obj->{return_next} = 1; } |
d0b74c17 |
781 | |
beac1dff |
782 | ## |
783 | # Iterate through buckets, looking for a key match |
784 | ## |
785 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
786 | my $key = substr($keys, $i * $self->{bucket_size}, $self->{hash_size}); |
e5fc7e69 |
787 | my $subloc = unpack( |
788 | $self->{long_pack}, |
789 | substr( |
790 | $keys, |
791 | ($i * $self->{bucket_size}) + $self->{hash_size}, |
792 | $self->{long_size}, |
793 | ), |
794 | ); |
d0b74c17 |
795 | |
beac1dff |
796 | if (!$subloc) { |
797 | ## |
798 | # End of bucket list -- return to outer loop |
799 | ## |
800 | $obj->{return_next} = 1; |
801 | last; |
802 | } |
803 | elsif ($key eq $obj->{prev_md5}) { |
804 | ## |
805 | # Located previous key -- return next one found |
806 | ## |
807 | $obj->{return_next} = 1; |
808 | next; |
809 | } |
810 | elsif ($obj->{return_next}) { |
811 | ## |
812 | # Seek to bucket location and skip over signature |
813 | ## |
814 | seek($fh, $subloc + DBM::Deep->SIG_SIZE + $obj->_root->{file_offset}, SEEK_SET); |
d0b74c17 |
815 | |
beac1dff |
816 | ## |
817 | # Skip over value to get to plain key |
818 | ## |
819 | my $size; |
e5fc7e69 |
820 | read( $fh, $size, $self->{data_size}); |
821 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
822 | if ($size) { seek($fh, $size, SEEK_CUR); } |
d0b74c17 |
823 | |
beac1dff |
824 | ## |
825 | # Read in plain key and return as scalar |
826 | ## |
827 | my $plain_key; |
e5fc7e69 |
828 | read( $fh, $size, $self->{data_size}); |
829 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
830 | if ($size) { read( $fh, $plain_key, $size); } |
d0b74c17 |
831 | |
beac1dff |
832 | return $plain_key; |
833 | } |
834 | } # bucket loop |
d0b74c17 |
835 | |
beac1dff |
836 | $obj->{return_next} = 1; |
837 | } # tag is a bucket list |
d0b74c17 |
838 | |
beac1dff |
839 | return; |
6736c116 |
840 | } |
841 | |
842 | sub get_next_key { |
beac1dff |
843 | ## |
844 | # Locate next key, given digested previous one |
845 | ## |
6736c116 |
846 | my $self = shift; |
847 | my ($obj) = @_; |
d0b74c17 |
848 | |
beac1dff |
849 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
850 | $obj->{return_next} = 0; |
d0b74c17 |
851 | |
beac1dff |
852 | ## |
853 | # If the previous key was not specifed, start at the top and |
854 | # return the first one found. |
855 | ## |
856 | if (!$obj->{prev_md5}) { |
857 | $obj->{prev_md5} = chr(0) x $self->{hash_size}; |
858 | $obj->{return_next} = 1; |
859 | } |
d0b74c17 |
860 | |
beac1dff |
861 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
6736c116 |
862 | } |
863 | |
a20d9a3f |
864 | 1; |
865 | __END__ |