Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
2 | |
3 | use strict; |
4 | |
5 | use Fcntl qw( :DEFAULT :flock :seek ); |
6 | |
8db25060 |
7 | ## |
8 | # Setup file and tag signatures. These should never change. |
9 | ## |
10 | sub SIG_FILE () { 'DPDB' } |
11 | sub SIG_INTERNAL () { 'i' } |
12 | sub SIG_HASH () { 'H' } |
13 | sub SIG_ARRAY () { 'A' } |
14 | sub SIG_SCALAR () { 'S' } |
15 | sub SIG_NULL () { 'N' } |
16 | sub SIG_DATA () { 'D' } |
17 | sub SIG_INDEX () { 'I' } |
18 | sub SIG_BLIST () { 'B' } |
19 | sub SIG_SIZE () { 1 } |
20 | |
612969fb |
21 | sub precalc_sizes { |
beac1dff |
22 | ## |
23 | # Precalculate index, bucket and bucket list sizes |
24 | ## |
251dfd0e |
25 | my $self = shift; |
1bf65be7 |
26 | |
251dfd0e |
27 | $self->{index_size} = (2**8) * $self->{long_size}; |
28 | $self->{bucket_size} = $self->{hash_size} + $self->{long_size}; |
29 | $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; |
1bf65be7 |
30 | |
251dfd0e |
31 | return 1; |
1bf65be7 |
32 | } |
33 | |
34 | sub set_pack { |
beac1dff |
35 | ## |
36 | # Set pack/unpack modes (see file header for more) |
37 | ## |
251dfd0e |
38 | my $self = shift; |
1bf65be7 |
39 | my ($long_s, $long_p, $data_s, $data_p) = @_; |
40 | |
81d16922 |
41 | ## |
75be6413 |
42 | # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 |
43 | # GB per file. |
beac1dff |
44 | # (Perl must be compiled with largefile support for files > 2 GB) |
81d16922 |
45 | # |
46 | # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. |
beac1dff |
47 | # (Perl must be compiled with largefile and 64-bit long support) |
81d16922 |
48 | ## |
251dfd0e |
49 | $self->{long_size} = $long_s ? $long_s : 4; |
50 | $self->{long_pack} = $long_p ? $long_p : 'N'; |
1bf65be7 |
51 | |
81d16922 |
52 | ## |
75be6413 |
53 | # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each |
54 | # key/value. Upgrading this is possible (see above) but probably not necessary. |
55 | # If you need more than 4 GB for a single key or value, this module is really |
56 | # not for you :-) |
81d16922 |
57 | ## |
251dfd0e |
58 | $self->{data_size} = $data_s ? $data_s : 4; |
59 | $self->{data_pack} = $data_p ? $data_p : 'N'; |
1bf65be7 |
60 | |
beac1dff |
61 | return $self->precalc_sizes(); |
1bf65be7 |
62 | } |
63 | |
64 | sub set_digest { |
beac1dff |
65 | ## |
66 | # Set key digest function (default is MD5) |
67 | ## |
251dfd0e |
68 | my $self = shift; |
1bf65be7 |
69 | my ($digest_func, $hash_size) = @_; |
70 | |
d0b74c17 |
71 | $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5; |
251dfd0e |
72 | $self->{hash_size} = $hash_size ? $hash_size : 16; |
612969fb |
73 | |
beac1dff |
74 | return $self->precalc_sizes(); |
612969fb |
75 | } |
76 | |
77 | sub new { |
78 | my $class = shift; |
79 | my ($args) = @_; |
80 | |
81 | my $self = bless { |
82 | long_size => 4, |
83 | long_pack => 'N', |
84 | data_size => 4, |
85 | data_pack => 'N', |
251dfd0e |
86 | |
612969fb |
87 | digest => \&Digest::MD5::md5, |
88 | hash_size => 16, |
251dfd0e |
89 | |
81d16922 |
90 | ## |
91 | # Maximum number of buckets per list before another level of indexing is done. |
92 | # Increase this value for slightly greater speed, but larger database files. |
93 | # DO NOT decrease this value below 16, due to risk of recursive reindex overrun. |
94 | ## |
612969fb |
95 | max_buckets => 16, |
96 | }, $class; |
97 | |
251dfd0e |
98 | $self->precalc_sizes; |
612969fb |
99 | |
100 | return $self; |
1bf65be7 |
101 | } |
102 | |
70b55428 |
103 | sub setup_fh { |
104 | my $self = shift; |
105 | my ($obj) = @_; |
106 | |
107 | $self->open( $obj ) if !defined $obj->_fh; |
108 | |
673464d9 |
109 | #XXX We have to make sure we don't mess up when autoflush isn't turned on |
70b55428 |
110 | unless ( $obj->_root->{inode} ) { |
111 | my @stats = stat($obj->_fh); |
112 | $obj->_root->{inode} = $stats[1]; |
113 | $obj->_root->{end} = $stats[7]; |
114 | } |
115 | |
116 | return 1; |
117 | } |
118 | |
a20d9a3f |
119 | sub open { |
20f7b20c |
120 | ## |
121 | # Open a fh to the database, create if nonexistent. |
122 | # Make sure file signature matches DBM::Deep spec. |
123 | ## |
a20d9a3f |
124 | my $self = shift; |
70b55428 |
125 | my ($obj) = @_; |
a20d9a3f |
126 | |
3d1b8be9 |
127 | if (defined($obj->_fh)) { $self->close_fh( $obj ); } |
20f7b20c |
128 | |
673464d9 |
129 | # Theoretically, adding O_BINARY should remove the need for the binmode |
130 | # Of course, testing it is going to be ... interesting. |
131 | my $flags = O_RDWR | O_CREAT | O_BINARY; |
a20d9a3f |
132 | |
673464d9 |
133 | my $fh; |
134 | sysopen( $fh, $obj->_root->{file}, $flags ) |
135 | or $obj->_throw_error("Cannot sysopen file: " . $obj->_root->{file} . ": $!"); |
136 | $obj->_root->{fh} = $fh; |
a20d9a3f |
137 | |
138 | #XXX Can we remove this by using the right sysopen() flags? |
139 | # Maybe ... q.v. above |
140 | binmode $fh; # for win32 |
141 | |
cd59cad8 |
142 | if ($obj->_root->{autoflush}) { |
a20d9a3f |
143 | my $old = select $fh; |
144 | $|=1; |
145 | select $old; |
146 | } |
20f7b20c |
147 | |
cd59cad8 |
148 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
a20d9a3f |
149 | |
150 | my $signature; |
8db25060 |
151 | my $bytes_read = read( $fh, $signature, length(SIG_FILE)); |
20f7b20c |
152 | |
a20d9a3f |
153 | ## |
154 | # File is empty -- write signature and master index |
155 | ## |
156 | if (!$bytes_read) { |
cd59cad8 |
157 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
8db25060 |
158 | print( $fh SIG_FILE); |
d0b74c17 |
159 | |
ec1bce6b |
160 | $self->create_tag($obj, $obj->_base_offset, $obj->_type, chr(0) x $self->{index_size}); |
a20d9a3f |
161 | |
a20d9a3f |
162 | # Flush the filehandle |
163 | my $old_fh = select $fh; |
164 | my $old_af = $|; $| = 1; $| = $old_af; |
165 | select $old_fh; |
166 | |
a20d9a3f |
167 | return 1; |
168 | } |
20f7b20c |
169 | |
a20d9a3f |
170 | ## |
171 | # Check signature was valid |
172 | ## |
8db25060 |
173 | unless ($signature eq SIG_FILE) { |
3d1b8be9 |
174 | $self->close_fh( $obj ); |
e5fc7e69 |
175 | $obj->_throw_error("Signature not found -- file is not a Deep DB"); |
a20d9a3f |
176 | } |
177 | |
a20d9a3f |
178 | ## |
179 | # Get our type from master index signature |
180 | ## |
e5fc7e69 |
181 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
182 | or $obj->_throw_error("Corrupted file, no master index record"); |
d0b74c17 |
183 | |
e5fc7e69 |
184 | unless ($obj->{type} eq $tag->{signature}) { |
185 | $obj->_throw_error("File type mismatch"); |
a20d9a3f |
186 | } |
20f7b20c |
187 | |
d0b74c17 |
188 | #XXX We probably also want to store the hash algorithm name and not assume anything |
189 | #XXX The cool thing would be to allow a different hashing algorithm at every level |
190 | |
a20d9a3f |
191 | return 1; |
192 | } |
193 | |
3d1b8be9 |
194 | sub close_fh { |
cd59cad8 |
195 | my $self = shift; |
a21f2d90 |
196 | my ($obj) = @_; |
cd59cad8 |
197 | |
198 | if ( my $fh = $obj->_root->{fh} ) { |
199 | close $fh; |
200 | } |
201 | $obj->_root->{fh} = undef; |
202 | |
203 | return 1; |
204 | } |
205 | |
d4b1166e |
206 | sub create_tag { |
20f7b20c |
207 | ## |
208 | # Given offset, signature and content, create tag and write to disk |
209 | ## |
d4b1166e |
210 | my $self = shift; |
20f7b20c |
211 | my ($obj, $offset, $sig, $content) = @_; |
212 | my $size = length($content); |
213 | |
d4b1166e |
214 | my $fh = $obj->_fh; |
215 | |
20f7b20c |
216 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
251dfd0e |
217 | print( $fh $sig . pack($self->{data_pack}, $size) . $content ); |
20f7b20c |
218 | |
219 | if ($offset == $obj->_root->{end}) { |
8db25060 |
220 | $obj->_root->{end} += SIG_SIZE + $self->{data_size} + $size; |
20f7b20c |
221 | } |
222 | |
223 | return { |
224 | signature => $sig, |
225 | size => $size, |
8db25060 |
226 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
227 | content => $content |
228 | }; |
d4b1166e |
229 | } |
230 | |
231 | sub load_tag { |
20f7b20c |
232 | ## |
233 | # Given offset, load single tag and return signature, size and data |
234 | ## |
d4b1166e |
235 | my $self = shift; |
20f7b20c |
236 | my ($obj, $offset) = @_; |
237 | |
d4b1166e |
238 | my $fh = $obj->_fh; |
239 | |
20f7b20c |
240 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
e5fc7e69 |
241 | |
75be6413 |
242 | #XXX I'm not sure this check will work if autoflush isn't enabled ... |
e5fc7e69 |
243 | return if eof $fh; |
20f7b20c |
244 | |
d4b1166e |
245 | my $b; |
8db25060 |
246 | read( $fh, $b, SIG_SIZE + $self->{data_size} ); |
251dfd0e |
247 | my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); |
20f7b20c |
248 | |
249 | my $buffer; |
250 | read( $fh, $buffer, $size); |
251 | |
252 | return { |
253 | signature => $sig, |
254 | size => $size, |
8db25060 |
255 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
256 | content => $buffer |
257 | }; |
d4b1166e |
258 | } |
259 | |
20f7b20c |
260 | sub add_bucket { |
261 | ## |
262 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
263 | # plain (undigested) key and value. |
264 | ## |
d4b1166e |
265 | my $self = shift; |
20f7b20c |
266 | my ($obj, $tag, $md5, $plain_key, $value) = @_; |
75be6413 |
267 | |
20f7b20c |
268 | my $keys = $tag->{content}; |
269 | my $location = 0; |
270 | my $result = 2; |
271 | |
272 | my $root = $obj->_root; |
273 | |
274 | my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) }; |
275 | my $internal_ref = $is_dbm_deep && ($value->_root eq $root); |
276 | |
277 | my $fh = $obj->_fh; |
278 | |
279 | ## |
280 | # Iterate through buckets, seeing if this is a new entry or a replace. |
281 | ## |
75be6413 |
282 | BUCKET: |
d0b74c17 |
283 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
9cec1360 |
284 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
75be6413 |
285 | |
20f7b20c |
286 | if (!$subloc) { |
287 | ## |
288 | # Found empty bucket (end of list). Populate and exit loop. |
289 | ## |
290 | $result = 2; |
291 | |
8db25060 |
292 | $location = $root->{end}; |
20f7b20c |
293 | |
75be6413 |
294 | seek( |
295 | $fh, |
296 | $tag->{offset} + ($i * $self->{bucket_size}) + $root->{file_offset}, |
297 | SEEK_SET, |
298 | ); |
299 | |
251dfd0e |
300 | print( $fh $md5 . pack($self->{long_pack}, $location) ); |
20f7b20c |
301 | last; |
302 | } |
303 | |
75be6413 |
304 | if ( $md5 ne $key ) { |
305 | next BUCKET; |
306 | } |
20f7b20c |
307 | |
75be6413 |
308 | ## |
309 | # Found existing bucket with same key. Replace with new value. |
310 | ## |
311 | $result = 1; |
20f7b20c |
312 | |
8db25060 |
313 | seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET); |
75be6413 |
314 | my $size; |
315 | read( $fh, $size, $self->{data_size}); |
316 | $size = unpack($self->{data_pack}, $size); |
20f7b20c |
317 | |
75be6413 |
318 | ## |
319 | # If value is a hash, array, or raw value with equal or less size, we can |
320 | # reuse the same content area of the database. Otherwise, we have to create |
321 | # a new content area at the EOF. |
322 | ## |
323 | my $actual_length; |
8db25060 |
324 | if ( $internal_ref ) { |
325 | $actual_length = $self->{long_size}; |
326 | } |
327 | else { |
328 | my $r = Scalar::Util::reftype( $value ) || ''; |
329 | if ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
330 | $actual_length = $self->{index_size}; |
331 | |
332 | # if autobless is enabled, must also take into consideration |
333 | # the class name, as it is stored along with key/value. |
334 | if ( $root->{autobless} ) { |
335 | my $value_class = Scalar::Util::blessed($value); |
336 | if ( defined $value_class && !$value->isa('DBM::Deep') ) { |
337 | $actual_length += length($value_class); |
338 | } |
75be6413 |
339 | } |
20f7b20c |
340 | } |
8db25060 |
341 | else { $actual_length = length($value); } |
75be6413 |
342 | } |
20f7b20c |
343 | |
75be6413 |
344 | if ($actual_length <= $size) { |
345 | $location = $subloc; |
20f7b20c |
346 | } |
75be6413 |
347 | else { |
348 | $location = $root->{end}; |
8db25060 |
349 | seek( |
350 | $fh, |
351 | $tag->{offset} + ($i * $self->{bucket_size}) + $self->{hash_size} + $root->{file_offset}, |
352 | SEEK_SET, |
353 | ); |
75be6413 |
354 | print( $fh pack($self->{long_pack}, $location) ); |
355 | } |
356 | |
357 | last; |
20f7b20c |
358 | } |
359 | |
360 | ## |
20f7b20c |
361 | # If bucket didn't fit into list, split into a new index level |
362 | ## |
363 | if (!$location) { |
75be6413 |
364 | $self->split_index( $obj, $md5, $tag ); |
20f7b20c |
365 | |
75be6413 |
366 | $location = $root->{end}; |
367 | } |
20f7b20c |
368 | |
369 | ## |
370 | # Seek to content area and store signature, value and plaintext key |
371 | ## |
372 | if ($location) { |
20f7b20c |
373 | seek($fh, $location + $root->{file_offset}, SEEK_SET); |
374 | |
375 | ## |
8db25060 |
376 | # Write signature based on content type, set content length and write |
377 | # actual value. |
20f7b20c |
378 | ## |
379 | my $r = Scalar::Util::reftype($value) || ''; |
8db25060 |
380 | my $content_length; |
381 | if ( $internal_ref ) { |
382 | print( $fh SIG_INTERNAL ); |
383 | print( $fh pack($self->{data_pack}, $self->{long_size}) ); |
384 | print( $fh pack($self->{long_pack}, $value->_base_offset) ); |
385 | $content_length = $self->{long_size}; |
20f7b20c |
386 | } |
387 | else { |
8db25060 |
388 | if ($r eq 'HASH') { |
389 | print( $fh SIG_HASH ); |
390 | print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); |
391 | $content_length = $self->{index_size}; |
392 | } |
393 | elsif ($r eq 'ARRAY') { |
394 | print( $fh SIG_ARRAY ); |
395 | print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); |
396 | $content_length = $self->{index_size}; |
397 | } |
398 | elsif (!defined($value)) { |
399 | print( $fh SIG_NULL ); |
400 | print( $fh pack($self->{data_pack}, 0) ); |
401 | $content_length = 0; |
402 | } |
403 | else { |
404 | print( $fh SIG_DATA ); |
405 | print( $fh pack($self->{data_pack}, length($value)) . $value ); |
406 | $content_length = length($value); |
407 | } |
20f7b20c |
408 | } |
409 | |
410 | ## |
411 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
412 | ## |
251dfd0e |
413 | print( $fh pack($self->{data_pack}, length($plain_key)) . $plain_key ); |
20f7b20c |
414 | |
415 | ## |
416 | # If value is blessed, preserve class name |
417 | ## |
418 | if ( $root->{autobless} ) { |
419 | my $value_class = Scalar::Util::blessed($value); |
8db25060 |
420 | if ( defined $value_class && !$value->isa( 'DBM::Deep' ) ) { |
20f7b20c |
421 | ## |
422 | # Blessed ref -- will restore later |
423 | ## |
424 | print( $fh chr(1) ); |
251dfd0e |
425 | print( $fh pack($self->{data_pack}, length($value_class)) . $value_class ); |
20f7b20c |
426 | $content_length += 1; |
251dfd0e |
427 | $content_length += $self->{data_size} + length($value_class); |
20f7b20c |
428 | } |
429 | else { |
430 | print( $fh chr(0) ); |
431 | $content_length += 1; |
432 | } |
433 | } |
434 | |
435 | ## |
436 | # If this is a new content area, advance EOF counter |
437 | ## |
438 | if ($location == $root->{end}) { |
8db25060 |
439 | $root->{end} += SIG_SIZE; |
251dfd0e |
440 | $root->{end} += $self->{data_size} + $content_length; |
441 | $root->{end} += $self->{data_size} + length($plain_key); |
20f7b20c |
442 | } |
443 | |
444 | ## |
445 | # If content is a hash or array, create new child DBM::Deep object and |
446 | # pass each key or element to it. |
447 | ## |
8db25060 |
448 | if ( ! $internal_ref ) { |
449 | if ($r eq 'HASH') { |
450 | my $branch = DBM::Deep->new( |
451 | type => DBM::Deep->TYPE_HASH, |
452 | base_offset => $location, |
453 | root => $root, |
454 | ); |
455 | foreach my $key (keys %{$value}) { |
456 | $branch->STORE( $key, $value->{$key} ); |
457 | } |
20f7b20c |
458 | } |
8db25060 |
459 | elsif ($r eq 'ARRAY') { |
460 | my $branch = DBM::Deep->new( |
461 | type => DBM::Deep->TYPE_ARRAY, |
462 | base_offset => $location, |
463 | root => $root, |
464 | ); |
465 | my $index = 0; |
466 | foreach my $element (@{$value}) { |
467 | $branch->STORE( $index, $element ); |
468 | $index++; |
469 | } |
20f7b20c |
470 | } |
471 | } |
472 | |
473 | return $result; |
474 | } |
d4b1166e |
475 | |
e5fc7e69 |
476 | $obj->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file"); |
d4b1166e |
477 | } |
478 | |
75be6413 |
479 | sub split_index { |
480 | my $self = shift; |
481 | my ($obj, $md5, $tag) = @_; |
482 | |
483 | my $fh = $obj->_fh; |
484 | my $root = $obj->_root; |
485 | my $keys = $tag->{content}; |
486 | |
487 | seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); |
488 | print( $fh pack($self->{long_pack}, $root->{end}) ); |
489 | |
490 | my $index_tag = $self->create_tag( |
491 | $obj, |
492 | $root->{end}, |
8db25060 |
493 | SIG_INDEX, |
75be6413 |
494 | chr(0) x $self->{index_size}, |
495 | ); |
496 | |
497 | my @offsets = (); |
498 | |
499 | $keys .= $md5 . pack($self->{long_pack}, 0); |
500 | |
501 | BUCKET: |
502 | for (my $i = 0; $i <= $self->{max_buckets}; $i++) { |
9cec1360 |
503 | my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i ); |
75be6413 |
504 | |
505 | next BUCKET unless $key; |
506 | |
75be6413 |
507 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
508 | |
509 | if ($offsets[$num]) { |
8db25060 |
510 | my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size}; |
75be6413 |
511 | seek($fh, $offset + $root->{file_offset}, SEEK_SET); |
512 | my $subkeys; |
513 | read( $fh, $subkeys, $self->{bucket_list_size}); |
514 | |
515 | for (my $k=0; $k<$self->{max_buckets}; $k++) { |
9cec1360 |
516 | my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k ); |
75be6413 |
517 | |
518 | if (!$subloc) { |
519 | seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); |
520 | print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); |
521 | last; |
522 | } |
523 | } # k loop |
524 | } |
525 | else { |
526 | $offsets[$num] = $root->{end}; |
527 | seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); |
528 | print( $fh pack($self->{long_pack}, $root->{end}) ); |
529 | |
8db25060 |
530 | my $blist_tag = $self->create_tag($obj, $root->{end}, SIG_BLIST, chr(0) x $self->{bucket_list_size}); |
75be6413 |
531 | |
532 | seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); |
533 | print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); |
534 | } |
535 | } # i loop |
536 | |
537 | return; |
538 | } |
539 | |
8db25060 |
540 | sub read_from_loc { |
541 | my $self = shift; |
542 | my ($obj, $subloc) = @_; |
543 | |
544 | my $fh = $obj->_fh; |
545 | |
546 | ## |
547 | # Found match -- seek to offset and read signature |
548 | ## |
549 | my $signature; |
550 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
551 | read( $fh, $signature, SIG_SIZE); |
552 | |
553 | ## |
554 | # If value is a hash or array, return new DBM::Deep object with correct offset |
555 | ## |
556 | if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) { |
557 | my $obj = DBM::Deep->new( |
558 | type => $signature, |
559 | base_offset => $subloc, |
560 | root => $obj->_root, |
561 | ); |
562 | |
563 | if ($obj->_root->{autobless}) { |
564 | ## |
565 | # Skip over value and plain key to see if object needs |
566 | # to be re-blessed |
567 | ## |
568 | seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); |
569 | |
570 | my $size; |
571 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
572 | if ($size) { seek($fh, $size, SEEK_CUR); } |
573 | |
574 | my $bless_bit; |
575 | read( $fh, $bless_bit, 1); |
576 | if (ord($bless_bit)) { |
577 | ## |
578 | # Yes, object needs to be re-blessed |
579 | ## |
580 | my $class_name; |
581 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
582 | if ($size) { read( $fh, $class_name, $size); } |
583 | if ($class_name) { $obj = bless( $obj, $class_name ); } |
584 | } |
585 | } |
586 | |
587 | return $obj; |
588 | } |
589 | elsif ( $signature eq SIG_INTERNAL ) { |
590 | my $size; |
591 | read( $fh, $size, $self->{data_size}); |
592 | $size = unpack($self->{data_pack}, $size); |
593 | |
594 | if ( $size ) { |
595 | my $new_loc; |
596 | read( $fh, $new_loc, $size ); |
597 | $new_loc = unpack( $self->{long_pack}, $new_loc ); |
598 | |
599 | return $self->read_from_loc( $obj, $new_loc ); |
600 | } |
601 | else { |
602 | return; |
603 | } |
604 | } |
605 | ## |
606 | # Otherwise return actual value |
607 | ## |
608 | elsif ($signature eq SIG_DATA) { |
609 | my $size; |
610 | read( $fh, $size, $self->{data_size}); |
611 | $size = unpack($self->{data_pack}, $size); |
612 | |
613 | my $value = ''; |
614 | if ($size) { read( $fh, $value, $size); } |
615 | return $value; |
616 | } |
617 | |
618 | ## |
619 | # Key exists, but content is null |
620 | ## |
621 | return; |
622 | } |
623 | |
9020ee8c |
624 | sub get_bucket_value { |
beac1dff |
625 | ## |
626 | # Fetch single value given tag and MD5 digested key. |
627 | ## |
628 | my $self = shift; |
629 | my ($obj, $tag, $md5) = @_; |
630 | my $keys = $tag->{content}; |
9020ee8c |
631 | |
632 | my $fh = $obj->_fh; |
633 | |
beac1dff |
634 | ## |
635 | # Iterate through buckets, looking for a key match |
636 | ## |
9020ee8c |
637 | BUCKET: |
75be6413 |
638 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
9cec1360 |
639 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
9020ee8c |
640 | |
beac1dff |
641 | if (!$subloc) { |
642 | ## |
643 | # Hit end of list, no match |
644 | ## |
645 | return; |
646 | } |
9020ee8c |
647 | |
648 | if ( $md5 ne $key ) { |
649 | next BUCKET; |
650 | } |
651 | |
8db25060 |
652 | return $self->read_from_loc( $obj, $subloc ); |
beac1dff |
653 | } # i loop |
9020ee8c |
654 | |
beac1dff |
655 | return; |
9020ee8c |
656 | } |
ab0e4957 |
657 | |
658 | sub delete_bucket { |
beac1dff |
659 | ## |
660 | # Delete single key/value pair given tag and MD5 digested key. |
661 | ## |
662 | my $self = shift; |
663 | my ($obj, $tag, $md5) = @_; |
664 | my $keys = $tag->{content}; |
ab0e4957 |
665 | |
666 | my $fh = $obj->_fh; |
d0b74c17 |
667 | |
beac1dff |
668 | ## |
669 | # Iterate through buckets, looking for a key match |
670 | ## |
ab0e4957 |
671 | BUCKET: |
beac1dff |
672 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
9cec1360 |
673 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
ab0e4957 |
674 | |
beac1dff |
675 | if (!$subloc) { |
676 | ## |
677 | # Hit end of list, no match |
678 | ## |
679 | return; |
680 | } |
ab0e4957 |
681 | |
682 | if ( $md5 ne $key ) { |
683 | next BUCKET; |
684 | } |
685 | |
686 | ## |
687 | # Matched key -- delete bucket and return |
688 | ## |
251dfd0e |
689 | seek($fh, $tag->{offset} + ($i * $self->{bucket_size}) + $obj->_root->{file_offset}, SEEK_SET); |
690 | print( $fh substr($keys, ($i+1) * $self->{bucket_size} ) ); |
691 | print( $fh chr(0) x $self->{bucket_size} ); |
d0b74c17 |
692 | |
ab0e4957 |
693 | return 1; |
beac1dff |
694 | } # i loop |
ab0e4957 |
695 | |
beac1dff |
696 | return; |
ab0e4957 |
697 | } |
698 | |
912d50b1 |
699 | sub bucket_exists { |
beac1dff |
700 | ## |
701 | # Check existence of single key given tag and MD5 digested key. |
702 | ## |
703 | my $self = shift; |
704 | my ($obj, $tag, $md5) = @_; |
705 | my $keys = $tag->{content}; |
d0b74c17 |
706 | |
beac1dff |
707 | ## |
708 | # Iterate through buckets, looking for a key match |
709 | ## |
912d50b1 |
710 | BUCKET: |
beac1dff |
711 | for (my $i=0; $i<$self->{max_buckets}; $i++) { |
9cec1360 |
712 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
912d50b1 |
713 | |
beac1dff |
714 | if (!$subloc) { |
715 | ## |
716 | # Hit end of list, no match |
717 | ## |
718 | return; |
719 | } |
912d50b1 |
720 | |
721 | if ( $md5 ne $key ) { |
722 | next BUCKET; |
723 | } |
724 | |
725 | ## |
726 | # Matched key -- return true |
727 | ## |
728 | return 1; |
beac1dff |
729 | } # i loop |
912d50b1 |
730 | |
beac1dff |
731 | return; |
912d50b1 |
732 | } |
733 | |
6736c116 |
734 | sub find_bucket_list { |
beac1dff |
735 | ## |
736 | # Locate offset for bucket list, given digested key |
737 | ## |
738 | my $self = shift; |
d0b74c17 |
739 | my ($obj, $md5, $args) = @_; |
740 | $args = {} unless $args; |
741 | |
beac1dff |
742 | ## |
743 | # Locate offset for bucket list using digest index system |
744 | ## |
e5fc7e69 |
745 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
746 | or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" ); |
d0b74c17 |
747 | |
e5fc7e69 |
748 | my $ch = 0; |
8db25060 |
749 | while ($tag->{signature} ne SIG_BLIST) { |
d0b74c17 |
750 | my $num = ord substr($md5, $ch, 1); |
751 | |
752 | my $ref_loc = $tag->{offset} + ($num * $self->{long_size}); |
753 | $tag = $self->index_lookup( $obj, $tag, $num ); |
754 | |
755 | if (!$tag) { |
756 | if ( $args->{create} ) { |
757 | my $fh = $obj->_fh; |
758 | seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); |
759 | print( $fh pack($self->{long_pack}, $obj->_root->{end}) ); |
760 | |
761 | $tag = $self->create_tag( |
762 | $obj, $obj->_root->{end}, |
8db25060 |
763 | SIG_BLIST, |
d0b74c17 |
764 | chr(0) x $self->{bucket_list_size}, |
765 | ); |
766 | |
767 | $tag->{ref_loc} = $ref_loc; |
768 | $tag->{ch} = $ch; |
769 | |
770 | last; |
771 | } |
772 | else { |
773 | return; |
774 | } |
775 | } |
776 | |
777 | $tag->{ch} = $ch; |
778 | $tag->{ref_loc} = $ref_loc; |
779 | |
beac1dff |
780 | $ch++; |
781 | } |
d0b74c17 |
782 | |
beac1dff |
783 | return $tag; |
6736c116 |
784 | } |
785 | |
d0b74c17 |
786 | sub index_lookup { |
787 | ## |
788 | # Given index tag, lookup single entry in index and return . |
789 | ## |
790 | my $self = shift; |
791 | my ($obj, $tag, $index) = @_; |
792 | |
793 | my $location = unpack( |
794 | $self->{long_pack}, |
795 | substr( |
796 | $tag->{content}, |
797 | $index * $self->{long_size}, |
798 | $self->{long_size}, |
799 | ), |
800 | ); |
801 | |
802 | if (!$location) { return; } |
803 | |
804 | return $self->load_tag( $obj, $location ); |
805 | } |
806 | |
6736c116 |
807 | sub traverse_index { |
beac1dff |
808 | ## |
809 | # Scan index and recursively step into deeper levels, looking for next key. |
810 | ## |
6736c116 |
811 | my $self = shift; |
812 | my ($obj, $offset, $ch, $force_return_next) = @_; |
d0b74c17 |
813 | |
beac1dff |
814 | my $tag = $self->load_tag($obj, $offset ); |
6736c116 |
815 | |
816 | my $fh = $obj->_fh; |
d0b74c17 |
817 | |
8db25060 |
818 | if ($tag->{signature} ne SIG_BLIST) { |
beac1dff |
819 | my $content = $tag->{content}; |
e5fc7e69 |
820 | my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); |
d0b74c17 |
821 | |
beac1dff |
822 | for (my $index = $start; $index < 256; $index++) { |
e5fc7e69 |
823 | my $subloc = unpack( |
824 | $self->{long_pack}, |
825 | substr($content, $index * $self->{long_size}, $self->{long_size}), |
826 | ); |
827 | |
beac1dff |
828 | if ($subloc) { |
e5fc7e69 |
829 | my $result = $self->traverse_index( |
830 | $obj, $subloc, $ch + 1, $force_return_next, |
831 | ); |
832 | |
beac1dff |
833 | if (defined($result)) { return $result; } |
834 | } |
835 | } # index loop |
d0b74c17 |
836 | |
beac1dff |
837 | $obj->{return_next} = 1; |
838 | } # tag is an index |
d0b74c17 |
839 | |
e5fc7e69 |
840 | else { |
beac1dff |
841 | my $keys = $tag->{content}; |
842 | if ($force_return_next) { $obj->{return_next} = 1; } |
d0b74c17 |
843 | |
beac1dff |
844 | ## |
845 | # Iterate through buckets, looking for a key match |
846 | ## |
8db25060 |
847 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
9cec1360 |
848 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
d0b74c17 |
849 | |
8db25060 |
850 | # End of bucket list -- return to outer loop |
beac1dff |
851 | if (!$subloc) { |
beac1dff |
852 | $obj->{return_next} = 1; |
853 | last; |
854 | } |
8db25060 |
855 | # Located previous key -- return next one found |
beac1dff |
856 | elsif ($key eq $obj->{prev_md5}) { |
beac1dff |
857 | $obj->{return_next} = 1; |
858 | next; |
859 | } |
8db25060 |
860 | # Seek to bucket location and skip over signature |
beac1dff |
861 | elsif ($obj->{return_next}) { |
8db25060 |
862 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
d0b74c17 |
863 | |
beac1dff |
864 | # Skip over value to get to plain key |
8db25060 |
865 | my $sig; |
866 | read( $fh, $sig, SIG_SIZE ); |
867 | |
beac1dff |
868 | my $size; |
e5fc7e69 |
869 | read( $fh, $size, $self->{data_size}); |
870 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
871 | if ($size) { seek($fh, $size, SEEK_CUR); } |
d0b74c17 |
872 | |
beac1dff |
873 | # Read in plain key and return as scalar |
beac1dff |
874 | my $plain_key; |
e5fc7e69 |
875 | read( $fh, $size, $self->{data_size}); |
876 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
877 | if ($size) { read( $fh, $plain_key, $size); } |
d0b74c17 |
878 | |
beac1dff |
879 | return $plain_key; |
880 | } |
8db25060 |
881 | } |
d0b74c17 |
882 | |
beac1dff |
883 | $obj->{return_next} = 1; |
884 | } # tag is a bucket list |
d0b74c17 |
885 | |
beac1dff |
886 | return; |
6736c116 |
887 | } |
888 | |
889 | sub get_next_key { |
beac1dff |
890 | ## |
891 | # Locate next key, given digested previous one |
892 | ## |
6736c116 |
893 | my $self = shift; |
894 | my ($obj) = @_; |
d0b74c17 |
895 | |
beac1dff |
896 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
897 | $obj->{return_next} = 0; |
d0b74c17 |
898 | |
beac1dff |
899 | ## |
900 | # If the previous key was not specifed, start at the top and |
901 | # return the first one found. |
902 | ## |
903 | if (!$obj->{prev_md5}) { |
904 | $obj->{prev_md5} = chr(0) x $self->{hash_size}; |
905 | $obj->{return_next} = 1; |
906 | } |
d0b74c17 |
907 | |
beac1dff |
908 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
6736c116 |
909 | } |
910 | |
75be6413 |
911 | # Utilities |
912 | |
9cec1360 |
913 | sub _get_key_subloc { |
75be6413 |
914 | my $self = shift; |
915 | my ($keys, $idx) = @_; |
916 | |
9cec1360 |
917 | my ($key, $subloc) = unpack( |
918 | "a$self->{hash_size} $self->{long_pack}", |
75be6413 |
919 | substr( |
920 | $keys, |
9cec1360 |
921 | ($idx * $self->{bucket_size}), |
922 | $self->{bucket_size}, |
75be6413 |
923 | ), |
924 | ); |
925 | |
9cec1360 |
926 | return ($key, $subloc); |
75be6413 |
927 | } |
928 | |
a20d9a3f |
929 | 1; |
930 | __END__ |