Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
2 | |
3 | use strict; |
4 | |
5 | use Fcntl qw( :DEFAULT :flock :seek ); |
6 | |
8db25060 |
7 | ## |
8 | # Setup file and tag signatures. These should never change. |
9 | ## |
10 | sub SIG_FILE () { 'DPDB' } |
11 | sub SIG_INTERNAL () { 'i' } |
12 | sub SIG_HASH () { 'H' } |
13 | sub SIG_ARRAY () { 'A' } |
8db25060 |
14 | sub SIG_NULL () { 'N' } |
15 | sub SIG_DATA () { 'D' } |
16 | sub SIG_INDEX () { 'I' } |
17 | sub SIG_BLIST () { 'B' } |
18 | sub SIG_SIZE () { 1 } |
19 | |
612969fb |
20 | sub precalc_sizes { |
beac1dff |
21 | ## |
22 | # Precalculate index, bucket and bucket list sizes |
23 | ## |
251dfd0e |
24 | my $self = shift; |
1bf65be7 |
25 | |
251dfd0e |
26 | $self->{index_size} = (2**8) * $self->{long_size}; |
27 | $self->{bucket_size} = $self->{hash_size} + $self->{long_size}; |
28 | $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; |
1bf65be7 |
29 | |
251dfd0e |
30 | return 1; |
1bf65be7 |
31 | } |
32 | |
33 | sub set_pack { |
beac1dff |
34 | ## |
35 | # Set pack/unpack modes (see file header for more) |
36 | ## |
251dfd0e |
37 | my $self = shift; |
1bf65be7 |
38 | my ($long_s, $long_p, $data_s, $data_p) = @_; |
39 | |
81d16922 |
40 | ## |
75be6413 |
41 | # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 |
42 | # GB per file. |
beac1dff |
43 | # (Perl must be compiled with largefile support for files > 2 GB) |
81d16922 |
44 | # |
45 | # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. |
beac1dff |
46 | # (Perl must be compiled with largefile and 64-bit long support) |
81d16922 |
47 | ## |
251dfd0e |
48 | $self->{long_size} = $long_s ? $long_s : 4; |
49 | $self->{long_pack} = $long_p ? $long_p : 'N'; |
1bf65be7 |
50 | |
81d16922 |
51 | ## |
75be6413 |
52 | # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each |
d5d7c51d |
53 | # key/value. Upgrading this is possible (see above) but probably not |
54 | # necessary. If you need more than 4 GB for a single key or value, this |
55 | # module is really not for you :-) |
81d16922 |
56 | ## |
251dfd0e |
57 | $self->{data_size} = $data_s ? $data_s : 4; |
58 | $self->{data_pack} = $data_p ? $data_p : 'N'; |
1bf65be7 |
59 | |
beac1dff |
60 | return $self->precalc_sizes(); |
1bf65be7 |
61 | } |
62 | |
63 | sub set_digest { |
beac1dff |
64 | ## |
65 | # Set key digest function (default is MD5) |
66 | ## |
251dfd0e |
67 | my $self = shift; |
1bf65be7 |
68 | my ($digest_func, $hash_size) = @_; |
69 | |
d0b74c17 |
70 | $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5; |
251dfd0e |
71 | $self->{hash_size} = $hash_size ? $hash_size : 16; |
612969fb |
72 | |
beac1dff |
73 | return $self->precalc_sizes(); |
612969fb |
74 | } |
75 | |
76 | sub new { |
77 | my $class = shift; |
78 | my ($args) = @_; |
79 | |
80 | my $self = bless { |
81 | long_size => 4, |
82 | long_pack => 'N', |
83 | data_size => 4, |
84 | data_pack => 'N', |
251dfd0e |
85 | |
612969fb |
86 | digest => \&Digest::MD5::md5, |
87 | hash_size => 16, |
251dfd0e |
88 | |
81d16922 |
89 | ## |
d5d7c51d |
90 | # Maximum number of buckets per list before another level of indexing is |
91 | # done. |
92 | # Increase this value for slightly greater speed, but larger database |
93 | # files. DO NOT decrease this value below 16, due to risk of recursive |
94 | # reindex overrun. |
81d16922 |
95 | ## |
612969fb |
96 | max_buckets => 16, |
97 | }, $class; |
98 | |
251dfd0e |
99 | $self->precalc_sizes; |
612969fb |
100 | |
101 | return $self; |
1bf65be7 |
102 | } |
103 | |
70b55428 |
104 | sub setup_fh { |
105 | my $self = shift; |
106 | my ($obj) = @_; |
107 | |
108 | $self->open( $obj ) if !defined $obj->_fh; |
109 | |
6fde4ed2 |
110 | my $fh = $obj->_fh; |
111 | flock $fh, LOCK_EX; |
118ba343 |
112 | |
6fde4ed2 |
113 | unless ( $obj->{base_offset} ) { |
118ba343 |
114 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
115 | my $signature; |
116 | my $bytes_read = read( $fh, $signature, length(SIG_FILE)); |
117 | |
118 | ## |
119 | # File is empty -- write signature and master index |
120 | ## |
121 | if (!$bytes_read) { |
c9ec091a |
122 | my $loc = $self->_request_space( $obj, length( SIG_FILE ) ); |
123 | seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET); |
118ba343 |
124 | print( $fh SIG_FILE); |
125 | |
c9ec091a |
126 | $obj->{base_offset} = $self->_request_space( |
16d1ad9b |
127 | $obj, $self->tag_size( $self->{index_size} ), |
c9ec091a |
128 | ); |
118ba343 |
129 | |
130 | $self->create_tag( |
c9ec091a |
131 | $obj, $obj->_base_offset, $obj->_type, |
132 | chr(0) x $self->{index_size}, |
118ba343 |
133 | ); |
134 | |
135 | # Flush the filehandle |
136 | my $old_fh = select $fh; |
137 | my $old_af = $|; $| = 1; $| = $old_af; |
138 | select $old_fh; |
139 | } |
140 | else { |
141 | $obj->{base_offset} = $bytes_read; |
142 | |
143 | ## |
144 | # Check signature was valid |
145 | ## |
146 | unless ($signature eq SIG_FILE) { |
147 | $self->close_fh( $obj ); |
148 | $obj->_throw_error("Signature not found -- file is not a Deep DB"); |
149 | } |
150 | |
151 | ## |
152 | # Get our type from master index signature |
153 | ## |
154 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
155 | or $obj->_throw_error("Corrupted file, no master index record"); |
156 | |
157 | unless ($obj->{type} eq $tag->{signature}) { |
158 | $obj->_throw_error("File type mismatch"); |
159 | } |
160 | } |
118ba343 |
161 | } |
e06824f8 |
162 | |
673464d9 |
163 | #XXX We have to make sure we don't mess up when autoflush isn't turned on |
70b55428 |
164 | unless ( $obj->_root->{inode} ) { |
165 | my @stats = stat($obj->_fh); |
166 | $obj->_root->{inode} = $stats[1]; |
167 | $obj->_root->{end} = $stats[7]; |
168 | } |
169 | |
6fde4ed2 |
170 | flock $fh, LOCK_UN; |
171 | |
70b55428 |
172 | return 1; |
173 | } |
174 | |
a20d9a3f |
175 | sub open { |
20f7b20c |
176 | ## |
177 | # Open a fh to the database, create if nonexistent. |
178 | # Make sure file signature matches DBM::Deep spec. |
179 | ## |
a20d9a3f |
180 | my $self = shift; |
70b55428 |
181 | my ($obj) = @_; |
a20d9a3f |
182 | |
673464d9 |
183 | # Theoretically, adding O_BINARY should remove the need for the binmode |
184 | # Of course, testing it is going to be ... interesting. |
185 | my $flags = O_RDWR | O_CREAT | O_BINARY; |
a20d9a3f |
186 | |
673464d9 |
187 | my $fh; |
d5d7c51d |
188 | my $filename = $obj->_root->{file}; |
189 | sysopen( $fh, $filename, $flags ) |
190 | or $obj->_throw_error("Cannot sysopen file '$filename': $!"); |
673464d9 |
191 | $obj->_root->{fh} = $fh; |
a20d9a3f |
192 | |
193 | #XXX Can we remove this by using the right sysopen() flags? |
194 | # Maybe ... q.v. above |
195 | binmode $fh; # for win32 |
196 | |
cd59cad8 |
197 | if ($obj->_root->{autoflush}) { |
a20d9a3f |
198 | my $old = select $fh; |
199 | $|=1; |
200 | select $old; |
201 | } |
20f7b20c |
202 | |
a20d9a3f |
203 | return 1; |
204 | } |
205 | |
3d1b8be9 |
206 | sub close_fh { |
cd59cad8 |
207 | my $self = shift; |
a21f2d90 |
208 | my ($obj) = @_; |
cd59cad8 |
209 | |
210 | if ( my $fh = $obj->_root->{fh} ) { |
211 | close $fh; |
212 | } |
213 | $obj->_root->{fh} = undef; |
214 | |
215 | return 1; |
216 | } |
217 | |
16d1ad9b |
218 | sub tag_size { |
219 | my $self = shift; |
220 | my ($size) = @_; |
221 | return SIG_SIZE + $self->{data_size} + $size; |
222 | } |
223 | |
d4b1166e |
224 | sub create_tag { |
20f7b20c |
225 | ## |
226 | # Given offset, signature and content, create tag and write to disk |
227 | ## |
d4b1166e |
228 | my $self = shift; |
20f7b20c |
229 | my ($obj, $offset, $sig, $content) = @_; |
230 | my $size = length($content); |
231 | |
d4b1166e |
232 | my $fh = $obj->_fh; |
233 | |
20f7b20c |
234 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
251dfd0e |
235 | print( $fh $sig . pack($self->{data_pack}, $size) . $content ); |
20f7b20c |
236 | |
237 | if ($offset == $obj->_root->{end}) { |
16d1ad9b |
238 | $obj->_root->{end} += $self->tag_size( $size ); |
20f7b20c |
239 | } |
240 | |
241 | return { |
242 | signature => $sig, |
243 | size => $size, |
8db25060 |
244 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
245 | content => $content |
246 | }; |
d4b1166e |
247 | } |
248 | |
249 | sub load_tag { |
20f7b20c |
250 | ## |
251 | # Given offset, load single tag and return signature, size and data |
252 | ## |
d4b1166e |
253 | my $self = shift; |
20f7b20c |
254 | my ($obj, $offset) = @_; |
255 | |
e06824f8 |
256 | # print join(':',map{$_||''}caller(1)), $/; |
257 | |
d4b1166e |
258 | my $fh = $obj->_fh; |
259 | |
20f7b20c |
260 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
e5fc7e69 |
261 | |
75be6413 |
262 | #XXX I'm not sure this check will work if autoflush isn't enabled ... |
e5fc7e69 |
263 | return if eof $fh; |
20f7b20c |
264 | |
d4b1166e |
265 | my $b; |
8db25060 |
266 | read( $fh, $b, SIG_SIZE + $self->{data_size} ); |
251dfd0e |
267 | my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); |
20f7b20c |
268 | |
269 | my $buffer; |
270 | read( $fh, $buffer, $size); |
271 | |
272 | return { |
273 | signature => $sig, |
274 | size => $size, |
8db25060 |
275 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
276 | content => $buffer |
277 | }; |
d4b1166e |
278 | } |
279 | |
29b01632 |
280 | sub _length_needed { |
281 | my $self = shift; |
282 | my ($obj, $value) = @_; |
283 | |
284 | my $is_dbm_deep = eval { |
285 | local $SIG{'__DIE__'}; |
286 | $value->isa( 'DBM::Deep' ); |
287 | }; |
288 | |
289 | my $internal_ref = $is_dbm_deep && ($value->_root eq $obj->_root); |
290 | |
291 | if ( $internal_ref ) { |
292 | return $self->{long_size}; |
293 | } |
294 | |
295 | my $r = Scalar::Util::reftype( $value ) || ''; |
296 | unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
297 | return length( $value ); |
298 | } |
299 | |
300 | my $actual_length = $self->{index_size}; |
301 | |
302 | # if autobless is enabled, must also take into consideration |
303 | # the class name, as it is stored along with key/value. |
304 | if ( $obj->_root->{autobless} ) { |
305 | my $value_class = Scalar::Util::blessed($value); |
306 | if ( defined $value_class && !$value->isa('DBM::Deep') ) { |
307 | $actual_length += length($value_class); |
308 | } |
309 | } |
310 | |
311 | return $actual_length; |
312 | } |
313 | |
20f7b20c |
314 | sub add_bucket { |
315 | ## |
316 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
317 | # plain (undigested) key and value. |
318 | ## |
d4b1166e |
319 | my $self = shift; |
20f7b20c |
320 | my ($obj, $tag, $md5, $plain_key, $value) = @_; |
75be6413 |
321 | |
eea0d863 |
322 | # This verifies that only supported values will be stored. |
323 | { |
324 | my $r = Scalar::Util::reftype( $value ); |
325 | last if !defined $r; |
326 | |
327 | last if $r eq 'HASH'; |
328 | last if $r eq 'ARRAY'; |
329 | |
330 | $obj->_throw_error( |
331 | "Storage of variables of type '$r' is not supported." |
332 | ); |
333 | } |
334 | |
20f7b20c |
335 | my $location = 0; |
336 | my $result = 2; |
337 | |
338 | my $root = $obj->_root; |
339 | |
20f7b20c |
340 | my $fh = $obj->_fh; |
341 | |
386bab6c |
342 | my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); |
75be6413 |
343 | |
386bab6c |
344 | # Updating a known md5 |
345 | if ( $subloc ) { |
346 | $result = 1; |
20f7b20c |
347 | |
d5d7c51d |
348 | seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET); |
349 | my $size; |
350 | read( $fh, $size, $self->{data_size}); |
351 | $size = unpack($self->{data_pack}, $size); |
352 | |
29b01632 |
353 | my $actual_length = $self->_length_needed( $obj, $value ); |
354 | |
386bab6c |
355 | if ($actual_length <= $size) { |
356 | $location = $subloc; |
20f7b20c |
357 | } |
75be6413 |
358 | else { |
359 | $location = $root->{end}; |
386bab6c |
360 | seek( |
361 | $fh, |
362 | $tag->{offset} + $offset + $self->{hash_size} + $root->{file_offset}, |
363 | SEEK_SET, |
364 | ); |
365 | print( $fh pack($self->{long_pack}, $location) ); |
75be6413 |
366 | } |
75be6413 |
367 | } |
386bab6c |
368 | # Adding a new md5 |
369 | elsif ( defined $offset ) { |
386bab6c |
370 | $location = $root->{end}; |
371 | |
372 | seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET ); |
373 | print( $fh $md5 . pack($self->{long_pack}, $location) ); |
374 | } |
375 | # If bucket didn't fit into list, split into a new index level |
376 | else { |
377 | $self->split_index( $obj, $md5, $tag ); |
378 | |
379 | $location = $root->{end}; |
380 | } |
20f7b20c |
381 | |
d5d7c51d |
382 | $self->write_value( $obj, $location, $plain_key, $value ); |
383 | |
384 | return $result; |
385 | } |
386 | |
387 | sub write_value { |
388 | my $self = shift; |
389 | my ($obj, $location, $key, $value) = @_; |
390 | |
391 | my $fh = $obj->_fh; |
392 | my $root = $obj->_root; |
393 | |
394 | my $is_dbm_deep = eval { |
395 | local $SIG{'__DIE__'}; |
396 | $value->isa( 'DBM::Deep' ); |
397 | }; |
398 | |
399 | my $internal_ref = $is_dbm_deep && ($value->_root eq $root); |
400 | |
401 | seek($fh, $location + $root->{file_offset}, SEEK_SET); |
402 | |
20f7b20c |
403 | ## |
d5d7c51d |
404 | # Write signature based on content type, set content length and write |
405 | # actual value. |
20f7b20c |
406 | ## |
d5d7c51d |
407 | my $r = Scalar::Util::reftype($value) || ''; |
408 | my $content_length; |
409 | if ( $internal_ref ) { |
410 | print( $fh SIG_INTERNAL ); |
411 | print( $fh pack($self->{data_pack}, $self->{long_size}) ); |
412 | print( $fh pack($self->{long_pack}, $value->_base_offset) ); |
413 | $content_length = $self->{long_size}; |
414 | } |
415 | else { |
416 | if ($r eq 'HASH') { |
417 | print( $fh SIG_HASH ); |
418 | print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); |
419 | $content_length = $self->{index_size}; |
420 | } |
421 | elsif ($r eq 'ARRAY') { |
422 | print( $fh SIG_ARRAY ); |
423 | print( $fh pack($self->{data_pack}, $self->{index_size}) . chr(0) x $self->{index_size} ); |
424 | $content_length = $self->{index_size}; |
425 | } |
426 | elsif (!defined($value)) { |
427 | print( $fh SIG_NULL ); |
428 | print( $fh pack($self->{data_pack}, 0) ); |
429 | $content_length = 0; |
20f7b20c |
430 | } |
431 | else { |
d5d7c51d |
432 | print( $fh SIG_DATA ); |
433 | print( $fh pack($self->{data_pack}, length($value)) . $value ); |
434 | $content_length = length($value); |
20f7b20c |
435 | } |
d5d7c51d |
436 | } |
20f7b20c |
437 | |
d5d7c51d |
438 | ## |
439 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
440 | ## |
441 | print( $fh pack($self->{data_pack}, length($key)) . $key ); |
20f7b20c |
442 | |
d5d7c51d |
443 | ## |
444 | # If value is blessed, preserve class name |
445 | ## |
446 | if ( $root->{autobless} ) { |
447 | my $value_class = Scalar::Util::blessed($value); |
448 | if ( defined $value_class && !$value->isa( 'DBM::Deep' ) ) { |
449 | ## |
450 | # Blessed ref -- will restore later |
451 | ## |
452 | print( $fh chr(1) ); |
453 | print( $fh pack($self->{data_pack}, length($value_class)) . $value_class ); |
454 | $content_length += 1; |
455 | $content_length += $self->{data_size} + length($value_class); |
20f7b20c |
456 | } |
d5d7c51d |
457 | else { |
458 | print( $fh chr(0) ); |
459 | $content_length += 1; |
20f7b20c |
460 | } |
d5d7c51d |
461 | } |
20f7b20c |
462 | |
d5d7c51d |
463 | ## |
464 | # If this is a new content area, advance EOF counter |
465 | ## |
466 | if ($location == $root->{end}) { |
467 | $root->{end} += SIG_SIZE; |
468 | $root->{end} += $self->{data_size} + $content_length; |
469 | $root->{end} += $self->{data_size} + length($key); |
470 | } |
471 | |
472 | ## |
473 | # If content is a hash or array, create new child DBM::Deep object and |
474 | # pass each key or element to it. |
475 | ## |
476 | if ( ! $internal_ref ) { |
477 | if ($r eq 'HASH') { |
478 | my $branch = DBM::Deep->new( |
479 | type => DBM::Deep->TYPE_HASH, |
480 | base_offset => $location, |
481 | root => $root, |
482 | ); |
483 | foreach my $key (keys %{$value}) { |
484 | $branch->STORE( $key, $value->{$key} ); |
20f7b20c |
485 | } |
d5d7c51d |
486 | } |
487 | elsif ($r eq 'ARRAY') { |
488 | my $branch = DBM::Deep->new( |
489 | type => DBM::Deep->TYPE_ARRAY, |
490 | base_offset => $location, |
491 | root => $root, |
492 | ); |
493 | my $index = 0; |
494 | foreach my $element (@{$value}) { |
495 | $branch->STORE( $index, $element ); |
496 | $index++; |
20f7b20c |
497 | } |
498 | } |
20f7b20c |
499 | } |
d4b1166e |
500 | |
d5d7c51d |
501 | return 1; |
d4b1166e |
502 | } |
503 | |
75be6413 |
504 | sub split_index { |
505 | my $self = shift; |
506 | my ($obj, $md5, $tag) = @_; |
507 | |
508 | my $fh = $obj->_fh; |
509 | my $root = $obj->_root; |
510 | my $keys = $tag->{content}; |
511 | |
512 | seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); |
16d1ad9b |
513 | |
514 | my $loc = $self->_request_space( |
515 | $obj, $self->tag_size( $self->{index_size} ), |
516 | ); |
517 | |
518 | print( $fh pack($self->{long_pack}, $loc) ); |
75be6413 |
519 | |
520 | my $index_tag = $self->create_tag( |
16d1ad9b |
521 | $obj, $loc, SIG_INDEX, |
75be6413 |
522 | chr(0) x $self->{index_size}, |
523 | ); |
524 | |
525 | my @offsets = (); |
526 | |
527 | $keys .= $md5 . pack($self->{long_pack}, 0); |
528 | |
529 | BUCKET: |
530 | for (my $i = 0; $i <= $self->{max_buckets}; $i++) { |
9cec1360 |
531 | my ($key, $old_subloc) = $self->_get_key_subloc( $keys, $i ); |
75be6413 |
532 | |
533 | next BUCKET unless $key; |
534 | |
75be6413 |
535 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
536 | |
537 | if ($offsets[$num]) { |
8db25060 |
538 | my $offset = $offsets[$num] + SIG_SIZE + $self->{data_size}; |
75be6413 |
539 | seek($fh, $offset + $root->{file_offset}, SEEK_SET); |
540 | my $subkeys; |
541 | read( $fh, $subkeys, $self->{bucket_list_size}); |
542 | |
543 | for (my $k=0; $k<$self->{max_buckets}; $k++) { |
9cec1360 |
544 | my ($temp, $subloc) = $self->_get_key_subloc( $subkeys, $k ); |
75be6413 |
545 | |
546 | if (!$subloc) { |
547 | seek($fh, $offset + ($k * $self->{bucket_size}) + $root->{file_offset}, SEEK_SET); |
548 | print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); |
549 | last; |
550 | } |
551 | } # k loop |
552 | } |
553 | else { |
554 | $offsets[$num] = $root->{end}; |
555 | seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); |
556 | print( $fh pack($self->{long_pack}, $root->{end}) ); |
557 | |
8db25060 |
558 | my $blist_tag = $self->create_tag($obj, $root->{end}, SIG_BLIST, chr(0) x $self->{bucket_list_size}); |
75be6413 |
559 | |
560 | seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); |
561 | print( $fh $key . pack($self->{long_pack}, $old_subloc || $root->{end}) ); |
562 | } |
563 | } # i loop |
564 | |
565 | return; |
566 | } |
567 | |
8db25060 |
568 | sub read_from_loc { |
569 | my $self = shift; |
570 | my ($obj, $subloc) = @_; |
571 | |
572 | my $fh = $obj->_fh; |
573 | |
574 | ## |
575 | # Found match -- seek to offset and read signature |
576 | ## |
577 | my $signature; |
578 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
579 | read( $fh, $signature, SIG_SIZE); |
580 | |
581 | ## |
582 | # If value is a hash or array, return new DBM::Deep object with correct offset |
583 | ## |
584 | if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) { |
585 | my $obj = DBM::Deep->new( |
586 | type => $signature, |
587 | base_offset => $subloc, |
588 | root => $obj->_root, |
589 | ); |
590 | |
591 | if ($obj->_root->{autobless}) { |
592 | ## |
593 | # Skip over value and plain key to see if object needs |
594 | # to be re-blessed |
595 | ## |
596 | seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); |
597 | |
598 | my $size; |
599 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
600 | if ($size) { seek($fh, $size, SEEK_CUR); } |
601 | |
602 | my $bless_bit; |
603 | read( $fh, $bless_bit, 1); |
604 | if (ord($bless_bit)) { |
605 | ## |
606 | # Yes, object needs to be re-blessed |
607 | ## |
608 | my $class_name; |
609 | read( $fh, $size, $self->{data_size}); $size = unpack($self->{data_pack}, $size); |
610 | if ($size) { read( $fh, $class_name, $size); } |
611 | if ($class_name) { $obj = bless( $obj, $class_name ); } |
612 | } |
613 | } |
614 | |
615 | return $obj; |
616 | } |
617 | elsif ( $signature eq SIG_INTERNAL ) { |
618 | my $size; |
619 | read( $fh, $size, $self->{data_size}); |
620 | $size = unpack($self->{data_pack}, $size); |
621 | |
622 | if ( $size ) { |
623 | my $new_loc; |
624 | read( $fh, $new_loc, $size ); |
625 | $new_loc = unpack( $self->{long_pack}, $new_loc ); |
626 | |
627 | return $self->read_from_loc( $obj, $new_loc ); |
628 | } |
629 | else { |
630 | return; |
631 | } |
632 | } |
633 | ## |
634 | # Otherwise return actual value |
635 | ## |
636 | elsif ($signature eq SIG_DATA) { |
637 | my $size; |
638 | read( $fh, $size, $self->{data_size}); |
639 | $size = unpack($self->{data_pack}, $size); |
640 | |
641 | my $value = ''; |
642 | if ($size) { read( $fh, $value, $size); } |
643 | return $value; |
644 | } |
645 | |
646 | ## |
647 | # Key exists, but content is null |
648 | ## |
649 | return; |
650 | } |
651 | |
9020ee8c |
652 | sub get_bucket_value { |
beac1dff |
653 | ## |
654 | # Fetch single value given tag and MD5 digested key. |
655 | ## |
656 | my $self = shift; |
657 | my ($obj, $tag, $md5) = @_; |
9020ee8c |
658 | |
386bab6c |
659 | my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); |
660 | if ( $subloc ) { |
8db25060 |
661 | return $self->read_from_loc( $obj, $subloc ); |
386bab6c |
662 | } |
beac1dff |
663 | return; |
9020ee8c |
664 | } |
ab0e4957 |
665 | |
666 | sub delete_bucket { |
beac1dff |
667 | ## |
668 | # Delete single key/value pair given tag and MD5 digested key. |
669 | ## |
670 | my $self = shift; |
671 | my ($obj, $tag, $md5) = @_; |
ab0e4957 |
672 | |
386bab6c |
673 | my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); |
674 | if ( $subloc ) { |
675 | my $fh = $obj->_fh; |
676 | seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET); |
677 | print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) ); |
251dfd0e |
678 | print( $fh chr(0) x $self->{bucket_size} ); |
d0b74c17 |
679 | |
ab0e4957 |
680 | return 1; |
386bab6c |
681 | } |
beac1dff |
682 | return; |
ab0e4957 |
683 | } |
684 | |
912d50b1 |
685 | sub bucket_exists { |
beac1dff |
686 | ## |
687 | # Check existence of single key given tag and MD5 digested key. |
688 | ## |
689 | my $self = shift; |
690 | my ($obj, $tag, $md5) = @_; |
912d50b1 |
691 | |
386bab6c |
692 | my ($subloc, $offset) = $self->_find_in_buckets( $tag, $md5 ); |
d5d7c51d |
693 | return $subloc && 1; |
912d50b1 |
694 | } |
695 | |
6736c116 |
696 | sub find_bucket_list { |
beac1dff |
697 | ## |
698 | # Locate offset for bucket list, given digested key |
699 | ## |
700 | my $self = shift; |
d0b74c17 |
701 | my ($obj, $md5, $args) = @_; |
702 | $args = {} unless $args; |
703 | |
beac1dff |
704 | ## |
705 | # Locate offset for bucket list using digest index system |
706 | ## |
e5fc7e69 |
707 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
d5d7c51d |
708 | or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" ); |
d0b74c17 |
709 | |
e5fc7e69 |
710 | my $ch = 0; |
8db25060 |
711 | while ($tag->{signature} ne SIG_BLIST) { |
d0b74c17 |
712 | my $num = ord substr($md5, $ch, 1); |
713 | |
714 | my $ref_loc = $tag->{offset} + ($num * $self->{long_size}); |
715 | $tag = $self->index_lookup( $obj, $tag, $num ); |
716 | |
717 | if (!$tag) { |
29b01632 |
718 | return if !$args->{create}; |
d0b74c17 |
719 | |
d5d7c51d |
720 | my $fh = $obj->_fh; |
721 | seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); |
16d1ad9b |
722 | |
723 | my $loc = $self->_request_space( |
724 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
725 | ); |
726 | |
727 | print( $fh pack($self->{long_pack}, $loc) ); |
d0b74c17 |
728 | |
d5d7c51d |
729 | $tag = $self->create_tag( |
16d1ad9b |
730 | $obj, $loc, SIG_BLIST, |
d5d7c51d |
731 | chr(0) x $self->{bucket_list_size}, |
732 | ); |
733 | |
734 | $tag->{ref_loc} = $ref_loc; |
735 | $tag->{ch} = $ch; |
736 | |
737 | last; |
d0b74c17 |
738 | } |
739 | |
16d1ad9b |
740 | $tag->{ch} = $ch++; |
d0b74c17 |
741 | $tag->{ref_loc} = $ref_loc; |
beac1dff |
742 | } |
d0b74c17 |
743 | |
beac1dff |
744 | return $tag; |
6736c116 |
745 | } |
746 | |
d0b74c17 |
747 | sub index_lookup { |
748 | ## |
749 | # Given index tag, lookup single entry in index and return . |
750 | ## |
751 | my $self = shift; |
752 | my ($obj, $tag, $index) = @_; |
753 | |
754 | my $location = unpack( |
755 | $self->{long_pack}, |
756 | substr( |
757 | $tag->{content}, |
758 | $index * $self->{long_size}, |
759 | $self->{long_size}, |
760 | ), |
761 | ); |
762 | |
763 | if (!$location) { return; } |
764 | |
765 | return $self->load_tag( $obj, $location ); |
766 | } |
767 | |
6736c116 |
768 | sub traverse_index { |
beac1dff |
769 | ## |
770 | # Scan index and recursively step into deeper levels, looking for next key. |
771 | ## |
6736c116 |
772 | my $self = shift; |
773 | my ($obj, $offset, $ch, $force_return_next) = @_; |
d0b74c17 |
774 | |
beac1dff |
775 | my $tag = $self->load_tag($obj, $offset ); |
6736c116 |
776 | |
777 | my $fh = $obj->_fh; |
d0b74c17 |
778 | |
8db25060 |
779 | if ($tag->{signature} ne SIG_BLIST) { |
beac1dff |
780 | my $content = $tag->{content}; |
e5fc7e69 |
781 | my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); |
d0b74c17 |
782 | |
d5d7c51d |
783 | for (my $idx = $start; $idx < (2**8); $idx++) { |
e5fc7e69 |
784 | my $subloc = unpack( |
785 | $self->{long_pack}, |
e06824f8 |
786 | substr( |
787 | $content, |
788 | $idx * $self->{long_size}, |
789 | $self->{long_size}, |
790 | ), |
e5fc7e69 |
791 | ); |
792 | |
beac1dff |
793 | if ($subloc) { |
e5fc7e69 |
794 | my $result = $self->traverse_index( |
795 | $obj, $subloc, $ch + 1, $force_return_next, |
796 | ); |
797 | |
beac1dff |
798 | if (defined($result)) { return $result; } |
799 | } |
800 | } # index loop |
d0b74c17 |
801 | |
beac1dff |
802 | $obj->{return_next} = 1; |
803 | } # tag is an index |
d0b74c17 |
804 | |
e5fc7e69 |
805 | else { |
beac1dff |
806 | my $keys = $tag->{content}; |
807 | if ($force_return_next) { $obj->{return_next} = 1; } |
d0b74c17 |
808 | |
beac1dff |
809 | ## |
810 | # Iterate through buckets, looking for a key match |
811 | ## |
8db25060 |
812 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
9cec1360 |
813 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
d0b74c17 |
814 | |
8db25060 |
815 | # End of bucket list -- return to outer loop |
beac1dff |
816 | if (!$subloc) { |
beac1dff |
817 | $obj->{return_next} = 1; |
818 | last; |
819 | } |
8db25060 |
820 | # Located previous key -- return next one found |
beac1dff |
821 | elsif ($key eq $obj->{prev_md5}) { |
beac1dff |
822 | $obj->{return_next} = 1; |
823 | next; |
824 | } |
8db25060 |
825 | # Seek to bucket location and skip over signature |
beac1dff |
826 | elsif ($obj->{return_next}) { |
8db25060 |
827 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
d0b74c17 |
828 | |
beac1dff |
829 | # Skip over value to get to plain key |
8db25060 |
830 | my $sig; |
831 | read( $fh, $sig, SIG_SIZE ); |
832 | |
beac1dff |
833 | my $size; |
e5fc7e69 |
834 | read( $fh, $size, $self->{data_size}); |
835 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
836 | if ($size) { seek($fh, $size, SEEK_CUR); } |
d0b74c17 |
837 | |
beac1dff |
838 | # Read in plain key and return as scalar |
beac1dff |
839 | my $plain_key; |
e5fc7e69 |
840 | read( $fh, $size, $self->{data_size}); |
841 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
842 | if ($size) { read( $fh, $plain_key, $size); } |
d0b74c17 |
843 | |
beac1dff |
844 | return $plain_key; |
845 | } |
8db25060 |
846 | } |
d0b74c17 |
847 | |
beac1dff |
848 | $obj->{return_next} = 1; |
849 | } # tag is a bucket list |
d0b74c17 |
850 | |
beac1dff |
851 | return; |
6736c116 |
852 | } |
853 | |
854 | sub get_next_key { |
beac1dff |
855 | ## |
856 | # Locate next key, given digested previous one |
857 | ## |
6736c116 |
858 | my $self = shift; |
859 | my ($obj) = @_; |
d0b74c17 |
860 | |
beac1dff |
861 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
862 | $obj->{return_next} = 0; |
d0b74c17 |
863 | |
beac1dff |
864 | ## |
865 | # If the previous key was not specifed, start at the top and |
866 | # return the first one found. |
867 | ## |
868 | if (!$obj->{prev_md5}) { |
869 | $obj->{prev_md5} = chr(0) x $self->{hash_size}; |
870 | $obj->{return_next} = 1; |
871 | } |
d0b74c17 |
872 | |
beac1dff |
873 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
6736c116 |
874 | } |
875 | |
75be6413 |
876 | # Utilities |
877 | |
9cec1360 |
878 | sub _get_key_subloc { |
75be6413 |
879 | my $self = shift; |
880 | my ($keys, $idx) = @_; |
881 | |
9cec1360 |
882 | my ($key, $subloc) = unpack( |
883 | "a$self->{hash_size} $self->{long_pack}", |
75be6413 |
884 | substr( |
885 | $keys, |
9cec1360 |
886 | ($idx * $self->{bucket_size}), |
887 | $self->{bucket_size}, |
75be6413 |
888 | ), |
889 | ); |
890 | |
9cec1360 |
891 | return ($key, $subloc); |
75be6413 |
892 | } |
893 | |
d608b06e |
894 | sub _find_in_buckets { |
895 | my $self = shift; |
896 | my ($tag, $md5) = @_; |
897 | |
898 | BUCKET: |
899 | for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { |
900 | my ($key, $subloc) = $self->_get_key_subloc( $tag->{content}, $i ); |
901 | |
902 | return ($subloc, $i * $self->{bucket_size}) unless $subloc; |
903 | |
904 | next BUCKET if $key ne $md5; |
905 | |
906 | return ($subloc, $i * $self->{bucket_size}); |
907 | } |
908 | |
909 | return; |
910 | } |
911 | |
994ccd8e |
912 | sub _request_space { |
913 | my $self = shift; |
914 | my ($obj, $size) = @_; |
915 | |
916 | my $loc = $obj->_root->{end}; |
c9ec091a |
917 | $obj->_root->{end} += $size; |
994ccd8e |
918 | |
919 | return $loc; |
920 | } |
921 | |
922 | sub _release_space { |
923 | my $self = shift; |
924 | my ($obj, $size, $loc) = @_; |
925 | |
926 | return; |
927 | } |
928 | |
a20d9a3f |
929 | 1; |
930 | __END__ |
d5d7c51d |
931 | |
932 | # This will be added in later, after more refactoring is done. This is an early |
933 | # attempt at refactoring on the physical level instead of the virtual level. |
934 | sub _read_at { |
935 | my $self = shift; |
936 | my ($obj, $spot, $amount, $unpack) = @_; |
937 | |
938 | my $fh = $obj->_fh; |
939 | seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET ); |
940 | |
941 | my $buffer; |
942 | my $bytes_read = read( $fh, $buffer, $amount ); |
943 | |
944 | if ( $unpack ) { |
945 | $buffer = unpack( $unpack, $buffer ); |
946 | } |
947 | |
948 | if ( wantarray ) { |
949 | return ($buffer, $bytes_read); |
950 | } |
951 | else { |
952 | return $buffer; |
953 | } |
954 | } |