Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
2 | |
3 | use strict; |
4 | |
5 | use Fcntl qw( :DEFAULT :flock :seek ); |
6 | |
8db25060 |
7 | ## |
8 | # Setup file and tag signatures. These should never change. |
9 | ## |
10 | sub SIG_FILE () { 'DPDB' } |
11 | sub SIG_INTERNAL () { 'i' } |
12 | sub SIG_HASH () { 'H' } |
13 | sub SIG_ARRAY () { 'A' } |
8db25060 |
14 | sub SIG_NULL () { 'N' } |
15 | sub SIG_DATA () { 'D' } |
16 | sub SIG_INDEX () { 'I' } |
17 | sub SIG_BLIST () { 'B' } |
7b1e1aa1 |
18 | sub SIG_FREE () { 'F' } |
8db25060 |
19 | sub SIG_SIZE () { 1 } |
20 | |
612969fb |
21 | sub precalc_sizes { |
beac1dff |
22 | ## |
23 | # Precalculate index, bucket and bucket list sizes |
24 | ## |
251dfd0e |
25 | my $self = shift; |
1bf65be7 |
26 | |
251dfd0e |
27 | $self->{index_size} = (2**8) * $self->{long_size}; |
6ed2f3df |
28 | $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2; |
251dfd0e |
29 | $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; |
1bf65be7 |
30 | |
251dfd0e |
31 | return 1; |
1bf65be7 |
32 | } |
33 | |
34 | sub set_pack { |
beac1dff |
35 | ## |
36 | # Set pack/unpack modes (see file header for more) |
37 | ## |
251dfd0e |
38 | my $self = shift; |
1bf65be7 |
39 | my ($long_s, $long_p, $data_s, $data_p) = @_; |
40 | |
81d16922 |
41 | ## |
75be6413 |
42 | # Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 |
43 | # GB per file. |
beac1dff |
44 | # (Perl must be compiled with largefile support for files > 2 GB) |
81d16922 |
45 | # |
46 | # Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. |
beac1dff |
47 | # (Perl must be compiled with largefile and 64-bit long support) |
81d16922 |
48 | ## |
251dfd0e |
49 | $self->{long_size} = $long_s ? $long_s : 4; |
50 | $self->{long_pack} = $long_p ? $long_p : 'N'; |
1bf65be7 |
51 | |
81d16922 |
52 | ## |
75be6413 |
53 | # Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each |
d5d7c51d |
54 | # key/value. Upgrading this is possible (see above) but probably not |
55 | # necessary. If you need more than 4 GB for a single key or value, this |
56 | # module is really not for you :-) |
81d16922 |
57 | ## |
251dfd0e |
58 | $self->{data_size} = $data_s ? $data_s : 4; |
59 | $self->{data_pack} = $data_p ? $data_p : 'N'; |
1bf65be7 |
60 | |
beac1dff |
61 | return $self->precalc_sizes(); |
1bf65be7 |
62 | } |
63 | |
64 | sub set_digest { |
beac1dff |
65 | ## |
66 | # Set key digest function (default is MD5) |
67 | ## |
251dfd0e |
68 | my $self = shift; |
1bf65be7 |
69 | my ($digest_func, $hash_size) = @_; |
70 | |
d0b74c17 |
71 | $self->{digest} = $digest_func ? $digest_func : \&Digest::MD5::md5; |
251dfd0e |
72 | $self->{hash_size} = $hash_size ? $hash_size : 16; |
612969fb |
73 | |
beac1dff |
74 | return $self->precalc_sizes(); |
612969fb |
75 | } |
76 | |
77 | sub new { |
78 | my $class = shift; |
79 | my ($args) = @_; |
80 | |
81 | my $self = bless { |
82 | long_size => 4, |
83 | long_pack => 'N', |
84 | data_size => 4, |
85 | data_pack => 'N', |
251dfd0e |
86 | |
612969fb |
87 | digest => \&Digest::MD5::md5, |
88 | hash_size => 16, |
251dfd0e |
89 | |
81d16922 |
90 | ## |
d5d7c51d |
91 | # Maximum number of buckets per list before another level of indexing is |
92 | # done. |
93 | # Increase this value for slightly greater speed, but larger database |
94 | # files. DO NOT decrease this value below 16, due to risk of recursive |
95 | # reindex overrun. |
81d16922 |
96 | ## |
612969fb |
97 | max_buckets => 16, |
98 | }, $class; |
99 | |
251dfd0e |
100 | $self->precalc_sizes; |
612969fb |
101 | |
102 | return $self; |
1bf65be7 |
103 | } |
104 | |
70b55428 |
105 | sub setup_fh { |
106 | my $self = shift; |
107 | my ($obj) = @_; |
108 | |
109 | $self->open( $obj ) if !defined $obj->_fh; |
110 | |
6fde4ed2 |
111 | my $fh = $obj->_fh; |
112 | flock $fh, LOCK_EX; |
118ba343 |
113 | |
6fde4ed2 |
114 | unless ( $obj->{base_offset} ) { |
118ba343 |
115 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
116 | my $signature; |
117 | my $bytes_read = read( $fh, $signature, length(SIG_FILE)); |
118 | |
119 | ## |
120 | # File is empty -- write signature and master index |
121 | ## |
122 | if (!$bytes_read) { |
c9ec091a |
123 | my $loc = $self->_request_space( $obj, length( SIG_FILE ) ); |
124 | seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET); |
118ba343 |
125 | print( $fh SIG_FILE); |
126 | |
c9ec091a |
127 | $obj->{base_offset} = $self->_request_space( |
16d1ad9b |
128 | $obj, $self->tag_size( $self->{index_size} ), |
c9ec091a |
129 | ); |
118ba343 |
130 | |
9e4f83a0 |
131 | $self->write_tag( |
c9ec091a |
132 | $obj, $obj->_base_offset, $obj->_type, |
f37c15ab |
133 | chr(0)x$self->{index_size}, |
118ba343 |
134 | ); |
135 | |
136 | # Flush the filehandle |
137 | my $old_fh = select $fh; |
138 | my $old_af = $|; $| = 1; $| = $old_af; |
139 | select $old_fh; |
140 | } |
141 | else { |
142 | $obj->{base_offset} = $bytes_read; |
143 | |
144 | ## |
145 | # Check signature was valid |
146 | ## |
147 | unless ($signature eq SIG_FILE) { |
148 | $self->close_fh( $obj ); |
149 | $obj->_throw_error("Signature not found -- file is not a Deep DB"); |
150 | } |
151 | |
152 | ## |
153 | # Get our type from master index signature |
154 | ## |
155 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
156 | or $obj->_throw_error("Corrupted file, no master index record"); |
157 | |
158 | unless ($obj->{type} eq $tag->{signature}) { |
159 | $obj->_throw_error("File type mismatch"); |
160 | } |
161 | } |
118ba343 |
162 | } |
e06824f8 |
163 | |
673464d9 |
164 | #XXX We have to make sure we don't mess up when autoflush isn't turned on |
70b55428 |
165 | unless ( $obj->_root->{inode} ) { |
166 | my @stats = stat($obj->_fh); |
167 | $obj->_root->{inode} = $stats[1]; |
168 | $obj->_root->{end} = $stats[7]; |
169 | } |
170 | |
6fde4ed2 |
171 | flock $fh, LOCK_UN; |
172 | |
70b55428 |
173 | return 1; |
174 | } |
175 | |
a20d9a3f |
176 | sub open { |
20f7b20c |
177 | ## |
178 | # Open a fh to the database, create if nonexistent. |
179 | # Make sure file signature matches DBM::Deep spec. |
180 | ## |
a20d9a3f |
181 | my $self = shift; |
70b55428 |
182 | my ($obj) = @_; |
a20d9a3f |
183 | |
673464d9 |
184 | # Theoretically, adding O_BINARY should remove the need for the binmode |
185 | # Of course, testing it is going to be ... interesting. |
186 | my $flags = O_RDWR | O_CREAT | O_BINARY; |
a20d9a3f |
187 | |
673464d9 |
188 | my $fh; |
d5d7c51d |
189 | my $filename = $obj->_root->{file}; |
190 | sysopen( $fh, $filename, $flags ) |
191 | or $obj->_throw_error("Cannot sysopen file '$filename': $!"); |
673464d9 |
192 | $obj->_root->{fh} = $fh; |
a20d9a3f |
193 | |
194 | #XXX Can we remove this by using the right sysopen() flags? |
195 | # Maybe ... q.v. above |
196 | binmode $fh; # for win32 |
197 | |
cd59cad8 |
198 | if ($obj->_root->{autoflush}) { |
a20d9a3f |
199 | my $old = select $fh; |
200 | $|=1; |
201 | select $old; |
202 | } |
20f7b20c |
203 | |
a20d9a3f |
204 | return 1; |
205 | } |
206 | |
3d1b8be9 |
207 | sub close_fh { |
cd59cad8 |
208 | my $self = shift; |
a21f2d90 |
209 | my ($obj) = @_; |
cd59cad8 |
210 | |
211 | if ( my $fh = $obj->_root->{fh} ) { |
212 | close $fh; |
213 | } |
214 | $obj->_root->{fh} = undef; |
215 | |
216 | return 1; |
217 | } |
218 | |
16d1ad9b |
219 | sub tag_size { |
220 | my $self = shift; |
221 | my ($size) = @_; |
222 | return SIG_SIZE + $self->{data_size} + $size; |
223 | } |
224 | |
9e4f83a0 |
225 | sub write_tag { |
20f7b20c |
226 | ## |
227 | # Given offset, signature and content, create tag and write to disk |
228 | ## |
d4b1166e |
229 | my $self = shift; |
20f7b20c |
230 | my ($obj, $offset, $sig, $content) = @_; |
f37c15ab |
231 | my $size = length( $content ); |
20f7b20c |
232 | |
d4b1166e |
233 | my $fh = $obj->_fh; |
234 | |
f37c15ab |
235 | if ( defined $offset ) { |
236 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
237 | } |
238 | |
251dfd0e |
239 | print( $fh $sig . pack($self->{data_pack}, $size) . $content ); |
20f7b20c |
240 | |
f37c15ab |
241 | return unless defined $offset; |
242 | |
20f7b20c |
243 | return { |
244 | signature => $sig, |
245 | size => $size, |
8db25060 |
246 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
247 | content => $content |
248 | }; |
d4b1166e |
249 | } |
250 | |
251 | sub load_tag { |
20f7b20c |
252 | ## |
253 | # Given offset, load single tag and return signature, size and data |
254 | ## |
d4b1166e |
255 | my $self = shift; |
20f7b20c |
256 | my ($obj, $offset) = @_; |
257 | |
e06824f8 |
258 | # print join(':',map{$_||''}caller(1)), $/; |
259 | |
d4b1166e |
260 | my $fh = $obj->_fh; |
261 | |
20f7b20c |
262 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
e5fc7e69 |
263 | |
75be6413 |
264 | #XXX I'm not sure this check will work if autoflush isn't enabled ... |
e5fc7e69 |
265 | return if eof $fh; |
20f7b20c |
266 | |
d4b1166e |
267 | my $b; |
8db25060 |
268 | read( $fh, $b, SIG_SIZE + $self->{data_size} ); |
251dfd0e |
269 | my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); |
20f7b20c |
270 | |
271 | my $buffer; |
272 | read( $fh, $buffer, $size); |
273 | |
274 | return { |
275 | signature => $sig, |
276 | size => $size, |
8db25060 |
277 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
278 | content => $buffer |
279 | }; |
d4b1166e |
280 | } |
281 | |
29b01632 |
282 | sub _length_needed { |
283 | my $self = shift; |
f37c15ab |
284 | my ($obj, $value, $key) = @_; |
29b01632 |
285 | |
286 | my $is_dbm_deep = eval { |
287 | local $SIG{'__DIE__'}; |
288 | $value->isa( 'DBM::Deep' ); |
289 | }; |
290 | |
f37c15ab |
291 | my $len = SIG_SIZE + $self->{data_size} |
292 | + $self->{data_size} + length( $key ); |
29b01632 |
293 | |
f37c15ab |
294 | if ( $is_dbm_deep && $value->_root eq $obj->_root ) { |
295 | return $len + $self->{long_size}; |
29b01632 |
296 | } |
297 | |
298 | my $r = Scalar::Util::reftype( $value ) || ''; |
9a187d8c |
299 | if ( $obj->_root->{autobless} ) { |
300 | # This is for the bit saying whether or not this thing is blessed. |
301 | $len += 1; |
302 | } |
303 | |
29b01632 |
304 | unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
f37c15ab |
305 | if ( defined $value ) { |
306 | $len += length( $value ); |
307 | } |
308 | return $len; |
29b01632 |
309 | } |
310 | |
f37c15ab |
311 | $len += $self->{index_size}; |
29b01632 |
312 | |
313 | # if autobless is enabled, must also take into consideration |
f37c15ab |
314 | # the class name as it is stored after the key. |
29b01632 |
315 | if ( $obj->_root->{autobless} ) { |
316 | my $value_class = Scalar::Util::blessed($value); |
f37c15ab |
317 | if ( defined $value_class && !$is_dbm_deep ) { |
318 | $len += $self->{data_size} + length($value_class); |
29b01632 |
319 | } |
320 | } |
321 | |
f37c15ab |
322 | return $len; |
29b01632 |
323 | } |
324 | |
20f7b20c |
325 | sub add_bucket { |
326 | ## |
327 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
328 | # plain (undigested) key and value. |
329 | ## |
d4b1166e |
330 | my $self = shift; |
20f7b20c |
331 | my ($obj, $tag, $md5, $plain_key, $value) = @_; |
75be6413 |
332 | |
eea0d863 |
333 | # This verifies that only supported values will be stored. |
334 | { |
335 | my $r = Scalar::Util::reftype( $value ); |
336 | last if !defined $r; |
337 | |
338 | last if $r eq 'HASH'; |
339 | last if $r eq 'ARRAY'; |
340 | |
341 | $obj->_throw_error( |
342 | "Storage of variables of type '$r' is not supported." |
343 | ); |
344 | } |
345 | |
20f7b20c |
346 | my $location = 0; |
347 | my $result = 2; |
348 | |
349 | my $root = $obj->_root; |
f37c15ab |
350 | my $fh = $obj->_fh; |
20f7b20c |
351 | |
f37c15ab |
352 | my $actual_length = $self->_length_needed( $obj, $value, $plain_key ); |
20f7b20c |
353 | |
9a187d8c |
354 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
75be6413 |
355 | |
f9c33187 |
356 | # $self->_release_space( $obj, $size, $subloc ); |
386bab6c |
357 | # Updating a known md5 |
f9c33187 |
358 | #XXX This needs updating to use _release_space |
386bab6c |
359 | if ( $subloc ) { |
360 | $result = 1; |
20f7b20c |
361 | |
386bab6c |
362 | if ($actual_length <= $size) { |
363 | $location = $subloc; |
20f7b20c |
364 | } |
75be6413 |
365 | else { |
f37c15ab |
366 | $location = $self->_request_space( $obj, $actual_length ); |
386bab6c |
367 | seek( |
368 | $fh, |
9a187d8c |
369 | $tag->{offset} + $offset |
370 | + $self->{hash_size} + $root->{file_offset}, |
386bab6c |
371 | SEEK_SET, |
372 | ); |
9a187d8c |
373 | print( $fh pack($self->{long_pack}, $location ) ); |
374 | print( $fh pack($self->{long_pack}, $actual_length ) ); |
75be6413 |
375 | } |
75be6413 |
376 | } |
386bab6c |
377 | # Adding a new md5 |
378 | elsif ( defined $offset ) { |
f37c15ab |
379 | $location = $self->_request_space( $obj, $actual_length ); |
386bab6c |
380 | |
381 | seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET ); |
9a187d8c |
382 | print( $fh $md5 . pack($self->{long_pack}, $location ) ); |
383 | print( $fh pack($self->{long_pack}, $actual_length ) ); |
386bab6c |
384 | } |
385 | # If bucket didn't fit into list, split into a new index level |
f9c33187 |
386 | # split_index() will do the _request_space() call |
386bab6c |
387 | else { |
f9c33187 |
388 | $location = $self->split_index( $obj, $md5, $tag ); |
386bab6c |
389 | } |
20f7b20c |
390 | |
d5d7c51d |
391 | $self->write_value( $obj, $location, $plain_key, $value ); |
392 | |
393 | return $result; |
394 | } |
395 | |
396 | sub write_value { |
397 | my $self = shift; |
398 | my ($obj, $location, $key, $value) = @_; |
399 | |
400 | my $fh = $obj->_fh; |
401 | my $root = $obj->_root; |
402 | |
403 | my $is_dbm_deep = eval { |
404 | local $SIG{'__DIE__'}; |
405 | $value->isa( 'DBM::Deep' ); |
406 | }; |
407 | |
f9c33187 |
408 | my $is_internal_ref = $is_dbm_deep && ($value->_root eq $root); |
d5d7c51d |
409 | |
410 | seek($fh, $location + $root->{file_offset}, SEEK_SET); |
411 | |
20f7b20c |
412 | ## |
d5d7c51d |
413 | # Write signature based on content type, set content length and write |
414 | # actual value. |
20f7b20c |
415 | ## |
d5d7c51d |
416 | my $r = Scalar::Util::reftype($value) || ''; |
f9c33187 |
417 | if ( $is_internal_ref ) { |
9e4f83a0 |
418 | $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $value->_base_offset) ); |
f37c15ab |
419 | } |
420 | elsif ($r eq 'HASH') { |
685e40f1 |
421 | if ( !$is_dbm_deep && tied %{$value} ) { |
019ab3a1 |
422 | $obj->_throw_error( "Cannot store something that is tied" ); |
423 | } |
9e4f83a0 |
424 | $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} ); |
f37c15ab |
425 | } |
426 | elsif ($r eq 'ARRAY') { |
685e40f1 |
427 | if ( !$is_dbm_deep && tied @{$value} ) { |
019ab3a1 |
428 | $obj->_throw_error( "Cannot store something that is tied" ); |
429 | } |
9e4f83a0 |
430 | $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} ); |
f37c15ab |
431 | } |
432 | elsif (!defined($value)) { |
9e4f83a0 |
433 | $self->write_tag( $obj, undef, SIG_NULL, '' ); |
d5d7c51d |
434 | } |
435 | else { |
9e4f83a0 |
436 | $self->write_tag( $obj, undef, SIG_DATA, $value ); |
d5d7c51d |
437 | } |
20f7b20c |
438 | |
d5d7c51d |
439 | ## |
440 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
441 | ## |
442 | print( $fh pack($self->{data_pack}, length($key)) . $key ); |
20f7b20c |
443 | |
9a187d8c |
444 | # Internal references don't care about autobless |
f9c33187 |
445 | return 1 if $is_internal_ref; |
9a187d8c |
446 | |
d5d7c51d |
447 | ## |
448 | # If value is blessed, preserve class name |
449 | ## |
450 | if ( $root->{autobless} ) { |
451 | my $value_class = Scalar::Util::blessed($value); |
f37c15ab |
452 | if ( defined $value_class && !$is_dbm_deep ) { |
d5d7c51d |
453 | print( $fh chr(1) ); |
454 | print( $fh pack($self->{data_pack}, length($value_class)) . $value_class ); |
20f7b20c |
455 | } |
d5d7c51d |
456 | else { |
457 | print( $fh chr(0) ); |
20f7b20c |
458 | } |
d5d7c51d |
459 | } |
20f7b20c |
460 | |
d5d7c51d |
461 | ## |
d5d7c51d |
462 | # If content is a hash or array, create new child DBM::Deep object and |
463 | # pass each key or element to it. |
464 | ## |
f9c33187 |
465 | if ( !$is_internal_ref ) { |
d5d7c51d |
466 | if ($r eq 'HASH') { |
685e40f1 |
467 | my %x = %$value; |
468 | tie %$value, 'DBM::Deep', { |
d5d7c51d |
469 | base_offset => $location, |
470 | root => $root, |
685e40f1 |
471 | }; |
472 | %$value = %x; |
d5d7c51d |
473 | } |
474 | elsif ($r eq 'ARRAY') { |
685e40f1 |
475 | my @x = @$value; |
476 | tie @$value, 'DBM::Deep', { |
d5d7c51d |
477 | base_offset => $location, |
478 | root => $root, |
685e40f1 |
479 | }; |
480 | @$value = @x; |
20f7b20c |
481 | } |
20f7b20c |
482 | } |
d4b1166e |
483 | |
d5d7c51d |
484 | return 1; |
d4b1166e |
485 | } |
486 | |
75be6413 |
487 | sub split_index { |
488 | my $self = shift; |
489 | my ($obj, $md5, $tag) = @_; |
490 | |
491 | my $fh = $obj->_fh; |
492 | my $root = $obj->_root; |
16d1ad9b |
493 | |
494 | my $loc = $self->_request_space( |
495 | $obj, $self->tag_size( $self->{index_size} ), |
496 | ); |
497 | |
7b1e1aa1 |
498 | seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); |
16d1ad9b |
499 | print( $fh pack($self->{long_pack}, $loc) ); |
75be6413 |
500 | |
9e4f83a0 |
501 | my $index_tag = $self->write_tag( |
16d1ad9b |
502 | $obj, $loc, SIG_INDEX, |
f37c15ab |
503 | chr(0)x$self->{index_size}, |
75be6413 |
504 | ); |
505 | |
f9c33187 |
506 | my $newtag_loc = $self->_request_space( |
507 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
508 | ); |
75be6413 |
509 | |
7b1e1aa1 |
510 | my $keys = $tag->{content} |
f9c33187 |
511 | . $md5 . pack($self->{long_pack}, $newtag_loc) |
512 | . pack($self->{long_pack}, 0); |
75be6413 |
513 | |
f9c33187 |
514 | my @newloc = (); |
75be6413 |
515 | BUCKET: |
516 | for (my $i = 0; $i <= $self->{max_buckets}; $i++) { |
9a187d8c |
517 | my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i ); |
75be6413 |
518 | |
f9c33187 |
519 | die "[INTERNAL ERROR]: No key in split_index()\n" unless $key; |
520 | die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc; |
75be6413 |
521 | |
75be6413 |
522 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
523 | |
f9c33187 |
524 | if ($newloc[$num]) { |
525 | seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET); |
75be6413 |
526 | my $subkeys; |
527 | read( $fh, $subkeys, $self->{bucket_list_size}); |
528 | |
f9c33187 |
529 | # This is looking for the first empty spot |
7b1e1aa1 |
530 | my ($subloc, $offset, $size) = $self->_find_in_buckets( |
f9c33187 |
531 | { content => $subkeys }, '', |
7b1e1aa1 |
532 | ); |
75be6413 |
533 | |
f9c33187 |
534 | seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET); |
535 | print( $fh $key . pack($self->{long_pack}, $old_subloc) ); |
7b1e1aa1 |
536 | |
537 | next; |
75be6413 |
538 | } |
75be6413 |
539 | |
7b1e1aa1 |
540 | seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); |
2603d86e |
541 | |
7b1e1aa1 |
542 | my $loc = $self->_request_space( |
543 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
544 | ); |
2603d86e |
545 | |
7b1e1aa1 |
546 | print( $fh pack($self->{long_pack}, $loc) ); |
75be6413 |
547 | |
7b1e1aa1 |
548 | my $blist_tag = $self->write_tag( |
549 | $obj, $loc, SIG_BLIST, |
550 | chr(0)x$self->{bucket_list_size}, |
551 | ); |
552 | |
553 | seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); |
f9c33187 |
554 | print( $fh $key . pack($self->{long_pack}, $old_subloc) ); |
7b1e1aa1 |
555 | |
f9c33187 |
556 | $newloc[$num] = $blist_tag->{offset}; |
7b1e1aa1 |
557 | } |
558 | |
559 | $self->_release_space( |
f9c33187 |
560 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
7b1e1aa1 |
561 | $tag->{offset} - SIG_SIZE - $self->{data_size}, |
562 | ); |
75be6413 |
563 | |
f9c33187 |
564 | return $newtag_loc; |
75be6413 |
565 | } |
566 | |
8db25060 |
567 | sub read_from_loc { |
568 | my $self = shift; |
569 | my ($obj, $subloc) = @_; |
570 | |
571 | my $fh = $obj->_fh; |
572 | |
573 | ## |
574 | # Found match -- seek to offset and read signature |
575 | ## |
576 | my $signature; |
577 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
578 | read( $fh, $signature, SIG_SIZE); |
579 | |
580 | ## |
581 | # If value is a hash or array, return new DBM::Deep object with correct offset |
582 | ## |
583 | if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) { |
685e40f1 |
584 | my $new_obj = DBM::Deep->new({ |
8db25060 |
585 | type => $signature, |
586 | base_offset => $subloc, |
587 | root => $obj->_root, |
685e40f1 |
588 | }); |
8db25060 |
589 | |
685e40f1 |
590 | if ($new_obj->_root->{autobless}) { |
8db25060 |
591 | ## |
592 | # Skip over value and plain key to see if object needs |
593 | # to be re-blessed |
594 | ## |
595 | seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); |
596 | |
597 | my $size; |
c6ea6b6c |
598 | read( $fh, $size, $self->{data_size}); |
599 | $size = unpack($self->{data_pack}, $size); |
8db25060 |
600 | if ($size) { seek($fh, $size, SEEK_CUR); } |
601 | |
602 | my $bless_bit; |
603 | read( $fh, $bless_bit, 1); |
604 | if (ord($bless_bit)) { |
605 | ## |
606 | # Yes, object needs to be re-blessed |
607 | ## |
608 | my $class_name; |
c6ea6b6c |
609 | read( $fh, $size, $self->{data_size}); |
610 | $size = unpack($self->{data_pack}, $size); |
8db25060 |
611 | if ($size) { read( $fh, $class_name, $size); } |
685e40f1 |
612 | if ($class_name) { $new_obj = bless( $new_obj, $class_name ); } |
8db25060 |
613 | } |
614 | } |
615 | |
685e40f1 |
616 | return $new_obj; |
8db25060 |
617 | } |
618 | elsif ( $signature eq SIG_INTERNAL ) { |
619 | my $size; |
620 | read( $fh, $size, $self->{data_size}); |
621 | $size = unpack($self->{data_pack}, $size); |
622 | |
623 | if ( $size ) { |
624 | my $new_loc; |
625 | read( $fh, $new_loc, $size ); |
626 | $new_loc = unpack( $self->{long_pack}, $new_loc ); |
627 | |
628 | return $self->read_from_loc( $obj, $new_loc ); |
629 | } |
630 | else { |
631 | return; |
632 | } |
633 | } |
634 | ## |
635 | # Otherwise return actual value |
636 | ## |
637 | elsif ($signature eq SIG_DATA) { |
638 | my $size; |
639 | read( $fh, $size, $self->{data_size}); |
640 | $size = unpack($self->{data_pack}, $size); |
641 | |
642 | my $value = ''; |
643 | if ($size) { read( $fh, $value, $size); } |
644 | return $value; |
645 | } |
646 | |
647 | ## |
648 | # Key exists, but content is null |
649 | ## |
650 | return; |
651 | } |
652 | |
9020ee8c |
653 | sub get_bucket_value { |
beac1dff |
654 | ## |
655 | # Fetch single value given tag and MD5 digested key. |
656 | ## |
657 | my $self = shift; |
658 | my ($obj, $tag, $md5) = @_; |
9020ee8c |
659 | |
9a187d8c |
660 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
386bab6c |
661 | if ( $subloc ) { |
8db25060 |
662 | return $self->read_from_loc( $obj, $subloc ); |
386bab6c |
663 | } |
beac1dff |
664 | return; |
9020ee8c |
665 | } |
ab0e4957 |
666 | |
667 | sub delete_bucket { |
beac1dff |
668 | ## |
669 | # Delete single key/value pair given tag and MD5 digested key. |
670 | ## |
671 | my $self = shift; |
672 | my ($obj, $tag, $md5) = @_; |
ab0e4957 |
673 | |
9a187d8c |
674 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
f9c33187 |
675 | #XXX This needs _release_space() |
386bab6c |
676 | if ( $subloc ) { |
677 | my $fh = $obj->_fh; |
678 | seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET); |
679 | print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) ); |
251dfd0e |
680 | print( $fh chr(0) x $self->{bucket_size} ); |
d0b74c17 |
681 | |
ab0e4957 |
682 | return 1; |
386bab6c |
683 | } |
beac1dff |
684 | return; |
ab0e4957 |
685 | } |
686 | |
912d50b1 |
687 | sub bucket_exists { |
beac1dff |
688 | ## |
689 | # Check existence of single key given tag and MD5 digested key. |
690 | ## |
691 | my $self = shift; |
692 | my ($obj, $tag, $md5) = @_; |
912d50b1 |
693 | |
9a187d8c |
694 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
d5d7c51d |
695 | return $subloc && 1; |
912d50b1 |
696 | } |
697 | |
6736c116 |
698 | sub find_bucket_list { |
beac1dff |
699 | ## |
700 | # Locate offset for bucket list, given digested key |
701 | ## |
702 | my $self = shift; |
d0b74c17 |
703 | my ($obj, $md5, $args) = @_; |
704 | $args = {} unless $args; |
705 | |
beac1dff |
706 | ## |
707 | # Locate offset for bucket list using digest index system |
708 | ## |
e5fc7e69 |
709 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
d5d7c51d |
710 | or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" ); |
d0b74c17 |
711 | |
e5fc7e69 |
712 | my $ch = 0; |
8db25060 |
713 | while ($tag->{signature} ne SIG_BLIST) { |
d0b74c17 |
714 | my $num = ord substr($md5, $ch, 1); |
715 | |
716 | my $ref_loc = $tag->{offset} + ($num * $self->{long_size}); |
717 | $tag = $self->index_lookup( $obj, $tag, $num ); |
718 | |
719 | if (!$tag) { |
29b01632 |
720 | return if !$args->{create}; |
d0b74c17 |
721 | |
16d1ad9b |
722 | my $loc = $self->_request_space( |
723 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
724 | ); |
725 | |
7b1e1aa1 |
726 | my $fh = $obj->_fh; |
727 | seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); |
16d1ad9b |
728 | print( $fh pack($self->{long_pack}, $loc) ); |
d0b74c17 |
729 | |
9e4f83a0 |
730 | $tag = $self->write_tag( |
16d1ad9b |
731 | $obj, $loc, SIG_BLIST, |
f37c15ab |
732 | chr(0)x$self->{bucket_list_size}, |
d5d7c51d |
733 | ); |
734 | |
735 | $tag->{ref_loc} = $ref_loc; |
736 | $tag->{ch} = $ch; |
737 | |
738 | last; |
d0b74c17 |
739 | } |
740 | |
16d1ad9b |
741 | $tag->{ch} = $ch++; |
d0b74c17 |
742 | $tag->{ref_loc} = $ref_loc; |
beac1dff |
743 | } |
d0b74c17 |
744 | |
beac1dff |
745 | return $tag; |
6736c116 |
746 | } |
747 | |
d0b74c17 |
748 | sub index_lookup { |
749 | ## |
750 | # Given index tag, lookup single entry in index and return . |
751 | ## |
752 | my $self = shift; |
753 | my ($obj, $tag, $index) = @_; |
754 | |
755 | my $location = unpack( |
756 | $self->{long_pack}, |
757 | substr( |
758 | $tag->{content}, |
759 | $index * $self->{long_size}, |
760 | $self->{long_size}, |
761 | ), |
762 | ); |
763 | |
764 | if (!$location) { return; } |
765 | |
766 | return $self->load_tag( $obj, $location ); |
767 | } |
768 | |
6736c116 |
769 | sub traverse_index { |
beac1dff |
770 | ## |
771 | # Scan index and recursively step into deeper levels, looking for next key. |
772 | ## |
6736c116 |
773 | my $self = shift; |
774 | my ($obj, $offset, $ch, $force_return_next) = @_; |
d0b74c17 |
775 | |
beac1dff |
776 | my $tag = $self->load_tag($obj, $offset ); |
6736c116 |
777 | |
778 | my $fh = $obj->_fh; |
d0b74c17 |
779 | |
8db25060 |
780 | if ($tag->{signature} ne SIG_BLIST) { |
beac1dff |
781 | my $content = $tag->{content}; |
e5fc7e69 |
782 | my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); |
d0b74c17 |
783 | |
d5d7c51d |
784 | for (my $idx = $start; $idx < (2**8); $idx++) { |
e5fc7e69 |
785 | my $subloc = unpack( |
786 | $self->{long_pack}, |
e06824f8 |
787 | substr( |
788 | $content, |
789 | $idx * $self->{long_size}, |
790 | $self->{long_size}, |
791 | ), |
e5fc7e69 |
792 | ); |
793 | |
beac1dff |
794 | if ($subloc) { |
e5fc7e69 |
795 | my $result = $self->traverse_index( |
796 | $obj, $subloc, $ch + 1, $force_return_next, |
797 | ); |
798 | |
beac1dff |
799 | if (defined($result)) { return $result; } |
800 | } |
801 | } # index loop |
d0b74c17 |
802 | |
beac1dff |
803 | $obj->{return_next} = 1; |
804 | } # tag is an index |
d0b74c17 |
805 | |
e5fc7e69 |
806 | else { |
beac1dff |
807 | my $keys = $tag->{content}; |
808 | if ($force_return_next) { $obj->{return_next} = 1; } |
d0b74c17 |
809 | |
beac1dff |
810 | ## |
811 | # Iterate through buckets, looking for a key match |
812 | ## |
8db25060 |
813 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
9cec1360 |
814 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
d0b74c17 |
815 | |
8db25060 |
816 | # End of bucket list -- return to outer loop |
beac1dff |
817 | if (!$subloc) { |
beac1dff |
818 | $obj->{return_next} = 1; |
819 | last; |
820 | } |
8db25060 |
821 | # Located previous key -- return next one found |
beac1dff |
822 | elsif ($key eq $obj->{prev_md5}) { |
beac1dff |
823 | $obj->{return_next} = 1; |
824 | next; |
825 | } |
8db25060 |
826 | # Seek to bucket location and skip over signature |
beac1dff |
827 | elsif ($obj->{return_next}) { |
8db25060 |
828 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
d0b74c17 |
829 | |
beac1dff |
830 | # Skip over value to get to plain key |
8db25060 |
831 | my $sig; |
832 | read( $fh, $sig, SIG_SIZE ); |
833 | |
beac1dff |
834 | my $size; |
e5fc7e69 |
835 | read( $fh, $size, $self->{data_size}); |
836 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
837 | if ($size) { seek($fh, $size, SEEK_CUR); } |
d0b74c17 |
838 | |
beac1dff |
839 | # Read in plain key and return as scalar |
beac1dff |
840 | my $plain_key; |
e5fc7e69 |
841 | read( $fh, $size, $self->{data_size}); |
842 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
843 | if ($size) { read( $fh, $plain_key, $size); } |
d0b74c17 |
844 | |
beac1dff |
845 | return $plain_key; |
846 | } |
8db25060 |
847 | } |
d0b74c17 |
848 | |
beac1dff |
849 | $obj->{return_next} = 1; |
850 | } # tag is a bucket list |
d0b74c17 |
851 | |
beac1dff |
852 | return; |
6736c116 |
853 | } |
854 | |
855 | sub get_next_key { |
beac1dff |
856 | ## |
857 | # Locate next key, given digested previous one |
858 | ## |
6736c116 |
859 | my $self = shift; |
860 | my ($obj) = @_; |
d0b74c17 |
861 | |
beac1dff |
862 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
863 | $obj->{return_next} = 0; |
d0b74c17 |
864 | |
beac1dff |
865 | ## |
866 | # If the previous key was not specifed, start at the top and |
867 | # return the first one found. |
868 | ## |
869 | if (!$obj->{prev_md5}) { |
870 | $obj->{prev_md5} = chr(0) x $self->{hash_size}; |
871 | $obj->{return_next} = 1; |
872 | } |
d0b74c17 |
873 | |
beac1dff |
874 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
6736c116 |
875 | } |
876 | |
75be6413 |
877 | # Utilities |
878 | |
9cec1360 |
879 | sub _get_key_subloc { |
75be6413 |
880 | my $self = shift; |
881 | my ($keys, $idx) = @_; |
882 | |
6ed2f3df |
883 | my ($key, $subloc, $size) = unpack( |
884 | "a$self->{hash_size} $self->{long_pack} $self->{long_pack}", |
75be6413 |
885 | substr( |
886 | $keys, |
9cec1360 |
887 | ($idx * $self->{bucket_size}), |
888 | $self->{bucket_size}, |
75be6413 |
889 | ), |
890 | ); |
891 | |
6ed2f3df |
892 | return ($key, $subloc, $size); |
75be6413 |
893 | } |
894 | |
d608b06e |
895 | sub _find_in_buckets { |
896 | my $self = shift; |
897 | my ($tag, $md5) = @_; |
898 | |
899 | BUCKET: |
900 | for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { |
9a187d8c |
901 | my ($key, $subloc, $size) = $self->_get_key_subloc( |
902 | $tag->{content}, $i, |
903 | ); |
d608b06e |
904 | |
9a187d8c |
905 | return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc; |
d608b06e |
906 | |
907 | next BUCKET if $key ne $md5; |
908 | |
9a187d8c |
909 | return ($subloc, $i * $self->{bucket_size}, $size); |
d608b06e |
910 | } |
911 | |
912 | return; |
913 | } |
914 | |
7b1e1aa1 |
915 | #sub _print_at { |
916 | # my $self = shift; |
917 | # my ($obj, $spot, $data) = @_; |
918 | # |
919 | # my $fh = $obj->_fh; |
920 | # seek( $fh, $spot, SEEK_SET ); |
921 | # print( $fh $data ); |
922 | # |
923 | # return; |
924 | #} |
925 | |
994ccd8e |
926 | sub _request_space { |
927 | my $self = shift; |
928 | my ($obj, $size) = @_; |
929 | |
930 | my $loc = $obj->_root->{end}; |
c9ec091a |
931 | $obj->_root->{end} += $size; |
994ccd8e |
932 | |
933 | return $loc; |
934 | } |
935 | |
936 | sub _release_space { |
937 | my $self = shift; |
938 | my ($obj, $size, $loc) = @_; |
939 | |
7b1e1aa1 |
940 | my $next_loc = 0; |
941 | |
942 | my $fh = $obj->_fh; |
943 | seek( $fh, $loc + $obj->_root->{file_offset}, SEEK_SET ); |
944 | print( $fh SIG_FREE |
945 | . pack($self->{long_pack}, $size ) |
946 | . pack($self->{long_pack}, $next_loc ) |
947 | ); |
948 | |
994ccd8e |
949 | return; |
950 | } |
951 | |
a20d9a3f |
952 | 1; |
953 | __END__ |
d5d7c51d |
954 | |
955 | # This will be added in later, after more refactoring is done. This is an early |
956 | # attempt at refactoring on the physical level instead of the virtual level. |
957 | sub _read_at { |
958 | my $self = shift; |
959 | my ($obj, $spot, $amount, $unpack) = @_; |
960 | |
961 | my $fh = $obj->_fh; |
962 | seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET ); |
963 | |
964 | my $buffer; |
965 | my $bytes_read = read( $fh, $buffer, $amount ); |
966 | |
967 | if ( $unpack ) { |
968 | $buffer = unpack( $unpack, $buffer ); |
969 | } |
970 | |
971 | if ( wantarray ) { |
972 | return ($buffer, $bytes_read); |
973 | } |
974 | else { |
975 | return $buffer; |
976 | } |
977 | } |