Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
2 | |
3 | use strict; |
4 | |
5 | use Fcntl qw( :DEFAULT :flock :seek ); |
6 | |
8db25060 |
7 | ## |
8 | # Setup file and tag signatures. These should never change. |
9 | ## |
10 | sub SIG_FILE () { 'DPDB' } |
11 | sub SIG_INTERNAL () { 'i' } |
12 | sub SIG_HASH () { 'H' } |
13 | sub SIG_ARRAY () { 'A' } |
8db25060 |
14 | sub SIG_NULL () { 'N' } |
15 | sub SIG_DATA () { 'D' } |
16 | sub SIG_INDEX () { 'I' } |
17 | sub SIG_BLIST () { 'B' } |
7b1e1aa1 |
18 | sub SIG_FREE () { 'F' } |
8db25060 |
19 | sub SIG_SIZE () { 1 } |
20 | |
612969fb |
21 | sub new { |
22 | my $class = shift; |
23 | my ($args) = @_; |
24 | |
25 | my $self = bless { |
26 | long_size => 4, |
27 | long_pack => 'N', |
28 | data_size => 4, |
29 | data_pack => 'N', |
251dfd0e |
30 | |
612969fb |
31 | digest => \&Digest::MD5::md5, |
32 | hash_size => 16, |
251dfd0e |
33 | |
81d16922 |
34 | ## |
d5d7c51d |
35 | # Maximum number of buckets per list before another level of indexing is |
e0098e7f |
36 | # done. Increase this value for slightly greater speed, but larger database |
d5d7c51d |
37 | # files. DO NOT decrease this value below 16, due to risk of recursive |
38 | # reindex overrun. |
81d16922 |
39 | ## |
612969fb |
40 | max_buckets => 16, |
41 | }, $class; |
42 | |
e0098e7f |
43 | if ( defined $args->{pack_size} ) { |
44 | if ( lc $args->{pack_size} eq 'small' ) { |
45 | $args->{long_size} = 2; |
46 | $args->{long_pack} = 'S'; |
47 | } |
48 | elsif ( lc $args->{pack_size} eq 'medium' ) { |
49 | $args->{long_size} = 4; |
50 | $args->{long_pack} = 'N'; |
51 | } |
52 | elsif ( lc $args->{pack_size} eq 'large' ) { |
53 | $args->{long_size} = 8; |
54 | $args->{long_pack} = 'Q'; |
55 | } |
56 | else { |
57 | die "Unknown pack_size value: '$args->{pack_size}'\n"; |
58 | } |
59 | } |
60 | |
fde3db1a |
61 | # Grab the parameters we want to use |
62 | foreach my $param ( keys %$self ) { |
63 | next unless exists $args->{$param}; |
3e9498a1 |
64 | $self->{$param} = $args->{$param}; |
fde3db1a |
65 | } |
66 | |
e0098e7f |
67 | if ( $self->{max_buckets} < 16 ) { |
68 | warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n"; |
69 | $self->{max_buckets} = 16; |
70 | } |
71 | |
260a80b4 |
72 | return $self; |
73 | } |
74 | |
75 | sub calculate_sizes { |
76 | my $self = shift; |
77 | |
e0098e7f |
78 | $self->{index_size} = (2**8) * $self->{long_size}; |
79 | $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 2; |
80 | $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; |
612969fb |
81 | |
260a80b4 |
82 | return; |
1bf65be7 |
83 | } |
84 | |
fde3db1a |
85 | sub write_file_header { |
0d0f3d5d |
86 | my $self = shift; |
87 | my ($obj) = @_; |
88 | |
89 | my $fh = $obj->_fh; |
90 | |
42f79e07 |
91 | my $loc = $self->_request_space( |
260a80b4 |
92 | $obj, length( SIG_FILE ) + 12, |
42f79e07 |
93 | ); |
0d0f3d5d |
94 | seek($fh, $loc + $obj->_root->{file_offset}, SEEK_SET); |
260a80b4 |
95 | print( $fh |
96 | SIG_FILE, |
40fc9162 |
97 | pack('S', 1), |
260a80b4 |
98 | pack('S', $self->{long_size}), |
99 | pack('A', $self->{long_pack}), |
100 | pack('S', $self->{data_size}), |
101 | pack('A', $self->{data_pack}), |
102 | pack('S', $self->{max_buckets}), |
103 | ); |
0d0f3d5d |
104 | |
105 | return; |
106 | } |
107 | |
fde3db1a |
108 | sub read_file_header { |
e064ccd1 |
109 | my $self = shift; |
110 | my ($obj) = @_; |
111 | |
112 | my $fh = $obj->_fh; |
113 | |
114 | seek($fh, 0 + $obj->_root->{file_offset}, SEEK_SET); |
42f79e07 |
115 | my $buffer; |
116 | my $bytes_read = read( |
260a80b4 |
117 | $fh, $buffer, length(SIG_FILE) + 12, |
42f79e07 |
118 | ); |
e064ccd1 |
119 | |
120 | if ( $bytes_read ) { |
9b2370e0 |
121 | my ($signature, $version, @values) = unpack( 'A4 S S A S A S', $buffer ); |
e064ccd1 |
122 | unless ($signature eq SIG_FILE) { |
123 | $self->close_fh( $obj ); |
124 | $obj->_throw_error("Signature not found -- file is not a Deep DB"); |
125 | } |
260a80b4 |
126 | |
9b2370e0 |
127 | if ( @values < 5 || grep { !defined } @values ) { |
260a80b4 |
128 | die "DBM::Deep: Corrupted file - bad header\n"; |
129 | } |
9b2370e0 |
130 | |
131 | #XXX Add warnings if values weren't set right |
260a80b4 |
132 | @{$self}{qw( long_size long_pack data_size data_pack max_buckets )} = @values; |
e064ccd1 |
133 | } |
134 | |
135 | return $bytes_read; |
136 | } |
137 | |
70b55428 |
138 | sub setup_fh { |
139 | my $self = shift; |
140 | my ($obj) = @_; |
141 | |
142 | $self->open( $obj ) if !defined $obj->_fh; |
143 | |
6fde4ed2 |
144 | my $fh = $obj->_fh; |
145 | flock $fh, LOCK_EX; |
118ba343 |
146 | |
260a80b4 |
147 | #XXX The duplication of calculate_sizes needs to go away |
6fde4ed2 |
148 | unless ( $obj->{base_offset} ) { |
fde3db1a |
149 | my $bytes_read = $self->read_file_header( $obj ); |
118ba343 |
150 | |
260a80b4 |
151 | $self->calculate_sizes; |
152 | |
118ba343 |
153 | ## |
fde3db1a |
154 | # File is empty -- write header and master index |
118ba343 |
155 | ## |
156 | if (!$bytes_read) { |
fde3db1a |
157 | $self->write_file_header( $obj ); |
118ba343 |
158 | |
c9ec091a |
159 | $obj->{base_offset} = $self->_request_space( |
16d1ad9b |
160 | $obj, $self->tag_size( $self->{index_size} ), |
c9ec091a |
161 | ); |
118ba343 |
162 | |
9e4f83a0 |
163 | $self->write_tag( |
c9ec091a |
164 | $obj, $obj->_base_offset, $obj->_type, |
f37c15ab |
165 | chr(0)x$self->{index_size}, |
118ba343 |
166 | ); |
167 | |
168 | # Flush the filehandle |
169 | my $old_fh = select $fh; |
170 | my $old_af = $|; $| = 1; $| = $old_af; |
171 | select $old_fh; |
172 | } |
173 | else { |
174 | $obj->{base_offset} = $bytes_read; |
175 | |
176 | ## |
fde3db1a |
177 | # Get our type from master index header |
118ba343 |
178 | ## |
179 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
180 | or $obj->_throw_error("Corrupted file, no master index record"); |
181 | |
182 | unless ($obj->{type} eq $tag->{signature}) { |
183 | $obj->_throw_error("File type mismatch"); |
184 | } |
185 | } |
118ba343 |
186 | } |
260a80b4 |
187 | else { |
188 | $self->calculate_sizes; |
189 | } |
e06824f8 |
190 | |
673464d9 |
191 | #XXX We have to make sure we don't mess up when autoflush isn't turned on |
70b55428 |
192 | unless ( $obj->_root->{inode} ) { |
193 | my @stats = stat($obj->_fh); |
194 | $obj->_root->{inode} = $stats[1]; |
195 | $obj->_root->{end} = $stats[7]; |
196 | } |
197 | |
6fde4ed2 |
198 | flock $fh, LOCK_UN; |
199 | |
70b55428 |
200 | return 1; |
201 | } |
202 | |
a20d9a3f |
203 | sub open { |
20f7b20c |
204 | ## |
205 | # Open a fh to the database, create if nonexistent. |
206 | # Make sure file signature matches DBM::Deep spec. |
207 | ## |
a20d9a3f |
208 | my $self = shift; |
70b55428 |
209 | my ($obj) = @_; |
a20d9a3f |
210 | |
673464d9 |
211 | # Theoretically, adding O_BINARY should remove the need for the binmode |
212 | # Of course, testing it is going to be ... interesting. |
213 | my $flags = O_RDWR | O_CREAT | O_BINARY; |
a20d9a3f |
214 | |
673464d9 |
215 | my $fh; |
d5d7c51d |
216 | my $filename = $obj->_root->{file}; |
217 | sysopen( $fh, $filename, $flags ) |
218 | or $obj->_throw_error("Cannot sysopen file '$filename': $!"); |
673464d9 |
219 | $obj->_root->{fh} = $fh; |
a20d9a3f |
220 | |
42f79e07 |
221 | # Even though we use O_BINARY, better be safe than sorry. |
222 | binmode $fh; |
a20d9a3f |
223 | |
cd59cad8 |
224 | if ($obj->_root->{autoflush}) { |
a20d9a3f |
225 | my $old = select $fh; |
226 | $|=1; |
227 | select $old; |
228 | } |
20f7b20c |
229 | |
a20d9a3f |
230 | return 1; |
231 | } |
232 | |
3d1b8be9 |
233 | sub close_fh { |
cd59cad8 |
234 | my $self = shift; |
a21f2d90 |
235 | my ($obj) = @_; |
cd59cad8 |
236 | |
237 | if ( my $fh = $obj->_root->{fh} ) { |
238 | close $fh; |
239 | } |
240 | $obj->_root->{fh} = undef; |
241 | |
242 | return 1; |
243 | } |
244 | |
16d1ad9b |
245 | sub tag_size { |
246 | my $self = shift; |
247 | my ($size) = @_; |
248 | return SIG_SIZE + $self->{data_size} + $size; |
249 | } |
250 | |
9e4f83a0 |
251 | sub write_tag { |
20f7b20c |
252 | ## |
253 | # Given offset, signature and content, create tag and write to disk |
254 | ## |
d4b1166e |
255 | my $self = shift; |
20f7b20c |
256 | my ($obj, $offset, $sig, $content) = @_; |
f37c15ab |
257 | my $size = length( $content ); |
20f7b20c |
258 | |
d4b1166e |
259 | my $fh = $obj->_fh; |
260 | |
f37c15ab |
261 | if ( defined $offset ) { |
262 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
263 | } |
264 | |
251dfd0e |
265 | print( $fh $sig . pack($self->{data_pack}, $size) . $content ); |
20f7b20c |
266 | |
f37c15ab |
267 | return unless defined $offset; |
268 | |
20f7b20c |
269 | return { |
270 | signature => $sig, |
271 | size => $size, |
8db25060 |
272 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
273 | content => $content |
274 | }; |
d4b1166e |
275 | } |
276 | |
277 | sub load_tag { |
20f7b20c |
278 | ## |
279 | # Given offset, load single tag and return signature, size and data |
280 | ## |
d4b1166e |
281 | my $self = shift; |
20f7b20c |
282 | my ($obj, $offset) = @_; |
283 | |
e06824f8 |
284 | # print join(':',map{$_||''}caller(1)), $/; |
285 | |
d4b1166e |
286 | my $fh = $obj->_fh; |
287 | |
20f7b20c |
288 | seek($fh, $offset + $obj->_root->{file_offset}, SEEK_SET); |
e5fc7e69 |
289 | |
75be6413 |
290 | #XXX I'm not sure this check will work if autoflush isn't enabled ... |
e5fc7e69 |
291 | return if eof $fh; |
20f7b20c |
292 | |
d4b1166e |
293 | my $b; |
8db25060 |
294 | read( $fh, $b, SIG_SIZE + $self->{data_size} ); |
251dfd0e |
295 | my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); |
20f7b20c |
296 | |
297 | my $buffer; |
298 | read( $fh, $buffer, $size); |
299 | |
300 | return { |
301 | signature => $sig, |
302 | size => $size, |
8db25060 |
303 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
304 | content => $buffer |
305 | }; |
d4b1166e |
306 | } |
307 | |
56ec4340 |
308 | sub _get_dbm_object { |
309 | my $item = shift; |
310 | |
311 | my $obj = eval { |
312 | local $SIG{__DIE__}; |
313 | if ($item->isa( 'DBM::Deep' )) { |
314 | return $item; |
315 | } |
316 | return; |
317 | }; |
318 | return $obj if $obj; |
319 | |
320 | my $r = Scalar::Util::reftype( $item ) || ''; |
321 | if ( $r eq 'HASH' ) { |
322 | my $obj = eval { |
323 | local $SIG{__DIE__}; |
324 | my $obj = tied(%$item); |
325 | if ($obj->isa( 'DBM::Deep' )) { |
326 | return $obj; |
327 | } |
328 | return; |
329 | }; |
330 | return $obj if $obj; |
331 | } |
332 | elsif ( $r eq 'ARRAY' ) { |
333 | my $obj = eval { |
334 | local $SIG{__DIE__}; |
335 | my $obj = tied(@$item); |
336 | if ($obj->isa( 'DBM::Deep' )) { |
337 | return $obj; |
338 | } |
339 | return; |
340 | }; |
341 | return $obj if $obj; |
342 | } |
343 | |
344 | return; |
345 | } |
346 | |
29b01632 |
347 | sub _length_needed { |
348 | my $self = shift; |
f37c15ab |
349 | my ($obj, $value, $key) = @_; |
29b01632 |
350 | |
351 | my $is_dbm_deep = eval { |
352 | local $SIG{'__DIE__'}; |
353 | $value->isa( 'DBM::Deep' ); |
354 | }; |
355 | |
f37c15ab |
356 | my $len = SIG_SIZE + $self->{data_size} |
357 | + $self->{data_size} + length( $key ); |
29b01632 |
358 | |
f37c15ab |
359 | if ( $is_dbm_deep && $value->_root eq $obj->_root ) { |
360 | return $len + $self->{long_size}; |
29b01632 |
361 | } |
362 | |
363 | my $r = Scalar::Util::reftype( $value ) || ''; |
9a187d8c |
364 | if ( $obj->_root->{autobless} ) { |
365 | # This is for the bit saying whether or not this thing is blessed. |
366 | $len += 1; |
367 | } |
368 | |
29b01632 |
369 | unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
f37c15ab |
370 | if ( defined $value ) { |
371 | $len += length( $value ); |
372 | } |
373 | return $len; |
29b01632 |
374 | } |
375 | |
f37c15ab |
376 | $len += $self->{index_size}; |
29b01632 |
377 | |
378 | # if autobless is enabled, must also take into consideration |
f37c15ab |
379 | # the class name as it is stored after the key. |
29b01632 |
380 | if ( $obj->_root->{autobless} ) { |
56ec4340 |
381 | my $c = Scalar::Util::blessed($value); |
382 | if ( defined $c && !$is_dbm_deep ) { |
383 | $len += $self->{data_size} + length($c); |
29b01632 |
384 | } |
385 | } |
386 | |
f37c15ab |
387 | return $len; |
29b01632 |
388 | } |
389 | |
20f7b20c |
390 | sub add_bucket { |
391 | ## |
392 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
393 | # plain (undigested) key and value. |
394 | ## |
d4b1166e |
395 | my $self = shift; |
20f7b20c |
396 | my ($obj, $tag, $md5, $plain_key, $value) = @_; |
75be6413 |
397 | |
eea0d863 |
398 | # This verifies that only supported values will be stored. |
399 | { |
400 | my $r = Scalar::Util::reftype( $value ); |
401 | last if !defined $r; |
402 | |
403 | last if $r eq 'HASH'; |
404 | last if $r eq 'ARRAY'; |
405 | |
406 | $obj->_throw_error( |
407 | "Storage of variables of type '$r' is not supported." |
408 | ); |
409 | } |
410 | |
20f7b20c |
411 | my $location = 0; |
412 | my $result = 2; |
413 | |
414 | my $root = $obj->_root; |
f37c15ab |
415 | my $fh = $obj->_fh; |
20f7b20c |
416 | |
f37c15ab |
417 | my $actual_length = $self->_length_needed( $obj, $value, $plain_key ); |
20f7b20c |
418 | |
9a187d8c |
419 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
75be6413 |
420 | |
f9c33187 |
421 | # $self->_release_space( $obj, $size, $subloc ); |
386bab6c |
422 | # Updating a known md5 |
f9c33187 |
423 | #XXX This needs updating to use _release_space |
386bab6c |
424 | if ( $subloc ) { |
425 | $result = 1; |
20f7b20c |
426 | |
386bab6c |
427 | if ($actual_length <= $size) { |
428 | $location = $subloc; |
20f7b20c |
429 | } |
75be6413 |
430 | else { |
f37c15ab |
431 | $location = $self->_request_space( $obj, $actual_length ); |
386bab6c |
432 | seek( |
433 | $fh, |
9a187d8c |
434 | $tag->{offset} + $offset |
435 | + $self->{hash_size} + $root->{file_offset}, |
386bab6c |
436 | SEEK_SET, |
437 | ); |
9a187d8c |
438 | print( $fh pack($self->{long_pack}, $location ) ); |
439 | print( $fh pack($self->{long_pack}, $actual_length ) ); |
75be6413 |
440 | } |
75be6413 |
441 | } |
386bab6c |
442 | # Adding a new md5 |
443 | elsif ( defined $offset ) { |
f37c15ab |
444 | $location = $self->_request_space( $obj, $actual_length ); |
386bab6c |
445 | |
446 | seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET ); |
9a187d8c |
447 | print( $fh $md5 . pack($self->{long_pack}, $location ) ); |
448 | print( $fh pack($self->{long_pack}, $actual_length ) ); |
386bab6c |
449 | } |
450 | # If bucket didn't fit into list, split into a new index level |
f9c33187 |
451 | # split_index() will do the _request_space() call |
386bab6c |
452 | else { |
f9c33187 |
453 | $location = $self->split_index( $obj, $md5, $tag ); |
386bab6c |
454 | } |
20f7b20c |
455 | |
d5d7c51d |
456 | $self->write_value( $obj, $location, $plain_key, $value ); |
457 | |
458 | return $result; |
459 | } |
460 | |
461 | sub write_value { |
462 | my $self = shift; |
463 | my ($obj, $location, $key, $value) = @_; |
464 | |
465 | my $fh = $obj->_fh; |
466 | my $root = $obj->_root; |
467 | |
9d4fa373 |
468 | my $dbm_deep_obj = _get_dbm_object( $value ); |
469 | if ( $dbm_deep_obj && $dbm_deep_obj->_root ne $obj->_root ) { |
470 | $obj->_throw_error( "Cannot cross-reference. Use export() instead" ); |
471 | } |
d5d7c51d |
472 | |
473 | seek($fh, $location + $root->{file_offset}, SEEK_SET); |
474 | |
20f7b20c |
475 | ## |
d5d7c51d |
476 | # Write signature based on content type, set content length and write |
477 | # actual value. |
20f7b20c |
478 | ## |
9d4fa373 |
479 | my $r = Scalar::Util::reftype( $value ) || ''; |
480 | if ( $dbm_deep_obj ) { |
481 | $self->write_tag( $obj, undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) ); |
f37c15ab |
482 | } |
483 | elsif ($r eq 'HASH') { |
9d4fa373 |
484 | if ( !$dbm_deep_obj && tied %{$value} ) { |
019ab3a1 |
485 | $obj->_throw_error( "Cannot store something that is tied" ); |
486 | } |
9e4f83a0 |
487 | $self->write_tag( $obj, undef, SIG_HASH, chr(0)x$self->{index_size} ); |
f37c15ab |
488 | } |
489 | elsif ($r eq 'ARRAY') { |
9d4fa373 |
490 | if ( !$dbm_deep_obj && tied @{$value} ) { |
019ab3a1 |
491 | $obj->_throw_error( "Cannot store something that is tied" ); |
492 | } |
9e4f83a0 |
493 | $self->write_tag( $obj, undef, SIG_ARRAY, chr(0)x$self->{index_size} ); |
f37c15ab |
494 | } |
495 | elsif (!defined($value)) { |
9e4f83a0 |
496 | $self->write_tag( $obj, undef, SIG_NULL, '' ); |
d5d7c51d |
497 | } |
498 | else { |
9e4f83a0 |
499 | $self->write_tag( $obj, undef, SIG_DATA, $value ); |
d5d7c51d |
500 | } |
20f7b20c |
501 | |
d5d7c51d |
502 | ## |
503 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
504 | ## |
505 | print( $fh pack($self->{data_pack}, length($key)) . $key ); |
20f7b20c |
506 | |
9a187d8c |
507 | # Internal references don't care about autobless |
9d4fa373 |
508 | return 1 if $dbm_deep_obj; |
9a187d8c |
509 | |
d5d7c51d |
510 | ## |
511 | # If value is blessed, preserve class name |
512 | ## |
513 | if ( $root->{autobless} ) { |
56ec4340 |
514 | my $c = Scalar::Util::blessed($value); |
515 | if ( defined $c && !$dbm_deep_obj ) { |
d5d7c51d |
516 | print( $fh chr(1) ); |
56ec4340 |
517 | print( $fh pack($self->{data_pack}, length($c)) . $c ); |
20f7b20c |
518 | } |
d5d7c51d |
519 | else { |
520 | print( $fh chr(0) ); |
20f7b20c |
521 | } |
d5d7c51d |
522 | } |
20f7b20c |
523 | |
d5d7c51d |
524 | ## |
56ec4340 |
525 | # Tie the passed in reference so that changes to it are reflected in the |
526 | # datafile. The use of $location as the base_offset will act as the |
527 | # the linkage between parent and child. |
528 | # |
529 | # The overall assignment is a hack around the fact that just tying doesn't |
530 | # store the values. This may not be the wrong thing to do. |
d5d7c51d |
531 | ## |
9d4fa373 |
532 | if ($r eq 'HASH') { |
533 | my %x = %$value; |
534 | tie %$value, 'DBM::Deep', { |
535 | base_offset => $location, |
536 | root => $root, |
537 | }; |
538 | %$value = %x; |
539 | } |
540 | elsif ($r eq 'ARRAY') { |
541 | my @x = @$value; |
542 | tie @$value, 'DBM::Deep', { |
543 | base_offset => $location, |
544 | root => $root, |
545 | }; |
546 | @$value = @x; |
20f7b20c |
547 | } |
d4b1166e |
548 | |
d5d7c51d |
549 | return 1; |
d4b1166e |
550 | } |
551 | |
75be6413 |
552 | sub split_index { |
553 | my $self = shift; |
554 | my ($obj, $md5, $tag) = @_; |
555 | |
556 | my $fh = $obj->_fh; |
557 | my $root = $obj->_root; |
16d1ad9b |
558 | |
559 | my $loc = $self->_request_space( |
560 | $obj, $self->tag_size( $self->{index_size} ), |
561 | ); |
562 | |
7b1e1aa1 |
563 | seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); |
16d1ad9b |
564 | print( $fh pack($self->{long_pack}, $loc) ); |
75be6413 |
565 | |
9e4f83a0 |
566 | my $index_tag = $self->write_tag( |
16d1ad9b |
567 | $obj, $loc, SIG_INDEX, |
f37c15ab |
568 | chr(0)x$self->{index_size}, |
75be6413 |
569 | ); |
570 | |
f9c33187 |
571 | my $newtag_loc = $self->_request_space( |
572 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
573 | ); |
75be6413 |
574 | |
7b1e1aa1 |
575 | my $keys = $tag->{content} |
f9c33187 |
576 | . $md5 . pack($self->{long_pack}, $newtag_loc) |
577 | . pack($self->{long_pack}, 0); |
75be6413 |
578 | |
f9c33187 |
579 | my @newloc = (); |
75be6413 |
580 | BUCKET: |
581 | for (my $i = 0; $i <= $self->{max_buckets}; $i++) { |
9a187d8c |
582 | my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i ); |
75be6413 |
583 | |
f9c33187 |
584 | die "[INTERNAL ERROR]: No key in split_index()\n" unless $key; |
585 | die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc; |
75be6413 |
586 | |
75be6413 |
587 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
588 | |
f9c33187 |
589 | if ($newloc[$num]) { |
590 | seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET); |
75be6413 |
591 | my $subkeys; |
592 | read( $fh, $subkeys, $self->{bucket_list_size}); |
593 | |
f9c33187 |
594 | # This is looking for the first empty spot |
7b1e1aa1 |
595 | my ($subloc, $offset, $size) = $self->_find_in_buckets( |
f9c33187 |
596 | { content => $subkeys }, '', |
7b1e1aa1 |
597 | ); |
75be6413 |
598 | |
f9c33187 |
599 | seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET); |
600 | print( $fh $key . pack($self->{long_pack}, $old_subloc) ); |
7b1e1aa1 |
601 | |
602 | next; |
75be6413 |
603 | } |
75be6413 |
604 | |
7b1e1aa1 |
605 | seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); |
2603d86e |
606 | |
7b1e1aa1 |
607 | my $loc = $self->_request_space( |
608 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
609 | ); |
2603d86e |
610 | |
7b1e1aa1 |
611 | print( $fh pack($self->{long_pack}, $loc) ); |
75be6413 |
612 | |
7b1e1aa1 |
613 | my $blist_tag = $self->write_tag( |
614 | $obj, $loc, SIG_BLIST, |
615 | chr(0)x$self->{bucket_list_size}, |
616 | ); |
617 | |
618 | seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); |
f9c33187 |
619 | print( $fh $key . pack($self->{long_pack}, $old_subloc) ); |
7b1e1aa1 |
620 | |
f9c33187 |
621 | $newloc[$num] = $blist_tag->{offset}; |
7b1e1aa1 |
622 | } |
623 | |
624 | $self->_release_space( |
f9c33187 |
625 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
7b1e1aa1 |
626 | $tag->{offset} - SIG_SIZE - $self->{data_size}, |
627 | ); |
75be6413 |
628 | |
f9c33187 |
629 | return $newtag_loc; |
75be6413 |
630 | } |
631 | |
8db25060 |
632 | sub read_from_loc { |
633 | my $self = shift; |
634 | my ($obj, $subloc) = @_; |
635 | |
636 | my $fh = $obj->_fh; |
637 | |
638 | ## |
639 | # Found match -- seek to offset and read signature |
640 | ## |
641 | my $signature; |
642 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
643 | read( $fh, $signature, SIG_SIZE); |
644 | |
645 | ## |
646 | # If value is a hash or array, return new DBM::Deep object with correct offset |
647 | ## |
648 | if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) { |
685e40f1 |
649 | my $new_obj = DBM::Deep->new({ |
8db25060 |
650 | type => $signature, |
651 | base_offset => $subloc, |
652 | root => $obj->_root, |
685e40f1 |
653 | }); |
8db25060 |
654 | |
685e40f1 |
655 | if ($new_obj->_root->{autobless}) { |
8db25060 |
656 | ## |
657 | # Skip over value and plain key to see if object needs |
658 | # to be re-blessed |
659 | ## |
660 | seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); |
661 | |
662 | my $size; |
c6ea6b6c |
663 | read( $fh, $size, $self->{data_size}); |
664 | $size = unpack($self->{data_pack}, $size); |
8db25060 |
665 | if ($size) { seek($fh, $size, SEEK_CUR); } |
666 | |
667 | my $bless_bit; |
668 | read( $fh, $bless_bit, 1); |
669 | if (ord($bless_bit)) { |
670 | ## |
671 | # Yes, object needs to be re-blessed |
672 | ## |
673 | my $class_name; |
c6ea6b6c |
674 | read( $fh, $size, $self->{data_size}); |
675 | $size = unpack($self->{data_pack}, $size); |
8db25060 |
676 | if ($size) { read( $fh, $class_name, $size); } |
685e40f1 |
677 | if ($class_name) { $new_obj = bless( $new_obj, $class_name ); } |
8db25060 |
678 | } |
679 | } |
680 | |
685e40f1 |
681 | return $new_obj; |
8db25060 |
682 | } |
683 | elsif ( $signature eq SIG_INTERNAL ) { |
684 | my $size; |
685 | read( $fh, $size, $self->{data_size}); |
686 | $size = unpack($self->{data_pack}, $size); |
687 | |
688 | if ( $size ) { |
689 | my $new_loc; |
690 | read( $fh, $new_loc, $size ); |
691 | $new_loc = unpack( $self->{long_pack}, $new_loc ); |
692 | |
693 | return $self->read_from_loc( $obj, $new_loc ); |
694 | } |
695 | else { |
696 | return; |
697 | } |
698 | } |
699 | ## |
700 | # Otherwise return actual value |
701 | ## |
702 | elsif ($signature eq SIG_DATA) { |
703 | my $size; |
704 | read( $fh, $size, $self->{data_size}); |
705 | $size = unpack($self->{data_pack}, $size); |
706 | |
707 | my $value = ''; |
708 | if ($size) { read( $fh, $value, $size); } |
709 | return $value; |
710 | } |
711 | |
712 | ## |
713 | # Key exists, but content is null |
714 | ## |
715 | return; |
716 | } |
717 | |
9020ee8c |
718 | sub get_bucket_value { |
beac1dff |
719 | ## |
720 | # Fetch single value given tag and MD5 digested key. |
721 | ## |
722 | my $self = shift; |
723 | my ($obj, $tag, $md5) = @_; |
9020ee8c |
724 | |
9a187d8c |
725 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
386bab6c |
726 | if ( $subloc ) { |
8db25060 |
727 | return $self->read_from_loc( $obj, $subloc ); |
386bab6c |
728 | } |
beac1dff |
729 | return; |
9020ee8c |
730 | } |
ab0e4957 |
731 | |
732 | sub delete_bucket { |
beac1dff |
733 | ## |
734 | # Delete single key/value pair given tag and MD5 digested key. |
735 | ## |
736 | my $self = shift; |
737 | my ($obj, $tag, $md5) = @_; |
ab0e4957 |
738 | |
9a187d8c |
739 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
f9c33187 |
740 | #XXX This needs _release_space() |
386bab6c |
741 | if ( $subloc ) { |
742 | my $fh = $obj->_fh; |
743 | seek($fh, $tag->{offset} + $offset + $obj->_root->{file_offset}, SEEK_SET); |
744 | print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) ); |
251dfd0e |
745 | print( $fh chr(0) x $self->{bucket_size} ); |
d0b74c17 |
746 | |
ab0e4957 |
747 | return 1; |
386bab6c |
748 | } |
beac1dff |
749 | return; |
ab0e4957 |
750 | } |
751 | |
912d50b1 |
752 | sub bucket_exists { |
beac1dff |
753 | ## |
754 | # Check existence of single key given tag and MD5 digested key. |
755 | ## |
756 | my $self = shift; |
757 | my ($obj, $tag, $md5) = @_; |
912d50b1 |
758 | |
9a187d8c |
759 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
d5d7c51d |
760 | return $subloc && 1; |
912d50b1 |
761 | } |
762 | |
6736c116 |
763 | sub find_bucket_list { |
beac1dff |
764 | ## |
765 | # Locate offset for bucket list, given digested key |
766 | ## |
767 | my $self = shift; |
d0b74c17 |
768 | my ($obj, $md5, $args) = @_; |
769 | $args = {} unless $args; |
770 | |
beac1dff |
771 | ## |
772 | # Locate offset for bucket list using digest index system |
773 | ## |
e5fc7e69 |
774 | my $tag = $self->load_tag($obj, $obj->_base_offset) |
d5d7c51d |
775 | or $obj->_throw_error( "INTERNAL ERROR - Cannot find tag" ); |
d0b74c17 |
776 | |
e5fc7e69 |
777 | my $ch = 0; |
8db25060 |
778 | while ($tag->{signature} ne SIG_BLIST) { |
d0b74c17 |
779 | my $num = ord substr($md5, $ch, 1); |
780 | |
781 | my $ref_loc = $tag->{offset} + ($num * $self->{long_size}); |
782 | $tag = $self->index_lookup( $obj, $tag, $num ); |
783 | |
784 | if (!$tag) { |
29b01632 |
785 | return if !$args->{create}; |
d0b74c17 |
786 | |
16d1ad9b |
787 | my $loc = $self->_request_space( |
788 | $obj, $self->tag_size( $self->{bucket_list_size} ), |
789 | ); |
790 | |
7b1e1aa1 |
791 | my $fh = $obj->_fh; |
792 | seek($fh, $ref_loc + $obj->_root->{file_offset}, SEEK_SET); |
16d1ad9b |
793 | print( $fh pack($self->{long_pack}, $loc) ); |
d0b74c17 |
794 | |
9e4f83a0 |
795 | $tag = $self->write_tag( |
16d1ad9b |
796 | $obj, $loc, SIG_BLIST, |
f37c15ab |
797 | chr(0)x$self->{bucket_list_size}, |
d5d7c51d |
798 | ); |
799 | |
800 | $tag->{ref_loc} = $ref_loc; |
801 | $tag->{ch} = $ch; |
802 | |
803 | last; |
d0b74c17 |
804 | } |
805 | |
16d1ad9b |
806 | $tag->{ch} = $ch++; |
d0b74c17 |
807 | $tag->{ref_loc} = $ref_loc; |
beac1dff |
808 | } |
d0b74c17 |
809 | |
beac1dff |
810 | return $tag; |
6736c116 |
811 | } |
812 | |
d0b74c17 |
813 | sub index_lookup { |
814 | ## |
815 | # Given index tag, lookup single entry in index and return . |
816 | ## |
817 | my $self = shift; |
818 | my ($obj, $tag, $index) = @_; |
819 | |
820 | my $location = unpack( |
821 | $self->{long_pack}, |
822 | substr( |
823 | $tag->{content}, |
824 | $index * $self->{long_size}, |
825 | $self->{long_size}, |
826 | ), |
827 | ); |
828 | |
829 | if (!$location) { return; } |
830 | |
831 | return $self->load_tag( $obj, $location ); |
832 | } |
833 | |
6736c116 |
834 | sub traverse_index { |
beac1dff |
835 | ## |
836 | # Scan index and recursively step into deeper levels, looking for next key. |
837 | ## |
6736c116 |
838 | my $self = shift; |
839 | my ($obj, $offset, $ch, $force_return_next) = @_; |
d0b74c17 |
840 | |
beac1dff |
841 | my $tag = $self->load_tag($obj, $offset ); |
6736c116 |
842 | |
843 | my $fh = $obj->_fh; |
d0b74c17 |
844 | |
8db25060 |
845 | if ($tag->{signature} ne SIG_BLIST) { |
beac1dff |
846 | my $content = $tag->{content}; |
e5fc7e69 |
847 | my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); |
d0b74c17 |
848 | |
d5d7c51d |
849 | for (my $idx = $start; $idx < (2**8); $idx++) { |
e5fc7e69 |
850 | my $subloc = unpack( |
851 | $self->{long_pack}, |
e06824f8 |
852 | substr( |
853 | $content, |
854 | $idx * $self->{long_size}, |
855 | $self->{long_size}, |
856 | ), |
e5fc7e69 |
857 | ); |
858 | |
beac1dff |
859 | if ($subloc) { |
e5fc7e69 |
860 | my $result = $self->traverse_index( |
861 | $obj, $subloc, $ch + 1, $force_return_next, |
862 | ); |
863 | |
beac1dff |
864 | if (defined($result)) { return $result; } |
865 | } |
866 | } # index loop |
d0b74c17 |
867 | |
beac1dff |
868 | $obj->{return_next} = 1; |
869 | } # tag is an index |
d0b74c17 |
870 | |
e5fc7e69 |
871 | else { |
beac1dff |
872 | my $keys = $tag->{content}; |
873 | if ($force_return_next) { $obj->{return_next} = 1; } |
d0b74c17 |
874 | |
beac1dff |
875 | ## |
876 | # Iterate through buckets, looking for a key match |
877 | ## |
8db25060 |
878 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
9cec1360 |
879 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
d0b74c17 |
880 | |
8db25060 |
881 | # End of bucket list -- return to outer loop |
beac1dff |
882 | if (!$subloc) { |
beac1dff |
883 | $obj->{return_next} = 1; |
884 | last; |
885 | } |
8db25060 |
886 | # Located previous key -- return next one found |
beac1dff |
887 | elsif ($key eq $obj->{prev_md5}) { |
beac1dff |
888 | $obj->{return_next} = 1; |
889 | next; |
890 | } |
8db25060 |
891 | # Seek to bucket location and skip over signature |
beac1dff |
892 | elsif ($obj->{return_next}) { |
8db25060 |
893 | seek($fh, $subloc + $obj->_root->{file_offset}, SEEK_SET); |
d0b74c17 |
894 | |
beac1dff |
895 | # Skip over value to get to plain key |
8db25060 |
896 | my $sig; |
897 | read( $fh, $sig, SIG_SIZE ); |
898 | |
beac1dff |
899 | my $size; |
e5fc7e69 |
900 | read( $fh, $size, $self->{data_size}); |
901 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
902 | if ($size) { seek($fh, $size, SEEK_CUR); } |
d0b74c17 |
903 | |
beac1dff |
904 | # Read in plain key and return as scalar |
beac1dff |
905 | my $plain_key; |
e5fc7e69 |
906 | read( $fh, $size, $self->{data_size}); |
907 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
908 | if ($size) { read( $fh, $plain_key, $size); } |
d0b74c17 |
909 | |
beac1dff |
910 | return $plain_key; |
911 | } |
8db25060 |
912 | } |
d0b74c17 |
913 | |
beac1dff |
914 | $obj->{return_next} = 1; |
915 | } # tag is a bucket list |
d0b74c17 |
916 | |
beac1dff |
917 | return; |
6736c116 |
918 | } |
919 | |
920 | sub get_next_key { |
beac1dff |
921 | ## |
922 | # Locate next key, given digested previous one |
923 | ## |
6736c116 |
924 | my $self = shift; |
925 | my ($obj) = @_; |
d0b74c17 |
926 | |
beac1dff |
927 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
928 | $obj->{return_next} = 0; |
d0b74c17 |
929 | |
beac1dff |
930 | ## |
931 | # If the previous key was not specifed, start at the top and |
932 | # return the first one found. |
933 | ## |
934 | if (!$obj->{prev_md5}) { |
935 | $obj->{prev_md5} = chr(0) x $self->{hash_size}; |
936 | $obj->{return_next} = 1; |
937 | } |
d0b74c17 |
938 | |
beac1dff |
939 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
6736c116 |
940 | } |
941 | |
75be6413 |
942 | # Utilities |
943 | |
9cec1360 |
944 | sub _get_key_subloc { |
75be6413 |
945 | my $self = shift; |
946 | my ($keys, $idx) = @_; |
947 | |
6ed2f3df |
948 | my ($key, $subloc, $size) = unpack( |
949 | "a$self->{hash_size} $self->{long_pack} $self->{long_pack}", |
75be6413 |
950 | substr( |
951 | $keys, |
9cec1360 |
952 | ($idx * $self->{bucket_size}), |
953 | $self->{bucket_size}, |
75be6413 |
954 | ), |
955 | ); |
956 | |
6ed2f3df |
957 | return ($key, $subloc, $size); |
75be6413 |
958 | } |
959 | |
d608b06e |
960 | sub _find_in_buckets { |
961 | my $self = shift; |
962 | my ($tag, $md5) = @_; |
963 | |
964 | BUCKET: |
965 | for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { |
9a187d8c |
966 | my ($key, $subloc, $size) = $self->_get_key_subloc( |
967 | $tag->{content}, $i, |
968 | ); |
d608b06e |
969 | |
9a187d8c |
970 | return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc; |
d608b06e |
971 | |
972 | next BUCKET if $key ne $md5; |
973 | |
9a187d8c |
974 | return ($subloc, $i * $self->{bucket_size}, $size); |
d608b06e |
975 | } |
976 | |
977 | return; |
978 | } |
979 | |
7b1e1aa1 |
980 | #sub _print_at { |
981 | # my $self = shift; |
982 | # my ($obj, $spot, $data) = @_; |
983 | # |
984 | # my $fh = $obj->_fh; |
985 | # seek( $fh, $spot, SEEK_SET ); |
986 | # print( $fh $data ); |
987 | # |
988 | # return; |
989 | #} |
990 | |
994ccd8e |
991 | sub _request_space { |
992 | my $self = shift; |
993 | my ($obj, $size) = @_; |
994 | |
995 | my $loc = $obj->_root->{end}; |
c9ec091a |
996 | $obj->_root->{end} += $size; |
994ccd8e |
997 | |
998 | return $loc; |
999 | } |
1000 | |
1001 | sub _release_space { |
1002 | my $self = shift; |
1003 | my ($obj, $size, $loc) = @_; |
1004 | |
7b1e1aa1 |
1005 | my $next_loc = 0; |
1006 | |
1007 | my $fh = $obj->_fh; |
1008 | seek( $fh, $loc + $obj->_root->{file_offset}, SEEK_SET ); |
1009 | print( $fh SIG_FREE |
1010 | . pack($self->{long_pack}, $size ) |
1011 | . pack($self->{long_pack}, $next_loc ) |
1012 | ); |
1013 | |
994ccd8e |
1014 | return; |
1015 | } |
1016 | |
a20d9a3f |
1017 | 1; |
1018 | __END__ |
d5d7c51d |
1019 | |
1020 | # This will be added in later, after more refactoring is done. This is an early |
1021 | # attempt at refactoring on the physical level instead of the virtual level. |
1022 | sub _read_at { |
1023 | my $self = shift; |
1024 | my ($obj, $spot, $amount, $unpack) = @_; |
1025 | |
1026 | my $fh = $obj->_fh; |
1027 | seek( $fh, $spot + $obj->_root->{file_offset}, SEEK_SET ); |
1028 | |
1029 | my $buffer; |
1030 | my $bytes_read = read( $fh, $buffer, $amount ); |
1031 | |
1032 | if ( $unpack ) { |
1033 | $buffer = unpack( $unpack, $buffer ); |
1034 | } |
1035 | |
1036 | if ( wantarray ) { |
1037 | return ($buffer, $bytes_read); |
1038 | } |
1039 | else { |
1040 | return $buffer; |
1041 | } |
1042 | } |