Commit | Line | Data |
a20d9a3f |
1 | package DBM::Deep::Engine; |
2 | |
460b1067 |
3 | use 5.6.0; |
4 | |
a20d9a3f |
5 | use strict; |
460b1067 |
6 | use warnings; |
a20d9a3f |
7 | |
8 | use Fcntl qw( :DEFAULT :flock :seek ); |
9 | |
8db25060 |
10 | ## |
11 | # Setup file and tag signatures. These should never change. |
12 | ## |
13 | sub SIG_FILE () { 'DPDB' } |
460b1067 |
14 | sub SIG_HEADER () { 'h' } |
8db25060 |
15 | sub SIG_INTERNAL () { 'i' } |
16 | sub SIG_HASH () { 'H' } |
17 | sub SIG_ARRAY () { 'A' } |
8db25060 |
18 | sub SIG_NULL () { 'N' } |
19 | sub SIG_DATA () { 'D' } |
20 | sub SIG_INDEX () { 'I' } |
21 | sub SIG_BLIST () { 'B' } |
7b1e1aa1 |
22 | sub SIG_FREE () { 'F' } |
8db25060 |
23 | sub SIG_SIZE () { 1 } |
24 | |
612969fb |
25 | sub new { |
26 | my $class = shift; |
27 | my ($args) = @_; |
28 | |
29 | my $self = bless { |
30 | long_size => 4, |
31 | long_pack => 'N', |
32 | data_size => 4, |
33 | data_pack => 'N', |
251dfd0e |
34 | |
612969fb |
35 | digest => \&Digest::MD5::md5, |
36 | hash_size => 16, |
251dfd0e |
37 | |
81d16922 |
38 | ## |
d5d7c51d |
39 | # Maximum number of buckets per list before another level of indexing is |
e0098e7f |
40 | # done. Increase this value for slightly greater speed, but larger database |
d5d7c51d |
41 | # files. DO NOT decrease this value below 16, due to risk of recursive |
42 | # reindex overrun. |
81d16922 |
43 | ## |
612969fb |
44 | max_buckets => 16, |
460b1067 |
45 | |
46 | fileobj => undef, |
612969fb |
47 | }, $class; |
48 | |
e0098e7f |
49 | if ( defined $args->{pack_size} ) { |
50 | if ( lc $args->{pack_size} eq 'small' ) { |
51 | $args->{long_size} = 2; |
52 | $args->{long_pack} = 'S'; |
53 | } |
54 | elsif ( lc $args->{pack_size} eq 'medium' ) { |
55 | $args->{long_size} = 4; |
56 | $args->{long_pack} = 'N'; |
57 | } |
58 | elsif ( lc $args->{pack_size} eq 'large' ) { |
59 | $args->{long_size} = 8; |
60 | $args->{long_pack} = 'Q'; |
61 | } |
62 | else { |
63 | die "Unknown pack_size value: '$args->{pack_size}'\n"; |
64 | } |
65 | } |
66 | |
fde3db1a |
67 | # Grab the parameters we want to use |
68 | foreach my $param ( keys %$self ) { |
69 | next unless exists $args->{$param}; |
3e9498a1 |
70 | $self->{$param} = $args->{$param}; |
fde3db1a |
71 | } |
72 | |
e0098e7f |
73 | if ( $self->{max_buckets} < 16 ) { |
74 | warn "Floor of max_buckets is 16. Setting it to 16 from '$self->{max_buckets}'\n"; |
75 | $self->{max_buckets} = 16; |
76 | } |
77 | |
260a80b4 |
78 | return $self; |
79 | } |
80 | |
460b1067 |
81 | sub _fileobj { return $_[0]{fileobj} } |
82 | sub _fh { return $_[0]->_fileobj->{fh} } |
83 | |
260a80b4 |
84 | sub calculate_sizes { |
85 | my $self = shift; |
86 | |
28394a1a |
87 | #XXX Does this need to be updated with different hashing algorithms? |
e0098e7f |
88 | $self->{index_size} = (2**8) * $self->{long_size}; |
28394a1a |
89 | #ACID This needs modified - DONE |
90 | $self->{bucket_size} = $self->{hash_size} + $self->{long_size} * 3; |
e0098e7f |
91 | $self->{bucket_list_size} = $self->{max_buckets} * $self->{bucket_size}; |
612969fb |
92 | |
260a80b4 |
93 | return; |
1bf65be7 |
94 | } |
95 | |
fde3db1a |
96 | sub write_file_header { |
0d0f3d5d |
97 | my $self = shift; |
0d0f3d5d |
98 | |
460b1067 |
99 | my $fh = $self->_fh; |
0d0f3d5d |
100 | |
e96daec8 |
101 | my $loc = $self->_request_space( length( SIG_FILE ) + 21 ); |
460b1067 |
102 | seek($fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET); |
260a80b4 |
103 | print( $fh |
104 | SIG_FILE, |
460b1067 |
105 | SIG_HEADER, |
106 | pack('N', 1), # header version |
107 | pack('N', 12), # header size |
108 | pack('N', 0), # file version |
260a80b4 |
109 | pack('S', $self->{long_size}), |
110 | pack('A', $self->{long_pack}), |
111 | pack('S', $self->{data_size}), |
112 | pack('A', $self->{data_pack}), |
113 | pack('S', $self->{max_buckets}), |
114 | ); |
0d0f3d5d |
115 | |
116 | return; |
117 | } |
118 | |
fde3db1a |
119 | sub read_file_header { |
e064ccd1 |
120 | my $self = shift; |
e064ccd1 |
121 | |
e96daec8 |
122 | my $fh = $self->_fh; |
e064ccd1 |
123 | |
e96daec8 |
124 | seek($fh, 0 + $self->_fileobj->{file_offset}, SEEK_SET); |
42f79e07 |
125 | my $buffer; |
460b1067 |
126 | my $bytes_read = read( $fh, $buffer, length(SIG_FILE) + 9 ); |
127 | |
128 | return unless $bytes_read; |
129 | |
130 | my ($file_signature, $sig_header, $header_version, $size) = unpack( |
131 | 'A4 A N N', $buffer |
42f79e07 |
132 | ); |
e064ccd1 |
133 | |
460b1067 |
134 | unless ( $file_signature eq SIG_FILE ) { |
135 | $self->{fileobj}->close; |
e96daec8 |
136 | $self->_throw_error( "Signature not found -- file is not a Deep DB" ); |
460b1067 |
137 | } |
260a80b4 |
138 | |
460b1067 |
139 | unless ( $sig_header eq SIG_HEADER ) { |
140 | $self->{fileobj}->close; |
e96daec8 |
141 | $self->_throw_error( "Old file version found." ); |
460b1067 |
142 | } |
9b2370e0 |
143 | |
460b1067 |
144 | my $buffer2; |
145 | $bytes_read += read( $fh, $buffer2, $size ); |
146 | my ($file_version, @values) = unpack( 'N S A S A S', $buffer2 ); |
147 | if ( @values < 5 || grep { !defined } @values ) { |
148 | $self->{fileobj}->close; |
e96daec8 |
149 | $self->_throw_error("Corrupted file - bad header"); |
e064ccd1 |
150 | } |
151 | |
460b1067 |
152 | #XXX Add warnings if values weren't set right |
153 | @{$self}{qw(long_size long_pack data_size data_pack max_buckets)} = @values; |
154 | |
e064ccd1 |
155 | return $bytes_read; |
156 | } |
157 | |
460b1067 |
158 | sub setup_fh { |
159 | my $self = shift; |
160 | my ($obj) = @_; |
70b55428 |
161 | |
e96daec8 |
162 | my $fh = $self->_fh; |
6fde4ed2 |
163 | flock $fh, LOCK_EX; |
118ba343 |
164 | |
260a80b4 |
165 | #XXX The duplication of calculate_sizes needs to go away |
6fde4ed2 |
166 | unless ( $obj->{base_offset} ) { |
e96daec8 |
167 | my $bytes_read = $self->read_file_header; |
118ba343 |
168 | |
260a80b4 |
169 | $self->calculate_sizes; |
170 | |
118ba343 |
171 | ## |
fde3db1a |
172 | # File is empty -- write header and master index |
118ba343 |
173 | ## |
174 | if (!$bytes_read) { |
e96daec8 |
175 | $self->write_file_header; |
118ba343 |
176 | |
e96daec8 |
177 | $obj->{base_offset} = $self->_request_space( $self->tag_size( $self->{index_size} ) ); |
118ba343 |
178 | |
9e4f83a0 |
179 | $self->write_tag( |
e96daec8 |
180 | $obj->_base_offset, $obj->_type, |
f37c15ab |
181 | chr(0)x$self->{index_size}, |
118ba343 |
182 | ); |
183 | |
184 | # Flush the filehandle |
185 | my $old_fh = select $fh; |
186 | my $old_af = $|; $| = 1; $| = $old_af; |
187 | select $old_fh; |
188 | } |
189 | else { |
190 | $obj->{base_offset} = $bytes_read; |
191 | |
192 | ## |
fde3db1a |
193 | # Get our type from master index header |
118ba343 |
194 | ## |
e96daec8 |
195 | my $tag = $self->load_tag($obj->_base_offset) |
196 | or $self->_throw_error("Corrupted file, no master index record"); |
118ba343 |
197 | |
e96daec8 |
198 | unless ($obj->_type eq $tag->{signature}) { |
199 | $self->_throw_error("File type mismatch"); |
118ba343 |
200 | } |
201 | } |
118ba343 |
202 | } |
260a80b4 |
203 | else { |
204 | $self->calculate_sizes; |
205 | } |
e06824f8 |
206 | |
673464d9 |
207 | #XXX We have to make sure we don't mess up when autoflush isn't turned on |
e96daec8 |
208 | unless ( $self->_fileobj->{inode} ) { |
209 | my @stats = stat($fh); |
210 | $self->_fileobj->{inode} = $stats[1]; |
211 | $self->_fileobj->{end} = $stats[7]; |
70b55428 |
212 | } |
213 | |
6fde4ed2 |
214 | flock $fh, LOCK_UN; |
215 | |
70b55428 |
216 | return 1; |
217 | } |
218 | |
16d1ad9b |
219 | sub tag_size { |
220 | my $self = shift; |
221 | my ($size) = @_; |
222 | return SIG_SIZE + $self->{data_size} + $size; |
223 | } |
224 | |
9e4f83a0 |
225 | sub write_tag { |
20f7b20c |
226 | ## |
227 | # Given offset, signature and content, create tag and write to disk |
228 | ## |
d4b1166e |
229 | my $self = shift; |
e96daec8 |
230 | my ($offset, $sig, $content) = @_; |
f37c15ab |
231 | my $size = length( $content ); |
20f7b20c |
232 | |
e96daec8 |
233 | my $fh = $self->_fh; |
d4b1166e |
234 | |
f37c15ab |
235 | if ( defined $offset ) { |
e96daec8 |
236 | seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET); |
f37c15ab |
237 | } |
238 | |
251dfd0e |
239 | print( $fh $sig . pack($self->{data_pack}, $size) . $content ); |
20f7b20c |
240 | |
f37c15ab |
241 | return unless defined $offset; |
242 | |
20f7b20c |
243 | return { |
244 | signature => $sig, |
245 | size => $size, |
8db25060 |
246 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
247 | content => $content |
248 | }; |
d4b1166e |
249 | } |
250 | |
251 | sub load_tag { |
20f7b20c |
252 | ## |
253 | # Given offset, load single tag and return signature, size and data |
254 | ## |
d4b1166e |
255 | my $self = shift; |
e96daec8 |
256 | my ($offset) = @_; |
20f7b20c |
257 | |
e06824f8 |
258 | # print join(':',map{$_||''}caller(1)), $/; |
259 | |
e96daec8 |
260 | my $fh = $self->_fh; |
d4b1166e |
261 | |
e96daec8 |
262 | seek($fh, $offset + $self->_fileobj->{file_offset}, SEEK_SET); |
e5fc7e69 |
263 | |
75be6413 |
264 | #XXX I'm not sure this check will work if autoflush isn't enabled ... |
e5fc7e69 |
265 | return if eof $fh; |
20f7b20c |
266 | |
d4b1166e |
267 | my $b; |
8db25060 |
268 | read( $fh, $b, SIG_SIZE + $self->{data_size} ); |
251dfd0e |
269 | my ($sig, $size) = unpack( "A $self->{data_pack}", $b ); |
20f7b20c |
270 | |
271 | my $buffer; |
272 | read( $fh, $buffer, $size); |
273 | |
274 | return { |
275 | signature => $sig, |
276 | size => $size, |
8db25060 |
277 | offset => $offset + SIG_SIZE + $self->{data_size}, |
20f7b20c |
278 | content => $buffer |
279 | }; |
d4b1166e |
280 | } |
281 | |
56ec4340 |
282 | sub _get_dbm_object { |
283 | my $item = shift; |
284 | |
285 | my $obj = eval { |
286 | local $SIG{__DIE__}; |
287 | if ($item->isa( 'DBM::Deep' )) { |
288 | return $item; |
289 | } |
290 | return; |
291 | }; |
292 | return $obj if $obj; |
293 | |
294 | my $r = Scalar::Util::reftype( $item ) || ''; |
295 | if ( $r eq 'HASH' ) { |
296 | my $obj = eval { |
297 | local $SIG{__DIE__}; |
298 | my $obj = tied(%$item); |
299 | if ($obj->isa( 'DBM::Deep' )) { |
300 | return $obj; |
301 | } |
302 | return; |
303 | }; |
304 | return $obj if $obj; |
305 | } |
306 | elsif ( $r eq 'ARRAY' ) { |
307 | my $obj = eval { |
308 | local $SIG{__DIE__}; |
309 | my $obj = tied(@$item); |
310 | if ($obj->isa( 'DBM::Deep' )) { |
311 | return $obj; |
312 | } |
313 | return; |
314 | }; |
315 | return $obj if $obj; |
316 | } |
317 | |
318 | return; |
319 | } |
320 | |
29b01632 |
321 | sub _length_needed { |
322 | my $self = shift; |
e96daec8 |
323 | my ($value, $key) = @_; |
29b01632 |
324 | |
325 | my $is_dbm_deep = eval { |
326 | local $SIG{'__DIE__'}; |
327 | $value->isa( 'DBM::Deep' ); |
328 | }; |
329 | |
f37c15ab |
330 | my $len = SIG_SIZE + $self->{data_size} |
331 | + $self->{data_size} + length( $key ); |
29b01632 |
332 | |
e96daec8 |
333 | if ( $is_dbm_deep && $value->_fileobj eq $self->_fileobj ) { |
f37c15ab |
334 | return $len + $self->{long_size}; |
29b01632 |
335 | } |
336 | |
337 | my $r = Scalar::Util::reftype( $value ) || ''; |
e96daec8 |
338 | if ( $self->_fileobj->{autobless} ) { |
9a187d8c |
339 | # This is for the bit saying whether or not this thing is blessed. |
340 | $len += 1; |
341 | } |
342 | |
29b01632 |
343 | unless ( $r eq 'HASH' || $r eq 'ARRAY' ) { |
f37c15ab |
344 | if ( defined $value ) { |
345 | $len += length( $value ); |
346 | } |
347 | return $len; |
29b01632 |
348 | } |
349 | |
f37c15ab |
350 | $len += $self->{index_size}; |
29b01632 |
351 | |
352 | # if autobless is enabled, must also take into consideration |
f37c15ab |
353 | # the class name as it is stored after the key. |
e96daec8 |
354 | if ( $self->_fileobj->{autobless} ) { |
56ec4340 |
355 | my $c = Scalar::Util::blessed($value); |
356 | if ( defined $c && !$is_dbm_deep ) { |
357 | $len += $self->{data_size} + length($c); |
29b01632 |
358 | } |
359 | } |
360 | |
f37c15ab |
361 | return $len; |
29b01632 |
362 | } |
363 | |
20f7b20c |
364 | sub add_bucket { |
365 | ## |
366 | # Adds one key/value pair to bucket list, given offset, MD5 digest of key, |
367 | # plain (undigested) key and value. |
368 | ## |
d4b1166e |
369 | my $self = shift; |
e96daec8 |
370 | my ($tag, $md5, $plain_key, $value) = @_; |
75be6413 |
371 | |
eea0d863 |
372 | # This verifies that only supported values will be stored. |
373 | { |
374 | my $r = Scalar::Util::reftype( $value ); |
375 | last if !defined $r; |
376 | |
377 | last if $r eq 'HASH'; |
378 | last if $r eq 'ARRAY'; |
379 | |
e96daec8 |
380 | $self->_throw_error( |
eea0d863 |
381 | "Storage of variables of type '$r' is not supported." |
382 | ); |
383 | } |
384 | |
20f7b20c |
385 | my $location = 0; |
386 | my $result = 2; |
387 | |
e96daec8 |
388 | my $root = $self->_fileobj; |
389 | my $fh = $self->_fh; |
20f7b20c |
390 | |
e96daec8 |
391 | my $actual_length = $self->_length_needed( $value, $plain_key ); |
20f7b20c |
392 | |
9a187d8c |
393 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
75be6413 |
394 | |
28394a1a |
395 | print "$subloc - $offset - $size\n"; |
e96daec8 |
396 | # $self->_release_space( $size, $subloc ); |
386bab6c |
397 | # Updating a known md5 |
f9c33187 |
398 | #XXX This needs updating to use _release_space |
386bab6c |
399 | if ( $subloc ) { |
400 | $result = 1; |
20f7b20c |
401 | |
386bab6c |
402 | if ($actual_length <= $size) { |
403 | $location = $subloc; |
20f7b20c |
404 | } |
75be6413 |
405 | else { |
e96daec8 |
406 | $location = $self->_request_space( $actual_length ); |
386bab6c |
407 | seek( |
408 | $fh, |
9a187d8c |
409 | $tag->{offset} + $offset |
410 | + $self->{hash_size} + $root->{file_offset}, |
386bab6c |
411 | SEEK_SET, |
412 | ); |
9a187d8c |
413 | print( $fh pack($self->{long_pack}, $location ) ); |
414 | print( $fh pack($self->{long_pack}, $actual_length ) ); |
28394a1a |
415 | print( $fh pack($self->{long_pack}, $root->transaction_id ) ); |
75be6413 |
416 | } |
75be6413 |
417 | } |
386bab6c |
418 | # Adding a new md5 |
419 | elsif ( defined $offset ) { |
e96daec8 |
420 | $location = $self->_request_space( $actual_length ); |
386bab6c |
421 | |
422 | seek( $fh, $tag->{offset} + $offset + $root->{file_offset}, SEEK_SET ); |
9a187d8c |
423 | print( $fh $md5 . pack($self->{long_pack}, $location ) ); |
424 | print( $fh pack($self->{long_pack}, $actual_length ) ); |
28394a1a |
425 | print( $fh pack($self->{long_pack}, $root->transaction_id ) ); |
386bab6c |
426 | } |
427 | # If bucket didn't fit into list, split into a new index level |
f9c33187 |
428 | # split_index() will do the _request_space() call |
386bab6c |
429 | else { |
e96daec8 |
430 | $location = $self->split_index( $md5, $tag ); |
386bab6c |
431 | } |
20f7b20c |
432 | |
e96daec8 |
433 | $self->write_value( $location, $plain_key, $value ); |
d5d7c51d |
434 | |
435 | return $result; |
436 | } |
437 | |
438 | sub write_value { |
439 | my $self = shift; |
e96daec8 |
440 | my ($location, $key, $value) = @_; |
d5d7c51d |
441 | |
e96daec8 |
442 | my $fh = $self->_fh; |
443 | my $root = $self->_fileobj; |
d5d7c51d |
444 | |
9d4fa373 |
445 | my $dbm_deep_obj = _get_dbm_object( $value ); |
e96daec8 |
446 | if ( $dbm_deep_obj && $dbm_deep_obj->_fileobj ne $self->_fileobj ) { |
447 | $self->_throw_error( "Cannot cross-reference. Use export() instead" ); |
9d4fa373 |
448 | } |
d5d7c51d |
449 | |
450 | seek($fh, $location + $root->{file_offset}, SEEK_SET); |
451 | |
20f7b20c |
452 | ## |
d5d7c51d |
453 | # Write signature based on content type, set content length and write |
454 | # actual value. |
20f7b20c |
455 | ## |
9d4fa373 |
456 | my $r = Scalar::Util::reftype( $value ) || ''; |
457 | if ( $dbm_deep_obj ) { |
e96daec8 |
458 | $self->write_tag( undef, SIG_INTERNAL,pack($self->{long_pack}, $dbm_deep_obj->_base_offset) ); |
f37c15ab |
459 | } |
460 | elsif ($r eq 'HASH') { |
9d4fa373 |
461 | if ( !$dbm_deep_obj && tied %{$value} ) { |
e96daec8 |
462 | $self->_throw_error( "Cannot store something that is tied" ); |
019ab3a1 |
463 | } |
e96daec8 |
464 | $self->write_tag( undef, SIG_HASH, chr(0)x$self->{index_size} ); |
f37c15ab |
465 | } |
466 | elsif ($r eq 'ARRAY') { |
9d4fa373 |
467 | if ( !$dbm_deep_obj && tied @{$value} ) { |
e96daec8 |
468 | $self->_throw_error( "Cannot store something that is tied" ); |
019ab3a1 |
469 | } |
e96daec8 |
470 | $self->write_tag( undef, SIG_ARRAY, chr(0)x$self->{index_size} ); |
f37c15ab |
471 | } |
472 | elsif (!defined($value)) { |
e96daec8 |
473 | $self->write_tag( undef, SIG_NULL, '' ); |
d5d7c51d |
474 | } |
475 | else { |
e96daec8 |
476 | $self->write_tag( undef, SIG_DATA, $value ); |
d5d7c51d |
477 | } |
20f7b20c |
478 | |
d5d7c51d |
479 | ## |
480 | # Plain key is stored AFTER value, as keys are typically fetched less often. |
481 | ## |
482 | print( $fh pack($self->{data_pack}, length($key)) . $key ); |
20f7b20c |
483 | |
9a187d8c |
484 | # Internal references don't care about autobless |
9d4fa373 |
485 | return 1 if $dbm_deep_obj; |
9a187d8c |
486 | |
d5d7c51d |
487 | ## |
488 | # If value is blessed, preserve class name |
489 | ## |
490 | if ( $root->{autobless} ) { |
56ec4340 |
491 | my $c = Scalar::Util::blessed($value); |
492 | if ( defined $c && !$dbm_deep_obj ) { |
d5d7c51d |
493 | print( $fh chr(1) ); |
56ec4340 |
494 | print( $fh pack($self->{data_pack}, length($c)) . $c ); |
20f7b20c |
495 | } |
d5d7c51d |
496 | else { |
497 | print( $fh chr(0) ); |
20f7b20c |
498 | } |
d5d7c51d |
499 | } |
20f7b20c |
500 | |
d5d7c51d |
501 | ## |
56ec4340 |
502 | # Tie the passed in reference so that changes to it are reflected in the |
503 | # datafile. The use of $location as the base_offset will act as the |
504 | # the linkage between parent and child. |
505 | # |
506 | # The overall assignment is a hack around the fact that just tying doesn't |
507 | # store the values. This may not be the wrong thing to do. |
d5d7c51d |
508 | ## |
9d4fa373 |
509 | if ($r eq 'HASH') { |
510 | my %x = %$value; |
511 | tie %$value, 'DBM::Deep', { |
512 | base_offset => $location, |
460b1067 |
513 | fileobj => $root, |
9d4fa373 |
514 | }; |
515 | %$value = %x; |
516 | } |
517 | elsif ($r eq 'ARRAY') { |
518 | my @x = @$value; |
519 | tie @$value, 'DBM::Deep', { |
520 | base_offset => $location, |
460b1067 |
521 | fileobj => $root, |
9d4fa373 |
522 | }; |
523 | @$value = @x; |
20f7b20c |
524 | } |
d4b1166e |
525 | |
d5d7c51d |
526 | return 1; |
d4b1166e |
527 | } |
528 | |
75be6413 |
529 | sub split_index { |
530 | my $self = shift; |
e96daec8 |
531 | my ($md5, $tag) = @_; |
75be6413 |
532 | |
e96daec8 |
533 | my $fh = $self->_fh; |
534 | my $root = $self->_fileobj; |
16d1ad9b |
535 | |
536 | my $loc = $self->_request_space( |
e96daec8 |
537 | $self->tag_size( $self->{index_size} ), |
16d1ad9b |
538 | ); |
539 | |
7b1e1aa1 |
540 | seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); |
16d1ad9b |
541 | print( $fh pack($self->{long_pack}, $loc) ); |
75be6413 |
542 | |
9e4f83a0 |
543 | my $index_tag = $self->write_tag( |
e96daec8 |
544 | $loc, SIG_INDEX, |
f37c15ab |
545 | chr(0)x$self->{index_size}, |
75be6413 |
546 | ); |
547 | |
f9c33187 |
548 | my $newtag_loc = $self->_request_space( |
e96daec8 |
549 | $self->tag_size( $self->{bucket_list_size} ), |
f9c33187 |
550 | ); |
75be6413 |
551 | |
7b1e1aa1 |
552 | my $keys = $tag->{content} |
f9c33187 |
553 | . $md5 . pack($self->{long_pack}, $newtag_loc) |
28394a1a |
554 | . pack($self->{long_pack}, 0) # size |
555 | . pack($self->{long_pack}, 0); # transaction # |
75be6413 |
556 | |
f9c33187 |
557 | my @newloc = (); |
75be6413 |
558 | BUCKET: |
559 | for (my $i = 0; $i <= $self->{max_buckets}; $i++) { |
9a187d8c |
560 | my ($key, $old_subloc, $size) = $self->_get_key_subloc( $keys, $i ); |
75be6413 |
561 | |
f9c33187 |
562 | die "[INTERNAL ERROR]: No key in split_index()\n" unless $key; |
563 | die "[INTERNAL ERROR]: No subloc in split_index()\n" unless $old_subloc; |
75be6413 |
564 | |
75be6413 |
565 | my $num = ord(substr($key, $tag->{ch} + 1, 1)); |
566 | |
f9c33187 |
567 | if ($newloc[$num]) { |
568 | seek($fh, $newloc[$num] + $root->{file_offset}, SEEK_SET); |
75be6413 |
569 | my $subkeys; |
570 | read( $fh, $subkeys, $self->{bucket_list_size}); |
571 | |
f9c33187 |
572 | # This is looking for the first empty spot |
7b1e1aa1 |
573 | my ($subloc, $offset, $size) = $self->_find_in_buckets( |
f9c33187 |
574 | { content => $subkeys }, '', |
7b1e1aa1 |
575 | ); |
75be6413 |
576 | |
f9c33187 |
577 | seek($fh, $newloc[$num] + $offset + $root->{file_offset}, SEEK_SET); |
578 | print( $fh $key . pack($self->{long_pack}, $old_subloc) ); |
7b1e1aa1 |
579 | |
580 | next; |
75be6413 |
581 | } |
75be6413 |
582 | |
7b1e1aa1 |
583 | seek($fh, $index_tag->{offset} + ($num * $self->{long_size}) + $root->{file_offset}, SEEK_SET); |
2603d86e |
584 | |
7b1e1aa1 |
585 | my $loc = $self->_request_space( |
e96daec8 |
586 | $self->tag_size( $self->{bucket_list_size} ), |
7b1e1aa1 |
587 | ); |
2603d86e |
588 | |
7b1e1aa1 |
589 | print( $fh pack($self->{long_pack}, $loc) ); |
75be6413 |
590 | |
7b1e1aa1 |
591 | my $blist_tag = $self->write_tag( |
e96daec8 |
592 | $loc, SIG_BLIST, |
7b1e1aa1 |
593 | chr(0)x$self->{bucket_list_size}, |
594 | ); |
595 | |
596 | seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); |
f9c33187 |
597 | print( $fh $key . pack($self->{long_pack}, $old_subloc) ); |
7b1e1aa1 |
598 | |
f9c33187 |
599 | $newloc[$num] = $blist_tag->{offset}; |
7b1e1aa1 |
600 | } |
601 | |
602 | $self->_release_space( |
e96daec8 |
603 | $self->tag_size( $self->{bucket_list_size} ), |
7b1e1aa1 |
604 | $tag->{offset} - SIG_SIZE - $self->{data_size}, |
605 | ); |
75be6413 |
606 | |
f9c33187 |
607 | return $newtag_loc; |
75be6413 |
608 | } |
609 | |
8db25060 |
610 | sub read_from_loc { |
611 | my $self = shift; |
e96daec8 |
612 | my ($subloc) = @_; |
8db25060 |
613 | |
e96daec8 |
614 | my $fh = $self->_fh; |
8db25060 |
615 | |
616 | ## |
617 | # Found match -- seek to offset and read signature |
618 | ## |
619 | my $signature; |
e96daec8 |
620 | seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET); |
8db25060 |
621 | read( $fh, $signature, SIG_SIZE); |
622 | |
623 | ## |
624 | # If value is a hash or array, return new DBM::Deep object with correct offset |
625 | ## |
626 | if (($signature eq SIG_HASH) || ($signature eq SIG_ARRAY)) { |
685e40f1 |
627 | my $new_obj = DBM::Deep->new({ |
8db25060 |
628 | type => $signature, |
629 | base_offset => $subloc, |
e96daec8 |
630 | fileobj => $self->_fileobj, |
685e40f1 |
631 | }); |
8db25060 |
632 | |
460b1067 |
633 | if ($new_obj->_fileobj->{autobless}) { |
8db25060 |
634 | ## |
635 | # Skip over value and plain key to see if object needs |
636 | # to be re-blessed |
637 | ## |
638 | seek($fh, $self->{data_size} + $self->{index_size}, SEEK_CUR); |
639 | |
640 | my $size; |
c6ea6b6c |
641 | read( $fh, $size, $self->{data_size}); |
642 | $size = unpack($self->{data_pack}, $size); |
8db25060 |
643 | if ($size) { seek($fh, $size, SEEK_CUR); } |
644 | |
645 | my $bless_bit; |
646 | read( $fh, $bless_bit, 1); |
647 | if (ord($bless_bit)) { |
648 | ## |
649 | # Yes, object needs to be re-blessed |
650 | ## |
651 | my $class_name; |
c6ea6b6c |
652 | read( $fh, $size, $self->{data_size}); |
653 | $size = unpack($self->{data_pack}, $size); |
8db25060 |
654 | if ($size) { read( $fh, $class_name, $size); } |
685e40f1 |
655 | if ($class_name) { $new_obj = bless( $new_obj, $class_name ); } |
8db25060 |
656 | } |
657 | } |
658 | |
685e40f1 |
659 | return $new_obj; |
8db25060 |
660 | } |
661 | elsif ( $signature eq SIG_INTERNAL ) { |
662 | my $size; |
663 | read( $fh, $size, $self->{data_size}); |
664 | $size = unpack($self->{data_pack}, $size); |
665 | |
666 | if ( $size ) { |
667 | my $new_loc; |
668 | read( $fh, $new_loc, $size ); |
669 | $new_loc = unpack( $self->{long_pack}, $new_loc ); |
670 | |
e96daec8 |
671 | return $self->read_from_loc( $new_loc ); |
8db25060 |
672 | } |
673 | else { |
674 | return; |
675 | } |
676 | } |
677 | ## |
678 | # Otherwise return actual value |
679 | ## |
460b1067 |
680 | elsif ( $signature eq SIG_DATA ) { |
8db25060 |
681 | my $size; |
682 | read( $fh, $size, $self->{data_size}); |
683 | $size = unpack($self->{data_pack}, $size); |
684 | |
685 | my $value = ''; |
686 | if ($size) { read( $fh, $value, $size); } |
687 | return $value; |
688 | } |
689 | |
690 | ## |
691 | # Key exists, but content is null |
692 | ## |
693 | return; |
694 | } |
695 | |
9020ee8c |
696 | sub get_bucket_value { |
beac1dff |
697 | ## |
698 | # Fetch single value given tag and MD5 digested key. |
699 | ## |
700 | my $self = shift; |
e96daec8 |
701 | my ($tag, $md5) = @_; |
9020ee8c |
702 | |
9a187d8c |
703 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
386bab6c |
704 | if ( $subloc ) { |
e96daec8 |
705 | return $self->read_from_loc( $subloc ); |
386bab6c |
706 | } |
beac1dff |
707 | return; |
9020ee8c |
708 | } |
ab0e4957 |
709 | |
710 | sub delete_bucket { |
beac1dff |
711 | ## |
712 | # Delete single key/value pair given tag and MD5 digested key. |
713 | ## |
714 | my $self = shift; |
e96daec8 |
715 | my ($tag, $md5) = @_; |
ab0e4957 |
716 | |
9a187d8c |
717 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
f9c33187 |
718 | #XXX This needs _release_space() |
386bab6c |
719 | if ( $subloc ) { |
e96daec8 |
720 | my $fh = $self->_fh; |
721 | seek($fh, $tag->{offset} + $offset + $self->_fileobj->{file_offset}, SEEK_SET); |
386bab6c |
722 | print( $fh substr($tag->{content}, $offset + $self->{bucket_size} ) ); |
251dfd0e |
723 | print( $fh chr(0) x $self->{bucket_size} ); |
d0b74c17 |
724 | |
ab0e4957 |
725 | return 1; |
386bab6c |
726 | } |
beac1dff |
727 | return; |
ab0e4957 |
728 | } |
729 | |
912d50b1 |
730 | sub bucket_exists { |
beac1dff |
731 | ## |
732 | # Check existence of single key given tag and MD5 digested key. |
733 | ## |
734 | my $self = shift; |
e96daec8 |
735 | my ($tag, $md5) = @_; |
912d50b1 |
736 | |
9a187d8c |
737 | my ($subloc, $offset, $size) = $self->_find_in_buckets( $tag, $md5 ); |
d5d7c51d |
738 | return $subloc && 1; |
912d50b1 |
739 | } |
740 | |
6736c116 |
741 | sub find_bucket_list { |
beac1dff |
742 | ## |
743 | # Locate offset for bucket list, given digested key |
744 | ## |
745 | my $self = shift; |
e96daec8 |
746 | my ($offset, $md5, $args) = @_; |
d0b74c17 |
747 | $args = {} unless $args; |
748 | |
beac1dff |
749 | ## |
750 | # Locate offset for bucket list using digest index system |
751 | ## |
e96daec8 |
752 | my $tag = $self->load_tag( $offset ) |
753 | or $self->_throw_error( "INTERNAL ERROR - Cannot find tag" ); |
d0b74c17 |
754 | |
e5fc7e69 |
755 | my $ch = 0; |
8db25060 |
756 | while ($tag->{signature} ne SIG_BLIST) { |
d0b74c17 |
757 | my $num = ord substr($md5, $ch, 1); |
758 | |
759 | my $ref_loc = $tag->{offset} + ($num * $self->{long_size}); |
e96daec8 |
760 | $tag = $self->index_lookup( $tag, $num ); |
d0b74c17 |
761 | |
762 | if (!$tag) { |
29b01632 |
763 | return if !$args->{create}; |
d0b74c17 |
764 | |
16d1ad9b |
765 | my $loc = $self->_request_space( |
e96daec8 |
766 | $self->tag_size( $self->{bucket_list_size} ), |
16d1ad9b |
767 | ); |
768 | |
e96daec8 |
769 | my $fh = $self->_fh; |
770 | seek($fh, $ref_loc + $self->_fileobj->{file_offset}, SEEK_SET); |
16d1ad9b |
771 | print( $fh pack($self->{long_pack}, $loc) ); |
d0b74c17 |
772 | |
9e4f83a0 |
773 | $tag = $self->write_tag( |
e96daec8 |
774 | $loc, SIG_BLIST, |
f37c15ab |
775 | chr(0)x$self->{bucket_list_size}, |
d5d7c51d |
776 | ); |
777 | |
778 | $tag->{ref_loc} = $ref_loc; |
779 | $tag->{ch} = $ch; |
780 | |
781 | last; |
d0b74c17 |
782 | } |
783 | |
16d1ad9b |
784 | $tag->{ch} = $ch++; |
d0b74c17 |
785 | $tag->{ref_loc} = $ref_loc; |
beac1dff |
786 | } |
d0b74c17 |
787 | |
beac1dff |
788 | return $tag; |
6736c116 |
789 | } |
790 | |
d0b74c17 |
791 | sub index_lookup { |
792 | ## |
793 | # Given index tag, lookup single entry in index and return . |
794 | ## |
795 | my $self = shift; |
e96daec8 |
796 | my ($tag, $index) = @_; |
d0b74c17 |
797 | |
798 | my $location = unpack( |
799 | $self->{long_pack}, |
800 | substr( |
801 | $tag->{content}, |
802 | $index * $self->{long_size}, |
803 | $self->{long_size}, |
804 | ), |
805 | ); |
806 | |
807 | if (!$location) { return; } |
808 | |
e96daec8 |
809 | return $self->load_tag( $location ); |
d0b74c17 |
810 | } |
811 | |
6736c116 |
812 | sub traverse_index { |
beac1dff |
813 | ## |
814 | # Scan index and recursively step into deeper levels, looking for next key. |
815 | ## |
6736c116 |
816 | my $self = shift; |
817 | my ($obj, $offset, $ch, $force_return_next) = @_; |
d0b74c17 |
818 | |
e96daec8 |
819 | my $tag = $self->load_tag( $offset ); |
6736c116 |
820 | |
e96daec8 |
821 | my $fh = $self->_fh; |
d0b74c17 |
822 | |
8db25060 |
823 | if ($tag->{signature} ne SIG_BLIST) { |
beac1dff |
824 | my $content = $tag->{content}; |
e5fc7e69 |
825 | my $start = $obj->{return_next} ? 0 : ord(substr($obj->{prev_md5}, $ch, 1)); |
d0b74c17 |
826 | |
d5d7c51d |
827 | for (my $idx = $start; $idx < (2**8); $idx++) { |
e5fc7e69 |
828 | my $subloc = unpack( |
829 | $self->{long_pack}, |
e06824f8 |
830 | substr( |
831 | $content, |
832 | $idx * $self->{long_size}, |
833 | $self->{long_size}, |
834 | ), |
e5fc7e69 |
835 | ); |
836 | |
beac1dff |
837 | if ($subloc) { |
e5fc7e69 |
838 | my $result = $self->traverse_index( |
839 | $obj, $subloc, $ch + 1, $force_return_next, |
840 | ); |
841 | |
beac1dff |
842 | if (defined($result)) { return $result; } |
843 | } |
844 | } # index loop |
d0b74c17 |
845 | |
beac1dff |
846 | $obj->{return_next} = 1; |
847 | } # tag is an index |
d0b74c17 |
848 | |
e5fc7e69 |
849 | else { |
beac1dff |
850 | my $keys = $tag->{content}; |
851 | if ($force_return_next) { $obj->{return_next} = 1; } |
d0b74c17 |
852 | |
beac1dff |
853 | ## |
854 | # Iterate through buckets, looking for a key match |
855 | ## |
8db25060 |
856 | for (my $i = 0; $i < $self->{max_buckets}; $i++) { |
9cec1360 |
857 | my ($key, $subloc) = $self->_get_key_subloc( $keys, $i ); |
d0b74c17 |
858 | |
8db25060 |
859 | # End of bucket list -- return to outer loop |
beac1dff |
860 | if (!$subloc) { |
beac1dff |
861 | $obj->{return_next} = 1; |
862 | last; |
863 | } |
8db25060 |
864 | # Located previous key -- return next one found |
beac1dff |
865 | elsif ($key eq $obj->{prev_md5}) { |
beac1dff |
866 | $obj->{return_next} = 1; |
867 | next; |
868 | } |
8db25060 |
869 | # Seek to bucket location and skip over signature |
beac1dff |
870 | elsif ($obj->{return_next}) { |
e96daec8 |
871 | seek($fh, $subloc + $self->_fileobj->{file_offset}, SEEK_SET); |
d0b74c17 |
872 | |
beac1dff |
873 | # Skip over value to get to plain key |
8db25060 |
874 | my $sig; |
875 | read( $fh, $sig, SIG_SIZE ); |
876 | |
beac1dff |
877 | my $size; |
e5fc7e69 |
878 | read( $fh, $size, $self->{data_size}); |
879 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
880 | if ($size) { seek($fh, $size, SEEK_CUR); } |
d0b74c17 |
881 | |
beac1dff |
882 | # Read in plain key and return as scalar |
beac1dff |
883 | my $plain_key; |
e5fc7e69 |
884 | read( $fh, $size, $self->{data_size}); |
885 | $size = unpack($self->{data_pack}, $size); |
beac1dff |
886 | if ($size) { read( $fh, $plain_key, $size); } |
d0b74c17 |
887 | |
beac1dff |
888 | return $plain_key; |
889 | } |
8db25060 |
890 | } |
d0b74c17 |
891 | |
beac1dff |
892 | $obj->{return_next} = 1; |
893 | } # tag is a bucket list |
d0b74c17 |
894 | |
beac1dff |
895 | return; |
6736c116 |
896 | } |
897 | |
898 | sub get_next_key { |
beac1dff |
899 | ## |
900 | # Locate next key, given digested previous one |
901 | ## |
6736c116 |
902 | my $self = shift; |
903 | my ($obj) = @_; |
d0b74c17 |
904 | |
beac1dff |
905 | $obj->{prev_md5} = $_[1] ? $_[1] : undef; |
906 | $obj->{return_next} = 0; |
d0b74c17 |
907 | |
beac1dff |
908 | ## |
909 | # If the previous key was not specifed, start at the top and |
910 | # return the first one found. |
911 | ## |
912 | if (!$obj->{prev_md5}) { |
913 | $obj->{prev_md5} = chr(0) x $self->{hash_size}; |
914 | $obj->{return_next} = 1; |
915 | } |
d0b74c17 |
916 | |
beac1dff |
917 | return $self->traverse_index( $obj, $obj->_base_offset, 0 ); |
6736c116 |
918 | } |
919 | |
75be6413 |
920 | # Utilities |
921 | |
28394a1a |
922 | #ACID This needs modified - DONE |
9cec1360 |
923 | sub _get_key_subloc { |
75be6413 |
924 | my $self = shift; |
925 | my ($keys, $idx) = @_; |
926 | |
28394a1a |
927 | my ($key, $subloc, $size, $transaction) = unpack( |
928 | # This is 'a', not 'A'. Please read the pack() documentation for the |
929 | # difference between the two and why it's important. |
930 | "a$self->{hash_size} $self->{long_pack} $self->{long_pack} $self->{long_pack}", |
75be6413 |
931 | substr( |
932 | $keys, |
9cec1360 |
933 | ($idx * $self->{bucket_size}), |
934 | $self->{bucket_size}, |
75be6413 |
935 | ), |
936 | ); |
937 | |
28394a1a |
938 | return ($key, $subloc, $size, $transaction); |
75be6413 |
939 | } |
940 | |
d608b06e |
941 | sub _find_in_buckets { |
942 | my $self = shift; |
943 | my ($tag, $md5) = @_; |
944 | |
28394a1a |
945 | my $trans_id = $self->_fileobj->transaction_id; |
946 | |
d608b06e |
947 | BUCKET: |
948 | for ( my $i = 0; $i < $self->{max_buckets}; $i++ ) { |
28394a1a |
949 | my ($key, $subloc, $size, $transaction_id) = $self->_get_key_subloc( |
9a187d8c |
950 | $tag->{content}, $i, |
951 | ); |
d608b06e |
952 | |
9a187d8c |
953 | return ($subloc, $i * $self->{bucket_size}, $size) unless $subloc; |
d608b06e |
954 | |
28394a1a |
955 | next BUCKET if $key ne $md5 || $transaction_id != $trans_id; |
d608b06e |
956 | |
9a187d8c |
957 | return ($subloc, $i * $self->{bucket_size}, $size); |
d608b06e |
958 | } |
959 | |
960 | return; |
961 | } |
962 | |
994ccd8e |
963 | sub _request_space { |
964 | my $self = shift; |
e96daec8 |
965 | my ($size) = @_; |
994ccd8e |
966 | |
460b1067 |
967 | my $loc = $self->_fileobj->{end}; |
968 | $self->_fileobj->{end} += $size; |
994ccd8e |
969 | |
970 | return $loc; |
971 | } |
972 | |
973 | sub _release_space { |
974 | my $self = shift; |
e96daec8 |
975 | my ($size, $loc) = @_; |
994ccd8e |
976 | |
7b1e1aa1 |
977 | my $next_loc = 0; |
978 | |
e96daec8 |
979 | my $fh = $self->_fh; |
980 | seek( $fh, $loc + $self->_fileobj->{file_offset}, SEEK_SET ); |
7b1e1aa1 |
981 | print( $fh SIG_FREE |
982 | . pack($self->{long_pack}, $size ) |
983 | . pack($self->{long_pack}, $next_loc ) |
984 | ); |
985 | |
994ccd8e |
986 | return; |
987 | } |
988 | |
e96daec8 |
989 | sub _throw_error { |
990 | die "DBM::Deep: $_[1]\n"; |
991 | } |
992 | |
a20d9a3f |
993 | 1; |
994 | __END__ |
d5d7c51d |
995 | |
996 | # This will be added in later, after more refactoring is done. This is an early |
997 | # attempt at refactoring on the physical level instead of the virtual level. |
998 | sub _read_at { |
999 | my $self = shift; |
e96daec8 |
1000 | my ($spot, $amount, $unpack) = @_; |
d5d7c51d |
1001 | |
e96daec8 |
1002 | my $fh = $self->_fh; |
1003 | seek( $fh, $spot + $self->_fileobj->{file_offset}, SEEK_SET ); |
d5d7c51d |
1004 | |
1005 | my $buffer; |
1006 | my $bytes_read = read( $fh, $buffer, $amount ); |
1007 | |
1008 | if ( $unpack ) { |
1009 | $buffer = unpack( $unpack, $buffer ); |
1010 | } |
1011 | |
1012 | if ( wantarray ) { |
1013 | return ($buffer, $bytes_read); |
1014 | } |
1015 | else { |
1016 | return $buffer; |
1017 | } |
1018 | } |
e96daec8 |
1019 | |
1020 | sub _print_at { |
1021 | my $self = shift; |
1022 | my ($spot, $data) = @_; |
1023 | |
1024 | my $fh = $self->_fh; |
1025 | seek( $fh, $spot, SEEK_SET ); |
1026 | print( $fh $data ); |
1027 | |
1028 | return; |
1029 | } |
1030 | |
1031 | sub get_file_version { |
1032 | my $self = shift; |
1033 | |
1034 | my $fh = $self->_fh; |
1035 | |
1036 | seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET ); |
1037 | my $buffer; |
1038 | my $bytes_read = read( $fh, $buffer, 4 ); |
1039 | unless ( $bytes_read == 4 ) { |
1040 | $self->_throw_error( "Cannot read file version" ); |
1041 | } |
1042 | |
1043 | return unpack( 'N', $buffer ); |
1044 | } |
1045 | |
1046 | sub write_file_version { |
1047 | my $self = shift; |
1048 | my ($new_version) = @_; |
1049 | |
1050 | my $fh = $self->_fh; |
1051 | |
1052 | seek( $fh, 13 + $self->_fileobj->{file_offset}, SEEK_SET ); |
1053 | print( $fh pack( 'N', $new_version ) ); |
1054 | |
1055 | return; |
1056 | } |
1057 | |