Commit | Line | Data |
a0d0e21e |
1 | # DB_File.pm -- Perl 5 interface to Berkeley DB |
2 | # |
3 | # written by Paul Marquess (pmarquess@bfsec.bt.co.uk) |
f6b705ef |
4 | # last modified 4th Sept 1996 |
5 | # version 1.03 |
8e07c86e |
6 | |
7 | package DB_File::HASHINFO ; |
785da04d |
8 | |
9 | use strict; |
8e07c86e |
10 | use Carp; |
88108326 |
11 | require Tie::Hash; |
12 | @DB_File::HASHINFO::ISA = qw(Tie::Hash); |
8e07c86e |
13 | |
88108326 |
14 | sub new |
8e07c86e |
15 | { |
88108326 |
16 | my $pkg = shift ; |
17 | my %x ; |
18 | tie %x, $pkg ; |
19 | bless \%x, $pkg ; |
8e07c86e |
20 | } |
21 | |
88108326 |
22 | sub TIEHASH |
23 | { |
24 | my $pkg = shift ; |
25 | |
26 | bless { 'bsize' => undef, |
27 | 'ffactor' => undef, |
28 | 'nelem' => undef, |
29 | 'cachesize' => undef, |
30 | 'hash' => undef, |
31 | 'lorder' => undef, |
32 | }, $pkg ; |
33 | } |
8e07c86e |
34 | |
35 | sub FETCH |
36 | { |
88108326 |
37 | my $self = shift ; |
38 | my $key = shift ; |
8e07c86e |
39 | |
88108326 |
40 | return $self->{$key} if exists $self->{$key} ; |
41 | |
42 | my $pkg = ref $self ; |
43 | croak "${pkg}::FETCH - Unknown element '$key'" ; |
8e07c86e |
44 | } |
45 | |
46 | |
47 | sub STORE |
48 | { |
88108326 |
49 | my $self = shift ; |
50 | my $key = shift ; |
51 | my $value = shift ; |
52 | |
53 | if ( exists $self->{$key} ) |
8e07c86e |
54 | { |
88108326 |
55 | $self->{$key} = $value ; |
8e07c86e |
56 | return ; |
57 | } |
58 | |
88108326 |
59 | my $pkg = ref $self ; |
60 | croak "${pkg}::STORE - Unknown element '$key'" ; |
8e07c86e |
61 | } |
62 | |
63 | sub DELETE |
64 | { |
88108326 |
65 | my $self = shift ; |
66 | my $key = shift ; |
67 | |
68 | if ( exists $self->{$key} ) |
8e07c86e |
69 | { |
88108326 |
70 | delete $self->{$key} ; |
8e07c86e |
71 | return ; |
72 | } |
73 | |
88108326 |
74 | my $pkg = ref $self ; |
75 | croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ; |
8e07c86e |
76 | } |
77 | |
88108326 |
78 | sub EXISTS |
8e07c86e |
79 | { |
88108326 |
80 | my $self = shift ; |
81 | my $key = shift ; |
8e07c86e |
82 | |
88108326 |
83 | exists $self->{$key} ; |
8e07c86e |
84 | } |
85 | |
88108326 |
86 | sub NotHere |
8e07c86e |
87 | { |
88108326 |
88 | my $pkg = shift ; |
89 | my $method = shift ; |
8e07c86e |
90 | |
88108326 |
91 | croak "${pkg} does not define the method ${method}" ; |
8e07c86e |
92 | } |
93 | |
88108326 |
94 | sub DESTROY { undef %{$_[0]} } |
95 | sub FIRSTKEY { my $self = shift ; $self->NotHere(ref $self, "FIRSTKEY") } |
96 | sub NEXTKEY { my $self = shift ; $self->NotHere(ref $self, "NEXTKEY") } |
97 | sub CLEAR { my $self = shift ; $self->NotHere(ref $self, "CLEAR") } |
8e07c86e |
98 | |
99 | package DB_File::RECNOINFO ; |
785da04d |
100 | |
88108326 |
101 | use strict ; |
102 | |
103 | @DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; |
8e07c86e |
104 | |
105 | sub TIEHASH |
106 | { |
88108326 |
107 | my $pkg = shift ; |
108 | |
109 | bless { 'bval' => undef, |
110 | 'cachesize' => undef, |
111 | 'psize' => undef, |
112 | 'flags' => undef, |
113 | 'lorder' => undef, |
114 | 'reclen' => undef, |
115 | 'bfname' => "", |
116 | }, $pkg ; |
8e07c86e |
117 | } |
118 | |
88108326 |
119 | package DB_File::BTREEINFO ; |
8e07c86e |
120 | |
88108326 |
121 | use strict ; |
8e07c86e |
122 | |
88108326 |
123 | @DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; |
8e07c86e |
124 | |
88108326 |
125 | sub TIEHASH |
8e07c86e |
126 | { |
88108326 |
127 | my $pkg = shift ; |
128 | |
f6b705ef |
129 | bless { 'flags' => undef, |
88108326 |
130 | 'cachesize' => undef, |
131 | 'maxkeypage' => undef, |
132 | 'minkeypage' => undef, |
133 | 'psize' => undef, |
134 | 'compare' => undef, |
135 | 'prefix' => undef, |
136 | 'lorder' => undef, |
137 | }, $pkg ; |
8e07c86e |
138 | } |
139 | |
140 | |
8e07c86e |
141 | package DB_File ; |
785da04d |
142 | |
143 | use strict; |
144 | use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO) ; |
8e07c86e |
145 | use Carp; |
146 | |
785da04d |
147 | |
f6b705ef |
148 | $VERSION = "1.03" ; |
8e07c86e |
149 | |
150 | #typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; |
88108326 |
151 | #$DB_BTREE = TIEHASH DB_File::BTREEINFO ; |
152 | #$DB_HASH = TIEHASH DB_File::HASHINFO ; |
153 | #$DB_RECNO = TIEHASH DB_File::RECNOINFO ; |
154 | |
155 | $DB_BTREE = new DB_File::BTREEINFO ; |
156 | $DB_HASH = new DB_File::HASHINFO ; |
157 | $DB_RECNO = new DB_File::RECNOINFO ; |
8e07c86e |
158 | |
785da04d |
159 | require Tie::Hash; |
8e07c86e |
160 | require Exporter; |
161 | use AutoLoader; |
162 | require DynaLoader; |
785da04d |
163 | @ISA = qw(Tie::Hash Exporter DynaLoader); |
8e07c86e |
164 | @EXPORT = qw( |
165 | $DB_BTREE $DB_HASH $DB_RECNO |
88108326 |
166 | |
8e07c86e |
167 | BTREEMAGIC |
168 | BTREEVERSION |
169 | DB_LOCK |
170 | DB_SHMEM |
171 | DB_TXN |
172 | HASHMAGIC |
173 | HASHVERSION |
174 | MAX_PAGE_NUMBER |
175 | MAX_PAGE_OFFSET |
176 | MAX_REC_NUMBER |
177 | RET_ERROR |
178 | RET_SPECIAL |
179 | RET_SUCCESS |
180 | R_CURSOR |
181 | R_DUP |
182 | R_FIRST |
183 | R_FIXEDLEN |
184 | R_IAFTER |
185 | R_IBEFORE |
186 | R_LAST |
187 | R_NEXT |
188 | R_NOKEY |
189 | R_NOOVERWRITE |
190 | R_PREV |
191 | R_RECNOSYNC |
192 | R_SETCURSOR |
193 | R_SNAPSHOT |
194 | __R_UNUSED |
88108326 |
195 | |
8e07c86e |
196 | ); |
197 | |
198 | sub AUTOLOAD { |
785da04d |
199 | my($constname); |
8e07c86e |
200 | ($constname = $AUTOLOAD) =~ s/.*:://; |
785da04d |
201 | my $val = constant($constname, @_ ? $_[0] : 0); |
8e07c86e |
202 | if ($! != 0) { |
203 | if ($! =~ /Invalid/) { |
204 | $AutoLoader::AUTOLOAD = $AUTOLOAD; |
205 | goto &AutoLoader::AUTOLOAD; |
206 | } |
207 | else { |
785da04d |
208 | my($pack,$file,$line) = caller; |
8e07c86e |
209 | croak "Your vendor has not defined DB macro $constname, used at $file line $line. |
210 | "; |
211 | } |
212 | } |
213 | eval "sub $AUTOLOAD { $val }"; |
214 | goto &$AUTOLOAD; |
215 | } |
216 | |
f6b705ef |
217 | |
218 | # import borrowed from IO::File |
219 | # exports Fcntl constants if available. |
220 | sub import { |
221 | my $pkg = shift; |
222 | my $callpkg = caller; |
223 | Exporter::export $pkg, $callpkg; |
224 | eval { |
225 | require Fcntl; |
226 | Exporter::export 'Fcntl', $callpkg; |
227 | }; |
228 | } |
229 | |
785da04d |
230 | bootstrap DB_File $VERSION; |
8e07c86e |
231 | |
232 | # Preloaded methods go here. Autoload methods go after __END__, and are |
233 | # processed by the autosplit program. |
234 | |
88108326 |
235 | |
236 | sub get_dup |
237 | { |
238 | croak "Usage: \$db->get_dup(key [,flag])\n" |
239 | unless @_ == 2 or @_ == 3 ; |
240 | |
241 | my $db = shift ; |
242 | my $key = shift ; |
243 | my $flag = shift ; |
f6b705ef |
244 | my $value = 0 ; |
88108326 |
245 | my $origkey = $key ; |
246 | my $wantarray = wantarray ; |
f6b705ef |
247 | my %values = () ; |
88108326 |
248 | my @values = () ; |
249 | my $counter = 0 ; |
f6b705ef |
250 | my $status = 0 ; |
88108326 |
251 | |
252 | # get the first value associated with the key, $key |
f6b705ef |
253 | #$db->seq($key, $value, R_CURSOR()) ; |
88108326 |
254 | |
f6b705ef |
255 | # iterate through the database until either EOF ($status == 0) |
256 | # or a different key is encountered ($key ne $origkey). |
257 | for ($status = $db->seq($key, $value, R_CURSOR()) ; |
258 | $status == 0 and $key eq $origkey ; |
259 | $status = $db->seq($key, $value, R_NEXT()) ) { |
88108326 |
260 | |
f6b705ef |
261 | # save the value or count number of matches |
262 | if ($wantarray) { |
263 | if ($flag) |
264 | { ++ $values{$value} } |
265 | else |
266 | { push (@values, $value) } |
267 | } |
268 | else |
269 | { ++ $counter } |
88108326 |
270 | |
88108326 |
271 | } |
272 | |
f6b705ef |
273 | return ($wantarray ? ($flag ? %values : @values) : $counter) ; |
88108326 |
274 | } |
275 | |
276 | |
8e07c86e |
277 | 1; |
278 | __END__ |
279 | |
280 | =cut |
3b35bae3 |
281 | |
282 | =head1 NAME |
283 | |
284 | DB_File - Perl5 access to Berkeley DB |
285 | |
286 | =head1 SYNOPSIS |
287 | |
288 | use DB_File ; |
f6b705ef |
289 | use strict 'untie' ; |
88108326 |
290 | |
291 | [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ; |
292 | [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ; |
293 | [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ; |
760ac839 |
294 | |
3b35bae3 |
295 | $status = $X->del($key [, $flags]) ; |
296 | $status = $X->put($key, $value [, $flags]) ; |
297 | $status = $X->get($key, $value [, $flags]) ; |
760ac839 |
298 | $status = $X->seq($key, $value, $flags) ; |
3b35bae3 |
299 | $status = $X->sync([$flags]) ; |
300 | $status = $X->fd ; |
760ac839 |
301 | |
f6b705ef |
302 | # BTREE only |
88108326 |
303 | $count = $X->get_dup($key) ; |
304 | @list = $X->get_dup($key) ; |
305 | %list = $X->get_dup($key, 1) ; |
306 | |
f6b705ef |
307 | # RECNO only |
308 | $a = $X->length; |
309 | $a = $X->pop ; |
310 | $X->push(list); |
311 | $a = $X->shift; |
312 | $X->unshift(list); |
313 | |
3b35bae3 |
314 | untie %hash ; |
315 | untie @array ; |
316 | |
317 | =head1 DESCRIPTION |
318 | |
8e07c86e |
319 | B<DB_File> is a module which allows Perl programs to make use of the |
320 | facilities provided by Berkeley DB. If you intend to use this |
f6b705ef |
321 | module you should really have a copy of the Berkeley DB manual pages at |
8e07c86e |
322 | hand. The interface defined here mirrors the Berkeley DB interface |
323 | closely. |
3b35bae3 |
324 | |
8e07c86e |
325 | Berkeley DB is a C library which provides a consistent interface to a |
326 | number of database formats. B<DB_File> provides an interface to all |
327 | three of the database types currently supported by Berkeley DB. |
3b35bae3 |
328 | |
329 | The file types are: |
330 | |
331 | =over 5 |
332 | |
88108326 |
333 | =item B<DB_HASH> |
3b35bae3 |
334 | |
88108326 |
335 | This database type allows arbitrary key/value pairs to be stored in data |
8e07c86e |
336 | files. This is equivalent to the functionality provided by other |
337 | hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though, |
338 | the files created using DB_HASH are not compatible with any of the |
339 | other packages mentioned. |
3b35bae3 |
340 | |
8e07c86e |
341 | A default hashing algorithm, which will be adequate for most |
342 | applications, is built into Berkeley DB. If you do need to use your own |
343 | hashing algorithm it is possible to write your own in Perl and have |
344 | B<DB_File> use it instead. |
3b35bae3 |
345 | |
88108326 |
346 | =item B<DB_BTREE> |
347 | |
348 | The btree format allows arbitrary key/value pairs to be stored in a |
8e07c86e |
349 | sorted, balanced binary tree. |
3b35bae3 |
350 | |
8e07c86e |
351 | As with the DB_HASH format, it is possible to provide a user defined |
352 | Perl routine to perform the comparison of keys. By default, though, the |
353 | keys are stored in lexical order. |
3b35bae3 |
354 | |
88108326 |
355 | =item B<DB_RECNO> |
3b35bae3 |
356 | |
8e07c86e |
357 | DB_RECNO allows both fixed-length and variable-length flat text files |
358 | to be manipulated using the same key/value pair interface as in DB_HASH |
359 | and DB_BTREE. In this case the key will consist of a record (line) |
360 | number. |
3b35bae3 |
361 | |
362 | =back |
363 | |
364 | =head2 How does DB_File interface to Berkeley DB? |
365 | |
366 | B<DB_File> allows access to Berkeley DB files using the tie() mechanism |
8e07c86e |
367 | in Perl 5 (for full details, see L<perlfunc/tie()>). This facility |
368 | allows B<DB_File> to access Berkeley DB files using either an |
369 | associative array (for DB_HASH & DB_BTREE file types) or an ordinary |
370 | array (for the DB_RECNO file type). |
3b35bae3 |
371 | |
88108326 |
372 | In addition to the tie() interface, it is also possible to access most |
373 | of the functions provided in the Berkeley DB API directly. |
f6b705ef |
374 | See L<THE API INTERFACE>. |
3b35bae3 |
375 | |
88108326 |
376 | =head2 Opening a Berkeley DB Database File |
3b35bae3 |
377 | |
8e07c86e |
378 | Berkeley DB uses the function dbopen() to open or create a database. |
f6b705ef |
379 | Here is the C prototype for dbopen(): |
3b35bae3 |
380 | |
381 | DB* |
382 | dbopen (const char * file, int flags, int mode, |
383 | DBTYPE type, const void * openinfo) |
384 | |
385 | The parameter C<type> is an enumeration which specifies which of the 3 |
386 | interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used. |
387 | Depending on which of these is actually chosen, the final parameter, |
388 | I<openinfo> points to a data structure which allows tailoring of the |
389 | specific interface method. |
390 | |
8e07c86e |
391 | This interface is handled slightly differently in B<DB_File>. Here is |
88108326 |
392 | an equivalent call using B<DB_File>: |
3b35bae3 |
393 | |
88108326 |
394 | tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ; |
3b35bae3 |
395 | |
8e07c86e |
396 | The C<filename>, C<flags> and C<mode> parameters are the direct |
397 | equivalent of their dbopen() counterparts. The final parameter $DB_HASH |
398 | performs the function of both the C<type> and C<openinfo> parameters in |
399 | dbopen(). |
3b35bae3 |
400 | |
88108326 |
401 | In the example above $DB_HASH is actually a pre-defined reference to a |
402 | hash object. B<DB_File> has three of these pre-defined references. |
403 | Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO. |
3b35bae3 |
404 | |
8e07c86e |
405 | The keys allowed in each of these pre-defined references is limited to |
406 | the names used in the equivalent C structure. So, for example, the |
407 | $DB_HASH reference will only allow keys called C<bsize>, C<cachesize>, |
88108326 |
408 | C<ffactor>, C<hash>, C<lorder> and C<nelem>. |
409 | |
410 | To change one of these elements, just assign to it like this: |
411 | |
412 | $DB_HASH->{'cachesize'} = 10000 ; |
413 | |
414 | The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are |
415 | usually adequate for most applications. If you do need to create extra |
416 | instances of these objects, constructors are available for each file |
417 | type. |
418 | |
419 | Here are examples of the constructors and the valid options available |
420 | for DB_HASH, DB_BTREE and DB_RECNO respectively. |
421 | |
422 | $a = new DB_File::HASHINFO ; |
423 | $a->{'bsize'} ; |
424 | $a->{'cachesize'} ; |
425 | $a->{'ffactor'}; |
426 | $a->{'hash'} ; |
427 | $a->{'lorder'} ; |
428 | $a->{'nelem'} ; |
429 | |
430 | $b = new DB_File::BTREEINFO ; |
431 | $b->{'flags'} ; |
432 | $b->{'cachesize'} ; |
433 | $b->{'maxkeypage'} ; |
434 | $b->{'minkeypage'} ; |
435 | $b->{'psize'} ; |
436 | $b->{'compare'} ; |
437 | $b->{'prefix'} ; |
438 | $b->{'lorder'} ; |
439 | |
440 | $c = new DB_File::RECNOINFO ; |
441 | $c->{'bval'} ; |
442 | $c->{'cachesize'} ; |
443 | $c->{'psize'} ; |
444 | $c->{'flags'} ; |
445 | $c->{'lorder'} ; |
446 | $c->{'reclen'} ; |
447 | $c->{'bfname'} ; |
448 | |
449 | The values stored in the hashes above are mostly the direct equivalent |
450 | of their C counterpart. Like their C counterparts, all are set to a |
f6b705ef |
451 | default values - that means you don't have to set I<all> of the |
88108326 |
452 | values when you only want to change one. Here is an example: |
453 | |
454 | $a = new DB_File::HASHINFO ; |
455 | $a->{'cachesize'} = 12345 ; |
456 | tie %y, 'DB_File', "filename", $flags, 0777, $a ; |
457 | |
458 | A few of the values need extra discussion here. When used, the C |
459 | equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers |
460 | to C functions. In B<DB_File> these keys are used to store references |
461 | to Perl subs. Below are templates for each of the subs: |
462 | |
463 | sub hash |
464 | { |
465 | my ($data) = @_ ; |
466 | ... |
467 | # return the hash value for $data |
468 | return $hash ; |
469 | } |
3b35bae3 |
470 | |
88108326 |
471 | sub compare |
472 | { |
473 | my ($key, $key2) = @_ ; |
474 | ... |
475 | # return 0 if $key1 eq $key2 |
476 | # -1 if $key1 lt $key2 |
477 | # 1 if $key1 gt $key2 |
478 | return (-1 , 0 or 1) ; |
479 | } |
3b35bae3 |
480 | |
88108326 |
481 | sub prefix |
482 | { |
483 | my ($key, $key2) = @_ ; |
484 | ... |
485 | # return number of bytes of $key2 which are |
486 | # necessary to determine that it is greater than $key1 |
487 | return $bytes ; |
488 | } |
3b35bae3 |
489 | |
f6b705ef |
490 | See L<Changing the BTREE sort order> for an example of using the |
491 | C<compare> template. |
88108326 |
492 | |
493 | =head2 Default Parameters |
494 | |
495 | It is possible to omit some or all of the final 4 parameters in the |
496 | call to C<tie> and let them take default values. As DB_HASH is the most |
497 | common file format used, the call: |
498 | |
499 | tie %A, "DB_File", "filename" ; |
500 | |
501 | is equivalent to: |
502 | |
503 | tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0640, $DB_HASH ; |
504 | |
505 | It is also possible to omit the filename parameter as well, so the |
506 | call: |
507 | |
508 | tie %A, "DB_File" ; |
509 | |
510 | is equivalent to: |
511 | |
512 | tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0640, $DB_HASH ; |
513 | |
f6b705ef |
514 | See L<In Memory Databases> for a discussion on the use of C<undef> |
88108326 |
515 | in place of a filename. |
516 | |
f6b705ef |
517 | =head2 In Memory Databases |
518 | |
519 | Berkeley DB allows the creation of in-memory databases by using NULL |
520 | (that is, a C<(char *)0> in C) in place of the filename. B<DB_File> |
521 | uses C<undef> instead of NULL to provide this functionality. |
522 | |
523 | =head1 DB_HASH |
524 | |
525 | The DB_HASH file format is probably the most commonly used of the three |
526 | file formats that B<DB_File> supports. It is also very straightforward |
527 | to use. |
528 | |
529 | =head2 A Simple Example. |
530 | |
531 | This example shows how to create a database, add key/value pairs to the |
532 | database, delete keys/value pairs and finally how to enumerate the |
533 | contents of the database. |
534 | |
535 | use DB_File ; |
536 | use strict 'untie' ; |
537 | |
538 | tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH |
539 | or die "Cannot open file 'fruit': $!\n"; |
540 | |
541 | # Add a few key/value pairs to the file |
542 | $h{"apple"} = "red" ; |
543 | $h{"orange"} = "orange" ; |
544 | $h{"banana"} = "yellow" ; |
545 | $h{"tomato"} = "red" ; |
546 | |
547 | # Check for existence of a key |
548 | print "Banana Exists\n\n" if $h{"banana"} ; |
549 | |
550 | # Delete a key/value pair. |
551 | delete $h{"apple"} ; |
552 | |
553 | # print the contents of the file |
554 | while (($k, $v) = each %h) |
555 | { print "$k -> $v\n" } |
556 | |
557 | untie %h ; |
558 | |
559 | here is the output: |
560 | |
561 | Banana Exists |
562 | |
563 | orange -> orange |
564 | tomato -> red |
565 | banana -> yellow |
566 | |
567 | Note that the like ordinary associative arrays, the order of the keys |
568 | retrieved is in an apparently random order. |
569 | |
570 | =head1 DB_BTREE |
571 | |
572 | The DB_BTREE format is useful when you want to store data in a given |
573 | order. By default the keys will be stored in lexical order, but as you |
574 | will see from the example shown in the next section, it is very easy to |
575 | define your own sorting function. |
576 | |
577 | =head2 Changing the BTREE sort order |
578 | |
579 | This script shows how to override the default sorting algorithm that |
580 | BTREE uses. Instead of using the normal lexical ordering, a case |
581 | insensitive compare function will be used. |
88108326 |
582 | |
f6b705ef |
583 | use DB_File ; |
584 | use strict 'untie' ; |
585 | |
586 | sub Compare |
587 | { |
588 | my ($key1, $key2) = @_ ; |
589 | "\L$key1" cmp "\L$key2" ; |
590 | } |
591 | |
592 | # specify the Perl sub that will do the comparison |
593 | $DB_BTREE->{'compare'} = \&Compare ; |
594 | |
595 | tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE |
596 | or die "Cannot open file 'tree': $!\n" ; |
597 | |
598 | # Add a key/value pair to the file |
599 | $h{'Wall'} = 'Larry' ; |
600 | $h{'Smith'} = 'John' ; |
601 | $h{'mouse'} = 'mickey' ; |
602 | $h{'duck'} = 'donald' ; |
603 | |
604 | # Delete |
605 | delete $h{"duck"} ; |
606 | |
607 | # Cycle through the keys printing them in order. |
608 | # Note it is not necessary to sort the keys as |
609 | # the btree will have kept them in order automatically. |
610 | foreach (keys %h) |
611 | { print "$_\n" } |
612 | |
613 | untie %h ; |
614 | |
615 | Here is the output from the code above. |
616 | |
617 | mouse |
618 | Smith |
619 | Wall |
620 | |
621 | There are a few point to bear in mind if you want to change the |
622 | ordering in a BTREE database: |
623 | |
624 | =over 5 |
625 | |
626 | =item 1. |
627 | |
628 | The new compare function must be specified when you create the database. |
629 | |
630 | =item 2. |
631 | |
632 | You cannot change the ordering once the database has been created. Thus |
633 | you must use the same compare function every time you access the |
88108326 |
634 | database. |
635 | |
f6b705ef |
636 | =back |
637 | |
638 | =head2 Handling duplicate keys |
639 | |
640 | The BTREE file type optionally allows a single key to be associated |
641 | with an arbitrary number of values. This option is enabled by setting |
642 | the flags element of C<$DB_BTREE> to R_DUP when creating the database. |
643 | |
88108326 |
644 | There are some difficulties in using the tied hash interface if you |
645 | want to manipulate a BTREE database with duplicate keys. Consider this |
646 | code: |
647 | |
648 | use DB_File ; |
f6b705ef |
649 | use strict 'untie' ; |
88108326 |
650 | |
651 | $filename = "tree" ; |
652 | unlink $filename ; |
653 | |
654 | # Enable duplicate records |
655 | $DB_BTREE->{'flags'} = R_DUP ; |
656 | |
657 | tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
658 | or die "Cannot open $filename: $!\n"; |
659 | |
660 | # Add some key/value pairs to the file |
661 | $h{'Wall'} = 'Larry' ; |
662 | $h{'Wall'} = 'Brick' ; # Note the duplicate key |
f6b705ef |
663 | $h{'Wall'} = 'Brick' ; # Note the duplicate key and value |
88108326 |
664 | $h{'Smith'} = 'John' ; |
665 | $h{'mouse'} = 'mickey' ; |
666 | |
667 | # iterate through the associative array |
668 | # and print each key/value pair. |
669 | foreach (keys %h) |
670 | { print "$_ -> $h{$_}\n" } |
671 | |
f6b705ef |
672 | untie %h ; |
673 | |
88108326 |
674 | Here is the output: |
675 | |
676 | Smith -> John |
677 | Wall -> Larry |
678 | Wall -> Larry |
f6b705ef |
679 | Wall -> Larry |
88108326 |
680 | mouse -> mickey |
681 | |
f6b705ef |
682 | As you can see 3 records have been successfully created with key C<Wall> |
88108326 |
683 | - the only thing is, when they are retrieved from the database they |
f6b705ef |
684 | I<seem> to have the same value, namely C<Larry>. The problem is caused |
685 | by the way that the associative array interface works. Basically, when |
686 | the associative array interface is used to fetch the value associated |
687 | with a given key, it will only ever retrieve the first value. |
88108326 |
688 | |
689 | Although it may not be immediately obvious from the code above, the |
690 | associative array interface can be used to write values with duplicate |
691 | keys, but it cannot be used to read them back from the database. |
692 | |
693 | The way to get around this problem is to use the Berkeley DB API method |
694 | called C<seq>. This method allows sequential access to key/value |
f6b705ef |
695 | pairs. See L<THE API INTERFACE> for details of both the C<seq> method |
696 | and the API in general. |
88108326 |
697 | |
698 | Here is the script above rewritten using the C<seq> API method. |
699 | |
700 | use DB_File ; |
f6b705ef |
701 | use strict 'untie' ; |
88108326 |
702 | |
703 | $filename = "tree" ; |
704 | unlink $filename ; |
705 | |
706 | # Enable duplicate records |
707 | $DB_BTREE->{'flags'} = R_DUP ; |
708 | |
709 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
710 | or die "Cannot open $filename: $!\n"; |
711 | |
712 | # Add some key/value pairs to the file |
713 | $h{'Wall'} = 'Larry' ; |
714 | $h{'Wall'} = 'Brick' ; # Note the duplicate key |
f6b705ef |
715 | $h{'Wall'} = 'Brick' ; # Note the duplicate key and value |
88108326 |
716 | $h{'Smith'} = 'John' ; |
717 | $h{'mouse'} = 'mickey' ; |
718 | |
f6b705ef |
719 | # iterate through the btree using seq |
88108326 |
720 | # and print each key/value pair. |
f6b705ef |
721 | for ($status = $x->seq($key, $value, R_FIRST) ; |
722 | $status == 0 ; |
723 | $status = $x->seq($key, $value, R_NEXT) ) |
88108326 |
724 | { print "$key -> $value\n" } |
725 | |
726 | undef $x ; |
727 | untie %h ; |
728 | |
729 | that prints: |
730 | |
731 | Smith -> John |
732 | Wall -> Brick |
f6b705ef |
733 | Wall -> Brick |
88108326 |
734 | Wall -> Larry |
735 | mouse -> mickey |
736 | |
f6b705ef |
737 | This time we have got all the key/value pairs, including the multiple |
88108326 |
738 | values associated with the key C<Wall>. |
739 | |
f6b705ef |
740 | =head2 The get_dup method. |
741 | |
742 | B<DB_File> comes with a utility method, called C<get_dup>, to assist in |
88108326 |
743 | reading duplicate values from BTREE databases. The method can take the |
744 | following forms: |
745 | |
746 | $count = $x->get_dup($key) ; |
747 | @list = $x->get_dup($key) ; |
748 | %list = $x->get_dup($key, 1) ; |
749 | |
750 | In a scalar context the method returns the number of values associated |
751 | with the key, C<$key>. |
752 | |
753 | In list context, it returns all the values which match C<$key>. Note |
f6b705ef |
754 | that the values will be returned in an apparently random order. |
88108326 |
755 | |
f6b705ef |
756 | In list context, if the second parameter is present and evaluates TRUE, |
757 | the method returns an associative array. The keys of the associative |
758 | array correspond to the the values that matched in the BTREE and the |
759 | values of the array are a count of the number of times that particular |
760 | value occurred in the BTREE. |
88108326 |
761 | |
f6b705ef |
762 | So assuming the database created above, we can use C<get_dup> like |
88108326 |
763 | this: |
764 | |
f6b705ef |
765 | $cnt = $x->get_dup("Wall") ; |
88108326 |
766 | print "Wall occurred $cnt times\n" ; |
767 | |
f6b705ef |
768 | %hash = $x->get_dup("Wall", 1) ; |
88108326 |
769 | print "Larry is there\n" if $hash{'Larry'} ; |
f6b705ef |
770 | print "There are $hash{'Brick'} Brick Walls\n" ; |
88108326 |
771 | |
f6b705ef |
772 | @list = $x->get_dup("Wall") ; |
88108326 |
773 | print "Wall => [@list]\n" ; |
774 | |
f6b705ef |
775 | @list = $x->get_dup("Smith") ; |
88108326 |
776 | print "Smith => [@list]\n" ; |
777 | |
f6b705ef |
778 | @list = $x->get_dup("Dog") ; |
88108326 |
779 | print "Dog => [@list]\n" ; |
780 | |
781 | |
782 | and it will print: |
783 | |
f6b705ef |
784 | Wall occurred 3 times |
88108326 |
785 | Larry is there |
f6b705ef |
786 | There are 2 Brick Walls |
787 | Wall => [Brick Brick Larry] |
88108326 |
788 | Smith => [John] |
789 | Dog => [] |
3b35bae3 |
790 | |
f6b705ef |
791 | =head2 Matching Partial Keys |
792 | |
793 | The BTREE interface has a feature which allows partial keys to be |
794 | matched. This functionality is I<only> available when the C<seq> method |
795 | is used along with the R_CURSOR flag. |
796 | |
797 | $x->seq($key, $value, R_CURSOR) ; |
798 | |
799 | Here is the relevant quote from the dbopen man page where it defines |
800 | the use of the R_CURSOR flag with seq: |
801 | |
802 | |
803 | Note, for the DB_BTREE access method, the returned key is not |
804 | necessarily an exact match for the specified key. The returned key |
805 | is the smallest key greater than or equal to the specified key, |
806 | permitting partial key matches and range searches. |
807 | |
808 | |
809 | In the example script below, the C<match> sub uses this feature to find |
810 | and print the first matching key/value pair given a partial key. |
811 | |
812 | use DB_File ; |
813 | use Fcntl ; |
814 | use strict 'untie' ; |
815 | |
816 | sub match |
817 | { |
818 | my $key = shift ; |
819 | my $value ; |
820 | my $orig_key = $key ; |
821 | $x->seq($key, $value, R_CURSOR) ; |
822 | print "$orig_key\t-> $key\t-> $value\n" ; |
823 | } |
824 | |
825 | $filename = "tree" ; |
826 | unlink $filename ; |
827 | |
828 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
829 | or die "Cannot open $filename: $!\n"; |
830 | |
831 | # Add some key/value pairs to the file |
832 | $h{'mouse'} = 'mickey' ; |
833 | $h{'Wall'} = 'Larry' ; |
834 | $h{'Walls'} = 'Brick' ; |
835 | $h{'Smith'} = 'John' ; |
836 | |
837 | |
838 | print "IN ORDER\n" ; |
839 | for ($st = $x->seq($key, $value, R_FIRST) ; |
840 | $st == 0 ; |
841 | $st = $x->seq($key, $value, R_NEXT) ) |
842 | |
843 | { print "$key -> $value\n" } |
844 | |
845 | print "\nPARTIAL MATCH\n" ; |
846 | |
847 | match "Wa" ; |
848 | match "A" ; |
849 | match "a" ; |
850 | |
851 | undef $x ; |
852 | untie %h ; |
853 | |
854 | Here is the output: |
855 | |
856 | IN ORDER |
857 | Smith -> John |
858 | Wall -> Larry |
859 | Walls -> Brick |
860 | mouse -> mickey |
861 | |
862 | PARTIAL MATCH |
863 | Wa -> Wall -> Larry |
864 | A -> Smith -> John |
865 | a -> mouse -> mickey |
866 | |
867 | =head1 DB_RECNO |
868 | |
869 | DB_RECNO provides an interface to flat text files. Both variable and |
870 | fixed length records are supported. |
3b35bae3 |
871 | |
88108326 |
872 | In order to make RECNO more compatible with Perl the array offset for |
873 | all RECNO arrays begins at 0 rather than 1 as in Berkeley DB. |
3b35bae3 |
874 | |
88108326 |
875 | As with normal Perl arrays, a RECNO array can be accessed using |
876 | negative indexes. The index -1 refers to the last element of the array, |
877 | -2 the second last, and so on. Attempting to access an element before |
878 | the start of the array will raise a fatal run-time error. |
3b35bae3 |
879 | |
f6b705ef |
880 | =head2 A Simple Example |
3b35bae3 |
881 | |
f6b705ef |
882 | Here is a simple example that uses RECNO. |
883 | |
884 | use DB_File ; |
885 | use strict 'untie' ; |
886 | |
887 | tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO |
888 | or die "Cannot open file 'text': $!\n" ; |
889 | |
890 | # Add a few key/value pairs to the file |
891 | $h[0] = "orange" ; |
892 | $h[1] = "blue" ; |
893 | $h[2] = "yellow" ; |
894 | |
895 | # Check for existence of a key |
896 | print "Element 1 Exists with value $h[1]\n" if $h[1] ; |
897 | |
898 | # use a negative index |
899 | print "The last element is $h[-1]\n" ; |
900 | print "The 2nd last element is $h[-2]\n" ; |
901 | |
902 | untie @h ; |
3b35bae3 |
903 | |
f6b705ef |
904 | Here is the output from the script: |
905 | |
906 | |
907 | Element 1 Exists with value blue |
908 | The last element is yellow |
909 | The 2nd last element is blue |
910 | |
911 | =head2 Extra Methods |
912 | |
913 | As you can see from the example above, the tied array interface is |
914 | quite limited. To make the interface more useful, a number of methods |
915 | are supplied with B<DB_File> to simulate the standard array operations |
916 | that are not currently implemented in Perl's tied array interface. All |
917 | these methods are accessed via the object returned from the tie call. |
918 | |
919 | Here are the methods: |
920 | |
921 | =over 5 |
3b35bae3 |
922 | |
f6b705ef |
923 | =item B<$X-E<gt>push(list) ;> |
924 | |
925 | Pushes the elements of C<list> to the end of the array. |
926 | |
927 | =item B<$value = $X-E<gt>pop ;> |
928 | |
929 | Removes and returns the last element of the array. |
930 | |
931 | =item B<$X-E<gt>shift> |
932 | |
933 | Removes and returns the first element of the array. |
934 | |
935 | =item B<$X-E<gt>unshift(list) ;> |
936 | |
937 | Pushes the elements of C<list> to the start of the array. |
938 | |
939 | =item B<$X-E<gt>length> |
940 | |
941 | Returns the number of elements in the array. |
942 | |
943 | =back |
944 | |
945 | =head2 Another Example |
946 | |
947 | Here is a more complete example that makes use of some of the methods |
948 | described above. It also makes use of the API interface directly (see |
949 | L<THE API INTERFACE>). |
950 | |
951 | use strict ; |
952 | use vars qw(@h $H $file $i) ; |
953 | use DB_File ; |
954 | use Fcntl ; |
955 | |
956 | $file = "text" ; |
957 | |
958 | unlink $file ; |
959 | |
960 | $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO |
961 | or die "Cannot open file $file: $!\n" ; |
962 | |
963 | # first create a text file to play with |
964 | $h[0] = "zero" ; |
965 | $h[1] = "one" ; |
966 | $h[2] = "two" ; |
967 | $h[3] = "three" ; |
968 | $h[4] = "four" ; |
969 | |
970 | |
971 | # Print the records in order. |
972 | # |
973 | # The length method is needed here because evaluating a tied |
974 | # array in a scalar context does not return the number of |
975 | # elements in the array. |
976 | |
977 | print "\nORIGINAL\n" ; |
978 | foreach $i (0 .. $H->length - 1) { |
979 | print "$i: $h[$i]\n" ; |
980 | } |
981 | |
982 | # use the push & pop methods |
983 | $a = $H->pop ; |
984 | $H->push("last") ; |
985 | print "\nThe last record was [$a]\n" ; |
986 | |
987 | # and the shift & unshift methods |
988 | $a = $H->shift ; |
989 | $H->unshift("first") ; |
990 | print "The first record was [$a]\n" ; |
991 | |
992 | # Use the API to add a new record after record 2. |
993 | $i = 2 ; |
994 | $H->put($i, "Newbie", R_IAFTER) ; |
995 | |
996 | # and a new record before record 1. |
997 | $i = 1 ; |
998 | $H->put($i, "New One", R_IBEFORE) ; |
999 | |
1000 | # delete record 3 |
1001 | $H->del(3) ; |
1002 | |
1003 | # now print the records in reverse order |
1004 | print "\nREVERSE\n" ; |
1005 | for ($i = $H->length - 1 ; $i >= 0 ; -- $i) |
1006 | { print "$i: $h[$i]\n" } |
1007 | |
1008 | # same again, but use the API functions instead |
1009 | print "\nREVERSE again\n" ; |
1010 | my ($s, $k, $v) ; |
1011 | for ($s = $H->seq($k, $v, R_LAST) ; |
1012 | $s == 0 ; |
1013 | $s = $H->seq($k, $v, R_PREV)) |
1014 | { print "$k: $v\n" } |
1015 | |
1016 | undef $H ; |
1017 | untie @h ; |
1018 | |
1019 | and this is what it outputs: |
1020 | |
1021 | ORIGINAL |
1022 | 0: zero |
1023 | 1: one |
1024 | 2: two |
1025 | 3: three |
1026 | 4: four |
1027 | |
1028 | The last record was [four] |
1029 | The first record was [zero] |
1030 | |
1031 | REVERSE |
1032 | 5: last |
1033 | 4: three |
1034 | 3: Newbie |
1035 | 2: one |
1036 | 1: New One |
1037 | 0: first |
1038 | |
1039 | REVERSE again |
1040 | 5: last |
1041 | 4: three |
1042 | 3: Newbie |
1043 | 2: one |
1044 | 1: New One |
1045 | 0: first |
1046 | |
1047 | Notes: |
1048 | |
1049 | =over 5 |
1050 | |
1051 | =item 1. |
1052 | |
1053 | Rather than iterating through the array, C<@h> like this: |
1054 | |
1055 | foreach $i (@h) |
1056 | |
1057 | it is necessary to use either this: |
1058 | |
1059 | foreach $i (0 .. $H->length - 1) |
1060 | |
1061 | or this: |
1062 | |
1063 | for ($a = $H->get($k, $v, R_FIRST) ; |
1064 | $a == 0 ; |
1065 | $a = $H->get($k, $v, R_NEXT) ) |
1066 | |
1067 | =item 2. |
1068 | |
1069 | Notice that both times the C<put> method was used the record index was |
1070 | specified using a variable, C<$i>, rather than the literal value |
1071 | itself. This is because C<put> will return the record number of the |
1072 | inserted line via that parameter. |
1073 | |
1074 | =back |
1075 | |
1076 | =head1 THE API INTERFACE |
3b35bae3 |
1077 | |
1078 | As well as accessing Berkeley DB using a tied hash or array, it is also |
88108326 |
1079 | possible to make direct use of most of the API functions defined in the |
8e07c86e |
1080 | Berkeley DB documentation. |
3b35bae3 |
1081 | |
88108326 |
1082 | To do this you need to store a copy of the object returned from the tie. |
3b35bae3 |
1083 | |
88108326 |
1084 | $db = tie %hash, "DB_File", "filename" ; |
3b35bae3 |
1085 | |
8e07c86e |
1086 | Once you have done that, you can access the Berkeley DB API functions |
88108326 |
1087 | as B<DB_File> methods directly like this: |
3b35bae3 |
1088 | |
1089 | $db->put($key, $value, R_NOOVERWRITE) ; |
1090 | |
88108326 |
1091 | B<Important:> If you have saved a copy of the object returned from |
1092 | C<tie>, the underlying database file will I<not> be closed until both |
1093 | the tied variable is untied and all copies of the saved object are |
f6b705ef |
1094 | destroyed. See L<The strict untie pragma> for more details. |
88108326 |
1095 | |
1096 | use DB_File ; |
1097 | $db = tie %hash, "DB_File", "filename" |
1098 | or die "Cannot tie filename: $!" ; |
1099 | ... |
1100 | undef $db ; |
1101 | untie %hash ; |
1102 | |
1103 | All the functions defined in L<dbopen> are available except for |
1104 | close() and dbopen() itself. The B<DB_File> method interface to the |
1105 | supported functions have been implemented to mirror the way Berkeley DB |
1106 | works whenever possible. In particular note that: |
1107 | |
1108 | =over 5 |
1109 | |
1110 | =item * |
1111 | |
1112 | The methods return a status value. All return 0 on success. |
1113 | All return -1 to signify an error and set C<$!> to the exact |
1114 | error code. The return code 1 generally (but not always) means that the |
1115 | key specified did not exist in the database. |
1116 | |
1117 | Other return codes are defined. See below and in the Berkeley DB |
1118 | documentation for details. The Berkeley DB documentation should be used |
1119 | as the definitive source. |
1120 | |
1121 | =item * |
3b35bae3 |
1122 | |
88108326 |
1123 | Whenever a Berkeley DB function returns data via one of its parameters, |
1124 | the equivalent B<DB_File> method does exactly the same. |
3b35bae3 |
1125 | |
88108326 |
1126 | =item * |
1127 | |
1128 | If you are careful, it is possible to mix API calls with the tied |
1129 | hash/array interface in the same piece of code. Although only a few of |
1130 | the methods used to implement the tied interface currently make use of |
1131 | the cursor, you should always assume that the cursor has been changed |
1132 | any time the tied hash/array interface is used. As an example, this |
1133 | code will probably not do what you expect: |
1134 | |
1135 | $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE |
1136 | or die "Cannot tie $filename: $!" ; |
1137 | |
1138 | # Get the first key/value pair and set the cursor |
1139 | $X->seq($key, $value, R_FIRST) ; |
1140 | |
1141 | # this line will modify the cursor |
1142 | $count = scalar keys %x ; |
1143 | |
1144 | # Get the second key/value pair. |
1145 | # oops, it didn't, it got the last key/value pair! |
1146 | $X->seq($key, $value, R_NEXT) ; |
1147 | |
1148 | The code above can be rearranged to get around the problem, like this: |
1149 | |
1150 | $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE |
1151 | or die "Cannot tie $filename: $!" ; |
1152 | |
1153 | # this line will modify the cursor |
1154 | $count = scalar keys %x ; |
1155 | |
1156 | # Get the first key/value pair and set the cursor |
1157 | $X->seq($key, $value, R_FIRST) ; |
1158 | |
1159 | # Get the second key/value pair. |
1160 | # worked this time. |
1161 | $X->seq($key, $value, R_NEXT) ; |
1162 | |
1163 | =back |
1164 | |
1165 | All the constants defined in L<dbopen> for use in the flags parameters |
1166 | in the methods defined below are also available. Refer to the Berkeley |
1167 | DB documentation for the precise meaning of the flags values. |
1168 | |
1169 | Below is a list of the methods available. |
3b35bae3 |
1170 | |
1171 | =over 5 |
1172 | |
f6b705ef |
1173 | =item B<$status = $X-E<gt>get($key, $value [, $flags]) ;> |
88108326 |
1174 | |
1175 | Given a key (C<$key>) this method reads the value associated with it |
1176 | from the database. The value read from the database is returned in the |
1177 | C<$value> parameter. |
3b35bae3 |
1178 | |
88108326 |
1179 | If the key does not exist the method returns 1. |
3b35bae3 |
1180 | |
88108326 |
1181 | No flags are currently defined for this method. |
3b35bae3 |
1182 | |
f6b705ef |
1183 | =item B<$status = $X-E<gt>put($key, $value [, $flags]) ;> |
3b35bae3 |
1184 | |
88108326 |
1185 | Stores the key/value pair in the database. |
1186 | |
1187 | If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter |
8e07c86e |
1188 | will have the record number of the inserted key/value pair set. |
3b35bae3 |
1189 | |
88108326 |
1190 | Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and |
1191 | R_SETCURSOR. |
1192 | |
f6b705ef |
1193 | =item B<$status = $X-E<gt>del($key [, $flags]) ;> |
3b35bae3 |
1194 | |
88108326 |
1195 | Removes all key/value pairs with key C<$key> from the database. |
3b35bae3 |
1196 | |
88108326 |
1197 | A return code of 1 means that the requested key was not in the |
1198 | database. |
3b35bae3 |
1199 | |
88108326 |
1200 | R_CURSOR is the only valid flag at present. |
3b35bae3 |
1201 | |
f6b705ef |
1202 | =item B<$status = $X-E<gt>fd ;> |
3b35bae3 |
1203 | |
88108326 |
1204 | Returns the file descriptor for the underlying database. |
3b35bae3 |
1205 | |
f6b705ef |
1206 | See L<Locking Databases> for an example of how to make use of the |
88108326 |
1207 | C<fd> method to lock your database. |
3b35bae3 |
1208 | |
f6b705ef |
1209 | =item B<$status = $X-E<gt>seq($key, $value, $flags) ;> |
3b35bae3 |
1210 | |
88108326 |
1211 | This interface allows sequential retrieval from the database. See |
1212 | L<dbopen> for full details. |
1213 | |
1214 | Both the C<$key> and C<$value> parameters will be set to the key/value |
1215 | pair read from the database. |
1216 | |
1217 | The flags parameter is mandatory. The valid flag values are R_CURSOR, |
1218 | R_FIRST, R_LAST, R_NEXT and R_PREV. |
1219 | |
f6b705ef |
1220 | =item B<$status = $X-E<gt>sync([$flags]) ;> |
88108326 |
1221 | |
1222 | Flushes any cached buffers to disk. |
1223 | |
1224 | R_RECNOSYNC is the only valid flag at present. |
3b35bae3 |
1225 | |
1226 | =back |
1227 | |
f6b705ef |
1228 | =head1 HINTS AND TIPS |
3b35bae3 |
1229 | |
f6b705ef |
1230 | =head2 The strict untie pragma |
3b35bae3 |
1231 | |
f6b705ef |
1232 | If you run Perl version 5.004 or later (actually any version from the |
1233 | 5.003_01 development release on will suffice) and you make use of the |
1234 | Berkeley DB API, it is is I<very> strongly recommended that you always |
1235 | include the C<use strict 'untie'> pragma in any of your scripts that |
1236 | make use of B<DB_File>. |
3b35bae3 |
1237 | |
f6b705ef |
1238 | Even if you don't currently make use of the API interface, it is still |
1239 | a good idea to include the pragma. It won't affect the performance of |
1240 | your script, but it will prevent problems in the future. |
88108326 |
1241 | |
f6b705ef |
1242 | If possible you should try to run with the full strict pragma, but that |
1243 | is another story. For further details see L<strict> and |
1244 | L<perldsc/WHY YOU SHOULD ALWAYS C<use strict>>. |
88108326 |
1245 | |
f6b705ef |
1246 | To illustrate the importance of including the untie pragma, here is an |
1247 | example script that fails in an unexpected place because it doesn't use |
1248 | it: |
88108326 |
1249 | |
f6b705ef |
1250 | use DB_File ; |
1251 | use Fcntl ; |
1252 | |
1253 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT |
1254 | or die "Cannot tie first time: $!" ; |
1255 | |
1256 | $x{123} = 456 ; |
1257 | |
1258 | untie %x ; |
1259 | |
1260 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT |
1261 | or die "Cannot tie second time: $!" ; |
88108326 |
1262 | |
f6b705ef |
1263 | untie %x ; |
88108326 |
1264 | |
f6b705ef |
1265 | When run the script will produce this error message: |
88108326 |
1266 | |
f6b705ef |
1267 | Cannot tie second time: Invalid argument at bad.file line 12. |
88108326 |
1268 | |
f6b705ef |
1269 | Although the error message above refers to the second tie statement in |
1270 | the script, the source of the problem is really with the untie |
1271 | statement that precedes it. |
88108326 |
1272 | |
f6b705ef |
1273 | To understand why there is a problem at all with the untie statement, |
1274 | consider what the tie does for a moment. |
3b35bae3 |
1275 | |
f6b705ef |
1276 | Whenever the tie is executed, it creates a logical link between a Perl |
1277 | variable, the associative array C<%x> in this case, and a Berkeley DB |
1278 | database, C<tst.fil>. The logical link ensures that all operation on |
1279 | the associative array are automatically mirrored to the database file. |
3b35bae3 |
1280 | |
f6b705ef |
1281 | In normal circumstances the untie is enough to break the logical link |
1282 | and also close the database. In this particular case there is another |
1283 | logical link, namely the API object returned from the tie and stored in |
1284 | C<$X>. Whenever the untie is executed in this case, only the link |
1285 | between the associative array and the database will be broken. The API |
1286 | object in C<$X> is still valid, so the database will not be closed. |
3b35bae3 |
1287 | |
f6b705ef |
1288 | The end result of this is that when the second tie is executed, the |
1289 | database will be in an inconsistent state (i.e. it is still opened by |
1290 | the first tie) - thus the second tie will fail. |
3b35bae3 |
1291 | |
f6b705ef |
1292 | If the C<use strict 'untie'> pragma is included in the script, like |
1293 | this: |
88108326 |
1294 | |
f6b705ef |
1295 | use DB_File ; |
1296 | use Fcntl ; |
1297 | use strict 'untie' ; |
1298 | |
1299 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT |
1300 | or die "Cannot tie first time: $!" ; |
1301 | |
1302 | $x{123} = 456 ; |
1303 | |
1304 | untie %x ; |
1305 | |
1306 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT |
1307 | or die "Cannot tie second time: $!" ; |
88108326 |
1308 | |
f6b705ef |
1309 | then the error message becomes: |
88108326 |
1310 | |
f6b705ef |
1311 | Can't untie: 1 inner references still exist at bad.file line 11. |
88108326 |
1312 | |
f6b705ef |
1313 | which pinpoints the real problem. Finally the script can now be |
1314 | modified to fix the original problem by destroying the API object |
1315 | before the untie: |
88108326 |
1316 | |
f6b705ef |
1317 | ... |
1318 | $x{123} = 456 ; |
3b35bae3 |
1319 | |
f6b705ef |
1320 | undef $X ; |
1321 | untie %x ; |
1322 | |
1323 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT |
1324 | ... |
3b35bae3 |
1325 | |
cb1a09d0 |
1326 | =head2 Locking Databases |
3b35bae3 |
1327 | |
cb1a09d0 |
1328 | Concurrent access of a read-write database by several parties requires |
1329 | them all to use some kind of locking. Here's an example of Tom's that |
1330 | uses the I<fd> method to get the file descriptor, and then a careful |
1331 | open() to give something Perl will flock() for you. Run this repeatedly |
1332 | in the background to watch the locks granted in proper order. |
3b35bae3 |
1333 | |
f6b705ef |
1334 | use strict 'untie'; |
cb1a09d0 |
1335 | use DB_File; |
1336 | |
1337 | use strict; |
1338 | |
1339 | sub LOCK_SH { 1 } |
1340 | sub LOCK_EX { 2 } |
1341 | sub LOCK_NB { 4 } |
1342 | sub LOCK_UN { 8 } |
1343 | |
1344 | my($oldval, $fd, $db, %db, $value, $key); |
1345 | |
1346 | $key = shift || 'default'; |
1347 | $value = shift || 'magic'; |
1348 | |
1349 | $value .= " $$"; |
1350 | |
1351 | $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644) |
1352 | || die "dbcreat /tmp/foo.db $!"; |
1353 | $fd = $db->fd; |
1354 | print "$$: db fd is $fd\n"; |
1355 | open(DB_FH, "+<&=$fd") || die "dup $!"; |
1356 | |
1357 | |
1358 | unless (flock (DB_FH, LOCK_SH | LOCK_NB)) { |
1359 | print "$$: CONTENTION; can't read during write update! |
1360 | Waiting for read lock ($!) ...."; |
1361 | unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" } |
1362 | } |
1363 | print "$$: Read lock granted\n"; |
1364 | |
1365 | $oldval = $db{$key}; |
1366 | print "$$: Old value was $oldval\n"; |
1367 | flock(DB_FH, LOCK_UN); |
1368 | |
1369 | unless (flock (DB_FH, LOCK_EX | LOCK_NB)) { |
1370 | print "$$: CONTENTION; must have exclusive lock! |
1371 | Waiting for write lock ($!) ...."; |
1372 | unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" } |
1373 | } |
1374 | |
1375 | print "$$: Write lock granted\n"; |
1376 | $db{$key} = $value; |
88108326 |
1377 | $db->sync; |
cb1a09d0 |
1378 | sleep 10; |
1379 | |
1380 | flock(DB_FH, LOCK_UN); |
88108326 |
1381 | undef $db; |
cb1a09d0 |
1382 | untie %db; |
1383 | close(DB_FH); |
1384 | print "$$: Updated db to $key=$value\n"; |
1385 | |
f6b705ef |
1386 | =head2 Sharing databases with C applications |
1387 | |
1388 | There is no technical reason why a Berkeley DB database cannot be |
1389 | shared by both a Perl and a C application. |
1390 | |
1391 | The vast majority of problems that are reported in this area boil down |
1392 | to the fact that C strings are NULL terminated, whilst Perl strings are |
1393 | not. |
1394 | |
1395 | Here is a real example. Netscape 2.0 keeps a record of the locations you |
1396 | visit along with the time you last visited them in a DB_HASH database. |
1397 | This is usually stored in the file F<~/.netscape/history.db>. The key |
1398 | field in the database is the location string and the value field is the |
1399 | time the location was last visited stored as a 4 byte binary value. |
1400 | |
1401 | If you haven't already guessed, the location string is stored with a |
1402 | terminating NULL. This means you need to be careful when accessing the |
1403 | database. |
1404 | |
1405 | Here is a snippet of code that is loosely based on Tom Christiansen's |
1406 | I<ggh> script (available from your nearest CPAN archive in |
1407 | F<authors/id/TOMC/scripts/nshist.gz>). |
1408 | |
1409 | use DB_File ; |
1410 | use Fcntl ; |
1411 | use strict 'untie' ; |
1412 | |
1413 | $dotdir = $ENV{HOME} || $ENV{LOGNAME}; |
1414 | |
1415 | $HISTORY = "$dotdir/.netscape/history.db"; |
1416 | |
1417 | tie %hist_db, 'DB_File', $HISTORY |
1418 | or die "Cannot open $HISTORY: $!\n" ;; |
1419 | |
1420 | # Dump the complete database |
1421 | while ( ($href, $binary_time) = each %hist_db ) { |
1422 | |
1423 | # remove the terminating NULL |
1424 | $href =~ s/\x00$// ; |
1425 | |
1426 | # convert the binary time into a user friendly string |
1427 | $date = localtime unpack("V", $binary_time); |
1428 | print "$date $href\n" ; |
1429 | } |
1430 | |
1431 | # check for the existence of a specific key |
1432 | # remember to add the NULL |
1433 | if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) { |
1434 | $date = localtime unpack("V", $binary_time) ; |
1435 | print "Last visited mox.perl.com on $date\n" ; |
1436 | } |
1437 | else { |
1438 | print "Never visited mox.perl.com\n" |
1439 | } |
1440 | |
1441 | untie %hist_db ; |
1442 | |
1443 | |
1444 | =head1 COMMON QUESTIONS |
1445 | |
1446 | =head2 Why is there Perl source in my database? |
1447 | |
1448 | If you look at the contents of a database file created by DB_File, |
1449 | there can sometimes be part of a Perl script included in it. |
1450 | |
1451 | This happens because Berkeley DB uses dynamic memory to allocate |
1452 | buffers which will subsequently be written to the database file. Being |
1453 | dynamic, the memory could have been used for anything before DB |
1454 | malloced it. As Berkeley DB doesn't clear the memory once it has been |
1455 | allocated, the unused portions will contain random junk. In the case |
1456 | where a Perl script gets written to the database, the random junk will |
1457 | correspond to an area of dynamic memory that happened to be used during |
1458 | the compilation of the script. |
1459 | |
1460 | Unless you don't like the possibility of there being part of your Perl |
1461 | scripts embedded in a database file, this is nothing to worry about. |
1462 | |
1463 | =head2 How do I store complex data structures with DB_File? |
1464 | |
1465 | Although B<DB_File> cannot do this directly, there is a module which |
1466 | can layer transparently over B<DB_File> to accomplish this feat. |
1467 | |
1468 | Check out the MLDBM module, available on CPAN in the directory |
1469 | F<modules/by-module/MLDBM>. |
1470 | |
1471 | =head2 What does "Invalid Argument" mean? |
1472 | |
1473 | You will get this error message when one of the parameters in the |
1474 | C<tie> call is wrong. Unfortunately there are quite a few parameters to |
1475 | get wrong, so it can be difficult to figure out which one it is. |
1476 | |
1477 | Here are a couple of possibilities: |
1478 | |
1479 | =over 5 |
1480 | |
1481 | =item 1. |
1482 | |
1483 | Attempting to reopen a database without closing it. See |
1484 | L<The strict untie pragma> for an example. |
1485 | |
1486 | =item 2. |
1487 | |
1488 | Using the O_WRONLY flag. |
1489 | |
1490 | =back |
1491 | |
1492 | =head2 What does "Bareword 'DB_File' not allowed" mean? |
1493 | |
1494 | You will encounter this particular error message when you have the |
1495 | C<strict 'subs'> pragma (or the full strict pragma) in your script. |
1496 | Consider this script: |
1497 | |
1498 | use strict ; |
1499 | use DB_File ; |
1500 | use vars qw(%x) ; |
1501 | tie %x, DB_File, "filename" ; |
1502 | |
1503 | Running it produces the error in question: |
1504 | |
1505 | Bareword "DB_File" not allowed while "strict subs" in use |
1506 | |
1507 | To get around the error, place the word C<DB_File> in either single or |
1508 | double quotes, like this: |
1509 | |
1510 | tie %x, "DB_File", "filename" ; |
1511 | |
1512 | Although it might seem like a real pain, it is really worth the effort |
1513 | of having a C<use strict> in all your scripts. |
1514 | |
cb1a09d0 |
1515 | =head1 HISTORY |
1516 | |
1517 | =over |
1518 | |
1519 | =item 0.1 |
3b35bae3 |
1520 | |
1521 | First Release. |
1522 | |
cb1a09d0 |
1523 | =item 0.2 |
3b35bae3 |
1524 | |
1525 | When B<DB_File> is opening a database file it no longer terminates the |
1526 | process if I<dbopen> returned an error. This allows file protection |
1527 | errors to be caught at run time. Thanks to Judith Grass |
cb1a09d0 |
1528 | E<lt>grass@cybercash.comE<gt> for spotting the bug. |
3b35bae3 |
1529 | |
cb1a09d0 |
1530 | =item 0.3 |
8e07c86e |
1531 | |
1532 | Added prototype support for multiple btree compare callbacks. |
1533 | |
cb1a09d0 |
1534 | =item 1.0 |
8e07c86e |
1535 | |
1536 | B<DB_File> has been in use for over a year. To reflect that, the |
1537 | version number has been incremented to 1.0. |
1538 | |
1539 | Added complete support for multiple concurrent callbacks. |
1540 | |
1541 | Using the I<push> method on an empty list didn't work properly. This |
1542 | has been fixed. |
1543 | |
cb1a09d0 |
1544 | =item 1.01 |
4633a7c4 |
1545 | |
1546 | Fixed a core dump problem with SunOS. |
1547 | |
1548 | The return value from TIEHASH wasn't set to NULL when dbopen returned |
1549 | an error. |
1550 | |
88108326 |
1551 | =item 1.02 |
1552 | |
f6b705ef |
1553 | Merged OS/2 specific code into DB_File.xs |
88108326 |
1554 | |
1555 | Removed some redundant code in DB_File.xs. |
1556 | |
1557 | Documentation update. |
1558 | |
1559 | Allow negative subscripts with RECNO interface. |
1560 | |
1561 | Changed the default flags from O_RDWR to O_CREAT|O_RDWR. |
1562 | |
1563 | The example code which showed how to lock a database needed a call to |
1564 | C<sync> added. Without it the resultant database file was empty. |
1565 | |
f6b705ef |
1566 | Added get_dup method. |
88108326 |
1567 | |
f6b705ef |
1568 | =item 1.03 |
1569 | |
1570 | Documentation update. |
3b35bae3 |
1571 | |
f6b705ef |
1572 | B<DB_File> now imports the constants (O_RDWR, O_CREAT etc.) from Fcntl |
1573 | automatically. |
3b35bae3 |
1574 | |
f6b705ef |
1575 | The standard hash function C<exists> is now supported. |
1576 | |
1577 | Modified the behavior of get_dup. When it returns an associative |
1578 | array, the value is the count of the number of matching BTREE values. |
3b35bae3 |
1579 | |
1580 | =head1 BUGS |
1581 | |
8e07c86e |
1582 | Some older versions of Berkeley DB had problems with fixed length |
1583 | records using the RECNO file format. The newest version at the time of |
1584 | writing was 1.85 - this seems to have fixed the problems with RECNO. |
3b35bae3 |
1585 | |
8e07c86e |
1586 | I am sure there are bugs in the code. If you do find any, or can |
1587 | suggest any enhancements, I would welcome your comments. |
3b35bae3 |
1588 | |
1589 | =head1 AVAILABILITY |
1590 | |
f6b705ef |
1591 | B<DB_File> comes with the standard Perl source distribution. Look in |
1592 | the directory F<ext/DB_File>. |
1593 | |
cb1a09d0 |
1594 | Berkeley DB is available at your nearest CPAN archive (see |
1595 | L<perlmod/"CPAN"> for a list) in F<src/misc/db.1.85.tar.gz>, or via the |
1596 | host F<ftp.cs.berkeley.edu> in F</ucb/4bsd/db.tar.gz>. It is I<not> under |
1597 | the GPL. |
3b35bae3 |
1598 | |
88108326 |
1599 | If you are running IRIX, then get Berkeley DB from |
1600 | F<http://reality.sgi.com/ariel>. It has the patches necessary to |
1601 | compile properly on IRIX 5.3. |
1602 | |
3b35bae3 |
1603 | =head1 SEE ALSO |
1604 | |
1605 | L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)> |
1606 | |
3b35bae3 |
1607 | =head1 AUTHOR |
1608 | |
8e07c86e |
1609 | The DB_File interface was written by Paul Marquess |
88108326 |
1610 | E<lt>pmarquess@bfsec.bt.co.ukE<gt>. |
8e07c86e |
1611 | Questions about the DB system itself may be addressed to Keith Bostic |
88108326 |
1612 | E<lt>bostic@cs.berkeley.eduE<gt>. |
3b35bae3 |
1613 | |
1614 | =cut |