Commit | Line | Data |
a0d0e21e |
1 | # DB_File.pm -- Perl 5 interface to Berkeley DB |
2 | # |
3 | # written by Paul Marquess (pmarquess@bfsec.bt.co.uk) |
88108326 |
4 | # last modified 28th June 1996 |
5 | # version 1.02 |
8e07c86e |
6 | |
7 | package DB_File::HASHINFO ; |
785da04d |
8 | |
9 | use strict; |
8e07c86e |
10 | use Carp; |
88108326 |
11 | require Tie::Hash; |
12 | @DB_File::HASHINFO::ISA = qw(Tie::Hash); |
8e07c86e |
13 | |
88108326 |
14 | sub new |
8e07c86e |
15 | { |
88108326 |
16 | my $pkg = shift ; |
17 | my %x ; |
18 | tie %x, $pkg ; |
19 | bless \%x, $pkg ; |
8e07c86e |
20 | } |
21 | |
88108326 |
22 | sub TIEHASH |
23 | { |
24 | my $pkg = shift ; |
25 | |
26 | bless { 'bsize' => undef, |
27 | 'ffactor' => undef, |
28 | 'nelem' => undef, |
29 | 'cachesize' => undef, |
30 | 'hash' => undef, |
31 | 'lorder' => undef, |
32 | }, $pkg ; |
33 | } |
8e07c86e |
34 | |
35 | sub FETCH |
36 | { |
88108326 |
37 | my $self = shift ; |
38 | my $key = shift ; |
8e07c86e |
39 | |
88108326 |
40 | return $self->{$key} if exists $self->{$key} ; |
41 | |
42 | my $pkg = ref $self ; |
43 | croak "${pkg}::FETCH - Unknown element '$key'" ; |
8e07c86e |
44 | } |
45 | |
46 | |
47 | sub STORE |
48 | { |
88108326 |
49 | my $self = shift ; |
50 | my $key = shift ; |
51 | my $value = shift ; |
52 | |
53 | if ( exists $self->{$key} ) |
8e07c86e |
54 | { |
88108326 |
55 | $self->{$key} = $value ; |
8e07c86e |
56 | return ; |
57 | } |
58 | |
88108326 |
59 | my $pkg = ref $self ; |
60 | croak "${pkg}::STORE - Unknown element '$key'" ; |
8e07c86e |
61 | } |
62 | |
63 | sub DELETE |
64 | { |
88108326 |
65 | my $self = shift ; |
66 | my $key = shift ; |
67 | |
68 | if ( exists $self->{$key} ) |
8e07c86e |
69 | { |
88108326 |
70 | delete $self->{$key} ; |
8e07c86e |
71 | return ; |
72 | } |
73 | |
88108326 |
74 | my $pkg = ref $self ; |
75 | croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ; |
8e07c86e |
76 | } |
77 | |
88108326 |
78 | sub EXISTS |
8e07c86e |
79 | { |
88108326 |
80 | my $self = shift ; |
81 | my $key = shift ; |
8e07c86e |
82 | |
88108326 |
83 | exists $self->{$key} ; |
8e07c86e |
84 | } |
85 | |
88108326 |
86 | sub NotHere |
8e07c86e |
87 | { |
88108326 |
88 | my $pkg = shift ; |
89 | my $method = shift ; |
8e07c86e |
90 | |
88108326 |
91 | croak "${pkg} does not define the method ${method}" ; |
8e07c86e |
92 | } |
93 | |
88108326 |
94 | sub DESTROY { undef %{$_[0]} } |
95 | sub FIRSTKEY { my $self = shift ; $self->NotHere(ref $self, "FIRSTKEY") } |
96 | sub NEXTKEY { my $self = shift ; $self->NotHere(ref $self, "NEXTKEY") } |
97 | sub CLEAR { my $self = shift ; $self->NotHere(ref $self, "CLEAR") } |
8e07c86e |
98 | |
99 | package DB_File::RECNOINFO ; |
785da04d |
100 | |
88108326 |
101 | use strict ; |
102 | |
103 | @DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; |
8e07c86e |
104 | |
105 | sub TIEHASH |
106 | { |
88108326 |
107 | my $pkg = shift ; |
108 | |
109 | bless { 'bval' => undef, |
110 | 'cachesize' => undef, |
111 | 'psize' => undef, |
112 | 'flags' => undef, |
113 | 'lorder' => undef, |
114 | 'reclen' => undef, |
115 | 'bfname' => "", |
116 | }, $pkg ; |
8e07c86e |
117 | } |
118 | |
88108326 |
119 | package DB_File::BTREEINFO ; |
8e07c86e |
120 | |
88108326 |
121 | use strict ; |
8e07c86e |
122 | |
88108326 |
123 | @DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; |
8e07c86e |
124 | |
88108326 |
125 | sub TIEHASH |
8e07c86e |
126 | { |
88108326 |
127 | my $pkg = shift ; |
128 | |
129 | bless { 'flags' => undef, |
130 | 'cachesize' => undef, |
131 | 'maxkeypage' => undef, |
132 | 'minkeypage' => undef, |
133 | 'psize' => undef, |
134 | 'compare' => undef, |
135 | 'prefix' => undef, |
136 | 'lorder' => undef, |
137 | }, $pkg ; |
8e07c86e |
138 | } |
139 | |
140 | |
8e07c86e |
141 | package DB_File ; |
785da04d |
142 | |
143 | use strict; |
144 | use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO) ; |
8e07c86e |
145 | use Carp; |
146 | |
785da04d |
147 | |
88108326 |
148 | $VERSION = "1.02" ; |
8e07c86e |
149 | |
150 | #typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; |
88108326 |
151 | #$DB_BTREE = TIEHASH DB_File::BTREEINFO ; |
152 | #$DB_HASH = TIEHASH DB_File::HASHINFO ; |
153 | #$DB_RECNO = TIEHASH DB_File::RECNOINFO ; |
154 | |
155 | $DB_BTREE = new DB_File::BTREEINFO ; |
156 | $DB_HASH = new DB_File::HASHINFO ; |
157 | $DB_RECNO = new DB_File::RECNOINFO ; |
8e07c86e |
158 | |
785da04d |
159 | require Tie::Hash; |
8e07c86e |
160 | require Exporter; |
161 | use AutoLoader; |
162 | require DynaLoader; |
785da04d |
163 | @ISA = qw(Tie::Hash Exporter DynaLoader); |
8e07c86e |
164 | @EXPORT = qw( |
165 | $DB_BTREE $DB_HASH $DB_RECNO |
88108326 |
166 | |
8e07c86e |
167 | BTREEMAGIC |
168 | BTREEVERSION |
169 | DB_LOCK |
170 | DB_SHMEM |
171 | DB_TXN |
172 | HASHMAGIC |
173 | HASHVERSION |
174 | MAX_PAGE_NUMBER |
175 | MAX_PAGE_OFFSET |
176 | MAX_REC_NUMBER |
177 | RET_ERROR |
178 | RET_SPECIAL |
179 | RET_SUCCESS |
180 | R_CURSOR |
181 | R_DUP |
182 | R_FIRST |
183 | R_FIXEDLEN |
184 | R_IAFTER |
185 | R_IBEFORE |
186 | R_LAST |
187 | R_NEXT |
188 | R_NOKEY |
189 | R_NOOVERWRITE |
190 | R_PREV |
191 | R_RECNOSYNC |
192 | R_SETCURSOR |
193 | R_SNAPSHOT |
194 | __R_UNUSED |
88108326 |
195 | |
8e07c86e |
196 | ); |
197 | |
198 | sub AUTOLOAD { |
785da04d |
199 | my($constname); |
8e07c86e |
200 | ($constname = $AUTOLOAD) =~ s/.*:://; |
785da04d |
201 | my $val = constant($constname, @_ ? $_[0] : 0); |
8e07c86e |
202 | if ($! != 0) { |
203 | if ($! =~ /Invalid/) { |
204 | $AutoLoader::AUTOLOAD = $AUTOLOAD; |
205 | goto &AutoLoader::AUTOLOAD; |
206 | } |
207 | else { |
785da04d |
208 | my($pack,$file,$line) = caller; |
8e07c86e |
209 | croak "Your vendor has not defined DB macro $constname, used at $file line $line. |
210 | "; |
211 | } |
212 | } |
213 | eval "sub $AUTOLOAD { $val }"; |
214 | goto &$AUTOLOAD; |
215 | } |
216 | |
785da04d |
217 | bootstrap DB_File $VERSION; |
8e07c86e |
218 | |
219 | # Preloaded methods go here. Autoload methods go after __END__, and are |
220 | # processed by the autosplit program. |
221 | |
88108326 |
222 | |
223 | sub get_dup |
224 | { |
225 | croak "Usage: \$db->get_dup(key [,flag])\n" |
226 | unless @_ == 2 or @_ == 3 ; |
227 | |
228 | my $db = shift ; |
229 | my $key = shift ; |
230 | my $flag = shift ; |
231 | my $value ; |
232 | my $origkey = $key ; |
233 | my $wantarray = wantarray ; |
234 | my @values = () ; |
235 | my $counter = 0 ; |
236 | |
237 | # get the first value associated with the key, $key |
238 | $db->seq($key, $value, R_CURSOR()) ; |
239 | |
240 | if ( $key eq $origkey) { |
241 | |
242 | while (1) { |
243 | # save the value or count matches |
244 | if ($wantarray) |
245 | { push (@values, $value) ; push(@values, 1) if $flag } |
246 | else |
247 | { ++ $counter } |
248 | |
249 | # iterate through the database until either EOF |
250 | # or a different key is encountered. |
251 | last if $db->seq($key, $value, R_NEXT()) != 0 or $key ne $origkey ; |
252 | } |
253 | } |
254 | |
255 | $wantarray ? @values : $counter ; |
256 | } |
257 | |
258 | |
8e07c86e |
259 | 1; |
260 | __END__ |
261 | |
262 | =cut |
3b35bae3 |
263 | |
264 | =head1 NAME |
265 | |
266 | DB_File - Perl5 access to Berkeley DB |
267 | |
268 | =head1 SYNOPSIS |
269 | |
270 | use DB_File ; |
88108326 |
271 | use Fcntl ; |
272 | |
273 | [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ; |
274 | [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ; |
275 | [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ; |
3b35bae3 |
276 | |
88108326 |
277 | [$X =] tie %hash, DB_File, $filename [, $flags, $mode, $DB_HASH ] ; |
3b35bae3 |
278 | [$X =] tie %hash, DB_File, $filename, $flags, $mode, $DB_BTREE ; |
279 | [$X =] tie @array, DB_File, $filename, $flags, $mode, $DB_RECNO ; |
280 | |
281 | $status = $X->del($key [, $flags]) ; |
282 | $status = $X->put($key, $value [, $flags]) ; |
283 | $status = $X->get($key, $value [, $flags]) ; |
88108326 |
284 | $status = $X->seq($key, $value , $flags) ; |
3b35bae3 |
285 | $status = $X->sync([$flags]) ; |
286 | $status = $X->fd ; |
287 | |
88108326 |
288 | $count = $X->get_dup($key) ; |
289 | @list = $X->get_dup($key) ; |
290 | %list = $X->get_dup($key, 1) ; |
291 | |
3b35bae3 |
292 | untie %hash ; |
293 | untie @array ; |
294 | |
295 | =head1 DESCRIPTION |
296 | |
8e07c86e |
297 | B<DB_File> is a module which allows Perl programs to make use of the |
298 | facilities provided by Berkeley DB. If you intend to use this |
88108326 |
299 | module you should really have a copy of the Berkeley DB manual page at |
8e07c86e |
300 | hand. The interface defined here mirrors the Berkeley DB interface |
301 | closely. |
3b35bae3 |
302 | |
8e07c86e |
303 | Berkeley DB is a C library which provides a consistent interface to a |
304 | number of database formats. B<DB_File> provides an interface to all |
305 | three of the database types currently supported by Berkeley DB. |
3b35bae3 |
306 | |
307 | The file types are: |
308 | |
309 | =over 5 |
310 | |
88108326 |
311 | =item B<DB_HASH> |
3b35bae3 |
312 | |
88108326 |
313 | This database type allows arbitrary key/value pairs to be stored in data |
8e07c86e |
314 | files. This is equivalent to the functionality provided by other |
315 | hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though, |
316 | the files created using DB_HASH are not compatible with any of the |
317 | other packages mentioned. |
3b35bae3 |
318 | |
8e07c86e |
319 | A default hashing algorithm, which will be adequate for most |
320 | applications, is built into Berkeley DB. If you do need to use your own |
321 | hashing algorithm it is possible to write your own in Perl and have |
322 | B<DB_File> use it instead. |
3b35bae3 |
323 | |
88108326 |
324 | When opening an existing database, you may omit the final three arguments |
325 | to C<tie>; they default to O_RDWR, 0644, and $DB_HASH. If you're |
326 | creating a new file, you need to specify at least the C<$flags> |
327 | argument, which must include O_CREAT. |
3b35bae3 |
328 | |
88108326 |
329 | =item B<DB_BTREE> |
330 | |
331 | The btree format allows arbitrary key/value pairs to be stored in a |
8e07c86e |
332 | sorted, balanced binary tree. |
3b35bae3 |
333 | |
8e07c86e |
334 | As with the DB_HASH format, it is possible to provide a user defined |
335 | Perl routine to perform the comparison of keys. By default, though, the |
336 | keys are stored in lexical order. |
3b35bae3 |
337 | |
88108326 |
338 | =item B<DB_RECNO> |
3b35bae3 |
339 | |
8e07c86e |
340 | DB_RECNO allows both fixed-length and variable-length flat text files |
341 | to be manipulated using the same key/value pair interface as in DB_HASH |
342 | and DB_BTREE. In this case the key will consist of a record (line) |
343 | number. |
3b35bae3 |
344 | |
345 | =back |
346 | |
347 | =head2 How does DB_File interface to Berkeley DB? |
348 | |
349 | B<DB_File> allows access to Berkeley DB files using the tie() mechanism |
8e07c86e |
350 | in Perl 5 (for full details, see L<perlfunc/tie()>). This facility |
351 | allows B<DB_File> to access Berkeley DB files using either an |
352 | associative array (for DB_HASH & DB_BTREE file types) or an ordinary |
353 | array (for the DB_RECNO file type). |
3b35bae3 |
354 | |
88108326 |
355 | In addition to the tie() interface, it is also possible to access most |
356 | of the functions provided in the Berkeley DB API directly. |
357 | See L<"Using the Berkeley DB API Directly">. |
3b35bae3 |
358 | |
88108326 |
359 | =head2 Opening a Berkeley DB Database File |
3b35bae3 |
360 | |
8e07c86e |
361 | Berkeley DB uses the function dbopen() to open or create a database. |
362 | Below is the C prototype for dbopen(). |
3b35bae3 |
363 | |
364 | DB* |
365 | dbopen (const char * file, int flags, int mode, |
366 | DBTYPE type, const void * openinfo) |
367 | |
368 | The parameter C<type> is an enumeration which specifies which of the 3 |
369 | interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used. |
370 | Depending on which of these is actually chosen, the final parameter, |
371 | I<openinfo> points to a data structure which allows tailoring of the |
372 | specific interface method. |
373 | |
8e07c86e |
374 | This interface is handled slightly differently in B<DB_File>. Here is |
88108326 |
375 | an equivalent call using B<DB_File>: |
3b35bae3 |
376 | |
88108326 |
377 | tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ; |
3b35bae3 |
378 | |
8e07c86e |
379 | The C<filename>, C<flags> and C<mode> parameters are the direct |
380 | equivalent of their dbopen() counterparts. The final parameter $DB_HASH |
381 | performs the function of both the C<type> and C<openinfo> parameters in |
382 | dbopen(). |
3b35bae3 |
383 | |
88108326 |
384 | In the example above $DB_HASH is actually a pre-defined reference to a |
385 | hash object. B<DB_File> has three of these pre-defined references. |
386 | Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO. |
3b35bae3 |
387 | |
8e07c86e |
388 | The keys allowed in each of these pre-defined references is limited to |
389 | the names used in the equivalent C structure. So, for example, the |
390 | $DB_HASH reference will only allow keys called C<bsize>, C<cachesize>, |
88108326 |
391 | C<ffactor>, C<hash>, C<lorder> and C<nelem>. |
392 | |
393 | To change one of these elements, just assign to it like this: |
394 | |
395 | $DB_HASH->{'cachesize'} = 10000 ; |
396 | |
397 | The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are |
398 | usually adequate for most applications. If you do need to create extra |
399 | instances of these objects, constructors are available for each file |
400 | type. |
401 | |
402 | Here are examples of the constructors and the valid options available |
403 | for DB_HASH, DB_BTREE and DB_RECNO respectively. |
404 | |
405 | $a = new DB_File::HASHINFO ; |
406 | $a->{'bsize'} ; |
407 | $a->{'cachesize'} ; |
408 | $a->{'ffactor'}; |
409 | $a->{'hash'} ; |
410 | $a->{'lorder'} ; |
411 | $a->{'nelem'} ; |
412 | |
413 | $b = new DB_File::BTREEINFO ; |
414 | $b->{'flags'} ; |
415 | $b->{'cachesize'} ; |
416 | $b->{'maxkeypage'} ; |
417 | $b->{'minkeypage'} ; |
418 | $b->{'psize'} ; |
419 | $b->{'compare'} ; |
420 | $b->{'prefix'} ; |
421 | $b->{'lorder'} ; |
422 | |
423 | $c = new DB_File::RECNOINFO ; |
424 | $c->{'bval'} ; |
425 | $c->{'cachesize'} ; |
426 | $c->{'psize'} ; |
427 | $c->{'flags'} ; |
428 | $c->{'lorder'} ; |
429 | $c->{'reclen'} ; |
430 | $c->{'bfname'} ; |
431 | |
432 | The values stored in the hashes above are mostly the direct equivalent |
433 | of their C counterpart. Like their C counterparts, all are set to a |
434 | default set of values - that means you don't have to set I<all> of the |
435 | values when you only want to change one. Here is an example: |
436 | |
437 | $a = new DB_File::HASHINFO ; |
438 | $a->{'cachesize'} = 12345 ; |
439 | tie %y, 'DB_File', "filename", $flags, 0777, $a ; |
440 | |
441 | A few of the values need extra discussion here. When used, the C |
442 | equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers |
443 | to C functions. In B<DB_File> these keys are used to store references |
444 | to Perl subs. Below are templates for each of the subs: |
445 | |
446 | sub hash |
447 | { |
448 | my ($data) = @_ ; |
449 | ... |
450 | # return the hash value for $data |
451 | return $hash ; |
452 | } |
3b35bae3 |
453 | |
88108326 |
454 | sub compare |
455 | { |
456 | my ($key, $key2) = @_ ; |
457 | ... |
458 | # return 0 if $key1 eq $key2 |
459 | # -1 if $key1 lt $key2 |
460 | # 1 if $key1 gt $key2 |
461 | return (-1 , 0 or 1) ; |
462 | } |
3b35bae3 |
463 | |
88108326 |
464 | sub prefix |
465 | { |
466 | my ($key, $key2) = @_ ; |
467 | ... |
468 | # return number of bytes of $key2 which are |
469 | # necessary to determine that it is greater than $key1 |
470 | return $bytes ; |
471 | } |
3b35bae3 |
472 | |
88108326 |
473 | See L<"Using BTREE"> for an example of using the C<compare> |
474 | |
475 | =head2 Default Parameters |
476 | |
477 | It is possible to omit some or all of the final 4 parameters in the |
478 | call to C<tie> and let them take default values. As DB_HASH is the most |
479 | common file format used, the call: |
480 | |
481 | tie %A, "DB_File", "filename" ; |
482 | |
483 | is equivalent to: |
484 | |
485 | tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0640, $DB_HASH ; |
486 | |
487 | It is also possible to omit the filename parameter as well, so the |
488 | call: |
489 | |
490 | tie %A, "DB_File" ; |
491 | |
492 | is equivalent to: |
493 | |
494 | tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0640, $DB_HASH ; |
495 | |
496 | See L<"In Memory Databases"> for a discussion on the use of C<undef> |
497 | in place of a filename. |
498 | |
499 | =head2 Handling duplicate keys in BTREE databases |
500 | |
501 | The BTREE file type in Berkeley DB optionally allows a single key to be |
502 | associated with an arbitrary number of values. This option is enabled by |
503 | setting the flags element of C<$DB_BTREE> to R_DUP when creating the |
504 | database. |
505 | |
506 | There are some difficulties in using the tied hash interface if you |
507 | want to manipulate a BTREE database with duplicate keys. Consider this |
508 | code: |
509 | |
510 | use DB_File ; |
511 | use Fcntl ; |
512 | |
513 | $filename = "tree" ; |
514 | unlink $filename ; |
515 | |
516 | # Enable duplicate records |
517 | $DB_BTREE->{'flags'} = R_DUP ; |
518 | |
519 | tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
520 | or die "Cannot open $filename: $!\n"; |
521 | |
522 | # Add some key/value pairs to the file |
523 | $h{'Wall'} = 'Larry' ; |
524 | $h{'Wall'} = 'Brick' ; # Note the duplicate key |
525 | $h{'Smith'} = 'John' ; |
526 | $h{'mouse'} = 'mickey' ; |
527 | |
528 | # iterate through the associative array |
529 | # and print each key/value pair. |
530 | foreach (keys %h) |
531 | { print "$_ -> $h{$_}\n" } |
532 | |
533 | Here is the output: |
534 | |
535 | Smith -> John |
536 | Wall -> Larry |
537 | Wall -> Larry |
538 | mouse -> mickey |
539 | |
540 | As you can see 2 records have been successfully created with key C<Wall> |
541 | - the only thing is, when they are retrieved from the database they |
542 | both I<seem> to have the same value, namely C<Larry>. The problem is |
543 | caused by the way that the associative array interface works. |
544 | Basically, when the associative array interface is used to fetch the |
545 | value associated with a given key, it will only ever retrieve the first |
546 | value. |
547 | |
548 | Although it may not be immediately obvious from the code above, the |
549 | associative array interface can be used to write values with duplicate |
550 | keys, but it cannot be used to read them back from the database. |
551 | |
552 | The way to get around this problem is to use the Berkeley DB API method |
553 | called C<seq>. This method allows sequential access to key/value |
554 | pairs. See L<"Using the Berkeley DB API Directly"> for details of both |
555 | the C<seq> method and the API in general. |
556 | |
557 | Here is the script above rewritten using the C<seq> API method. |
558 | |
559 | use DB_File ; |
560 | use Fcntl ; |
561 | |
562 | $filename = "tree" ; |
563 | unlink $filename ; |
564 | |
565 | # Enable duplicate records |
566 | $DB_BTREE->{'flags'} = R_DUP ; |
567 | |
568 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
569 | or die "Cannot open $filename: $!\n"; |
570 | |
571 | # Add some key/value pairs to the file |
572 | $h{'Wall'} = 'Larry' ; |
573 | $h{'Wall'} = 'Brick' ; # Note the duplicate key |
574 | $h{'Smith'} = 'John' ; |
575 | $h{'mouse'} = 'mickey' ; |
576 | |
577 | # Point to the first record in the btree |
578 | $x->seq($key, $value, R_FIRST) ; |
579 | |
580 | # now iterate through the rest of the btree |
581 | # and print each key/value pair. |
582 | print "$key -> $value\n" ; |
583 | while ( $x->seq($key, $value, R_NEXT) == 0) |
584 | { print "$key -> $value\n" } |
585 | |
586 | undef $x ; |
587 | untie %h ; |
588 | |
589 | that prints: |
590 | |
591 | Smith -> John |
592 | Wall -> Brick |
593 | Wall -> Larry |
594 | mouse -> mickey |
595 | |
596 | This time we have got all the key/value pairs, including both the |
597 | values associated with the key C<Wall>. |
598 | |
599 | C<DB_File> comes with a utility method, called C<get_dup>, to assist in |
600 | reading duplicate values from BTREE databases. The method can take the |
601 | following forms: |
602 | |
603 | $count = $x->get_dup($key) ; |
604 | @list = $x->get_dup($key) ; |
605 | %list = $x->get_dup($key, 1) ; |
606 | |
607 | In a scalar context the method returns the number of values associated |
608 | with the key, C<$key>. |
609 | |
610 | In list context, it returns all the values which match C<$key>. Note |
611 | that the values returned will be in an apparently random order. |
612 | |
613 | If the second parameter is present and evaluates TRUE, the method |
614 | returns an associative array whose keys correspond to the the values |
615 | from the BTREE and whose values are all C<1>. |
616 | |
617 | So assuming the database created above, we can use C<get_dups> like |
618 | this: |
619 | |
620 | $cnt = $x->get_dups("Wall") ; |
621 | print "Wall occurred $cnt times\n" ; |
622 | |
623 | %hash = $x->get_dups("Wall", 1) ; |
624 | print "Larry is there\n" if $hash{'Larry'} ; |
625 | |
626 | @list = $x->get_dups("Wall") ; |
627 | print "Wall => [@list]\n" ; |
628 | |
629 | @list = $x->get_dups("Smith") ; |
630 | print "Smith => [@list]\n" ; |
631 | |
632 | @list = $x->get_dups("Dog") ; |
633 | print "Dog => [@list]\n" ; |
634 | |
635 | |
636 | and it will print: |
637 | |
638 | Wall occurred 2 times |
639 | Larry is there |
640 | Wall => [Brick Larry] |
641 | Smith => [John] |
642 | Dog => [] |
3b35bae3 |
643 | |
644 | =head2 RECNO |
645 | |
88108326 |
646 | In order to make RECNO more compatible with Perl the array offset for |
647 | all RECNO arrays begins at 0 rather than 1 as in Berkeley DB. |
3b35bae3 |
648 | |
88108326 |
649 | As with normal Perl arrays, a RECNO array can be accessed using |
650 | negative indexes. The index -1 refers to the last element of the array, |
651 | -2 the second last, and so on. Attempting to access an element before |
652 | the start of the array will raise a fatal run-time error. |
3b35bae3 |
653 | |
654 | =head2 In Memory Databases |
655 | |
8e07c86e |
656 | Berkeley DB allows the creation of in-memory databases by using NULL |
785da04d |
657 | (that is, a C<(char *)0> in C) in place of the filename. B<DB_File> |
8e07c86e |
658 | uses C<undef> instead of NULL to provide this functionality. |
3b35bae3 |
659 | |
660 | |
88108326 |
661 | =head2 Using the Berkeley DB API Directly |
3b35bae3 |
662 | |
663 | As well as accessing Berkeley DB using a tied hash or array, it is also |
88108326 |
664 | possible to make direct use of most of the API functions defined in the |
8e07c86e |
665 | Berkeley DB documentation. |
3b35bae3 |
666 | |
88108326 |
667 | To do this you need to store a copy of the object returned from the tie. |
3b35bae3 |
668 | |
88108326 |
669 | $db = tie %hash, "DB_File", "filename" ; |
3b35bae3 |
670 | |
8e07c86e |
671 | Once you have done that, you can access the Berkeley DB API functions |
88108326 |
672 | as B<DB_File> methods directly like this: |
3b35bae3 |
673 | |
674 | $db->put($key, $value, R_NOOVERWRITE) ; |
675 | |
88108326 |
676 | B<Important:> If you have saved a copy of the object returned from |
677 | C<tie>, the underlying database file will I<not> be closed until both |
678 | the tied variable is untied and all copies of the saved object are |
679 | destroyed. |
680 | |
681 | use DB_File ; |
682 | $db = tie %hash, "DB_File", "filename" |
683 | or die "Cannot tie filename: $!" ; |
684 | ... |
685 | undef $db ; |
686 | untie %hash ; |
687 | |
688 | All the functions defined in L<dbopen> are available except for |
689 | close() and dbopen() itself. The B<DB_File> method interface to the |
690 | supported functions have been implemented to mirror the way Berkeley DB |
691 | works whenever possible. In particular note that: |
692 | |
693 | =over 5 |
694 | |
695 | =item * |
696 | |
697 | The methods return a status value. All return 0 on success. |
698 | All return -1 to signify an error and set C<$!> to the exact |
699 | error code. The return code 1 generally (but not always) means that the |
700 | key specified did not exist in the database. |
701 | |
702 | Other return codes are defined. See below and in the Berkeley DB |
703 | documentation for details. The Berkeley DB documentation should be used |
704 | as the definitive source. |
705 | |
706 | =item * |
3b35bae3 |
707 | |
88108326 |
708 | Whenever a Berkeley DB function returns data via one of its parameters, |
709 | the equivalent B<DB_File> method does exactly the same. |
3b35bae3 |
710 | |
88108326 |
711 | =item * |
712 | |
713 | If you are careful, it is possible to mix API calls with the tied |
714 | hash/array interface in the same piece of code. Although only a few of |
715 | the methods used to implement the tied interface currently make use of |
716 | the cursor, you should always assume that the cursor has been changed |
717 | any time the tied hash/array interface is used. As an example, this |
718 | code will probably not do what you expect: |
719 | |
720 | $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE |
721 | or die "Cannot tie $filename: $!" ; |
722 | |
723 | # Get the first key/value pair and set the cursor |
724 | $X->seq($key, $value, R_FIRST) ; |
725 | |
726 | # this line will modify the cursor |
727 | $count = scalar keys %x ; |
728 | |
729 | # Get the second key/value pair. |
730 | # oops, it didn't, it got the last key/value pair! |
731 | $X->seq($key, $value, R_NEXT) ; |
732 | |
733 | The code above can be rearranged to get around the problem, like this: |
734 | |
735 | $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE |
736 | or die "Cannot tie $filename: $!" ; |
737 | |
738 | # this line will modify the cursor |
739 | $count = scalar keys %x ; |
740 | |
741 | # Get the first key/value pair and set the cursor |
742 | $X->seq($key, $value, R_FIRST) ; |
743 | |
744 | # Get the second key/value pair. |
745 | # worked this time. |
746 | $X->seq($key, $value, R_NEXT) ; |
747 | |
748 | =back |
749 | |
750 | All the constants defined in L<dbopen> for use in the flags parameters |
751 | in the methods defined below are also available. Refer to the Berkeley |
752 | DB documentation for the precise meaning of the flags values. |
753 | |
754 | Below is a list of the methods available. |
3b35bae3 |
755 | |
756 | =over 5 |
757 | |
88108326 |
758 | =item C<$status = $X-E<gt>get($key, $value [, $flags]) ;> |
759 | |
760 | Given a key (C<$key>) this method reads the value associated with it |
761 | from the database. The value read from the database is returned in the |
762 | C<$value> parameter. |
3b35bae3 |
763 | |
88108326 |
764 | If the key does not exist the method returns 1. |
3b35bae3 |
765 | |
88108326 |
766 | No flags are currently defined for this method. |
3b35bae3 |
767 | |
88108326 |
768 | =item C<$status = $X-E<gt>put($key, $value [, $flags]) ;> |
3b35bae3 |
769 | |
88108326 |
770 | Stores the key/value pair in the database. |
771 | |
772 | If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter |
8e07c86e |
773 | will have the record number of the inserted key/value pair set. |
3b35bae3 |
774 | |
88108326 |
775 | Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and |
776 | R_SETCURSOR. |
777 | |
778 | =item C<$status = $X-E<gt>del($key [, $flags]) ;> |
3b35bae3 |
779 | |
88108326 |
780 | Removes all key/value pairs with key C<$key> from the database. |
3b35bae3 |
781 | |
88108326 |
782 | A return code of 1 means that the requested key was not in the |
783 | database. |
3b35bae3 |
784 | |
88108326 |
785 | R_CURSOR is the only valid flag at present. |
3b35bae3 |
786 | |
88108326 |
787 | =item C<$status = $X-E<gt>fd ;> |
3b35bae3 |
788 | |
88108326 |
789 | Returns the file descriptor for the underlying database. |
3b35bae3 |
790 | |
88108326 |
791 | See L<"Locking Databases"> for an example of how to make use of the |
792 | C<fd> method to lock your database. |
3b35bae3 |
793 | |
88108326 |
794 | =item C<$status = $X-E<gt>seq($key, $value, $flags) ;> |
3b35bae3 |
795 | |
88108326 |
796 | This interface allows sequential retrieval from the database. See |
797 | L<dbopen> for full details. |
798 | |
799 | Both the C<$key> and C<$value> parameters will be set to the key/value |
800 | pair read from the database. |
801 | |
802 | The flags parameter is mandatory. The valid flag values are R_CURSOR, |
803 | R_FIRST, R_LAST, R_NEXT and R_PREV. |
804 | |
805 | =item C<$status = $X-E<gt>sync([$flags]) ;> |
806 | |
807 | Flushes any cached buffers to disk. |
808 | |
809 | R_RECNOSYNC is the only valid flag at present. |
3b35bae3 |
810 | |
811 | =back |
812 | |
813 | =head1 EXAMPLES |
814 | |
8e07c86e |
815 | It is always a lot easier to understand something when you see a real |
816 | example. So here are a few. |
3b35bae3 |
817 | |
818 | =head2 Using HASH |
819 | |
820 | use DB_File ; |
821 | use Fcntl ; |
88108326 |
822 | |
823 | tie %h, "DB_File", "hashed", O_RDWR|O_CREAT, 0640, $DB_HASH |
824 | or die "Cannot open file 'hashed': $!\n"; |
825 | |
3b35bae3 |
826 | # Add a key/value pair to the file |
827 | $h{"apple"} = "orange" ; |
88108326 |
828 | |
3b35bae3 |
829 | # Check for existence of a key |
830 | print "Exists\n" if $h{"banana"} ; |
88108326 |
831 | |
3b35bae3 |
832 | # Delete |
833 | delete $h{"apple"} ; |
88108326 |
834 | |
3b35bae3 |
835 | untie %h ; |
836 | |
837 | =head2 Using BTREE |
838 | |
88108326 |
839 | Here is a sample of code which uses BTREE. Just to make life more |
840 | interesting the default comparison function will not be used. Instead |
8e07c86e |
841 | a Perl sub, C<Compare()>, will be used to do a case insensitive |
842 | comparison. |
3b35bae3 |
843 | |
844 | use DB_File ; |
845 | use Fcntl ; |
88108326 |
846 | |
3b35bae3 |
847 | sub Compare |
848 | { |
849 | my ($key1, $key2) = @_ ; |
88108326 |
850 | |
3b35bae3 |
851 | "\L$key1" cmp "\L$key2" ; |
852 | } |
88108326 |
853 | |
854 | $DB_BTREE->{'compare'} = 'Compare' ; |
855 | |
856 | tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE |
857 | or die "Cannot open file 'tree': $!\n" ; |
858 | |
3b35bae3 |
859 | # Add a key/value pair to the file |
860 | $h{'Wall'} = 'Larry' ; |
861 | $h{'Smith'} = 'John' ; |
862 | $h{'mouse'} = 'mickey' ; |
863 | $h{'duck'} = 'donald' ; |
88108326 |
864 | |
3b35bae3 |
865 | # Delete |
866 | delete $h{"duck"} ; |
88108326 |
867 | |
3b35bae3 |
868 | # Cycle through the keys printing them in order. |
869 | # Note it is not necessary to sort the keys as |
870 | # the btree will have kept them in order automatically. |
871 | foreach (keys %h) |
872 | { print "$_\n" } |
88108326 |
873 | |
3b35bae3 |
874 | untie %h ; |
875 | |
876 | Here is the output from the code above. |
877 | |
878 | mouse |
879 | Smith |
880 | Wall |
881 | |
882 | |
883 | =head2 Using RECNO |
884 | |
88108326 |
885 | Here is a simple example that uses RECNO. |
886 | |
3b35bae3 |
887 | use DB_File ; |
888 | use Fcntl ; |
88108326 |
889 | |
890 | $DB_RECNO->{'psize'} = 3000 ; |
891 | |
892 | tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO |
893 | or die "Cannot open file 'text': $!\n" ; |
894 | |
3b35bae3 |
895 | # Add a key/value pair to the file |
896 | $h[0] = "orange" ; |
88108326 |
897 | |
3b35bae3 |
898 | # Check for existence of a key |
899 | print "Exists\n" if $h[1] ; |
3b35bae3 |
900 | |
88108326 |
901 | untie @h ; |
3b35bae3 |
902 | |
cb1a09d0 |
903 | =head2 Locking Databases |
3b35bae3 |
904 | |
cb1a09d0 |
905 | Concurrent access of a read-write database by several parties requires |
906 | them all to use some kind of locking. Here's an example of Tom's that |
907 | uses the I<fd> method to get the file descriptor, and then a careful |
908 | open() to give something Perl will flock() for you. Run this repeatedly |
909 | in the background to watch the locks granted in proper order. |
3b35bae3 |
910 | |
cb1a09d0 |
911 | use Fcntl; |
912 | use DB_File; |
913 | |
914 | use strict; |
915 | |
916 | sub LOCK_SH { 1 } |
917 | sub LOCK_EX { 2 } |
918 | sub LOCK_NB { 4 } |
919 | sub LOCK_UN { 8 } |
920 | |
921 | my($oldval, $fd, $db, %db, $value, $key); |
922 | |
923 | $key = shift || 'default'; |
924 | $value = shift || 'magic'; |
925 | |
926 | $value .= " $$"; |
927 | |
928 | $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644) |
929 | || die "dbcreat /tmp/foo.db $!"; |
930 | $fd = $db->fd; |
931 | print "$$: db fd is $fd\n"; |
932 | open(DB_FH, "+<&=$fd") || die "dup $!"; |
933 | |
934 | |
935 | unless (flock (DB_FH, LOCK_SH | LOCK_NB)) { |
936 | print "$$: CONTENTION; can't read during write update! |
937 | Waiting for read lock ($!) ...."; |
938 | unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" } |
939 | } |
940 | print "$$: Read lock granted\n"; |
941 | |
942 | $oldval = $db{$key}; |
943 | print "$$: Old value was $oldval\n"; |
944 | flock(DB_FH, LOCK_UN); |
945 | |
946 | unless (flock (DB_FH, LOCK_EX | LOCK_NB)) { |
947 | print "$$: CONTENTION; must have exclusive lock! |
948 | Waiting for write lock ($!) ...."; |
949 | unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" } |
950 | } |
951 | |
952 | print "$$: Write lock granted\n"; |
953 | $db{$key} = $value; |
88108326 |
954 | $db->sync; |
cb1a09d0 |
955 | sleep 10; |
956 | |
957 | flock(DB_FH, LOCK_UN); |
88108326 |
958 | undef $db; |
cb1a09d0 |
959 | untie %db; |
960 | close(DB_FH); |
961 | print "$$: Updated db to $key=$value\n"; |
962 | |
963 | =head1 HISTORY |
964 | |
965 | =over |
966 | |
967 | =item 0.1 |
3b35bae3 |
968 | |
969 | First Release. |
970 | |
cb1a09d0 |
971 | =item 0.2 |
3b35bae3 |
972 | |
973 | When B<DB_File> is opening a database file it no longer terminates the |
974 | process if I<dbopen> returned an error. This allows file protection |
975 | errors to be caught at run time. Thanks to Judith Grass |
cb1a09d0 |
976 | E<lt>grass@cybercash.comE<gt> for spotting the bug. |
3b35bae3 |
977 | |
cb1a09d0 |
978 | =item 0.3 |
8e07c86e |
979 | |
980 | Added prototype support for multiple btree compare callbacks. |
981 | |
cb1a09d0 |
982 | =item 1.0 |
8e07c86e |
983 | |
984 | B<DB_File> has been in use for over a year. To reflect that, the |
985 | version number has been incremented to 1.0. |
986 | |
987 | Added complete support for multiple concurrent callbacks. |
988 | |
989 | Using the I<push> method on an empty list didn't work properly. This |
990 | has been fixed. |
991 | |
cb1a09d0 |
992 | =item 1.01 |
4633a7c4 |
993 | |
994 | Fixed a core dump problem with SunOS. |
995 | |
996 | The return value from TIEHASH wasn't set to NULL when dbopen returned |
997 | an error. |
998 | |
88108326 |
999 | =item 1.02 |
1000 | |
1001 | Merged OS2 specific code into DB_File.xs |
1002 | |
1003 | Removed some redundant code in DB_File.xs. |
1004 | |
1005 | Documentation update. |
1006 | |
1007 | Allow negative subscripts with RECNO interface. |
1008 | |
1009 | Changed the default flags from O_RDWR to O_CREAT|O_RDWR. |
1010 | |
1011 | The example code which showed how to lock a database needed a call to |
1012 | C<sync> added. Without it the resultant database file was empty. |
1013 | |
1014 | Added get_dups method. |
1015 | |
3b35bae3 |
1016 | =head1 WARNINGS |
1017 | |
88108326 |
1018 | If you happen to find any other functions defined in the source for |
1019 | this module that have not been mentioned in this document -- beware. I |
1020 | may drop them at a moments notice. |
3b35bae3 |
1021 | |
8e07c86e |
1022 | If you cannot find any, then either you didn't look very hard or the |
1023 | moment has passed and I have dropped them. |
3b35bae3 |
1024 | |
1025 | =head1 BUGS |
1026 | |
8e07c86e |
1027 | Some older versions of Berkeley DB had problems with fixed length |
1028 | records using the RECNO file format. The newest version at the time of |
1029 | writing was 1.85 - this seems to have fixed the problems with RECNO. |
3b35bae3 |
1030 | |
8e07c86e |
1031 | I am sure there are bugs in the code. If you do find any, or can |
1032 | suggest any enhancements, I would welcome your comments. |
3b35bae3 |
1033 | |
1034 | =head1 AVAILABILITY |
1035 | |
cb1a09d0 |
1036 | Berkeley DB is available at your nearest CPAN archive (see |
1037 | L<perlmod/"CPAN"> for a list) in F<src/misc/db.1.85.tar.gz>, or via the |
1038 | host F<ftp.cs.berkeley.edu> in F</ucb/4bsd/db.tar.gz>. It is I<not> under |
1039 | the GPL. |
3b35bae3 |
1040 | |
88108326 |
1041 | If you are running IRIX, then get Berkeley DB from |
1042 | F<http://reality.sgi.com/ariel>. It has the patches necessary to |
1043 | compile properly on IRIX 5.3. |
1044 | |
3b35bae3 |
1045 | =head1 SEE ALSO |
1046 | |
1047 | L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)> |
1048 | |
8e07c86e |
1049 | Berkeley DB is available from F<ftp.cs.berkeley.edu> in the directory |
1050 | F</ucb/4bsd>. |
3b35bae3 |
1051 | |
1052 | =head1 AUTHOR |
1053 | |
8e07c86e |
1054 | The DB_File interface was written by Paul Marquess |
88108326 |
1055 | E<lt>pmarquess@bfsec.bt.co.ukE<gt>. |
8e07c86e |
1056 | Questions about the DB system itself may be addressed to Keith Bostic |
88108326 |
1057 | E<lt>bostic@cs.berkeley.eduE<gt>. |
3b35bae3 |
1058 | |
1059 | =cut |