Update to version 1.02
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
3# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
88108326 4# last modified 28th June 1996
5# version 1.02
8e07c86e 6
7package DB_File::HASHINFO ;
785da04d 8
9use strict;
8e07c86e 10use Carp;
88108326 11require Tie::Hash;
12@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 13
88108326 14sub new
8e07c86e 15{
88108326 16 my $pkg = shift ;
17 my %x ;
18 tie %x, $pkg ;
19 bless \%x, $pkg ;
8e07c86e 20}
21
88108326 22sub TIEHASH
23{
24 my $pkg = shift ;
25
26 bless { 'bsize' => undef,
27 'ffactor' => undef,
28 'nelem' => undef,
29 'cachesize' => undef,
30 'hash' => undef,
31 'lorder' => undef,
32 }, $pkg ;
33}
8e07c86e 34
35sub FETCH
36{
88108326 37 my $self = shift ;
38 my $key = shift ;
8e07c86e 39
88108326 40 return $self->{$key} if exists $self->{$key} ;
41
42 my $pkg = ref $self ;
43 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 44}
45
46
47sub STORE
48{
88108326 49 my $self = shift ;
50 my $key = shift ;
51 my $value = shift ;
52
53 if ( exists $self->{$key} )
8e07c86e 54 {
88108326 55 $self->{$key} = $value ;
8e07c86e 56 return ;
57 }
58
88108326 59 my $pkg = ref $self ;
60 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 61}
62
63sub DELETE
64{
88108326 65 my $self = shift ;
66 my $key = shift ;
67
68 if ( exists $self->{$key} )
8e07c86e 69 {
88108326 70 delete $self->{$key} ;
8e07c86e 71 return ;
72 }
73
88108326 74 my $pkg = ref $self ;
75 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 76}
77
88108326 78sub EXISTS
8e07c86e 79{
88108326 80 my $self = shift ;
81 my $key = shift ;
8e07c86e 82
88108326 83 exists $self->{$key} ;
8e07c86e 84}
85
88108326 86sub NotHere
8e07c86e 87{
88108326 88 my $pkg = shift ;
89 my $method = shift ;
8e07c86e 90
88108326 91 croak "${pkg} does not define the method ${method}" ;
8e07c86e 92}
93
88108326 94sub DESTROY { undef %{$_[0]} }
95sub FIRSTKEY { my $self = shift ; $self->NotHere(ref $self, "FIRSTKEY") }
96sub NEXTKEY { my $self = shift ; $self->NotHere(ref $self, "NEXTKEY") }
97sub CLEAR { my $self = shift ; $self->NotHere(ref $self, "CLEAR") }
8e07c86e 98
99package DB_File::RECNOINFO ;
785da04d 100
88108326 101use strict ;
102
103@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 104
105sub TIEHASH
106{
88108326 107 my $pkg = shift ;
108
109 bless { 'bval' => undef,
110 'cachesize' => undef,
111 'psize' => undef,
112 'flags' => undef,
113 'lorder' => undef,
114 'reclen' => undef,
115 'bfname' => "",
116 }, $pkg ;
8e07c86e 117}
118
88108326 119package DB_File::BTREEINFO ;
8e07c86e 120
88108326 121use strict ;
8e07c86e 122
88108326 123@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 124
88108326 125sub TIEHASH
8e07c86e 126{
88108326 127 my $pkg = shift ;
128
129 bless { 'flags' => undef,
130 'cachesize' => undef,
131 'maxkeypage' => undef,
132 'minkeypage' => undef,
133 'psize' => undef,
134 'compare' => undef,
135 'prefix' => undef,
136 'lorder' => undef,
137 }, $pkg ;
8e07c86e 138}
139
140
8e07c86e 141package DB_File ;
785da04d 142
143use strict;
144use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO) ;
8e07c86e 145use Carp;
146
785da04d 147
88108326 148$VERSION = "1.02" ;
8e07c86e 149
150#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 151#$DB_BTREE = TIEHASH DB_File::BTREEINFO ;
152#$DB_HASH = TIEHASH DB_File::HASHINFO ;
153#$DB_RECNO = TIEHASH DB_File::RECNOINFO ;
154
155$DB_BTREE = new DB_File::BTREEINFO ;
156$DB_HASH = new DB_File::HASHINFO ;
157$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 158
785da04d 159require Tie::Hash;
8e07c86e 160require Exporter;
161use AutoLoader;
162require DynaLoader;
785da04d 163@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 164@EXPORT = qw(
165 $DB_BTREE $DB_HASH $DB_RECNO
88108326 166
8e07c86e 167 BTREEMAGIC
168 BTREEVERSION
169 DB_LOCK
170 DB_SHMEM
171 DB_TXN
172 HASHMAGIC
173 HASHVERSION
174 MAX_PAGE_NUMBER
175 MAX_PAGE_OFFSET
176 MAX_REC_NUMBER
177 RET_ERROR
178 RET_SPECIAL
179 RET_SUCCESS
180 R_CURSOR
181 R_DUP
182 R_FIRST
183 R_FIXEDLEN
184 R_IAFTER
185 R_IBEFORE
186 R_LAST
187 R_NEXT
188 R_NOKEY
189 R_NOOVERWRITE
190 R_PREV
191 R_RECNOSYNC
192 R_SETCURSOR
193 R_SNAPSHOT
194 __R_UNUSED
88108326 195
8e07c86e 196);
197
198sub AUTOLOAD {
785da04d 199 my($constname);
8e07c86e 200 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 201 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 202 if ($! != 0) {
203 if ($! =~ /Invalid/) {
204 $AutoLoader::AUTOLOAD = $AUTOLOAD;
205 goto &AutoLoader::AUTOLOAD;
206 }
207 else {
785da04d 208 my($pack,$file,$line) = caller;
8e07c86e 209 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
210";
211 }
212 }
213 eval "sub $AUTOLOAD { $val }";
214 goto &$AUTOLOAD;
215}
216
785da04d 217bootstrap DB_File $VERSION;
8e07c86e 218
219# Preloaded methods go here. Autoload methods go after __END__, and are
220# processed by the autosplit program.
221
88108326 222
223sub get_dup
224{
225 croak "Usage: \$db->get_dup(key [,flag])\n"
226 unless @_ == 2 or @_ == 3 ;
227
228 my $db = shift ;
229 my $key = shift ;
230 my $flag = shift ;
231 my $value ;
232 my $origkey = $key ;
233 my $wantarray = wantarray ;
234 my @values = () ;
235 my $counter = 0 ;
236
237 # get the first value associated with the key, $key
238 $db->seq($key, $value, R_CURSOR()) ;
239
240 if ( $key eq $origkey) {
241
242 while (1) {
243 # save the value or count matches
244 if ($wantarray)
245 { push (@values, $value) ; push(@values, 1) if $flag }
246 else
247 { ++ $counter }
248
249 # iterate through the database until either EOF
250 # or a different key is encountered.
251 last if $db->seq($key, $value, R_NEXT()) != 0 or $key ne $origkey ;
252 }
253 }
254
255 $wantarray ? @values : $counter ;
256}
257
258
8e07c86e 2591;
260__END__
261
262=cut
3b35bae3 263
264=head1 NAME
265
266DB_File - Perl5 access to Berkeley DB
267
268=head1 SYNOPSIS
269
270 use DB_File ;
88108326 271 use Fcntl ;
272
273 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
274 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
275 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
3b35bae3 276
88108326 277 [$X =] tie %hash, DB_File, $filename [, $flags, $mode, $DB_HASH ] ;
3b35bae3 278 [$X =] tie %hash, DB_File, $filename, $flags, $mode, $DB_BTREE ;
279 [$X =] tie @array, DB_File, $filename, $flags, $mode, $DB_RECNO ;
280
281 $status = $X->del($key [, $flags]) ;
282 $status = $X->put($key, $value [, $flags]) ;
283 $status = $X->get($key, $value [, $flags]) ;
88108326 284 $status = $X->seq($key, $value , $flags) ;
3b35bae3 285 $status = $X->sync([$flags]) ;
286 $status = $X->fd ;
287
88108326 288 $count = $X->get_dup($key) ;
289 @list = $X->get_dup($key) ;
290 %list = $X->get_dup($key, 1) ;
291
3b35bae3 292 untie %hash ;
293 untie @array ;
294
295=head1 DESCRIPTION
296
8e07c86e 297B<DB_File> is a module which allows Perl programs to make use of the
298facilities provided by Berkeley DB. If you intend to use this
88108326 299module you should really have a copy of the Berkeley DB manual page at
8e07c86e 300hand. The interface defined here mirrors the Berkeley DB interface
301closely.
3b35bae3 302
8e07c86e 303Berkeley DB is a C library which provides a consistent interface to a
304number of database formats. B<DB_File> provides an interface to all
305three of the database types currently supported by Berkeley DB.
3b35bae3 306
307The file types are:
308
309=over 5
310
88108326 311=item B<DB_HASH>
3b35bae3 312
88108326 313This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 314files. This is equivalent to the functionality provided by other
315hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
316the files created using DB_HASH are not compatible with any of the
317other packages mentioned.
3b35bae3 318
8e07c86e 319A default hashing algorithm, which will be adequate for most
320applications, is built into Berkeley DB. If you do need to use your own
321hashing algorithm it is possible to write your own in Perl and have
322B<DB_File> use it instead.
3b35bae3 323
88108326 324When opening an existing database, you may omit the final three arguments
325to C<tie>; they default to O_RDWR, 0644, and $DB_HASH. If you're
326creating a new file, you need to specify at least the C<$flags>
327argument, which must include O_CREAT.
3b35bae3 328
88108326 329=item B<DB_BTREE>
330
331The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 332sorted, balanced binary tree.
3b35bae3 333
8e07c86e 334As with the DB_HASH format, it is possible to provide a user defined
335Perl routine to perform the comparison of keys. By default, though, the
336keys are stored in lexical order.
3b35bae3 337
88108326 338=item B<DB_RECNO>
3b35bae3 339
8e07c86e 340DB_RECNO allows both fixed-length and variable-length flat text files
341to be manipulated using the same key/value pair interface as in DB_HASH
342and DB_BTREE. In this case the key will consist of a record (line)
343number.
3b35bae3 344
345=back
346
347=head2 How does DB_File interface to Berkeley DB?
348
349B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 350in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
351allows B<DB_File> to access Berkeley DB files using either an
352associative array (for DB_HASH & DB_BTREE file types) or an ordinary
353array (for the DB_RECNO file type).
3b35bae3 354
88108326 355In addition to the tie() interface, it is also possible to access most
356of the functions provided in the Berkeley DB API directly.
357See L<"Using the Berkeley DB API Directly">.
3b35bae3 358
88108326 359=head2 Opening a Berkeley DB Database File
3b35bae3 360
8e07c86e 361Berkeley DB uses the function dbopen() to open or create a database.
362Below is the C prototype for dbopen().
3b35bae3 363
364 DB*
365 dbopen (const char * file, int flags, int mode,
366 DBTYPE type, const void * openinfo)
367
368The parameter C<type> is an enumeration which specifies which of the 3
369interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
370Depending on which of these is actually chosen, the final parameter,
371I<openinfo> points to a data structure which allows tailoring of the
372specific interface method.
373
8e07c86e 374This interface is handled slightly differently in B<DB_File>. Here is
88108326 375an equivalent call using B<DB_File>:
3b35bae3 376
88108326 377 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 378
8e07c86e 379The C<filename>, C<flags> and C<mode> parameters are the direct
380equivalent of their dbopen() counterparts. The final parameter $DB_HASH
381performs the function of both the C<type> and C<openinfo> parameters in
382dbopen().
3b35bae3 383
88108326 384In the example above $DB_HASH is actually a pre-defined reference to a
385hash object. B<DB_File> has three of these pre-defined references.
386Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 387
8e07c86e 388The keys allowed in each of these pre-defined references is limited to
389the names used in the equivalent C structure. So, for example, the
390$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 391C<ffactor>, C<hash>, C<lorder> and C<nelem>.
392
393To change one of these elements, just assign to it like this:
394
395 $DB_HASH->{'cachesize'} = 10000 ;
396
397The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
398usually adequate for most applications. If you do need to create extra
399instances of these objects, constructors are available for each file
400type.
401
402Here are examples of the constructors and the valid options available
403for DB_HASH, DB_BTREE and DB_RECNO respectively.
404
405 $a = new DB_File::HASHINFO ;
406 $a->{'bsize'} ;
407 $a->{'cachesize'} ;
408 $a->{'ffactor'};
409 $a->{'hash'} ;
410 $a->{'lorder'} ;
411 $a->{'nelem'} ;
412
413 $b = new DB_File::BTREEINFO ;
414 $b->{'flags'} ;
415 $b->{'cachesize'} ;
416 $b->{'maxkeypage'} ;
417 $b->{'minkeypage'} ;
418 $b->{'psize'} ;
419 $b->{'compare'} ;
420 $b->{'prefix'} ;
421 $b->{'lorder'} ;
422
423 $c = new DB_File::RECNOINFO ;
424 $c->{'bval'} ;
425 $c->{'cachesize'} ;
426 $c->{'psize'} ;
427 $c->{'flags'} ;
428 $c->{'lorder'} ;
429 $c->{'reclen'} ;
430 $c->{'bfname'} ;
431
432The values stored in the hashes above are mostly the direct equivalent
433of their C counterpart. Like their C counterparts, all are set to a
434default set of values - that means you don't have to set I<all> of the
435values when you only want to change one. Here is an example:
436
437 $a = new DB_File::HASHINFO ;
438 $a->{'cachesize'} = 12345 ;
439 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
440
441A few of the values need extra discussion here. When used, the C
442equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
443to C functions. In B<DB_File> these keys are used to store references
444to Perl subs. Below are templates for each of the subs:
445
446 sub hash
447 {
448 my ($data) = @_ ;
449 ...
450 # return the hash value for $data
451 return $hash ;
452 }
3b35bae3 453
88108326 454 sub compare
455 {
456 my ($key, $key2) = @_ ;
457 ...
458 # return 0 if $key1 eq $key2
459 # -1 if $key1 lt $key2
460 # 1 if $key1 gt $key2
461 return (-1 , 0 or 1) ;
462 }
3b35bae3 463
88108326 464 sub prefix
465 {
466 my ($key, $key2) = @_ ;
467 ...
468 # return number of bytes of $key2 which are
469 # necessary to determine that it is greater than $key1
470 return $bytes ;
471 }
3b35bae3 472
88108326 473See L<"Using BTREE"> for an example of using the C<compare>
474
475=head2 Default Parameters
476
477It is possible to omit some or all of the final 4 parameters in the
478call to C<tie> and let them take default values. As DB_HASH is the most
479common file format used, the call:
480
481 tie %A, "DB_File", "filename" ;
482
483is equivalent to:
484
485 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0640, $DB_HASH ;
486
487It is also possible to omit the filename parameter as well, so the
488call:
489
490 tie %A, "DB_File" ;
491
492is equivalent to:
493
494 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0640, $DB_HASH ;
495
496See L<"In Memory Databases"> for a discussion on the use of C<undef>
497in place of a filename.
498
499=head2 Handling duplicate keys in BTREE databases
500
501The BTREE file type in Berkeley DB optionally allows a single key to be
502associated with an arbitrary number of values. This option is enabled by
503setting the flags element of C<$DB_BTREE> to R_DUP when creating the
504database.
505
506There are some difficulties in using the tied hash interface if you
507want to manipulate a BTREE database with duplicate keys. Consider this
508code:
509
510 use DB_File ;
511 use Fcntl ;
512
513 $filename = "tree" ;
514 unlink $filename ;
515
516 # Enable duplicate records
517 $DB_BTREE->{'flags'} = R_DUP ;
518
519 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
520 or die "Cannot open $filename: $!\n";
521
522 # Add some key/value pairs to the file
523 $h{'Wall'} = 'Larry' ;
524 $h{'Wall'} = 'Brick' ; # Note the duplicate key
525 $h{'Smith'} = 'John' ;
526 $h{'mouse'} = 'mickey' ;
527
528 # iterate through the associative array
529 # and print each key/value pair.
530 foreach (keys %h)
531 { print "$_ -> $h{$_}\n" }
532
533Here is the output:
534
535 Smith -> John
536 Wall -> Larry
537 Wall -> Larry
538 mouse -> mickey
539
540As you can see 2 records have been successfully created with key C<Wall>
541- the only thing is, when they are retrieved from the database they
542both I<seem> to have the same value, namely C<Larry>. The problem is
543caused by the way that the associative array interface works.
544Basically, when the associative array interface is used to fetch the
545value associated with a given key, it will only ever retrieve the first
546value.
547
548Although it may not be immediately obvious from the code above, the
549associative array interface can be used to write values with duplicate
550keys, but it cannot be used to read them back from the database.
551
552The way to get around this problem is to use the Berkeley DB API method
553called C<seq>. This method allows sequential access to key/value
554pairs. See L<"Using the Berkeley DB API Directly"> for details of both
555the C<seq> method and the API in general.
556
557Here is the script above rewritten using the C<seq> API method.
558
559 use DB_File ;
560 use Fcntl ;
561
562 $filename = "tree" ;
563 unlink $filename ;
564
565 # Enable duplicate records
566 $DB_BTREE->{'flags'} = R_DUP ;
567
568 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
569 or die "Cannot open $filename: $!\n";
570
571 # Add some key/value pairs to the file
572 $h{'Wall'} = 'Larry' ;
573 $h{'Wall'} = 'Brick' ; # Note the duplicate key
574 $h{'Smith'} = 'John' ;
575 $h{'mouse'} = 'mickey' ;
576
577 # Point to the first record in the btree
578 $x->seq($key, $value, R_FIRST) ;
579
580 # now iterate through the rest of the btree
581 # and print each key/value pair.
582 print "$key -> $value\n" ;
583 while ( $x->seq($key, $value, R_NEXT) == 0)
584 { print "$key -> $value\n" }
585
586 undef $x ;
587 untie %h ;
588
589that prints:
590
591 Smith -> John
592 Wall -> Brick
593 Wall -> Larry
594 mouse -> mickey
595
596This time we have got all the key/value pairs, including both the
597values associated with the key C<Wall>.
598
599C<DB_File> comes with a utility method, called C<get_dup>, to assist in
600reading duplicate values from BTREE databases. The method can take the
601following forms:
602
603 $count = $x->get_dup($key) ;
604 @list = $x->get_dup($key) ;
605 %list = $x->get_dup($key, 1) ;
606
607In a scalar context the method returns the number of values associated
608with the key, C<$key>.
609
610In list context, it returns all the values which match C<$key>. Note
611that the values returned will be in an apparently random order.
612
613If the second parameter is present and evaluates TRUE, the method
614returns an associative array whose keys correspond to the the values
615from the BTREE and whose values are all C<1>.
616
617So assuming the database created above, we can use C<get_dups> like
618this:
619
620 $cnt = $x->get_dups("Wall") ;
621 print "Wall occurred $cnt times\n" ;
622
623 %hash = $x->get_dups("Wall", 1) ;
624 print "Larry is there\n" if $hash{'Larry'} ;
625
626 @list = $x->get_dups("Wall") ;
627 print "Wall => [@list]\n" ;
628
629 @list = $x->get_dups("Smith") ;
630 print "Smith => [@list]\n" ;
631
632 @list = $x->get_dups("Dog") ;
633 print "Dog => [@list]\n" ;
634
635
636and it will print:
637
638 Wall occurred 2 times
639 Larry is there
640 Wall => [Brick Larry]
641 Smith => [John]
642 Dog => []
3b35bae3 643
644=head2 RECNO
645
88108326 646In order to make RECNO more compatible with Perl the array offset for
647all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 648
88108326 649As with normal Perl arrays, a RECNO array can be accessed using
650negative indexes. The index -1 refers to the last element of the array,
651-2 the second last, and so on. Attempting to access an element before
652the start of the array will raise a fatal run-time error.
3b35bae3 653
654=head2 In Memory Databases
655
8e07c86e 656Berkeley DB allows the creation of in-memory databases by using NULL
785da04d 657(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
8e07c86e 658uses C<undef> instead of NULL to provide this functionality.
3b35bae3 659
660
88108326 661=head2 Using the Berkeley DB API Directly
3b35bae3 662
663As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 664possible to make direct use of most of the API functions defined in the
8e07c86e 665Berkeley DB documentation.
3b35bae3 666
88108326 667To do this you need to store a copy of the object returned from the tie.
3b35bae3 668
88108326 669 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 670
8e07c86e 671Once you have done that, you can access the Berkeley DB API functions
88108326 672as B<DB_File> methods directly like this:
3b35bae3 673
674 $db->put($key, $value, R_NOOVERWRITE) ;
675
88108326 676B<Important:> If you have saved a copy of the object returned from
677C<tie>, the underlying database file will I<not> be closed until both
678the tied variable is untied and all copies of the saved object are
679destroyed.
680
681 use DB_File ;
682 $db = tie %hash, "DB_File", "filename"
683 or die "Cannot tie filename: $!" ;
684 ...
685 undef $db ;
686 untie %hash ;
687
688All the functions defined in L<dbopen> are available except for
689close() and dbopen() itself. The B<DB_File> method interface to the
690supported functions have been implemented to mirror the way Berkeley DB
691works whenever possible. In particular note that:
692
693=over 5
694
695=item *
696
697The methods return a status value. All return 0 on success.
698All return -1 to signify an error and set C<$!> to the exact
699error code. The return code 1 generally (but not always) means that the
700key specified did not exist in the database.
701
702Other return codes are defined. See below and in the Berkeley DB
703documentation for details. The Berkeley DB documentation should be used
704as the definitive source.
705
706=item *
3b35bae3 707
88108326 708Whenever a Berkeley DB function returns data via one of its parameters,
709the equivalent B<DB_File> method does exactly the same.
3b35bae3 710
88108326 711=item *
712
713If you are careful, it is possible to mix API calls with the tied
714hash/array interface in the same piece of code. Although only a few of
715the methods used to implement the tied interface currently make use of
716the cursor, you should always assume that the cursor has been changed
717any time the tied hash/array interface is used. As an example, this
718code will probably not do what you expect:
719
720 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
721 or die "Cannot tie $filename: $!" ;
722
723 # Get the first key/value pair and set the cursor
724 $X->seq($key, $value, R_FIRST) ;
725
726 # this line will modify the cursor
727 $count = scalar keys %x ;
728
729 # Get the second key/value pair.
730 # oops, it didn't, it got the last key/value pair!
731 $X->seq($key, $value, R_NEXT) ;
732
733The code above can be rearranged to get around the problem, like this:
734
735 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
736 or die "Cannot tie $filename: $!" ;
737
738 # this line will modify the cursor
739 $count = scalar keys %x ;
740
741 # Get the first key/value pair and set the cursor
742 $X->seq($key, $value, R_FIRST) ;
743
744 # Get the second key/value pair.
745 # worked this time.
746 $X->seq($key, $value, R_NEXT) ;
747
748=back
749
750All the constants defined in L<dbopen> for use in the flags parameters
751in the methods defined below are also available. Refer to the Berkeley
752DB documentation for the precise meaning of the flags values.
753
754Below is a list of the methods available.
3b35bae3 755
756=over 5
757
88108326 758=item C<$status = $X-E<gt>get($key, $value [, $flags]) ;>
759
760Given a key (C<$key>) this method reads the value associated with it
761from the database. The value read from the database is returned in the
762C<$value> parameter.
3b35bae3 763
88108326 764If the key does not exist the method returns 1.
3b35bae3 765
88108326 766No flags are currently defined for this method.
3b35bae3 767
88108326 768=item C<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 769
88108326 770Stores the key/value pair in the database.
771
772If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 773will have the record number of the inserted key/value pair set.
3b35bae3 774
88108326 775Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
776R_SETCURSOR.
777
778=item C<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 779
88108326 780Removes all key/value pairs with key C<$key> from the database.
3b35bae3 781
88108326 782A return code of 1 means that the requested key was not in the
783database.
3b35bae3 784
88108326 785R_CURSOR is the only valid flag at present.
3b35bae3 786
88108326 787=item C<$status = $X-E<gt>fd ;>
3b35bae3 788
88108326 789Returns the file descriptor for the underlying database.
3b35bae3 790
88108326 791See L<"Locking Databases"> for an example of how to make use of the
792C<fd> method to lock your database.
3b35bae3 793
88108326 794=item C<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 795
88108326 796This interface allows sequential retrieval from the database. See
797L<dbopen> for full details.
798
799Both the C<$key> and C<$value> parameters will be set to the key/value
800pair read from the database.
801
802The flags parameter is mandatory. The valid flag values are R_CURSOR,
803R_FIRST, R_LAST, R_NEXT and R_PREV.
804
805=item C<$status = $X-E<gt>sync([$flags]) ;>
806
807Flushes any cached buffers to disk.
808
809R_RECNOSYNC is the only valid flag at present.
3b35bae3 810
811=back
812
813=head1 EXAMPLES
814
8e07c86e 815It is always a lot easier to understand something when you see a real
816example. So here are a few.
3b35bae3 817
818=head2 Using HASH
819
820 use DB_File ;
821 use Fcntl ;
88108326 822
823 tie %h, "DB_File", "hashed", O_RDWR|O_CREAT, 0640, $DB_HASH
824 or die "Cannot open file 'hashed': $!\n";
825
3b35bae3 826 # Add a key/value pair to the file
827 $h{"apple"} = "orange" ;
88108326 828
3b35bae3 829 # Check for existence of a key
830 print "Exists\n" if $h{"banana"} ;
88108326 831
3b35bae3 832 # Delete
833 delete $h{"apple"} ;
88108326 834
3b35bae3 835 untie %h ;
836
837=head2 Using BTREE
838
88108326 839Here is a sample of code which uses BTREE. Just to make life more
840interesting the default comparison function will not be used. Instead
8e07c86e 841a Perl sub, C<Compare()>, will be used to do a case insensitive
842comparison.
3b35bae3 843
844 use DB_File ;
845 use Fcntl ;
88108326 846
3b35bae3 847 sub Compare
848 {
849 my ($key1, $key2) = @_ ;
88108326 850
3b35bae3 851 "\L$key1" cmp "\L$key2" ;
852 }
88108326 853
854 $DB_BTREE->{'compare'} = 'Compare' ;
855
856 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
857 or die "Cannot open file 'tree': $!\n" ;
858
3b35bae3 859 # Add a key/value pair to the file
860 $h{'Wall'} = 'Larry' ;
861 $h{'Smith'} = 'John' ;
862 $h{'mouse'} = 'mickey' ;
863 $h{'duck'} = 'donald' ;
88108326 864
3b35bae3 865 # Delete
866 delete $h{"duck"} ;
88108326 867
3b35bae3 868 # Cycle through the keys printing them in order.
869 # Note it is not necessary to sort the keys as
870 # the btree will have kept them in order automatically.
871 foreach (keys %h)
872 { print "$_\n" }
88108326 873
3b35bae3 874 untie %h ;
875
876Here is the output from the code above.
877
878 mouse
879 Smith
880 Wall
881
882
883=head2 Using RECNO
884
88108326 885Here is a simple example that uses RECNO.
886
3b35bae3 887 use DB_File ;
888 use Fcntl ;
88108326 889
890 $DB_RECNO->{'psize'} = 3000 ;
891
892 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
893 or die "Cannot open file 'text': $!\n" ;
894
3b35bae3 895 # Add a key/value pair to the file
896 $h[0] = "orange" ;
88108326 897
3b35bae3 898 # Check for existence of a key
899 print "Exists\n" if $h[1] ;
3b35bae3 900
88108326 901 untie @h ;
3b35bae3 902
cb1a09d0 903=head2 Locking Databases
3b35bae3 904
cb1a09d0 905Concurrent access of a read-write database by several parties requires
906them all to use some kind of locking. Here's an example of Tom's that
907uses the I<fd> method to get the file descriptor, and then a careful
908open() to give something Perl will flock() for you. Run this repeatedly
909in the background to watch the locks granted in proper order.
3b35bae3 910
cb1a09d0 911 use Fcntl;
912 use DB_File;
913
914 use strict;
915
916 sub LOCK_SH { 1 }
917 sub LOCK_EX { 2 }
918 sub LOCK_NB { 4 }
919 sub LOCK_UN { 8 }
920
921 my($oldval, $fd, $db, %db, $value, $key);
922
923 $key = shift || 'default';
924 $value = shift || 'magic';
925
926 $value .= " $$";
927
928 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
929 || die "dbcreat /tmp/foo.db $!";
930 $fd = $db->fd;
931 print "$$: db fd is $fd\n";
932 open(DB_FH, "+<&=$fd") || die "dup $!";
933
934
935 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
936 print "$$: CONTENTION; can't read during write update!
937 Waiting for read lock ($!) ....";
938 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
939 }
940 print "$$: Read lock granted\n";
941
942 $oldval = $db{$key};
943 print "$$: Old value was $oldval\n";
944 flock(DB_FH, LOCK_UN);
945
946 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
947 print "$$: CONTENTION; must have exclusive lock!
948 Waiting for write lock ($!) ....";
949 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
950 }
951
952 print "$$: Write lock granted\n";
953 $db{$key} = $value;
88108326 954 $db->sync;
cb1a09d0 955 sleep 10;
956
957 flock(DB_FH, LOCK_UN);
88108326 958 undef $db;
cb1a09d0 959 untie %db;
960 close(DB_FH);
961 print "$$: Updated db to $key=$value\n";
962
963=head1 HISTORY
964
965=over
966
967=item 0.1
3b35bae3 968
969First Release.
970
cb1a09d0 971=item 0.2
3b35bae3 972
973When B<DB_File> is opening a database file it no longer terminates the
974process if I<dbopen> returned an error. This allows file protection
975errors to be caught at run time. Thanks to Judith Grass
cb1a09d0 976E<lt>grass@cybercash.comE<gt> for spotting the bug.
3b35bae3 977
cb1a09d0 978=item 0.3
8e07c86e 979
980Added prototype support for multiple btree compare callbacks.
981
cb1a09d0 982=item 1.0
8e07c86e 983
984B<DB_File> has been in use for over a year. To reflect that, the
985version number has been incremented to 1.0.
986
987Added complete support for multiple concurrent callbacks.
988
989Using the I<push> method on an empty list didn't work properly. This
990has been fixed.
991
cb1a09d0 992=item 1.01
4633a7c4 993
994Fixed a core dump problem with SunOS.
995
996The return value from TIEHASH wasn't set to NULL when dbopen returned
997an error.
998
88108326 999=item 1.02
1000
1001Merged OS2 specific code into DB_File.xs
1002
1003Removed some redundant code in DB_File.xs.
1004
1005Documentation update.
1006
1007Allow negative subscripts with RECNO interface.
1008
1009Changed the default flags from O_RDWR to O_CREAT|O_RDWR.
1010
1011The example code which showed how to lock a database needed a call to
1012C<sync> added. Without it the resultant database file was empty.
1013
1014Added get_dups method.
1015
3b35bae3 1016=head1 WARNINGS
1017
88108326 1018If you happen to find any other functions defined in the source for
1019this module that have not been mentioned in this document -- beware. I
1020may drop them at a moments notice.
3b35bae3 1021
8e07c86e 1022If you cannot find any, then either you didn't look very hard or the
1023moment has passed and I have dropped them.
3b35bae3 1024
1025=head1 BUGS
1026
8e07c86e 1027Some older versions of Berkeley DB had problems with fixed length
1028records using the RECNO file format. The newest version at the time of
1029writing was 1.85 - this seems to have fixed the problems with RECNO.
3b35bae3 1030
8e07c86e 1031I am sure there are bugs in the code. If you do find any, or can
1032suggest any enhancements, I would welcome your comments.
3b35bae3 1033
1034=head1 AVAILABILITY
1035
cb1a09d0 1036Berkeley DB is available at your nearest CPAN archive (see
1037L<perlmod/"CPAN"> for a list) in F<src/misc/db.1.85.tar.gz>, or via the
1038host F<ftp.cs.berkeley.edu> in F</ucb/4bsd/db.tar.gz>. It is I<not> under
1039the GPL.
3b35bae3 1040
88108326 1041If you are running IRIX, then get Berkeley DB from
1042F<http://reality.sgi.com/ariel>. It has the patches necessary to
1043compile properly on IRIX 5.3.
1044
3b35bae3 1045=head1 SEE ALSO
1046
1047L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>
1048
8e07c86e 1049Berkeley DB is available from F<ftp.cs.berkeley.edu> in the directory
1050F</ucb/4bsd>.
3b35bae3 1051
1052=head1 AUTHOR
1053
8e07c86e 1054The DB_File interface was written by Paul Marquess
88108326 1055E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
8e07c86e 1056Questions about the DB system itself may be addressed to Keith Bostic
88108326 1057E<lt>bostic@cs.berkeley.eduE<gt>.
3b35bae3 1058
1059=cut