Put back the cygwin32 Configure fix of 3582 undone by 3597.
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6ca2e664 3# written by Paul Marquess (Paul.Marquess@btinternet.com)
ca63f0d2 4# last modified 6th March 1999
9fe6733a 5# version 1.66
36477c24 6#
20896112 7# Copyright (c) 1995-9 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
18d2dc8c 101sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
102sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
103sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 104
105package DB_File::RECNOINFO ;
785da04d 106
88108326 107use strict ;
108
045291aa 109@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 110
111sub TIEHASH
112{
88108326 113 my $pkg = shift ;
114
36477c24 115 bless { VALID => { map {$_, 1}
116 qw( bval cachesize psize flags lorder reclen bfname )
117 },
118 GOT => {},
119 }, $pkg ;
8e07c86e 120}
121
88108326 122package DB_File::BTREEINFO ;
8e07c86e 123
88108326 124use strict ;
8e07c86e 125
88108326 126@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 127
88108326 128sub TIEHASH
8e07c86e 129{
88108326 130 my $pkg = shift ;
131
36477c24 132 bless { VALID => { map {$_, 1}
133 qw( flags cachesize maxkeypage minkeypage psize
134 compare prefix lorder )
135 },
136 GOT => {},
137 }, $pkg ;
8e07c86e 138}
139
140
8e07c86e 141package DB_File ;
785da04d 142
143use strict;
1f70e1ea 144use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version) ;
8e07c86e 145use Carp;
146
785da04d 147
9fe6733a 148$VERSION = "1.66" ;
8e07c86e 149
150#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 151$DB_BTREE = new DB_File::BTREEINFO ;
152$DB_HASH = new DB_File::HASHINFO ;
153$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 154
785da04d 155require Tie::Hash;
8e07c86e 156require Exporter;
157use AutoLoader;
158require DynaLoader;
785da04d 159@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 160@EXPORT = qw(
161 $DB_BTREE $DB_HASH $DB_RECNO
88108326 162
8e07c86e 163 BTREEMAGIC
164 BTREEVERSION
165 DB_LOCK
166 DB_SHMEM
167 DB_TXN
168 HASHMAGIC
169 HASHVERSION
170 MAX_PAGE_NUMBER
171 MAX_PAGE_OFFSET
172 MAX_REC_NUMBER
173 RET_ERROR
174 RET_SPECIAL
175 RET_SUCCESS
176 R_CURSOR
177 R_DUP
178 R_FIRST
179 R_FIXEDLEN
180 R_IAFTER
181 R_IBEFORE
182 R_LAST
183 R_NEXT
184 R_NOKEY
185 R_NOOVERWRITE
186 R_PREV
187 R_RECNOSYNC
188 R_SETCURSOR
189 R_SNAPSHOT
190 __R_UNUSED
88108326 191
045291aa 192);
8e07c86e 193
194sub AUTOLOAD {
785da04d 195 my($constname);
8e07c86e 196 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 197 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 198 if ($! != 0) {
199 if ($! =~ /Invalid/) {
200 $AutoLoader::AUTOLOAD = $AUTOLOAD;
201 goto &AutoLoader::AUTOLOAD;
202 }
203 else {
785da04d 204 my($pack,$file,$line) = caller;
8e07c86e 205 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
206";
207 }
208 }
209 eval "sub $AUTOLOAD { $val }";
210 goto &$AUTOLOAD;
211}
212
f6b705ef 213
a6ed719b 214eval {
1f70e1ea 215 # Make all Fcntl O_XXX constants available for importing
216 require Fcntl;
217 my @O = grep /^O_/, @Fcntl::EXPORT;
218 Fcntl->import(@O); # first we import what we want to export
219 push(@EXPORT, @O);
a6ed719b 220};
f6b705ef 221
1f70e1ea 222## import borrowed from IO::File
223## exports Fcntl constants if available.
224#sub import {
225# my $pkg = shift;
226# my $callpkg = caller;
227# Exporter::export $pkg, $callpkg, @_;
228# eval {
229# require Fcntl;
230# Exporter::export 'Fcntl', $callpkg, '/^O_/';
231# };
232#}
233
785da04d 234bootstrap DB_File $VERSION;
8e07c86e 235
236# Preloaded methods go here. Autoload methods go after __END__, and are
237# processed by the autosplit program.
238
05475680 239sub tie_hash_or_array
610ab055 240{
241 my (@arg) = @_ ;
05475680 242 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 243
244 $arg[4] = tied %{ $arg[4] }
245 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
246
1f70e1ea 247 # make recno in Berkeley DB version 2 work like recno in version 1.
248 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
249 $arg[1] and ! -e $arg[1]) {
250 open(FH, ">$arg[1]") or return undef ;
251 close FH ;
252 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
253 }
254
05475680 255 DoTie_($tieHASH, @arg) ;
610ab055 256}
257
05475680 258sub TIEHASH
259{
260 tie_hash_or_array(@_) ;
261}
262
263sub TIEARRAY
264{
265 tie_hash_or_array(@_) ;
266}
88108326 267
045291aa 268sub CLEAR
269{
1f70e1ea 270 my $self = shift;
271 my $key = "" ;
272 my $value = "" ;
273 my $status = $self->seq($key, $value, R_FIRST());
274 my @keys;
275
276 while ($status == 0) {
277 push @keys, $key;
278 $status = $self->seq($key, $value, R_NEXT());
279 }
280 foreach $key (reverse @keys) {
281 my $s = $self->del($key);
282 }
283}
284
045291aa 285sub EXTEND { }
286
287sub STORESIZE
288{
289 my $self = shift;
290 my $length = shift ;
291 my $current_length = $self->length() ;
292
293 if ($length < $current_length) {
294 my $key ;
295 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
296 { $self->del($key) }
297 }
a9fd575d 298 elsif ($length > $current_length) {
299 $self->put($length-1, "") ;
300 }
045291aa 301}
302
6ca2e664 303sub find_dup
304{
305 croak "Usage: \$db->find_dup(key,value)\n"
306 unless @_ == 3 ;
307
308 my $db = shift ;
309 my ($origkey, $value_wanted) = @_ ;
310 my ($key, $value) = ($origkey, 0);
311 my ($status) = 0 ;
312
313 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
314 $status == 0 ;
315 $status = $db->seq($key, $value, R_NEXT() ) ) {
316
317 return 0 if $key eq $origkey and $value eq $value_wanted ;
318 }
319
320 return $status ;
321}
322
323sub del_dup
324{
325 croak "Usage: \$db->del_dup(key,value)\n"
326 unless @_ == 3 ;
327
328 my $db = shift ;
329 my ($key, $value) = @_ ;
330 my ($status) = $db->find_dup($key, $value) ;
331 return $status if $status != 0 ;
332
333 $status = $db->del($key, R_CURSOR() ) ;
334 return $status ;
335}
336
88108326 337sub get_dup
338{
339 croak "Usage: \$db->get_dup(key [,flag])\n"
340 unless @_ == 2 or @_ == 3 ;
341
342 my $db = shift ;
343 my $key = shift ;
344 my $flag = shift ;
f6b705ef 345 my $value = 0 ;
88108326 346 my $origkey = $key ;
347 my $wantarray = wantarray ;
f6b705ef 348 my %values = () ;
88108326 349 my @values = () ;
350 my $counter = 0 ;
f6b705ef 351 my $status = 0 ;
88108326 352
f6b705ef 353 # iterate through the database until either EOF ($status == 0)
354 # or a different key is encountered ($key ne $origkey).
355 for ($status = $db->seq($key, $value, R_CURSOR()) ;
356 $status == 0 and $key eq $origkey ;
357 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 358
f6b705ef 359 # save the value or count number of matches
360 if ($wantarray) {
361 if ($flag)
362 { ++ $values{$value} }
363 else
364 { push (@values, $value) }
365 }
366 else
367 { ++ $counter }
88108326 368
88108326 369 }
370
f6b705ef 371 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 372}
373
374
8e07c86e 3751;
376__END__
377
3b35bae3 378=head1 NAME
379
1f70e1ea 380DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 381
382=head1 SYNOPSIS
383
384 use DB_File ;
88108326 385
386 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
387 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
388 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 389
3b35bae3 390 $status = $X->del($key [, $flags]) ;
391 $status = $X->put($key, $value [, $flags]) ;
392 $status = $X->get($key, $value [, $flags]) ;
760ac839 393 $status = $X->seq($key, $value, $flags) ;
3b35bae3 394 $status = $X->sync([$flags]) ;
395 $status = $X->fd ;
760ac839 396
f6b705ef 397 # BTREE only
88108326 398 $count = $X->get_dup($key) ;
399 @list = $X->get_dup($key) ;
400 %list = $X->get_dup($key, 1) ;
6ca2e664 401 $status = $X->find_dup($key, $value) ;
402 $status = $X->del_dup($key, $value) ;
88108326 403
f6b705ef 404 # RECNO only
405 $a = $X->length;
406 $a = $X->pop ;
407 $X->push(list);
408 $a = $X->shift;
409 $X->unshift(list);
410
3b35bae3 411 untie %hash ;
412 untie @array ;
413
414=head1 DESCRIPTION
415
8e07c86e 416B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 417facilities provided by Berkeley DB version 1.x (if you have a newer
418version of DB, see L<Using DB_File with Berkeley DB version 2>). It is
419assumed that you have a copy of the Berkeley DB manual pages at hand
420when reading this documentation. The interface defined here mirrors the
421Berkeley DB interface closely.
68dc0745 422
8e07c86e 423Berkeley DB is a C library which provides a consistent interface to a
424number of database formats. B<DB_File> provides an interface to all
425three of the database types currently supported by Berkeley DB.
3b35bae3 426
427The file types are:
428
429=over 5
430
88108326 431=item B<DB_HASH>
3b35bae3 432
88108326 433This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 434files. This is equivalent to the functionality provided by other
435hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
436the files created using DB_HASH are not compatible with any of the
437other packages mentioned.
3b35bae3 438
8e07c86e 439A default hashing algorithm, which will be adequate for most
440applications, is built into Berkeley DB. If you do need to use your own
441hashing algorithm it is possible to write your own in Perl and have
442B<DB_File> use it instead.
3b35bae3 443
88108326 444=item B<DB_BTREE>
445
446The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 447sorted, balanced binary tree.
3b35bae3 448
8e07c86e 449As with the DB_HASH format, it is possible to provide a user defined
450Perl routine to perform the comparison of keys. By default, though, the
451keys are stored in lexical order.
3b35bae3 452
88108326 453=item B<DB_RECNO>
3b35bae3 454
8e07c86e 455DB_RECNO allows both fixed-length and variable-length flat text files
456to be manipulated using the same key/value pair interface as in DB_HASH
457and DB_BTREE. In this case the key will consist of a record (line)
458number.
3b35bae3 459
460=back
461
1f70e1ea 462=head2 Using DB_File with Berkeley DB version 2
463
464Although B<DB_File> is intended to be used with Berkeley DB version 1,
465it can also be used with version 2. In this case the interface is
466limited to the functionality provided by Berkeley DB 1.x. Anywhere the
467version 2 interface differs, B<DB_File> arranges for it to work like
468version 1. This feature allows B<DB_File> scripts that were built with
469version 1 to be migrated to version 2 without any changes.
470
471If you want to make use of the new features available in Berkeley DB
4722.x, use the Perl module B<BerkeleyDB> instead.
473
474At the time of writing this document the B<BerkeleyDB> module is still
475alpha quality (the version number is < 1.0), and so unsuitable for use
476in any serious development work. Once its version number is >= 1.0, it
477is considered stable enough for real work.
478
479B<Note:> The database file format has changed in Berkeley DB version 2.
480If you cannot recreate your databases, you must dump any existing
481databases with the C<db_dump185> utility that comes with Berkeley DB.
6ca2e664 482Once you have rebuilt DB_File to use Berkeley DB version 2, your
1f70e1ea 483databases can be recreated using C<db_load>. Refer to the Berkeley DB
484documentation for further details.
485
6ca2e664 486Please read L<"COPYRIGHT"> before using version 2.x of Berkeley DB with
1f70e1ea 487DB_File.
488
68dc0745 489=head2 Interface to Berkeley DB
3b35bae3 490
491B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 492in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
493allows B<DB_File> to access Berkeley DB files using either an
494associative array (for DB_HASH & DB_BTREE file types) or an ordinary
495array (for the DB_RECNO file type).
3b35bae3 496
88108326 497In addition to the tie() interface, it is also possible to access most
498of the functions provided in the Berkeley DB API directly.
f6b705ef 499See L<THE API INTERFACE>.
3b35bae3 500
88108326 501=head2 Opening a Berkeley DB Database File
3b35bae3 502
8e07c86e 503Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 504Here is the C prototype for dbopen():
3b35bae3 505
506 DB*
507 dbopen (const char * file, int flags, int mode,
508 DBTYPE type, const void * openinfo)
509
510The parameter C<type> is an enumeration which specifies which of the 3
511interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
512Depending on which of these is actually chosen, the final parameter,
513I<openinfo> points to a data structure which allows tailoring of the
514specific interface method.
515
8e07c86e 516This interface is handled slightly differently in B<DB_File>. Here is
88108326 517an equivalent call using B<DB_File>:
3b35bae3 518
88108326 519 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 520
8e07c86e 521The C<filename>, C<flags> and C<mode> parameters are the direct
522equivalent of their dbopen() counterparts. The final parameter $DB_HASH
523performs the function of both the C<type> and C<openinfo> parameters in
524dbopen().
3b35bae3 525
88108326 526In the example above $DB_HASH is actually a pre-defined reference to a
527hash object. B<DB_File> has three of these pre-defined references.
528Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 529
8e07c86e 530The keys allowed in each of these pre-defined references is limited to
531the names used in the equivalent C structure. So, for example, the
532$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 533C<ffactor>, C<hash>, C<lorder> and C<nelem>.
534
535To change one of these elements, just assign to it like this:
536
537 $DB_HASH->{'cachesize'} = 10000 ;
538
539The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
540usually adequate for most applications. If you do need to create extra
541instances of these objects, constructors are available for each file
542type.
543
544Here are examples of the constructors and the valid options available
545for DB_HASH, DB_BTREE and DB_RECNO respectively.
546
547 $a = new DB_File::HASHINFO ;
548 $a->{'bsize'} ;
549 $a->{'cachesize'} ;
550 $a->{'ffactor'};
551 $a->{'hash'} ;
552 $a->{'lorder'} ;
553 $a->{'nelem'} ;
554
555 $b = new DB_File::BTREEINFO ;
556 $b->{'flags'} ;
557 $b->{'cachesize'} ;
558 $b->{'maxkeypage'} ;
559 $b->{'minkeypage'} ;
560 $b->{'psize'} ;
561 $b->{'compare'} ;
562 $b->{'prefix'} ;
563 $b->{'lorder'} ;
564
565 $c = new DB_File::RECNOINFO ;
566 $c->{'bval'} ;
567 $c->{'cachesize'} ;
568 $c->{'psize'} ;
569 $c->{'flags'} ;
570 $c->{'lorder'} ;
571 $c->{'reclen'} ;
572 $c->{'bfname'} ;
573
574The values stored in the hashes above are mostly the direct equivalent
575of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 576default values - that means you don't have to set I<all> of the
88108326 577values when you only want to change one. Here is an example:
578
579 $a = new DB_File::HASHINFO ;
580 $a->{'cachesize'} = 12345 ;
581 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
582
36477c24 583A few of the options need extra discussion here. When used, the C
88108326 584equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
585to C functions. In B<DB_File> these keys are used to store references
586to Perl subs. Below are templates for each of the subs:
587
588 sub hash
589 {
590 my ($data) = @_ ;
591 ...
592 # return the hash value for $data
593 return $hash ;
594 }
3b35bae3 595
88108326 596 sub compare
597 {
598 my ($key, $key2) = @_ ;
599 ...
600 # return 0 if $key1 eq $key2
601 # -1 if $key1 lt $key2
602 # 1 if $key1 gt $key2
603 return (-1 , 0 or 1) ;
604 }
3b35bae3 605
88108326 606 sub prefix
607 {
608 my ($key, $key2) = @_ ;
609 ...
610 # return number of bytes of $key2 which are
611 # necessary to determine that it is greater than $key1
612 return $bytes ;
613 }
3b35bae3 614
f6b705ef 615See L<Changing the BTREE sort order> for an example of using the
616C<compare> template.
88108326 617
36477c24 618If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 619C<bval>, you should check out L<The 'bval' Option>.
36477c24 620
88108326 621=head2 Default Parameters
622
623It is possible to omit some or all of the final 4 parameters in the
624call to C<tie> and let them take default values. As DB_HASH is the most
625common file format used, the call:
626
627 tie %A, "DB_File", "filename" ;
628
629is equivalent to:
630
18d2dc8c 631 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 632
633It is also possible to omit the filename parameter as well, so the
634call:
635
636 tie %A, "DB_File" ;
637
638is equivalent to:
639
18d2dc8c 640 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 641
f6b705ef 642See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 643in place of a filename.
644
f6b705ef 645=head2 In Memory Databases
646
647Berkeley DB allows the creation of in-memory databases by using NULL
648(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
649uses C<undef> instead of NULL to provide this functionality.
650
651=head1 DB_HASH
652
653The DB_HASH file format is probably the most commonly used of the three
654file formats that B<DB_File> supports. It is also very straightforward
655to use.
656
68dc0745 657=head2 A Simple Example
f6b705ef 658
659This example shows how to create a database, add key/value pairs to the
660database, delete keys/value pairs and finally how to enumerate the
661contents of the database.
662
610ab055 663 use strict ;
f6b705ef 664 use DB_File ;
610ab055 665 use vars qw( %h $k $v ) ;
f6b705ef 666
667 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
668 or die "Cannot open file 'fruit': $!\n";
669
670 # Add a few key/value pairs to the file
671 $h{"apple"} = "red" ;
672 $h{"orange"} = "orange" ;
673 $h{"banana"} = "yellow" ;
674 $h{"tomato"} = "red" ;
675
676 # Check for existence of a key
677 print "Banana Exists\n\n" if $h{"banana"} ;
678
679 # Delete a key/value pair.
680 delete $h{"apple"} ;
681
682 # print the contents of the file
683 while (($k, $v) = each %h)
684 { print "$k -> $v\n" }
685
686 untie %h ;
687
688here is the output:
689
690 Banana Exists
691
692 orange -> orange
693 tomato -> red
694 banana -> yellow
695
696Note that the like ordinary associative arrays, the order of the keys
697retrieved is in an apparently random order.
698
699=head1 DB_BTREE
700
701The DB_BTREE format is useful when you want to store data in a given
702order. By default the keys will be stored in lexical order, but as you
703will see from the example shown in the next section, it is very easy to
704define your own sorting function.
705
706=head2 Changing the BTREE sort order
707
708This script shows how to override the default sorting algorithm that
709BTREE uses. Instead of using the normal lexical ordering, a case
710insensitive compare function will be used.
88108326 711
610ab055 712 use strict ;
f6b705ef 713 use DB_File ;
610ab055 714
715 my %h ;
f6b705ef 716
717 sub Compare
718 {
719 my ($key1, $key2) = @_ ;
720 "\L$key1" cmp "\L$key2" ;
721 }
722
723 # specify the Perl sub that will do the comparison
724 $DB_BTREE->{'compare'} = \&Compare ;
725
726 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
727 or die "Cannot open file 'tree': $!\n" ;
728
729 # Add a key/value pair to the file
730 $h{'Wall'} = 'Larry' ;
731 $h{'Smith'} = 'John' ;
732 $h{'mouse'} = 'mickey' ;
733 $h{'duck'} = 'donald' ;
734
735 # Delete
736 delete $h{"duck"} ;
737
738 # Cycle through the keys printing them in order.
739 # Note it is not necessary to sort the keys as
740 # the btree will have kept them in order automatically.
741 foreach (keys %h)
742 { print "$_\n" }
743
744 untie %h ;
745
746Here is the output from the code above.
747
748 mouse
749 Smith
750 Wall
751
752There are a few point to bear in mind if you want to change the
753ordering in a BTREE database:
754
755=over 5
756
757=item 1.
758
759The new compare function must be specified when you create the database.
760
761=item 2.
762
763You cannot change the ordering once the database has been created. Thus
764you must use the same compare function every time you access the
88108326 765database.
766
f6b705ef 767=back
768
68dc0745 769=head2 Handling Duplicate Keys
f6b705ef 770
771The BTREE file type optionally allows a single key to be associated
772with an arbitrary number of values. This option is enabled by setting
773the flags element of C<$DB_BTREE> to R_DUP when creating the database.
774
88108326 775There are some difficulties in using the tied hash interface if you
776want to manipulate a BTREE database with duplicate keys. Consider this
777code:
778
610ab055 779 use strict ;
88108326 780 use DB_File ;
610ab055 781
782 use vars qw($filename %h ) ;
783
88108326 784 $filename = "tree" ;
785 unlink $filename ;
786
787 # Enable duplicate records
788 $DB_BTREE->{'flags'} = R_DUP ;
789
790 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
791 or die "Cannot open $filename: $!\n";
792
793 # Add some key/value pairs to the file
794 $h{'Wall'} = 'Larry' ;
795 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 796 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 797 $h{'Smith'} = 'John' ;
798 $h{'mouse'} = 'mickey' ;
799
800 # iterate through the associative array
801 # and print each key/value pair.
802 foreach (keys %h)
803 { print "$_ -> $h{$_}\n" }
804
f6b705ef 805 untie %h ;
806
88108326 807Here is the output:
808
809 Smith -> John
810 Wall -> Larry
811 Wall -> Larry
f6b705ef 812 Wall -> Larry
88108326 813 mouse -> mickey
814
f6b705ef 815As you can see 3 records have been successfully created with key C<Wall>
88108326 816- the only thing is, when they are retrieved from the database they
f6b705ef 817I<seem> to have the same value, namely C<Larry>. The problem is caused
818by the way that the associative array interface works. Basically, when
819the associative array interface is used to fetch the value associated
820with a given key, it will only ever retrieve the first value.
88108326 821
822Although it may not be immediately obvious from the code above, the
823associative array interface can be used to write values with duplicate
824keys, but it cannot be used to read them back from the database.
825
826The way to get around this problem is to use the Berkeley DB API method
827called C<seq>. This method allows sequential access to key/value
f6b705ef 828pairs. See L<THE API INTERFACE> for details of both the C<seq> method
829and the API in general.
88108326 830
831Here is the script above rewritten using the C<seq> API method.
832
610ab055 833 use strict ;
88108326 834 use DB_File ;
88108326 835
610ab055 836 use vars qw($filename $x %h $status $key $value) ;
837
88108326 838 $filename = "tree" ;
839 unlink $filename ;
840
841 # Enable duplicate records
842 $DB_BTREE->{'flags'} = R_DUP ;
843
844 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
845 or die "Cannot open $filename: $!\n";
846
847 # Add some key/value pairs to the file
848 $h{'Wall'} = 'Larry' ;
849 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 850 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 851 $h{'Smith'} = 'John' ;
852 $h{'mouse'} = 'mickey' ;
853
f6b705ef 854 # iterate through the btree using seq
88108326 855 # and print each key/value pair.
610ab055 856 $key = $value = 0 ;
f6b705ef 857 for ($status = $x->seq($key, $value, R_FIRST) ;
858 $status == 0 ;
859 $status = $x->seq($key, $value, R_NEXT) )
88108326 860 { print "$key -> $value\n" }
861
862 undef $x ;
863 untie %h ;
864
865that prints:
866
867 Smith -> John
868 Wall -> Brick
f6b705ef 869 Wall -> Brick
88108326 870 Wall -> Larry
871 mouse -> mickey
872
f6b705ef 873This time we have got all the key/value pairs, including the multiple
88108326 874values associated with the key C<Wall>.
875
6ca2e664 876To make life easier when dealing with duplicate keys, B<DB_File> comes with
877a few utility methods.
878
68dc0745 879=head2 The get_dup() Method
f6b705ef 880
6ca2e664 881The C<get_dup> method assists in
88108326 882reading duplicate values from BTREE databases. The method can take the
883following forms:
884
885 $count = $x->get_dup($key) ;
886 @list = $x->get_dup($key) ;
887 %list = $x->get_dup($key, 1) ;
888
889In a scalar context the method returns the number of values associated
890with the key, C<$key>.
891
892In list context, it returns all the values which match C<$key>. Note
f6b705ef 893that the values will be returned in an apparently random order.
88108326 894
7a2e2cd6 895In list context, if the second parameter is present and evaluates
896TRUE, the method returns an associative array. The keys of the
897associative array correspond to the values that matched in the BTREE
898and the values of the array are a count of the number of times that
899particular value occurred in the BTREE.
88108326 900
f6b705ef 901So assuming the database created above, we can use C<get_dup> like
88108326 902this:
903
610ab055 904 my $cnt = $x->get_dup("Wall") ;
88108326 905 print "Wall occurred $cnt times\n" ;
906
610ab055 907 my %hash = $x->get_dup("Wall", 1) ;
88108326 908 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 909 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 910
610ab055 911 my @list = $x->get_dup("Wall") ;
88108326 912 print "Wall => [@list]\n" ;
913
f6b705ef 914 @list = $x->get_dup("Smith") ;
88108326 915 print "Smith => [@list]\n" ;
916
f6b705ef 917 @list = $x->get_dup("Dog") ;
88108326 918 print "Dog => [@list]\n" ;
919
920
921and it will print:
922
f6b705ef 923 Wall occurred 3 times
88108326 924 Larry is there
f6b705ef 925 There are 2 Brick Walls
926 Wall => [Brick Brick Larry]
88108326 927 Smith => [John]
928 Dog => []
3b35bae3 929
6ca2e664 930=head2 The find_dup() Method
931
932 $status = $X->find_dup($key, $value) ;
933
934This method checks for the existance of a specific key/value pair. If the
935pair exists, the cursor is left pointing to the pair and the method
936returns 0. Otherwise the method returns a non-zero value.
937
938Assuming the database from the previous example:
939
940 use strict ;
941 use DB_File ;
942
943 use vars qw($filename $x %h $found) ;
944
945 my $filename = "tree" ;
946
947 # Enable duplicate records
948 $DB_BTREE->{'flags'} = R_DUP ;
949
950 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
951 or die "Cannot open $filename: $!\n";
952
953 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
954 print "Larry Wall is $found there\n" ;
955
956 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
957 print "Harry Wall is $found there\n" ;
958
959 undef $x ;
960 untie %h ;
961
962prints this
963
964 Larry Wall is there
965 Harry Wall is not there
966
967
968=head2 The del_dup() Method
969
970 $status = $X->del_dup($key, $value) ;
971
972This method deletes a specific key/value pair. It returns
9730 if they exist and have been deleted successfully.
974Otherwise the method returns a non-zero value.
975
976Again assuming the existance of the C<tree> database
977
978 use strict ;
979 use DB_File ;
980
981 use vars qw($filename $x %h $found) ;
982
983 my $filename = "tree" ;
984
985 # Enable duplicate records
986 $DB_BTREE->{'flags'} = R_DUP ;
987
988 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
989 or die "Cannot open $filename: $!\n";
990
991 $x->del_dup("Wall", "Larry") ;
992
993 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
994 print "Larry Wall is $found there\n" ;
995
996 undef $x ;
997 untie %h ;
998
999prints this
1000
1001 Larry Wall is not there
1002
f6b705ef 1003=head2 Matching Partial Keys
1004
1005The BTREE interface has a feature which allows partial keys to be
1006matched. This functionality is I<only> available when the C<seq> method
1007is used along with the R_CURSOR flag.
1008
1009 $x->seq($key, $value, R_CURSOR) ;
1010
1011Here is the relevant quote from the dbopen man page where it defines
1012the use of the R_CURSOR flag with seq:
1013
f6b705ef 1014 Note, for the DB_BTREE access method, the returned key is not
1015 necessarily an exact match for the specified key. The returned key
1016 is the smallest key greater than or equal to the specified key,
1017 permitting partial key matches and range searches.
1018
f6b705ef 1019In the example script below, the C<match> sub uses this feature to find
1020and print the first matching key/value pair given a partial key.
1021
610ab055 1022 use strict ;
f6b705ef 1023 use DB_File ;
1024 use Fcntl ;
610ab055 1025
1026 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 1027
1028 sub match
1029 {
1030 my $key = shift ;
610ab055 1031 my $value = 0;
f6b705ef 1032 my $orig_key = $key ;
1033 $x->seq($key, $value, R_CURSOR) ;
1034 print "$orig_key\t-> $key\t-> $value\n" ;
1035 }
1036
1037 $filename = "tree" ;
1038 unlink $filename ;
1039
1040 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1041 or die "Cannot open $filename: $!\n";
1042
1043 # Add some key/value pairs to the file
1044 $h{'mouse'} = 'mickey' ;
1045 $h{'Wall'} = 'Larry' ;
1046 $h{'Walls'} = 'Brick' ;
1047 $h{'Smith'} = 'John' ;
1048
1049
610ab055 1050 $key = $value = 0 ;
f6b705ef 1051 print "IN ORDER\n" ;
1052 for ($st = $x->seq($key, $value, R_FIRST) ;
1053 $st == 0 ;
1054 $st = $x->seq($key, $value, R_NEXT) )
1055
1056 { print "$key -> $value\n" }
1057
1058 print "\nPARTIAL MATCH\n" ;
1059
1060 match "Wa" ;
1061 match "A" ;
1062 match "a" ;
1063
1064 undef $x ;
1065 untie %h ;
1066
1067Here is the output:
1068
1069 IN ORDER
1070 Smith -> John
1071 Wall -> Larry
1072 Walls -> Brick
1073 mouse -> mickey
1074
1075 PARTIAL MATCH
1076 Wa -> Wall -> Larry
1077 A -> Smith -> John
1078 a -> mouse -> mickey
1079
1080=head1 DB_RECNO
1081
1082DB_RECNO provides an interface to flat text files. Both variable and
1083fixed length records are supported.
3b35bae3 1084
6ca2e664 1085In order to make RECNO more compatible with Perl, the array offset for
88108326 1086all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1087
88108326 1088As with normal Perl arrays, a RECNO array can be accessed using
1089negative indexes. The index -1 refers to the last element of the array,
1090-2 the second last, and so on. Attempting to access an element before
1091the start of the array will raise a fatal run-time error.
3b35bae3 1092
68dc0745 1093=head2 The 'bval' Option
36477c24 1094
1095The operation of the bval option warrants some discussion. Here is the
1096definition of bval from the Berkeley DB 1.85 recno manual page:
1097
1098 The delimiting byte to be used to mark the end of a
1099 record for variable-length records, and the pad charac-
1100 ter for fixed-length records. If no value is speci-
1101 fied, newlines (``\n'') are used to mark the end of
1102 variable-length records and fixed-length records are
1103 padded with spaces.
1104
1105The second sentence is wrong. In actual fact bval will only default to
1106C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1107openinfo parameter is used at all, the value that happens to be in bval
1108will be used. That means you always have to specify bval when making
1109use of any of the options in the openinfo parameter. This documentation
1110error will be fixed in the next release of Berkeley DB.
1111
1112That clarifies the situation with regards Berkeley DB itself. What
1113about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1114quite useful, so B<DB_File> conforms to it.
36477c24 1115
1116That means that you can specify other options (e.g. cachesize) and
1117still have bval default to C<"\n"> for variable length records, and
1118space for fixed length records.
1119
f6b705ef 1120=head2 A Simple Example
3b35bae3 1121
6ca2e664 1122Here is a simple example that uses RECNO (if you are using a version
1123of Perl earlier than 5.004_57 this example won't work -- see
1124L<Extra RECNO Methods> for a workaround).
f6b705ef 1125
610ab055 1126 use strict ;
f6b705ef 1127 use DB_File ;
f6b705ef 1128
610ab055 1129 my @h ;
f6b705ef 1130 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
1131 or die "Cannot open file 'text': $!\n" ;
1132
1133 # Add a few key/value pairs to the file
1134 $h[0] = "orange" ;
1135 $h[1] = "blue" ;
1136 $h[2] = "yellow" ;
1137
6ca2e664 1138 push @h, "green", "black" ;
1139
1140 my $elements = scalar @h ;
1141 print "The array contains $elements entries\n" ;
1142
1143 my $last = pop @h ;
1144 print "popped $last\n" ;
1145
1146 unshift @h, "white" ;
1147 my $first = shift @h ;
1148 print "shifted $first\n" ;
1149
f6b705ef 1150 # Check for existence of a key
1151 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1152
1153 # use a negative index
1154 print "The last element is $h[-1]\n" ;
1155 print "The 2nd last element is $h[-2]\n" ;
1156
1157 untie @h ;
3b35bae3 1158
f6b705ef 1159Here is the output from the script:
1160
6ca2e664 1161 The array contains 5 entries
1162 popped black
1163 unshifted white
f6b705ef 1164 Element 1 Exists with value blue
6ca2e664 1165 The last element is green
1166 The 2nd last element is yellow
f6b705ef 1167
6ca2e664 1168=head2 Extra RECNO Methods
f6b705ef 1169
045291aa 1170If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664 1171array interface is quite limited. In the example script above
1172C<push>, C<pop>, C<shift>, C<unshift>
1173or determining the array length will not work with a tied array.
045291aa 1174
1175To make the interface more useful for older versions of Perl, a number
1176of methods are supplied with B<DB_File> to simulate the missing array
1177operations. All these methods are accessed via the object returned from
1178the tie call.
f6b705ef 1179
1180Here are the methods:
1181
1182=over 5
3b35bae3 1183
f6b705ef 1184=item B<$X-E<gt>push(list) ;>
1185
1186Pushes the elements of C<list> to the end of the array.
1187
1188=item B<$value = $X-E<gt>pop ;>
1189
1190Removes and returns the last element of the array.
1191
1192=item B<$X-E<gt>shift>
1193
1194Removes and returns the first element of the array.
1195
1196=item B<$X-E<gt>unshift(list) ;>
1197
1198Pushes the elements of C<list> to the start of the array.
1199
1200=item B<$X-E<gt>length>
1201
1202Returns the number of elements in the array.
1203
1204=back
1205
1206=head2 Another Example
1207
1208Here is a more complete example that makes use of some of the methods
1209described above. It also makes use of the API interface directly (see
1210L<THE API INTERFACE>).
1211
1212 use strict ;
1213 use vars qw(@h $H $file $i) ;
1214 use DB_File ;
1215 use Fcntl ;
1216
1217 $file = "text" ;
1218
1219 unlink $file ;
1220
1221 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1222 or die "Cannot open file $file: $!\n" ;
1223
1224 # first create a text file to play with
1225 $h[0] = "zero" ;
1226 $h[1] = "one" ;
1227 $h[2] = "two" ;
1228 $h[3] = "three" ;
1229 $h[4] = "four" ;
1230
1231
1232 # Print the records in order.
1233 #
1234 # The length method is needed here because evaluating a tied
1235 # array in a scalar context does not return the number of
1236 # elements in the array.
1237
1238 print "\nORIGINAL\n" ;
1239 foreach $i (0 .. $H->length - 1) {
1240 print "$i: $h[$i]\n" ;
1241 }
1242
1243 # use the push & pop methods
1244 $a = $H->pop ;
1245 $H->push("last") ;
1246 print "\nThe last record was [$a]\n" ;
1247
1248 # and the shift & unshift methods
1249 $a = $H->shift ;
1250 $H->unshift("first") ;
1251 print "The first record was [$a]\n" ;
1252
1253 # Use the API to add a new record after record 2.
1254 $i = 2 ;
1255 $H->put($i, "Newbie", R_IAFTER) ;
1256
1257 # and a new record before record 1.
1258 $i = 1 ;
1259 $H->put($i, "New One", R_IBEFORE) ;
1260
1261 # delete record 3
1262 $H->del(3) ;
1263
1264 # now print the records in reverse order
1265 print "\nREVERSE\n" ;
1266 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1267 { print "$i: $h[$i]\n" }
1268
1269 # same again, but use the API functions instead
1270 print "\nREVERSE again\n" ;
610ab055 1271 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1272 for ($s = $H->seq($k, $v, R_LAST) ;
1273 $s == 0 ;
1274 $s = $H->seq($k, $v, R_PREV))
1275 { print "$k: $v\n" }
1276
1277 undef $H ;
1278 untie @h ;
1279
1280and this is what it outputs:
1281
1282 ORIGINAL
1283 0: zero
1284 1: one
1285 2: two
1286 3: three
1287 4: four
1288
1289 The last record was [four]
1290 The first record was [zero]
1291
1292 REVERSE
1293 5: last
1294 4: three
1295 3: Newbie
1296 2: one
1297 1: New One
1298 0: first
1299
1300 REVERSE again
1301 5: last
1302 4: three
1303 3: Newbie
1304 2: one
1305 1: New One
1306 0: first
1307
1308Notes:
1309
1310=over 5
1311
1312=item 1.
1313
1314Rather than iterating through the array, C<@h> like this:
1315
1316 foreach $i (@h)
1317
1318it is necessary to use either this:
1319
1320 foreach $i (0 .. $H->length - 1)
1321
1322or this:
1323
1324 for ($a = $H->get($k, $v, R_FIRST) ;
1325 $a == 0 ;
1326 $a = $H->get($k, $v, R_NEXT) )
1327
1328=item 2.
1329
1330Notice that both times the C<put> method was used the record index was
1331specified using a variable, C<$i>, rather than the literal value
1332itself. This is because C<put> will return the record number of the
1333inserted line via that parameter.
1334
1335=back
1336
1337=head1 THE API INTERFACE
3b35bae3 1338
1339As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1340possible to make direct use of most of the API functions defined in the
8e07c86e 1341Berkeley DB documentation.
3b35bae3 1342
88108326 1343To do this you need to store a copy of the object returned from the tie.
3b35bae3 1344
88108326 1345 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1346
8e07c86e 1347Once you have done that, you can access the Berkeley DB API functions
88108326 1348as B<DB_File> methods directly like this:
3b35bae3 1349
1350 $db->put($key, $value, R_NOOVERWRITE) ;
1351
88108326 1352B<Important:> If you have saved a copy of the object returned from
1353C<tie>, the underlying database file will I<not> be closed until both
1354the tied variable is untied and all copies of the saved object are
610ab055 1355destroyed.
88108326 1356
1357 use DB_File ;
1358 $db = tie %hash, "DB_File", "filename"
1359 or die "Cannot tie filename: $!" ;
1360 ...
1361 undef $db ;
1362 untie %hash ;
1363
9a2c4ce3 1364See L<The untie() Gotcha> for more details.
778183f3 1365
88108326 1366All the functions defined in L<dbopen> are available except for
1367close() and dbopen() itself. The B<DB_File> method interface to the
1368supported functions have been implemented to mirror the way Berkeley DB
1369works whenever possible. In particular note that:
1370
1371=over 5
1372
1373=item *
1374
1375The methods return a status value. All return 0 on success.
1376All return -1 to signify an error and set C<$!> to the exact
1377error code. The return code 1 generally (but not always) means that the
1378key specified did not exist in the database.
1379
1380Other return codes are defined. See below and in the Berkeley DB
1381documentation for details. The Berkeley DB documentation should be used
1382as the definitive source.
1383
1384=item *
3b35bae3 1385
88108326 1386Whenever a Berkeley DB function returns data via one of its parameters,
1387the equivalent B<DB_File> method does exactly the same.
3b35bae3 1388
88108326 1389=item *
1390
1391If you are careful, it is possible to mix API calls with the tied
1392hash/array interface in the same piece of code. Although only a few of
1393the methods used to implement the tied interface currently make use of
1394the cursor, you should always assume that the cursor has been changed
1395any time the tied hash/array interface is used. As an example, this
1396code will probably not do what you expect:
1397
1398 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1399 or die "Cannot tie $filename: $!" ;
1400
1401 # Get the first key/value pair and set the cursor
1402 $X->seq($key, $value, R_FIRST) ;
1403
1404 # this line will modify the cursor
1405 $count = scalar keys %x ;
1406
1407 # Get the second key/value pair.
1408 # oops, it didn't, it got the last key/value pair!
1409 $X->seq($key, $value, R_NEXT) ;
1410
1411The code above can be rearranged to get around the problem, like this:
1412
1413 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1414 or die "Cannot tie $filename: $!" ;
1415
1416 # this line will modify the cursor
1417 $count = scalar keys %x ;
1418
1419 # Get the first key/value pair and set the cursor
1420 $X->seq($key, $value, R_FIRST) ;
1421
1422 # Get the second key/value pair.
1423 # worked this time.
1424 $X->seq($key, $value, R_NEXT) ;
1425
1426=back
1427
1428All the constants defined in L<dbopen> for use in the flags parameters
1429in the methods defined below are also available. Refer to the Berkeley
1430DB documentation for the precise meaning of the flags values.
1431
1432Below is a list of the methods available.
3b35bae3 1433
1434=over 5
1435
f6b705ef 1436=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1437
1438Given a key (C<$key>) this method reads the value associated with it
1439from the database. The value read from the database is returned in the
1440C<$value> parameter.
3b35bae3 1441
88108326 1442If the key does not exist the method returns 1.
3b35bae3 1443
88108326 1444No flags are currently defined for this method.
3b35bae3 1445
f6b705ef 1446=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1447
88108326 1448Stores the key/value pair in the database.
1449
1450If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1451will have the record number of the inserted key/value pair set.
3b35bae3 1452
88108326 1453Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1454R_SETCURSOR.
1455
f6b705ef 1456=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1457
88108326 1458Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1459
88108326 1460A return code of 1 means that the requested key was not in the
1461database.
3b35bae3 1462
88108326 1463R_CURSOR is the only valid flag at present.
3b35bae3 1464
f6b705ef 1465=item B<$status = $X-E<gt>fd ;>
3b35bae3 1466
88108326 1467Returns the file descriptor for the underlying database.
3b35bae3 1468
f6b705ef 1469See L<Locking Databases> for an example of how to make use of the
88108326 1470C<fd> method to lock your database.
3b35bae3 1471
f6b705ef 1472=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1473
88108326 1474This interface allows sequential retrieval from the database. See
1475L<dbopen> for full details.
1476
1477Both the C<$key> and C<$value> parameters will be set to the key/value
1478pair read from the database.
1479
1480The flags parameter is mandatory. The valid flag values are R_CURSOR,
1481R_FIRST, R_LAST, R_NEXT and R_PREV.
1482
f6b705ef 1483=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1484
1485Flushes any cached buffers to disk.
1486
1487R_RECNOSYNC is the only valid flag at present.
3b35bae3 1488
1489=back
1490
f6b705ef 1491=head1 HINTS AND TIPS
3b35bae3 1492
3b35bae3 1493
cb1a09d0 1494=head2 Locking Databases
3b35bae3 1495
cb1a09d0 1496Concurrent access of a read-write database by several parties requires
1497them all to use some kind of locking. Here's an example of Tom's that
1498uses the I<fd> method to get the file descriptor, and then a careful
1499open() to give something Perl will flock() for you. Run this repeatedly
1500in the background to watch the locks granted in proper order.
3b35bae3 1501
cb1a09d0 1502 use DB_File;
1503
1504 use strict;
1505
1506 sub LOCK_SH { 1 }
1507 sub LOCK_EX { 2 }
1508 sub LOCK_NB { 4 }
1509 sub LOCK_UN { 8 }
1510
1511 my($oldval, $fd, $db, %db, $value, $key);
1512
1513 $key = shift || 'default';
1514 $value = shift || 'magic';
1515
1516 $value .= " $$";
1517
1518 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1519 || die "dbcreat /tmp/foo.db $!";
1520 $fd = $db->fd;
1521 print "$$: db fd is $fd\n";
1522 open(DB_FH, "+<&=$fd") || die "dup $!";
1523
1524
1525 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1526 print "$$: CONTENTION; can't read during write update!
1527 Waiting for read lock ($!) ....";
1528 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1529 }
1530 print "$$: Read lock granted\n";
1531
1532 $oldval = $db{$key};
1533 print "$$: Old value was $oldval\n";
1534 flock(DB_FH, LOCK_UN);
1535
1536 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1537 print "$$: CONTENTION; must have exclusive lock!
1538 Waiting for write lock ($!) ....";
1539 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1540 }
1541
1542 print "$$: Write lock granted\n";
1543 $db{$key} = $value;
610ab055 1544 $db->sync; # to flush
cb1a09d0 1545 sleep 10;
1546
1547 flock(DB_FH, LOCK_UN);
88108326 1548 undef $db;
cb1a09d0 1549 untie %db;
1550 close(DB_FH);
1551 print "$$: Updated db to $key=$value\n";
1552
68dc0745 1553=head2 Sharing Databases With C Applications
f6b705ef 1554
1555There is no technical reason why a Berkeley DB database cannot be
1556shared by both a Perl and a C application.
1557
1558The vast majority of problems that are reported in this area boil down
1559to the fact that C strings are NULL terminated, whilst Perl strings are
1560not.
1561
1562Here is a real example. Netscape 2.0 keeps a record of the locations you
1563visit along with the time you last visited them in a DB_HASH database.
1564This is usually stored in the file F<~/.netscape/history.db>. The key
1565field in the database is the location string and the value field is the
1566time the location was last visited stored as a 4 byte binary value.
1567
1568If you haven't already guessed, the location string is stored with a
1569terminating NULL. This means you need to be careful when accessing the
1570database.
1571
1572Here is a snippet of code that is loosely based on Tom Christiansen's
1573I<ggh> script (available from your nearest CPAN archive in
1574F<authors/id/TOMC/scripts/nshist.gz>).
1575
610ab055 1576 use strict ;
f6b705ef 1577 use DB_File ;
1578 use Fcntl ;
f6b705ef 1579
610ab055 1580 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1581 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1582
1583 $HISTORY = "$dotdir/.netscape/history.db";
1584
1585 tie %hist_db, 'DB_File', $HISTORY
1586 or die "Cannot open $HISTORY: $!\n" ;;
1587
1588 # Dump the complete database
1589 while ( ($href, $binary_time) = each %hist_db ) {
1590
1591 # remove the terminating NULL
1592 $href =~ s/\x00$// ;
1593
1594 # convert the binary time into a user friendly string
1595 $date = localtime unpack("V", $binary_time);
1596 print "$date $href\n" ;
1597 }
1598
1599 # check for the existence of a specific key
1600 # remember to add the NULL
1601 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1602 $date = localtime unpack("V", $binary_time) ;
1603 print "Last visited mox.perl.com on $date\n" ;
1604 }
1605 else {
1606 print "Never visited mox.perl.com\n"
1607 }
1608
1609 untie %hist_db ;
1610
68dc0745 1611=head2 The untie() Gotcha
778183f3 1612
7a2e2cd6 1613If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1614recommended that you read L<perltie/The untie Gotcha>.
778183f3 1615
1616Even if you don't currently make use of the API interface, it is still
1617worth reading it.
1618
1619Here is an example which illustrates the problem from a B<DB_File>
1620perspective:
1621
1622 use DB_File ;
1623 use Fcntl ;
1624
1625 my %x ;
1626 my $X ;
1627
1628 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1629 or die "Cannot tie first time: $!" ;
1630
1631 $x{123} = 456 ;
1632
1633 untie %x ;
1634
1635 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1636 or die "Cannot tie second time: $!" ;
1637
1638 untie %x ;
1639
1640When run, the script will produce this error message:
1641
1642 Cannot tie second time: Invalid argument at bad.file line 14.
1643
1644Although the error message above refers to the second tie() statement
1645in the script, the source of the problem is really with the untie()
1646statement that precedes it.
1647
1648Having read L<perltie> you will probably have already guessed that the
1649error is caused by the extra copy of the tied object stored in C<$X>.
1650If you haven't, then the problem boils down to the fact that the
1651B<DB_File> destructor, DESTROY, will not be called until I<all>
1652references to the tied object are destroyed. Both the tied variable,
1653C<%x>, and C<$X> above hold a reference to the object. The call to
1654untie() will destroy the first, but C<$X> still holds a valid
1655reference, so the destructor will not get called and the database file
1656F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1657attempt to open a database that is alreday open via the catch-all
1658"Invalid argument" doesn't help.
1659
1660If you run the script with the C<-w> flag the error message becomes:
1661
1662 untie attempted while 1 inner references still exist at bad.file line 12.
1663 Cannot tie second time: Invalid argument at bad.file line 14.
1664
1665which pinpoints the real problem. Finally the script can now be
1666modified to fix the original problem by destroying the API object
1667before the untie:
1668
1669 ...
1670 $x{123} = 456 ;
1671
1672 undef $X ;
1673 untie %x ;
1674
1675 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1676 ...
1677
f6b705ef 1678
1679=head1 COMMON QUESTIONS
1680
1681=head2 Why is there Perl source in my database?
1682
1683If you look at the contents of a database file created by DB_File,
1684there can sometimes be part of a Perl script included in it.
1685
1686This happens because Berkeley DB uses dynamic memory to allocate
1687buffers which will subsequently be written to the database file. Being
1688dynamic, the memory could have been used for anything before DB
1689malloced it. As Berkeley DB doesn't clear the memory once it has been
1690allocated, the unused portions will contain random junk. In the case
1691where a Perl script gets written to the database, the random junk will
1692correspond to an area of dynamic memory that happened to be used during
1693the compilation of the script.
1694
1695Unless you don't like the possibility of there being part of your Perl
1696scripts embedded in a database file, this is nothing to worry about.
1697
1698=head2 How do I store complex data structures with DB_File?
1699
1700Although B<DB_File> cannot do this directly, there is a module which
1701can layer transparently over B<DB_File> to accomplish this feat.
1702
1703Check out the MLDBM module, available on CPAN in the directory
1704F<modules/by-module/MLDBM>.
1705
1706=head2 What does "Invalid Argument" mean?
1707
1708You will get this error message when one of the parameters in the
1709C<tie> call is wrong. Unfortunately there are quite a few parameters to
1710get wrong, so it can be difficult to figure out which one it is.
1711
1712Here are a couple of possibilities:
1713
1714=over 5
1715
1716=item 1.
1717
610ab055 1718Attempting to reopen a database without closing it.
f6b705ef 1719
1720=item 2.
1721
1722Using the O_WRONLY flag.
1723
1724=back
1725
1726=head2 What does "Bareword 'DB_File' not allowed" mean?
1727
1728You will encounter this particular error message when you have the
1729C<strict 'subs'> pragma (or the full strict pragma) in your script.
1730Consider this script:
1731
1732 use strict ;
1733 use DB_File ;
1734 use vars qw(%x) ;
1735 tie %x, DB_File, "filename" ;
1736
1737Running it produces the error in question:
1738
1739 Bareword "DB_File" not allowed while "strict subs" in use
1740
1741To get around the error, place the word C<DB_File> in either single or
1742double quotes, like this:
1743
1744 tie %x, "DB_File", "filename" ;
1745
1746Although it might seem like a real pain, it is really worth the effort
1747of having a C<use strict> in all your scripts.
1748
cb1a09d0 1749=head1 HISTORY
1750
1f70e1ea 1751Moved to the Changes file.
610ab055 1752
1f70e1ea 1753=head1 BUGS
05475680 1754
1f70e1ea 1755Some older versions of Berkeley DB had problems with fixed length
1756records using the RECNO file format. This problem has been fixed since
1757version 1.85 of Berkeley DB.
e858de61 1758
1f70e1ea 1759I am sure there are bugs in the code. If you do find any, or can
1760suggest any enhancements, I would welcome your comments.
a6ed719b 1761
1f70e1ea 1762=head1 AVAILABILITY
a6ed719b 1763
1f70e1ea 1764B<DB_File> comes with the standard Perl source distribution. Look in
1765the directory F<ext/DB_File>. Given the amount of time between releases
1766of Perl the version that ships with Perl is quite likely to be out of
1767date, so the most recent version can always be found on CPAN (see
1768L<perlmod/CPAN> for details), in the directory
1769F<modules/by-module/DB_File>.
a6ed719b 1770
1f70e1ea 1771This version of B<DB_File> will work with either version 1.x or 2.x of
1772Berkeley DB, but is limited to the functionality provided by version 1.
a6ed719b 1773
1f70e1ea 1774The official web site for Berkeley DB is
1775F<http://www.sleepycat.com/db>. The ftp equivalent is
1776F<ftp.sleepycat.com:/pub>. Both versions 1 and 2 of Berkeley DB are
1777available there.
93af7a87 1778
1f70e1ea 1779Alternatively, Berkeley DB version 1 is available at your nearest CPAN
1780archive in F<src/misc/db.1.85.tar.gz>.
e858de61 1781
1f70e1ea 1782If you are running IRIX, then get Berkeley DB version 1 from
1783F<http://reality.sgi.com/ariel>. It has the patches necessary to
1784compile properly on IRIX 5.3.
610ab055 1785
1f70e1ea 1786=head1 COPYRIGHT
3b35bae3 1787
20896112 1788Copyright (c) 1995-9 Paul Marquess. All rights reserved. This program
a9fd575d 1789is free software; you can redistribute it and/or modify it under the
1790same terms as Perl itself.
3b35bae3 1791
1f70e1ea 1792Although B<DB_File> is covered by the Perl license, the library it
1793makes use of, namely Berkeley DB, is not. Berkeley DB has its own
1794copyright and its own license. Please take the time to read it.
3b35bae3 1795
a9fd575d 1796Here are are few words taken from the Berkeley DB FAQ (at
1797http://www.sleepycat.com) regarding the license:
68dc0745 1798
a9fd575d 1799 Do I have to license DB to use it in Perl scripts?
3b35bae3 1800
a9fd575d 1801 No. The Berkeley DB license requires that software that uses
1802 Berkeley DB be freely redistributable. In the case of Perl, that
1803 software is Perl, and not your scripts. Any Perl scripts that you
1804 write are your property, including scripts that make use of
1805 Berkeley DB. Neither the Perl license nor the Berkeley DB license
1806 place any restriction on what you may do with them.
88108326 1807
1f70e1ea 1808If you are in any doubt about the license situation, contact either the
1809Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 1810
1811
3b35bae3 1812=head1 SEE ALSO
1813
9fe6733a 1814L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
1815L<dbmfilter>
3b35bae3 1816
3b35bae3 1817=head1 AUTHOR
1818
8e07c86e 1819The DB_File interface was written by Paul Marquess
6ca2e664 1820E<lt>Paul.Marquess@btinternet.comE<gt>.
d3ef3b8a 1821Questions about the DB system itself may be addressed to
1822E<lt>db@sleepycat.com<gt>.
3b35bae3 1823
1824=cut