DB_File 1.58 patch
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
3# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
045291aa 4# last modified 20th Dec 1997
5# version 1.57
36477c24 6#
a0b8c8c1 7# Copyright (c) 1995, 1996, 1997 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
18d2dc8c 101sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
102sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
103sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 104
105package DB_File::RECNOINFO ;
785da04d 106
88108326 107use strict ;
108
045291aa 109@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 110
111sub TIEHASH
112{
88108326 113 my $pkg = shift ;
114
36477c24 115 bless { VALID => { map {$_, 1}
116 qw( bval cachesize psize flags lorder reclen bfname )
117 },
118 GOT => {},
119 }, $pkg ;
8e07c86e 120}
121
88108326 122package DB_File::BTREEINFO ;
8e07c86e 123
88108326 124use strict ;
8e07c86e 125
88108326 126@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 127
88108326 128sub TIEHASH
8e07c86e 129{
88108326 130 my $pkg = shift ;
131
36477c24 132 bless { VALID => { map {$_, 1}
133 qw( flags cachesize maxkeypage minkeypage psize
134 compare prefix lorder )
135 },
136 GOT => {},
137 }, $pkg ;
8e07c86e 138}
139
140
8e07c86e 141package DB_File ;
785da04d 142
143use strict;
1f70e1ea 144use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version) ;
8e07c86e 145use Carp;
146
785da04d 147
045291aa 148$VERSION = "1.58" ;
8e07c86e 149
150#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 151$DB_BTREE = new DB_File::BTREEINFO ;
152$DB_HASH = new DB_File::HASHINFO ;
153$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 154
785da04d 155require Tie::Hash;
8e07c86e 156require Exporter;
157use AutoLoader;
158require DynaLoader;
785da04d 159@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 160@EXPORT = qw(
161 $DB_BTREE $DB_HASH $DB_RECNO
88108326 162
8e07c86e 163 BTREEMAGIC
164 BTREEVERSION
165 DB_LOCK
166 DB_SHMEM
167 DB_TXN
168 HASHMAGIC
169 HASHVERSION
170 MAX_PAGE_NUMBER
171 MAX_PAGE_OFFSET
172 MAX_REC_NUMBER
173 RET_ERROR
174 RET_SPECIAL
175 RET_SUCCESS
176 R_CURSOR
177 R_DUP
178 R_FIRST
179 R_FIXEDLEN
180 R_IAFTER
181 R_IBEFORE
182 R_LAST
183 R_NEXT
184 R_NOKEY
185 R_NOOVERWRITE
186 R_PREV
187 R_RECNOSYNC
188 R_SETCURSOR
189 R_SNAPSHOT
190 __R_UNUSED
88108326 191
045291aa 192);
8e07c86e 193
194sub AUTOLOAD {
785da04d 195 my($constname);
8e07c86e 196 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 197 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 198 if ($! != 0) {
199 if ($! =~ /Invalid/) {
200 $AutoLoader::AUTOLOAD = $AUTOLOAD;
201 goto &AutoLoader::AUTOLOAD;
202 }
203 else {
785da04d 204 my($pack,$file,$line) = caller;
8e07c86e 205 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
206";
207 }
208 }
209 eval "sub $AUTOLOAD { $val }";
210 goto &$AUTOLOAD;
211}
212
f6b705ef 213
a6ed719b 214eval {
1f70e1ea 215 # Make all Fcntl O_XXX constants available for importing
216 require Fcntl;
217 my @O = grep /^O_/, @Fcntl::EXPORT;
218 Fcntl->import(@O); # first we import what we want to export
219 push(@EXPORT, @O);
a6ed719b 220};
f6b705ef 221
1f70e1ea 222## import borrowed from IO::File
223## exports Fcntl constants if available.
224#sub import {
225# my $pkg = shift;
226# my $callpkg = caller;
227# Exporter::export $pkg, $callpkg, @_;
228# eval {
229# require Fcntl;
230# Exporter::export 'Fcntl', $callpkg, '/^O_/';
231# };
232#}
233
785da04d 234bootstrap DB_File $VERSION;
8e07c86e 235
236# Preloaded methods go here. Autoload methods go after __END__, and are
237# processed by the autosplit program.
238
05475680 239sub tie_hash_or_array
610ab055 240{
241 my (@arg) = @_ ;
05475680 242 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 243
244 $arg[4] = tied %{ $arg[4] }
245 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
246
1f70e1ea 247 # make recno in Berkeley DB version 2 work like recno in version 1.
248 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
249 $arg[1] and ! -e $arg[1]) {
250 open(FH, ">$arg[1]") or return undef ;
251 close FH ;
252 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
253 }
254
05475680 255 DoTie_($tieHASH, @arg) ;
610ab055 256}
257
05475680 258sub TIEHASH
259{
260 tie_hash_or_array(@_) ;
261}
262
263sub TIEARRAY
264{
265 tie_hash_or_array(@_) ;
266}
88108326 267
045291aa 268sub CLEAR
269{
1f70e1ea 270 my $self = shift;
271 my $key = "" ;
272 my $value = "" ;
273 my $status = $self->seq($key, $value, R_FIRST());
274 my @keys;
275
276 while ($status == 0) {
277 push @keys, $key;
278 $status = $self->seq($key, $value, R_NEXT());
279 }
280 foreach $key (reverse @keys) {
281 my $s = $self->del($key);
282 }
283}
284
045291aa 285sub EXTEND { }
286
287sub STORESIZE
288{
289 my $self = shift;
290 my $length = shift ;
291 my $current_length = $self->length() ;
292
293 if ($length < $current_length) {
294 my $key ;
295 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
296 { $self->del($key) }
297 }
298 elsif ($length > $current_length)
299 { $self->put($length-1, "") }
300}
301
88108326 302sub get_dup
303{
304 croak "Usage: \$db->get_dup(key [,flag])\n"
305 unless @_ == 2 or @_ == 3 ;
306
307 my $db = shift ;
308 my $key = shift ;
309 my $flag = shift ;
f6b705ef 310 my $value = 0 ;
88108326 311 my $origkey = $key ;
312 my $wantarray = wantarray ;
f6b705ef 313 my %values = () ;
88108326 314 my @values = () ;
315 my $counter = 0 ;
f6b705ef 316 my $status = 0 ;
88108326 317
f6b705ef 318 # iterate through the database until either EOF ($status == 0)
319 # or a different key is encountered ($key ne $origkey).
320 for ($status = $db->seq($key, $value, R_CURSOR()) ;
321 $status == 0 and $key eq $origkey ;
322 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 323
f6b705ef 324 # save the value or count number of matches
325 if ($wantarray) {
326 if ($flag)
327 { ++ $values{$value} }
328 else
329 { push (@values, $value) }
330 }
331 else
332 { ++ $counter }
88108326 333
88108326 334 }
335
f6b705ef 336 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 337}
338
339
8e07c86e 3401;
341__END__
342
3b35bae3 343=head1 NAME
344
1f70e1ea 345DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 346
347=head1 SYNOPSIS
348
349 use DB_File ;
88108326 350
351 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
352 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
353 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 354
3b35bae3 355 $status = $X->del($key [, $flags]) ;
356 $status = $X->put($key, $value [, $flags]) ;
357 $status = $X->get($key, $value [, $flags]) ;
760ac839 358 $status = $X->seq($key, $value, $flags) ;
3b35bae3 359 $status = $X->sync([$flags]) ;
360 $status = $X->fd ;
760ac839 361
f6b705ef 362 # BTREE only
88108326 363 $count = $X->get_dup($key) ;
364 @list = $X->get_dup($key) ;
365 %list = $X->get_dup($key, 1) ;
366
f6b705ef 367 # RECNO only
368 $a = $X->length;
369 $a = $X->pop ;
370 $X->push(list);
371 $a = $X->shift;
372 $X->unshift(list);
373
3b35bae3 374 untie %hash ;
375 untie @array ;
376
377=head1 DESCRIPTION
378
8e07c86e 379B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 380facilities provided by Berkeley DB version 1.x (if you have a newer
381version of DB, see L<Using DB_File with Berkeley DB version 2>). It is
382assumed that you have a copy of the Berkeley DB manual pages at hand
383when reading this documentation. The interface defined here mirrors the
384Berkeley DB interface closely.
68dc0745 385
8e07c86e 386Berkeley DB is a C library which provides a consistent interface to a
387number of database formats. B<DB_File> provides an interface to all
388three of the database types currently supported by Berkeley DB.
3b35bae3 389
390The file types are:
391
392=over 5
393
88108326 394=item B<DB_HASH>
3b35bae3 395
88108326 396This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 397files. This is equivalent to the functionality provided by other
398hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
399the files created using DB_HASH are not compatible with any of the
400other packages mentioned.
3b35bae3 401
8e07c86e 402A default hashing algorithm, which will be adequate for most
403applications, is built into Berkeley DB. If you do need to use your own
404hashing algorithm it is possible to write your own in Perl and have
405B<DB_File> use it instead.
3b35bae3 406
88108326 407=item B<DB_BTREE>
408
409The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 410sorted, balanced binary tree.
3b35bae3 411
8e07c86e 412As with the DB_HASH format, it is possible to provide a user defined
413Perl routine to perform the comparison of keys. By default, though, the
414keys are stored in lexical order.
3b35bae3 415
88108326 416=item B<DB_RECNO>
3b35bae3 417
8e07c86e 418DB_RECNO allows both fixed-length and variable-length flat text files
419to be manipulated using the same key/value pair interface as in DB_HASH
420and DB_BTREE. In this case the key will consist of a record (line)
421number.
3b35bae3 422
423=back
424
1f70e1ea 425=head2 Using DB_File with Berkeley DB version 2
426
427Although B<DB_File> is intended to be used with Berkeley DB version 1,
428it can also be used with version 2. In this case the interface is
429limited to the functionality provided by Berkeley DB 1.x. Anywhere the
430version 2 interface differs, B<DB_File> arranges for it to work like
431version 1. This feature allows B<DB_File> scripts that were built with
432version 1 to be migrated to version 2 without any changes.
433
434If you want to make use of the new features available in Berkeley DB
4352.x, use the Perl module B<BerkeleyDB> instead.
436
437At the time of writing this document the B<BerkeleyDB> module is still
438alpha quality (the version number is < 1.0), and so unsuitable for use
439in any serious development work. Once its version number is >= 1.0, it
440is considered stable enough for real work.
441
442B<Note:> The database file format has changed in Berkeley DB version 2.
443If you cannot recreate your databases, you must dump any existing
444databases with the C<db_dump185> utility that comes with Berkeley DB.
445Once you have upgraded DB_File to use Berkeley DB version 2, your
446databases can be recreated using C<db_load>. Refer to the Berkeley DB
447documentation for further details.
448
449Please read L<COPYRIGHT> before using version 2.x of Berkeley DB with
450DB_File.
451
68dc0745 452=head2 Interface to Berkeley DB
3b35bae3 453
454B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 455in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
456allows B<DB_File> to access Berkeley DB files using either an
457associative array (for DB_HASH & DB_BTREE file types) or an ordinary
458array (for the DB_RECNO file type).
3b35bae3 459
88108326 460In addition to the tie() interface, it is also possible to access most
461of the functions provided in the Berkeley DB API directly.
f6b705ef 462See L<THE API INTERFACE>.
3b35bae3 463
88108326 464=head2 Opening a Berkeley DB Database File
3b35bae3 465
8e07c86e 466Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 467Here is the C prototype for dbopen():
3b35bae3 468
469 DB*
470 dbopen (const char * file, int flags, int mode,
471 DBTYPE type, const void * openinfo)
472
473The parameter C<type> is an enumeration which specifies which of the 3
474interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
475Depending on which of these is actually chosen, the final parameter,
476I<openinfo> points to a data structure which allows tailoring of the
477specific interface method.
478
8e07c86e 479This interface is handled slightly differently in B<DB_File>. Here is
88108326 480an equivalent call using B<DB_File>:
3b35bae3 481
88108326 482 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 483
8e07c86e 484The C<filename>, C<flags> and C<mode> parameters are the direct
485equivalent of their dbopen() counterparts. The final parameter $DB_HASH
486performs the function of both the C<type> and C<openinfo> parameters in
487dbopen().
3b35bae3 488
88108326 489In the example above $DB_HASH is actually a pre-defined reference to a
490hash object. B<DB_File> has three of these pre-defined references.
491Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 492
8e07c86e 493The keys allowed in each of these pre-defined references is limited to
494the names used in the equivalent C structure. So, for example, the
495$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 496C<ffactor>, C<hash>, C<lorder> and C<nelem>.
497
498To change one of these elements, just assign to it like this:
499
500 $DB_HASH->{'cachesize'} = 10000 ;
501
502The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
503usually adequate for most applications. If you do need to create extra
504instances of these objects, constructors are available for each file
505type.
506
507Here are examples of the constructors and the valid options available
508for DB_HASH, DB_BTREE and DB_RECNO respectively.
509
510 $a = new DB_File::HASHINFO ;
511 $a->{'bsize'} ;
512 $a->{'cachesize'} ;
513 $a->{'ffactor'};
514 $a->{'hash'} ;
515 $a->{'lorder'} ;
516 $a->{'nelem'} ;
517
518 $b = new DB_File::BTREEINFO ;
519 $b->{'flags'} ;
520 $b->{'cachesize'} ;
521 $b->{'maxkeypage'} ;
522 $b->{'minkeypage'} ;
523 $b->{'psize'} ;
524 $b->{'compare'} ;
525 $b->{'prefix'} ;
526 $b->{'lorder'} ;
527
528 $c = new DB_File::RECNOINFO ;
529 $c->{'bval'} ;
530 $c->{'cachesize'} ;
531 $c->{'psize'} ;
532 $c->{'flags'} ;
533 $c->{'lorder'} ;
534 $c->{'reclen'} ;
535 $c->{'bfname'} ;
536
537The values stored in the hashes above are mostly the direct equivalent
538of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 539default values - that means you don't have to set I<all> of the
88108326 540values when you only want to change one. Here is an example:
541
542 $a = new DB_File::HASHINFO ;
543 $a->{'cachesize'} = 12345 ;
544 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
545
36477c24 546A few of the options need extra discussion here. When used, the C
88108326 547equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
548to C functions. In B<DB_File> these keys are used to store references
549to Perl subs. Below are templates for each of the subs:
550
551 sub hash
552 {
553 my ($data) = @_ ;
554 ...
555 # return the hash value for $data
556 return $hash ;
557 }
3b35bae3 558
88108326 559 sub compare
560 {
561 my ($key, $key2) = @_ ;
562 ...
563 # return 0 if $key1 eq $key2
564 # -1 if $key1 lt $key2
565 # 1 if $key1 gt $key2
566 return (-1 , 0 or 1) ;
567 }
3b35bae3 568
88108326 569 sub prefix
570 {
571 my ($key, $key2) = @_ ;
572 ...
573 # return number of bytes of $key2 which are
574 # necessary to determine that it is greater than $key1
575 return $bytes ;
576 }
3b35bae3 577
f6b705ef 578See L<Changing the BTREE sort order> for an example of using the
579C<compare> template.
88108326 580
36477c24 581If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 582C<bval>, you should check out L<The 'bval' Option>.
36477c24 583
88108326 584=head2 Default Parameters
585
586It is possible to omit some or all of the final 4 parameters in the
587call to C<tie> and let them take default values. As DB_HASH is the most
588common file format used, the call:
589
590 tie %A, "DB_File", "filename" ;
591
592is equivalent to:
593
18d2dc8c 594 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 595
596It is also possible to omit the filename parameter as well, so the
597call:
598
599 tie %A, "DB_File" ;
600
601is equivalent to:
602
18d2dc8c 603 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 604
f6b705ef 605See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 606in place of a filename.
607
f6b705ef 608=head2 In Memory Databases
609
610Berkeley DB allows the creation of in-memory databases by using NULL
611(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
612uses C<undef> instead of NULL to provide this functionality.
613
614=head1 DB_HASH
615
616The DB_HASH file format is probably the most commonly used of the three
617file formats that B<DB_File> supports. It is also very straightforward
618to use.
619
68dc0745 620=head2 A Simple Example
f6b705ef 621
622This example shows how to create a database, add key/value pairs to the
623database, delete keys/value pairs and finally how to enumerate the
624contents of the database.
625
610ab055 626 use strict ;
f6b705ef 627 use DB_File ;
610ab055 628 use vars qw( %h $k $v ) ;
f6b705ef 629
630 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
631 or die "Cannot open file 'fruit': $!\n";
632
633 # Add a few key/value pairs to the file
634 $h{"apple"} = "red" ;
635 $h{"orange"} = "orange" ;
636 $h{"banana"} = "yellow" ;
637 $h{"tomato"} = "red" ;
638
639 # Check for existence of a key
640 print "Banana Exists\n\n" if $h{"banana"} ;
641
642 # Delete a key/value pair.
643 delete $h{"apple"} ;
644
645 # print the contents of the file
646 while (($k, $v) = each %h)
647 { print "$k -> $v\n" }
648
649 untie %h ;
650
651here is the output:
652
653 Banana Exists
654
655 orange -> orange
656 tomato -> red
657 banana -> yellow
658
659Note that the like ordinary associative arrays, the order of the keys
660retrieved is in an apparently random order.
661
662=head1 DB_BTREE
663
664The DB_BTREE format is useful when you want to store data in a given
665order. By default the keys will be stored in lexical order, but as you
666will see from the example shown in the next section, it is very easy to
667define your own sorting function.
668
669=head2 Changing the BTREE sort order
670
671This script shows how to override the default sorting algorithm that
672BTREE uses. Instead of using the normal lexical ordering, a case
673insensitive compare function will be used.
88108326 674
610ab055 675 use strict ;
f6b705ef 676 use DB_File ;
610ab055 677
678 my %h ;
f6b705ef 679
680 sub Compare
681 {
682 my ($key1, $key2) = @_ ;
683 "\L$key1" cmp "\L$key2" ;
684 }
685
686 # specify the Perl sub that will do the comparison
687 $DB_BTREE->{'compare'} = \&Compare ;
688
689 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
690 or die "Cannot open file 'tree': $!\n" ;
691
692 # Add a key/value pair to the file
693 $h{'Wall'} = 'Larry' ;
694 $h{'Smith'} = 'John' ;
695 $h{'mouse'} = 'mickey' ;
696 $h{'duck'} = 'donald' ;
697
698 # Delete
699 delete $h{"duck"} ;
700
701 # Cycle through the keys printing them in order.
702 # Note it is not necessary to sort the keys as
703 # the btree will have kept them in order automatically.
704 foreach (keys %h)
705 { print "$_\n" }
706
707 untie %h ;
708
709Here is the output from the code above.
710
711 mouse
712 Smith
713 Wall
714
715There are a few point to bear in mind if you want to change the
716ordering in a BTREE database:
717
718=over 5
719
720=item 1.
721
722The new compare function must be specified when you create the database.
723
724=item 2.
725
726You cannot change the ordering once the database has been created. Thus
727you must use the same compare function every time you access the
88108326 728database.
729
f6b705ef 730=back
731
68dc0745 732=head2 Handling Duplicate Keys
f6b705ef 733
734The BTREE file type optionally allows a single key to be associated
735with an arbitrary number of values. This option is enabled by setting
736the flags element of C<$DB_BTREE> to R_DUP when creating the database.
737
88108326 738There are some difficulties in using the tied hash interface if you
739want to manipulate a BTREE database with duplicate keys. Consider this
740code:
741
610ab055 742 use strict ;
88108326 743 use DB_File ;
610ab055 744
745 use vars qw($filename %h ) ;
746
88108326 747 $filename = "tree" ;
748 unlink $filename ;
749
750 # Enable duplicate records
751 $DB_BTREE->{'flags'} = R_DUP ;
752
753 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
754 or die "Cannot open $filename: $!\n";
755
756 # Add some key/value pairs to the file
757 $h{'Wall'} = 'Larry' ;
758 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 759 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 760 $h{'Smith'} = 'John' ;
761 $h{'mouse'} = 'mickey' ;
762
763 # iterate through the associative array
764 # and print each key/value pair.
765 foreach (keys %h)
766 { print "$_ -> $h{$_}\n" }
767
f6b705ef 768 untie %h ;
769
88108326 770Here is the output:
771
772 Smith -> John
773 Wall -> Larry
774 Wall -> Larry
f6b705ef 775 Wall -> Larry
88108326 776 mouse -> mickey
777
f6b705ef 778As you can see 3 records have been successfully created with key C<Wall>
88108326 779- the only thing is, when they are retrieved from the database they
f6b705ef 780I<seem> to have the same value, namely C<Larry>. The problem is caused
781by the way that the associative array interface works. Basically, when
782the associative array interface is used to fetch the value associated
783with a given key, it will only ever retrieve the first value.
88108326 784
785Although it may not be immediately obvious from the code above, the
786associative array interface can be used to write values with duplicate
787keys, but it cannot be used to read them back from the database.
788
789The way to get around this problem is to use the Berkeley DB API method
790called C<seq>. This method allows sequential access to key/value
f6b705ef 791pairs. See L<THE API INTERFACE> for details of both the C<seq> method
792and the API in general.
88108326 793
794Here is the script above rewritten using the C<seq> API method.
795
610ab055 796 use strict ;
88108326 797 use DB_File ;
88108326 798
610ab055 799 use vars qw($filename $x %h $status $key $value) ;
800
88108326 801 $filename = "tree" ;
802 unlink $filename ;
803
804 # Enable duplicate records
805 $DB_BTREE->{'flags'} = R_DUP ;
806
807 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
808 or die "Cannot open $filename: $!\n";
809
810 # Add some key/value pairs to the file
811 $h{'Wall'} = 'Larry' ;
812 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 813 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 814 $h{'Smith'} = 'John' ;
815 $h{'mouse'} = 'mickey' ;
816
f6b705ef 817 # iterate through the btree using seq
88108326 818 # and print each key/value pair.
610ab055 819 $key = $value = 0 ;
f6b705ef 820 for ($status = $x->seq($key, $value, R_FIRST) ;
821 $status == 0 ;
822 $status = $x->seq($key, $value, R_NEXT) )
88108326 823 { print "$key -> $value\n" }
824
825 undef $x ;
826 untie %h ;
827
828that prints:
829
830 Smith -> John
831 Wall -> Brick
f6b705ef 832 Wall -> Brick
88108326 833 Wall -> Larry
834 mouse -> mickey
835
f6b705ef 836This time we have got all the key/value pairs, including the multiple
88108326 837values associated with the key C<Wall>.
838
68dc0745 839=head2 The get_dup() Method
f6b705ef 840
841B<DB_File> comes with a utility method, called C<get_dup>, to assist in
88108326 842reading duplicate values from BTREE databases. The method can take the
843following forms:
844
845 $count = $x->get_dup($key) ;
846 @list = $x->get_dup($key) ;
847 %list = $x->get_dup($key, 1) ;
848
849In a scalar context the method returns the number of values associated
850with the key, C<$key>.
851
852In list context, it returns all the values which match C<$key>. Note
f6b705ef 853that the values will be returned in an apparently random order.
88108326 854
7a2e2cd6 855In list context, if the second parameter is present and evaluates
856TRUE, the method returns an associative array. The keys of the
857associative array correspond to the values that matched in the BTREE
858and the values of the array are a count of the number of times that
859particular value occurred in the BTREE.
88108326 860
f6b705ef 861So assuming the database created above, we can use C<get_dup> like
88108326 862this:
863
610ab055 864 my $cnt = $x->get_dup("Wall") ;
88108326 865 print "Wall occurred $cnt times\n" ;
866
610ab055 867 my %hash = $x->get_dup("Wall", 1) ;
88108326 868 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 869 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 870
610ab055 871 my @list = $x->get_dup("Wall") ;
88108326 872 print "Wall => [@list]\n" ;
873
f6b705ef 874 @list = $x->get_dup("Smith") ;
88108326 875 print "Smith => [@list]\n" ;
876
f6b705ef 877 @list = $x->get_dup("Dog") ;
88108326 878 print "Dog => [@list]\n" ;
879
880
881and it will print:
882
f6b705ef 883 Wall occurred 3 times
88108326 884 Larry is there
f6b705ef 885 There are 2 Brick Walls
886 Wall => [Brick Brick Larry]
88108326 887 Smith => [John]
888 Dog => []
3b35bae3 889
f6b705ef 890=head2 Matching Partial Keys
891
892The BTREE interface has a feature which allows partial keys to be
893matched. This functionality is I<only> available when the C<seq> method
894is used along with the R_CURSOR flag.
895
896 $x->seq($key, $value, R_CURSOR) ;
897
898Here is the relevant quote from the dbopen man page where it defines
899the use of the R_CURSOR flag with seq:
900
f6b705ef 901 Note, for the DB_BTREE access method, the returned key is not
902 necessarily an exact match for the specified key. The returned key
903 is the smallest key greater than or equal to the specified key,
904 permitting partial key matches and range searches.
905
f6b705ef 906In the example script below, the C<match> sub uses this feature to find
907and print the first matching key/value pair given a partial key.
908
610ab055 909 use strict ;
f6b705ef 910 use DB_File ;
911 use Fcntl ;
610ab055 912
913 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 914
915 sub match
916 {
917 my $key = shift ;
610ab055 918 my $value = 0;
f6b705ef 919 my $orig_key = $key ;
920 $x->seq($key, $value, R_CURSOR) ;
921 print "$orig_key\t-> $key\t-> $value\n" ;
922 }
923
924 $filename = "tree" ;
925 unlink $filename ;
926
927 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
928 or die "Cannot open $filename: $!\n";
929
930 # Add some key/value pairs to the file
931 $h{'mouse'} = 'mickey' ;
932 $h{'Wall'} = 'Larry' ;
933 $h{'Walls'} = 'Brick' ;
934 $h{'Smith'} = 'John' ;
935
936
610ab055 937 $key = $value = 0 ;
f6b705ef 938 print "IN ORDER\n" ;
939 for ($st = $x->seq($key, $value, R_FIRST) ;
940 $st == 0 ;
941 $st = $x->seq($key, $value, R_NEXT) )
942
943 { print "$key -> $value\n" }
944
945 print "\nPARTIAL MATCH\n" ;
946
947 match "Wa" ;
948 match "A" ;
949 match "a" ;
950
951 undef $x ;
952 untie %h ;
953
954Here is the output:
955
956 IN ORDER
957 Smith -> John
958 Wall -> Larry
959 Walls -> Brick
960 mouse -> mickey
961
962 PARTIAL MATCH
963 Wa -> Wall -> Larry
964 A -> Smith -> John
965 a -> mouse -> mickey
966
967=head1 DB_RECNO
968
969DB_RECNO provides an interface to flat text files. Both variable and
970fixed length records are supported.
3b35bae3 971
88108326 972In order to make RECNO more compatible with Perl the array offset for
973all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 974
88108326 975As with normal Perl arrays, a RECNO array can be accessed using
976negative indexes. The index -1 refers to the last element of the array,
977-2 the second last, and so on. Attempting to access an element before
978the start of the array will raise a fatal run-time error.
3b35bae3 979
68dc0745 980=head2 The 'bval' Option
36477c24 981
982The operation of the bval option warrants some discussion. Here is the
983definition of bval from the Berkeley DB 1.85 recno manual page:
984
985 The delimiting byte to be used to mark the end of a
986 record for variable-length records, and the pad charac-
987 ter for fixed-length records. If no value is speci-
988 fied, newlines (``\n'') are used to mark the end of
989 variable-length records and fixed-length records are
990 padded with spaces.
991
992The second sentence is wrong. In actual fact bval will only default to
993C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
994openinfo parameter is used at all, the value that happens to be in bval
995will be used. That means you always have to specify bval when making
996use of any of the options in the openinfo parameter. This documentation
997error will be fixed in the next release of Berkeley DB.
998
999That clarifies the situation with regards Berkeley DB itself. What
1000about B<DB_File>? Well, the behavior defined in the quote above is
1001quite useful, so B<DB_File> conforms it.
1002
1003That means that you can specify other options (e.g. cachesize) and
1004still have bval default to C<"\n"> for variable length records, and
1005space for fixed length records.
1006
f6b705ef 1007=head2 A Simple Example
3b35bae3 1008
f6b705ef 1009Here is a simple example that uses RECNO.
1010
610ab055 1011 use strict ;
f6b705ef 1012 use DB_File ;
f6b705ef 1013
610ab055 1014 my @h ;
f6b705ef 1015 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
1016 or die "Cannot open file 'text': $!\n" ;
1017
1018 # Add a few key/value pairs to the file
1019 $h[0] = "orange" ;
1020 $h[1] = "blue" ;
1021 $h[2] = "yellow" ;
1022
1023 # Check for existence of a key
1024 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1025
1026 # use a negative index
1027 print "The last element is $h[-1]\n" ;
1028 print "The 2nd last element is $h[-2]\n" ;
1029
1030 untie @h ;
3b35bae3 1031
f6b705ef 1032Here is the output from the script:
1033
1034
1035 Element 1 Exists with value blue
1036 The last element is yellow
1037 The 2nd last element is blue
1038
1039=head2 Extra Methods
1040
045291aa 1041If you are using a version of Perl earlier than 5.004_57, the tied
1042array interface is quite limited. The example script above will work,
1043but you won't be able to use C<push>, C<pop>, C<shift>, C<unshift>
1044etc. with the tied array.
1045
1046To make the interface more useful for older versions of Perl, a number
1047of methods are supplied with B<DB_File> to simulate the missing array
1048operations. All these methods are accessed via the object returned from
1049the tie call.
f6b705ef 1050
1051Here are the methods:
1052
1053=over 5
3b35bae3 1054
f6b705ef 1055=item B<$X-E<gt>push(list) ;>
1056
1057Pushes the elements of C<list> to the end of the array.
1058
1059=item B<$value = $X-E<gt>pop ;>
1060
1061Removes and returns the last element of the array.
1062
1063=item B<$X-E<gt>shift>
1064
1065Removes and returns the first element of the array.
1066
1067=item B<$X-E<gt>unshift(list) ;>
1068
1069Pushes the elements of C<list> to the start of the array.
1070
1071=item B<$X-E<gt>length>
1072
1073Returns the number of elements in the array.
1074
1075=back
1076
1077=head2 Another Example
1078
1079Here is a more complete example that makes use of some of the methods
1080described above. It also makes use of the API interface directly (see
1081L<THE API INTERFACE>).
1082
1083 use strict ;
1084 use vars qw(@h $H $file $i) ;
1085 use DB_File ;
1086 use Fcntl ;
1087
1088 $file = "text" ;
1089
1090 unlink $file ;
1091
1092 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1093 or die "Cannot open file $file: $!\n" ;
1094
1095 # first create a text file to play with
1096 $h[0] = "zero" ;
1097 $h[1] = "one" ;
1098 $h[2] = "two" ;
1099 $h[3] = "three" ;
1100 $h[4] = "four" ;
1101
1102
1103 # Print the records in order.
1104 #
1105 # The length method is needed here because evaluating a tied
1106 # array in a scalar context does not return the number of
1107 # elements in the array.
1108
1109 print "\nORIGINAL\n" ;
1110 foreach $i (0 .. $H->length - 1) {
1111 print "$i: $h[$i]\n" ;
1112 }
1113
1114 # use the push & pop methods
1115 $a = $H->pop ;
1116 $H->push("last") ;
1117 print "\nThe last record was [$a]\n" ;
1118
1119 # and the shift & unshift methods
1120 $a = $H->shift ;
1121 $H->unshift("first") ;
1122 print "The first record was [$a]\n" ;
1123
1124 # Use the API to add a new record after record 2.
1125 $i = 2 ;
1126 $H->put($i, "Newbie", R_IAFTER) ;
1127
1128 # and a new record before record 1.
1129 $i = 1 ;
1130 $H->put($i, "New One", R_IBEFORE) ;
1131
1132 # delete record 3
1133 $H->del(3) ;
1134
1135 # now print the records in reverse order
1136 print "\nREVERSE\n" ;
1137 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1138 { print "$i: $h[$i]\n" }
1139
1140 # same again, but use the API functions instead
1141 print "\nREVERSE again\n" ;
610ab055 1142 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1143 for ($s = $H->seq($k, $v, R_LAST) ;
1144 $s == 0 ;
1145 $s = $H->seq($k, $v, R_PREV))
1146 { print "$k: $v\n" }
1147
1148 undef $H ;
1149 untie @h ;
1150
1151and this is what it outputs:
1152
1153 ORIGINAL
1154 0: zero
1155 1: one
1156 2: two
1157 3: three
1158 4: four
1159
1160 The last record was [four]
1161 The first record was [zero]
1162
1163 REVERSE
1164 5: last
1165 4: three
1166 3: Newbie
1167 2: one
1168 1: New One
1169 0: first
1170
1171 REVERSE again
1172 5: last
1173 4: three
1174 3: Newbie
1175 2: one
1176 1: New One
1177 0: first
1178
1179Notes:
1180
1181=over 5
1182
1183=item 1.
1184
1185Rather than iterating through the array, C<@h> like this:
1186
1187 foreach $i (@h)
1188
1189it is necessary to use either this:
1190
1191 foreach $i (0 .. $H->length - 1)
1192
1193or this:
1194
1195 for ($a = $H->get($k, $v, R_FIRST) ;
1196 $a == 0 ;
1197 $a = $H->get($k, $v, R_NEXT) )
1198
1199=item 2.
1200
1201Notice that both times the C<put> method was used the record index was
1202specified using a variable, C<$i>, rather than the literal value
1203itself. This is because C<put> will return the record number of the
1204inserted line via that parameter.
1205
1206=back
1207
1208=head1 THE API INTERFACE
3b35bae3 1209
1210As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1211possible to make direct use of most of the API functions defined in the
8e07c86e 1212Berkeley DB documentation.
3b35bae3 1213
88108326 1214To do this you need to store a copy of the object returned from the tie.
3b35bae3 1215
88108326 1216 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1217
8e07c86e 1218Once you have done that, you can access the Berkeley DB API functions
88108326 1219as B<DB_File> methods directly like this:
3b35bae3 1220
1221 $db->put($key, $value, R_NOOVERWRITE) ;
1222
88108326 1223B<Important:> If you have saved a copy of the object returned from
1224C<tie>, the underlying database file will I<not> be closed until both
1225the tied variable is untied and all copies of the saved object are
610ab055 1226destroyed.
88108326 1227
1228 use DB_File ;
1229 $db = tie %hash, "DB_File", "filename"
1230 or die "Cannot tie filename: $!" ;
1231 ...
1232 undef $db ;
1233 untie %hash ;
1234
9a2c4ce3 1235See L<The untie() Gotcha> for more details.
778183f3 1236
88108326 1237All the functions defined in L<dbopen> are available except for
1238close() and dbopen() itself. The B<DB_File> method interface to the
1239supported functions have been implemented to mirror the way Berkeley DB
1240works whenever possible. In particular note that:
1241
1242=over 5
1243
1244=item *
1245
1246The methods return a status value. All return 0 on success.
1247All return -1 to signify an error and set C<$!> to the exact
1248error code. The return code 1 generally (but not always) means that the
1249key specified did not exist in the database.
1250
1251Other return codes are defined. See below and in the Berkeley DB
1252documentation for details. The Berkeley DB documentation should be used
1253as the definitive source.
1254
1255=item *
3b35bae3 1256
88108326 1257Whenever a Berkeley DB function returns data via one of its parameters,
1258the equivalent B<DB_File> method does exactly the same.
3b35bae3 1259
88108326 1260=item *
1261
1262If you are careful, it is possible to mix API calls with the tied
1263hash/array interface in the same piece of code. Although only a few of
1264the methods used to implement the tied interface currently make use of
1265the cursor, you should always assume that the cursor has been changed
1266any time the tied hash/array interface is used. As an example, this
1267code will probably not do what you expect:
1268
1269 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1270 or die "Cannot tie $filename: $!" ;
1271
1272 # Get the first key/value pair and set the cursor
1273 $X->seq($key, $value, R_FIRST) ;
1274
1275 # this line will modify the cursor
1276 $count = scalar keys %x ;
1277
1278 # Get the second key/value pair.
1279 # oops, it didn't, it got the last key/value pair!
1280 $X->seq($key, $value, R_NEXT) ;
1281
1282The code above can be rearranged to get around the problem, like this:
1283
1284 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1285 or die "Cannot tie $filename: $!" ;
1286
1287 # this line will modify the cursor
1288 $count = scalar keys %x ;
1289
1290 # Get the first key/value pair and set the cursor
1291 $X->seq($key, $value, R_FIRST) ;
1292
1293 # Get the second key/value pair.
1294 # worked this time.
1295 $X->seq($key, $value, R_NEXT) ;
1296
1297=back
1298
1299All the constants defined in L<dbopen> for use in the flags parameters
1300in the methods defined below are also available. Refer to the Berkeley
1301DB documentation for the precise meaning of the flags values.
1302
1303Below is a list of the methods available.
3b35bae3 1304
1305=over 5
1306
f6b705ef 1307=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1308
1309Given a key (C<$key>) this method reads the value associated with it
1310from the database. The value read from the database is returned in the
1311C<$value> parameter.
3b35bae3 1312
88108326 1313If the key does not exist the method returns 1.
3b35bae3 1314
88108326 1315No flags are currently defined for this method.
3b35bae3 1316
f6b705ef 1317=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1318
88108326 1319Stores the key/value pair in the database.
1320
1321If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1322will have the record number of the inserted key/value pair set.
3b35bae3 1323
88108326 1324Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1325R_SETCURSOR.
1326
f6b705ef 1327=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1328
88108326 1329Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1330
88108326 1331A return code of 1 means that the requested key was not in the
1332database.
3b35bae3 1333
88108326 1334R_CURSOR is the only valid flag at present.
3b35bae3 1335
f6b705ef 1336=item B<$status = $X-E<gt>fd ;>
3b35bae3 1337
88108326 1338Returns the file descriptor for the underlying database.
3b35bae3 1339
f6b705ef 1340See L<Locking Databases> for an example of how to make use of the
88108326 1341C<fd> method to lock your database.
3b35bae3 1342
f6b705ef 1343=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1344
88108326 1345This interface allows sequential retrieval from the database. See
1346L<dbopen> for full details.
1347
1348Both the C<$key> and C<$value> parameters will be set to the key/value
1349pair read from the database.
1350
1351The flags parameter is mandatory. The valid flag values are R_CURSOR,
1352R_FIRST, R_LAST, R_NEXT and R_PREV.
1353
f6b705ef 1354=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1355
1356Flushes any cached buffers to disk.
1357
1358R_RECNOSYNC is the only valid flag at present.
3b35bae3 1359
1360=back
1361
f6b705ef 1362=head1 HINTS AND TIPS
3b35bae3 1363
3b35bae3 1364
cb1a09d0 1365=head2 Locking Databases
3b35bae3 1366
cb1a09d0 1367Concurrent access of a read-write database by several parties requires
1368them all to use some kind of locking. Here's an example of Tom's that
1369uses the I<fd> method to get the file descriptor, and then a careful
1370open() to give something Perl will flock() for you. Run this repeatedly
1371in the background to watch the locks granted in proper order.
3b35bae3 1372
cb1a09d0 1373 use DB_File;
1374
1375 use strict;
1376
1377 sub LOCK_SH { 1 }
1378 sub LOCK_EX { 2 }
1379 sub LOCK_NB { 4 }
1380 sub LOCK_UN { 8 }
1381
1382 my($oldval, $fd, $db, %db, $value, $key);
1383
1384 $key = shift || 'default';
1385 $value = shift || 'magic';
1386
1387 $value .= " $$";
1388
1389 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1390 || die "dbcreat /tmp/foo.db $!";
1391 $fd = $db->fd;
1392 print "$$: db fd is $fd\n";
1393 open(DB_FH, "+<&=$fd") || die "dup $!";
1394
1395
1396 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1397 print "$$: CONTENTION; can't read during write update!
1398 Waiting for read lock ($!) ....";
1399 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1400 }
1401 print "$$: Read lock granted\n";
1402
1403 $oldval = $db{$key};
1404 print "$$: Old value was $oldval\n";
1405 flock(DB_FH, LOCK_UN);
1406
1407 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1408 print "$$: CONTENTION; must have exclusive lock!
1409 Waiting for write lock ($!) ....";
1410 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1411 }
1412
1413 print "$$: Write lock granted\n";
1414 $db{$key} = $value;
610ab055 1415 $db->sync; # to flush
cb1a09d0 1416 sleep 10;
1417
1418 flock(DB_FH, LOCK_UN);
88108326 1419 undef $db;
cb1a09d0 1420 untie %db;
1421 close(DB_FH);
1422 print "$$: Updated db to $key=$value\n";
1423
68dc0745 1424=head2 Sharing Databases With C Applications
f6b705ef 1425
1426There is no technical reason why a Berkeley DB database cannot be
1427shared by both a Perl and a C application.
1428
1429The vast majority of problems that are reported in this area boil down
1430to the fact that C strings are NULL terminated, whilst Perl strings are
1431not.
1432
1433Here is a real example. Netscape 2.0 keeps a record of the locations you
1434visit along with the time you last visited them in a DB_HASH database.
1435This is usually stored in the file F<~/.netscape/history.db>. The key
1436field in the database is the location string and the value field is the
1437time the location was last visited stored as a 4 byte binary value.
1438
1439If you haven't already guessed, the location string is stored with a
1440terminating NULL. This means you need to be careful when accessing the
1441database.
1442
1443Here is a snippet of code that is loosely based on Tom Christiansen's
1444I<ggh> script (available from your nearest CPAN archive in
1445F<authors/id/TOMC/scripts/nshist.gz>).
1446
610ab055 1447 use strict ;
f6b705ef 1448 use DB_File ;
1449 use Fcntl ;
f6b705ef 1450
610ab055 1451 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1452 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1453
1454 $HISTORY = "$dotdir/.netscape/history.db";
1455
1456 tie %hist_db, 'DB_File', $HISTORY
1457 or die "Cannot open $HISTORY: $!\n" ;;
1458
1459 # Dump the complete database
1460 while ( ($href, $binary_time) = each %hist_db ) {
1461
1462 # remove the terminating NULL
1463 $href =~ s/\x00$// ;
1464
1465 # convert the binary time into a user friendly string
1466 $date = localtime unpack("V", $binary_time);
1467 print "$date $href\n" ;
1468 }
1469
1470 # check for the existence of a specific key
1471 # remember to add the NULL
1472 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1473 $date = localtime unpack("V", $binary_time) ;
1474 print "Last visited mox.perl.com on $date\n" ;
1475 }
1476 else {
1477 print "Never visited mox.perl.com\n"
1478 }
1479
1480 untie %hist_db ;
1481
68dc0745 1482=head2 The untie() Gotcha
778183f3 1483
7a2e2cd6 1484If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1485recommended that you read L<perltie/The untie Gotcha>.
778183f3 1486
1487Even if you don't currently make use of the API interface, it is still
1488worth reading it.
1489
1490Here is an example which illustrates the problem from a B<DB_File>
1491perspective:
1492
1493 use DB_File ;
1494 use Fcntl ;
1495
1496 my %x ;
1497 my $X ;
1498
1499 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1500 or die "Cannot tie first time: $!" ;
1501
1502 $x{123} = 456 ;
1503
1504 untie %x ;
1505
1506 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1507 or die "Cannot tie second time: $!" ;
1508
1509 untie %x ;
1510
1511When run, the script will produce this error message:
1512
1513 Cannot tie second time: Invalid argument at bad.file line 14.
1514
1515Although the error message above refers to the second tie() statement
1516in the script, the source of the problem is really with the untie()
1517statement that precedes it.
1518
1519Having read L<perltie> you will probably have already guessed that the
1520error is caused by the extra copy of the tied object stored in C<$X>.
1521If you haven't, then the problem boils down to the fact that the
1522B<DB_File> destructor, DESTROY, will not be called until I<all>
1523references to the tied object are destroyed. Both the tied variable,
1524C<%x>, and C<$X> above hold a reference to the object. The call to
1525untie() will destroy the first, but C<$X> still holds a valid
1526reference, so the destructor will not get called and the database file
1527F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1528attempt to open a database that is alreday open via the catch-all
1529"Invalid argument" doesn't help.
1530
1531If you run the script with the C<-w> flag the error message becomes:
1532
1533 untie attempted while 1 inner references still exist at bad.file line 12.
1534 Cannot tie second time: Invalid argument at bad.file line 14.
1535
1536which pinpoints the real problem. Finally the script can now be
1537modified to fix the original problem by destroying the API object
1538before the untie:
1539
1540 ...
1541 $x{123} = 456 ;
1542
1543 undef $X ;
1544 untie %x ;
1545
1546 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1547 ...
1548
f6b705ef 1549
1550=head1 COMMON QUESTIONS
1551
1552=head2 Why is there Perl source in my database?
1553
1554If you look at the contents of a database file created by DB_File,
1555there can sometimes be part of a Perl script included in it.
1556
1557This happens because Berkeley DB uses dynamic memory to allocate
1558buffers which will subsequently be written to the database file. Being
1559dynamic, the memory could have been used for anything before DB
1560malloced it. As Berkeley DB doesn't clear the memory once it has been
1561allocated, the unused portions will contain random junk. In the case
1562where a Perl script gets written to the database, the random junk will
1563correspond to an area of dynamic memory that happened to be used during
1564the compilation of the script.
1565
1566Unless you don't like the possibility of there being part of your Perl
1567scripts embedded in a database file, this is nothing to worry about.
1568
1569=head2 How do I store complex data structures with DB_File?
1570
1571Although B<DB_File> cannot do this directly, there is a module which
1572can layer transparently over B<DB_File> to accomplish this feat.
1573
1574Check out the MLDBM module, available on CPAN in the directory
1575F<modules/by-module/MLDBM>.
1576
1577=head2 What does "Invalid Argument" mean?
1578
1579You will get this error message when one of the parameters in the
1580C<tie> call is wrong. Unfortunately there are quite a few parameters to
1581get wrong, so it can be difficult to figure out which one it is.
1582
1583Here are a couple of possibilities:
1584
1585=over 5
1586
1587=item 1.
1588
610ab055 1589Attempting to reopen a database without closing it.
f6b705ef 1590
1591=item 2.
1592
1593Using the O_WRONLY flag.
1594
1595=back
1596
1597=head2 What does "Bareword 'DB_File' not allowed" mean?
1598
1599You will encounter this particular error message when you have the
1600C<strict 'subs'> pragma (or the full strict pragma) in your script.
1601Consider this script:
1602
1603 use strict ;
1604 use DB_File ;
1605 use vars qw(%x) ;
1606 tie %x, DB_File, "filename" ;
1607
1608Running it produces the error in question:
1609
1610 Bareword "DB_File" not allowed while "strict subs" in use
1611
1612To get around the error, place the word C<DB_File> in either single or
1613double quotes, like this:
1614
1615 tie %x, "DB_File", "filename" ;
1616
1617Although it might seem like a real pain, it is really worth the effort
1618of having a C<use strict> in all your scripts.
1619
cb1a09d0 1620=head1 HISTORY
1621
1f70e1ea 1622Moved to the Changes file.
610ab055 1623
1f70e1ea 1624=head1 BUGS
05475680 1625
1f70e1ea 1626Some older versions of Berkeley DB had problems with fixed length
1627records using the RECNO file format. This problem has been fixed since
1628version 1.85 of Berkeley DB.
e858de61 1629
1f70e1ea 1630I am sure there are bugs in the code. If you do find any, or can
1631suggest any enhancements, I would welcome your comments.
a6ed719b 1632
1f70e1ea 1633=head1 AVAILABILITY
a6ed719b 1634
1f70e1ea 1635B<DB_File> comes with the standard Perl source distribution. Look in
1636the directory F<ext/DB_File>. Given the amount of time between releases
1637of Perl the version that ships with Perl is quite likely to be out of
1638date, so the most recent version can always be found on CPAN (see
1639L<perlmod/CPAN> for details), in the directory
1640F<modules/by-module/DB_File>.
a6ed719b 1641
1f70e1ea 1642This version of B<DB_File> will work with either version 1.x or 2.x of
1643Berkeley DB, but is limited to the functionality provided by version 1.
a6ed719b 1644
1f70e1ea 1645The official web site for Berkeley DB is
1646F<http://www.sleepycat.com/db>. The ftp equivalent is
1647F<ftp.sleepycat.com:/pub>. Both versions 1 and 2 of Berkeley DB are
1648available there.
93af7a87 1649
1f70e1ea 1650Alternatively, Berkeley DB version 1 is available at your nearest CPAN
1651archive in F<src/misc/db.1.85.tar.gz>.
e858de61 1652
1f70e1ea 1653If you are running IRIX, then get Berkeley DB version 1 from
1654F<http://reality.sgi.com/ariel>. It has the patches necessary to
1655compile properly on IRIX 5.3.
610ab055 1656
1f70e1ea 1657=head1 COPYRIGHT
3b35bae3 1658
1f70e1ea 1659Copyright (c) 1997 Paul Marquess. All rights reserved. This program is
1660free software; you can redistribute it and/or modify it under the same
1661terms as Perl itself.
3b35bae3 1662
1f70e1ea 1663Although B<DB_File> is covered by the Perl license, the library it
1664makes use of, namely Berkeley DB, is not. Berkeley DB has its own
1665copyright and its own license. Please take the time to read it.
3b35bae3 1666
1f70e1ea 1667The license for Berkeley DB version 2, and how it relates to DB_File
1668does need some extra clarification. Here are are few words taken from
1669the Berkeley DB FAQ regarding the version 2 license:
3b35bae3 1670
1f70e1ea 1671 The major difference is that the license for DB 2.0, when
1672 downloaded from the net, requires that the software that
1673 uses DB 2.0 be freely redistributable.
f6b705ef 1674
1f70e1ea 1675That means that if you want to use DB_File, and you have changed either
1676the source for Berkeley DB or Perl, then the changes must be freely
1677available.
68dc0745 1678
1f70e1ea 1679In the case of Perl, the term source refers to the complete source
1680code for Perl (e.g. sv.c, toke.c, perl.h) and any external modules that
1681you are using (e.g. DB_File, Tk).
3b35bae3 1682
1f70e1ea 1683Note that any Perl scripts that you write are your property - this
1684includes scripts that make use of DB_File. Neither the Perl license or
1685the Berkeley DB license place any restriction on what you have to do
1686with them.
88108326 1687
1f70e1ea 1688If you are in any doubt about the license situation, contact either the
1689Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 1690
1691
3b35bae3 1692=head1 SEE ALSO
1693
1694L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>
1695
3b35bae3 1696=head1 AUTHOR
1697
8e07c86e 1698The DB_File interface was written by Paul Marquess
88108326 1699E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
d3ef3b8a 1700Questions about the DB system itself may be addressed to
1701E<lt>db@sleepycat.com<gt>.
3b35bae3 1702
1703=cut