Sys::Hostname should localize $SIG{__DIE__}
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
3# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
05475680 4# last modified 30th Apr 1997
5# version 1.14
36477c24 6#
a0b8c8c1 7# Copyright (c) 1995, 1996, 1997 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
88108326 101sub DESTROY { undef %{$_[0]} }
18d2dc8c 102sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
103sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
104sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 105
106package DB_File::RECNOINFO ;
785da04d 107
88108326 108use strict ;
109
110@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 111
112sub TIEHASH
113{
88108326 114 my $pkg = shift ;
115
36477c24 116 bless { VALID => { map {$_, 1}
117 qw( bval cachesize psize flags lorder reclen bfname )
118 },
119 GOT => {},
120 }, $pkg ;
8e07c86e 121}
122
88108326 123package DB_File::BTREEINFO ;
8e07c86e 124
88108326 125use strict ;
8e07c86e 126
88108326 127@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 128
88108326 129sub TIEHASH
8e07c86e 130{
88108326 131 my $pkg = shift ;
132
36477c24 133 bless { VALID => { map {$_, 1}
134 qw( flags cachesize maxkeypage minkeypage psize
135 compare prefix lorder )
136 },
137 GOT => {},
138 }, $pkg ;
8e07c86e 139}
140
141
8e07c86e 142package DB_File ;
785da04d 143
144use strict;
145use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO) ;
8e07c86e 146use Carp;
147
785da04d 148
05475680 149$VERSION = "1.14" ;
8e07c86e 150
151#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 152$DB_BTREE = new DB_File::BTREEINFO ;
153$DB_HASH = new DB_File::HASHINFO ;
154$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 155
785da04d 156require Tie::Hash;
8e07c86e 157require Exporter;
158use AutoLoader;
159require DynaLoader;
785da04d 160@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 161@EXPORT = qw(
162 $DB_BTREE $DB_HASH $DB_RECNO
88108326 163
8e07c86e 164 BTREEMAGIC
165 BTREEVERSION
166 DB_LOCK
167 DB_SHMEM
168 DB_TXN
169 HASHMAGIC
170 HASHVERSION
171 MAX_PAGE_NUMBER
172 MAX_PAGE_OFFSET
173 MAX_REC_NUMBER
174 RET_ERROR
175 RET_SPECIAL
176 RET_SUCCESS
177 R_CURSOR
178 R_DUP
179 R_FIRST
180 R_FIXEDLEN
181 R_IAFTER
182 R_IBEFORE
183 R_LAST
184 R_NEXT
185 R_NOKEY
186 R_NOOVERWRITE
187 R_PREV
188 R_RECNOSYNC
189 R_SETCURSOR
190 R_SNAPSHOT
191 __R_UNUSED
88108326 192
8e07c86e 193);
194
195sub AUTOLOAD {
785da04d 196 my($constname);
8e07c86e 197 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 198 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 199 if ($! != 0) {
200 if ($! =~ /Invalid/) {
201 $AutoLoader::AUTOLOAD = $AUTOLOAD;
202 goto &AutoLoader::AUTOLOAD;
203 }
204 else {
785da04d 205 my($pack,$file,$line) = caller;
8e07c86e 206 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
207";
208 }
209 }
210 eval "sub $AUTOLOAD { $val }";
211 goto &$AUTOLOAD;
212}
213
f6b705ef 214
215# import borrowed from IO::File
216# exports Fcntl constants if available.
217sub import {
218 my $pkg = shift;
219 my $callpkg = caller;
18d2dc8c 220 Exporter::export $pkg, $callpkg, @_;
f6b705ef 221 eval {
222 require Fcntl;
18d2dc8c 223 Exporter::export 'Fcntl', $callpkg, '/^O_/';
f6b705ef 224 };
225}
226
785da04d 227bootstrap DB_File $VERSION;
8e07c86e 228
229# Preloaded methods go here. Autoload methods go after __END__, and are
230# processed by the autosplit program.
231
05475680 232sub tie_hash_or_array
610ab055 233{
234 my (@arg) = @_ ;
05475680 235 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 236
237 $arg[4] = tied %{ $arg[4] }
238 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
239
05475680 240 DoTie_($tieHASH, @arg) ;
610ab055 241}
242
05475680 243sub TIEHASH
244{
245 tie_hash_or_array(@_) ;
246}
247
248sub TIEARRAY
249{
250 tie_hash_or_array(@_) ;
251}
88108326 252
253sub get_dup
254{
255 croak "Usage: \$db->get_dup(key [,flag])\n"
256 unless @_ == 2 or @_ == 3 ;
257
258 my $db = shift ;
259 my $key = shift ;
260 my $flag = shift ;
f6b705ef 261 my $value = 0 ;
88108326 262 my $origkey = $key ;
263 my $wantarray = wantarray ;
f6b705ef 264 my %values = () ;
88108326 265 my @values = () ;
266 my $counter = 0 ;
f6b705ef 267 my $status = 0 ;
88108326 268
f6b705ef 269 # iterate through the database until either EOF ($status == 0)
270 # or a different key is encountered ($key ne $origkey).
271 for ($status = $db->seq($key, $value, R_CURSOR()) ;
272 $status == 0 and $key eq $origkey ;
273 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 274
f6b705ef 275 # save the value or count number of matches
276 if ($wantarray) {
277 if ($flag)
278 { ++ $values{$value} }
279 else
280 { push (@values, $value) }
281 }
282 else
283 { ++ $counter }
88108326 284
88108326 285 }
286
f6b705ef 287 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 288}
289
290
8e07c86e 2911;
292__END__
293
3b35bae3 294=head1 NAME
295
296DB_File - Perl5 access to Berkeley DB
297
298=head1 SYNOPSIS
299
300 use DB_File ;
88108326 301
302 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
303 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
304 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 305
3b35bae3 306 $status = $X->del($key [, $flags]) ;
307 $status = $X->put($key, $value [, $flags]) ;
308 $status = $X->get($key, $value [, $flags]) ;
760ac839 309 $status = $X->seq($key, $value, $flags) ;
3b35bae3 310 $status = $X->sync([$flags]) ;
311 $status = $X->fd ;
760ac839 312
f6b705ef 313 # BTREE only
88108326 314 $count = $X->get_dup($key) ;
315 @list = $X->get_dup($key) ;
316 %list = $X->get_dup($key, 1) ;
317
f6b705ef 318 # RECNO only
319 $a = $X->length;
320 $a = $X->pop ;
321 $X->push(list);
322 $a = $X->shift;
323 $X->unshift(list);
324
3b35bae3 325 untie %hash ;
326 untie @array ;
327
328=head1 DESCRIPTION
329
8e07c86e 330B<DB_File> is a module which allows Perl programs to make use of the
331facilities provided by Berkeley DB. If you intend to use this
f6b705ef 332module you should really have a copy of the Berkeley DB manual pages at
8e07c86e 333hand. The interface defined here mirrors the Berkeley DB interface
334closely.
3b35bae3 335
68dc0745 336Please note that this module will only work with version 1.x of
337Berkeley DB. Once Berkeley DB version 2 is released, B<DB_File> will be
338upgraded to work with it.
339
8e07c86e 340Berkeley DB is a C library which provides a consistent interface to a
341number of database formats. B<DB_File> provides an interface to all
342three of the database types currently supported by Berkeley DB.
3b35bae3 343
344The file types are:
345
346=over 5
347
88108326 348=item B<DB_HASH>
3b35bae3 349
88108326 350This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 351files. This is equivalent to the functionality provided by other
352hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
353the files created using DB_HASH are not compatible with any of the
354other packages mentioned.
3b35bae3 355
8e07c86e 356A default hashing algorithm, which will be adequate for most
357applications, is built into Berkeley DB. If you do need to use your own
358hashing algorithm it is possible to write your own in Perl and have
359B<DB_File> use it instead.
3b35bae3 360
88108326 361=item B<DB_BTREE>
362
363The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 364sorted, balanced binary tree.
3b35bae3 365
8e07c86e 366As with the DB_HASH format, it is possible to provide a user defined
367Perl routine to perform the comparison of keys. By default, though, the
368keys are stored in lexical order.
3b35bae3 369
88108326 370=item B<DB_RECNO>
3b35bae3 371
8e07c86e 372DB_RECNO allows both fixed-length and variable-length flat text files
373to be manipulated using the same key/value pair interface as in DB_HASH
374and DB_BTREE. In this case the key will consist of a record (line)
375number.
3b35bae3 376
377=back
378
68dc0745 379=head2 Interface to Berkeley DB
3b35bae3 380
381B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 382in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
383allows B<DB_File> to access Berkeley DB files using either an
384associative array (for DB_HASH & DB_BTREE file types) or an ordinary
385array (for the DB_RECNO file type).
3b35bae3 386
88108326 387In addition to the tie() interface, it is also possible to access most
388of the functions provided in the Berkeley DB API directly.
f6b705ef 389See L<THE API INTERFACE>.
3b35bae3 390
88108326 391=head2 Opening a Berkeley DB Database File
3b35bae3 392
8e07c86e 393Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 394Here is the C prototype for dbopen():
3b35bae3 395
396 DB*
397 dbopen (const char * file, int flags, int mode,
398 DBTYPE type, const void * openinfo)
399
400The parameter C<type> is an enumeration which specifies which of the 3
401interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
402Depending on which of these is actually chosen, the final parameter,
403I<openinfo> points to a data structure which allows tailoring of the
404specific interface method.
405
8e07c86e 406This interface is handled slightly differently in B<DB_File>. Here is
88108326 407an equivalent call using B<DB_File>:
3b35bae3 408
88108326 409 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 410
8e07c86e 411The C<filename>, C<flags> and C<mode> parameters are the direct
412equivalent of their dbopen() counterparts. The final parameter $DB_HASH
413performs the function of both the C<type> and C<openinfo> parameters in
414dbopen().
3b35bae3 415
88108326 416In the example above $DB_HASH is actually a pre-defined reference to a
417hash object. B<DB_File> has three of these pre-defined references.
418Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 419
8e07c86e 420The keys allowed in each of these pre-defined references is limited to
421the names used in the equivalent C structure. So, for example, the
422$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 423C<ffactor>, C<hash>, C<lorder> and C<nelem>.
424
425To change one of these elements, just assign to it like this:
426
427 $DB_HASH->{'cachesize'} = 10000 ;
428
429The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
430usually adequate for most applications. If you do need to create extra
431instances of these objects, constructors are available for each file
432type.
433
434Here are examples of the constructors and the valid options available
435for DB_HASH, DB_BTREE and DB_RECNO respectively.
436
437 $a = new DB_File::HASHINFO ;
438 $a->{'bsize'} ;
439 $a->{'cachesize'} ;
440 $a->{'ffactor'};
441 $a->{'hash'} ;
442 $a->{'lorder'} ;
443 $a->{'nelem'} ;
444
445 $b = new DB_File::BTREEINFO ;
446 $b->{'flags'} ;
447 $b->{'cachesize'} ;
448 $b->{'maxkeypage'} ;
449 $b->{'minkeypage'} ;
450 $b->{'psize'} ;
451 $b->{'compare'} ;
452 $b->{'prefix'} ;
453 $b->{'lorder'} ;
454
455 $c = new DB_File::RECNOINFO ;
456 $c->{'bval'} ;
457 $c->{'cachesize'} ;
458 $c->{'psize'} ;
459 $c->{'flags'} ;
460 $c->{'lorder'} ;
461 $c->{'reclen'} ;
462 $c->{'bfname'} ;
463
464The values stored in the hashes above are mostly the direct equivalent
465of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 466default values - that means you don't have to set I<all> of the
88108326 467values when you only want to change one. Here is an example:
468
469 $a = new DB_File::HASHINFO ;
470 $a->{'cachesize'} = 12345 ;
471 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
472
36477c24 473A few of the options need extra discussion here. When used, the C
88108326 474equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
475to C functions. In B<DB_File> these keys are used to store references
476to Perl subs. Below are templates for each of the subs:
477
478 sub hash
479 {
480 my ($data) = @_ ;
481 ...
482 # return the hash value for $data
483 return $hash ;
484 }
3b35bae3 485
88108326 486 sub compare
487 {
488 my ($key, $key2) = @_ ;
489 ...
490 # return 0 if $key1 eq $key2
491 # -1 if $key1 lt $key2
492 # 1 if $key1 gt $key2
493 return (-1 , 0 or 1) ;
494 }
3b35bae3 495
88108326 496 sub prefix
497 {
498 my ($key, $key2) = @_ ;
499 ...
500 # return number of bytes of $key2 which are
501 # necessary to determine that it is greater than $key1
502 return $bytes ;
503 }
3b35bae3 504
f6b705ef 505See L<Changing the BTREE sort order> for an example of using the
506C<compare> template.
88108326 507
36477c24 508If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 509C<bval>, you should check out L<The 'bval' Option>.
36477c24 510
88108326 511=head2 Default Parameters
512
513It is possible to omit some or all of the final 4 parameters in the
514call to C<tie> and let them take default values. As DB_HASH is the most
515common file format used, the call:
516
517 tie %A, "DB_File", "filename" ;
518
519is equivalent to:
520
18d2dc8c 521 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 522
523It is also possible to omit the filename parameter as well, so the
524call:
525
526 tie %A, "DB_File" ;
527
528is equivalent to:
529
18d2dc8c 530 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 531
f6b705ef 532See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 533in place of a filename.
534
f6b705ef 535=head2 In Memory Databases
536
537Berkeley DB allows the creation of in-memory databases by using NULL
538(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
539uses C<undef> instead of NULL to provide this functionality.
540
541=head1 DB_HASH
542
543The DB_HASH file format is probably the most commonly used of the three
544file formats that B<DB_File> supports. It is also very straightforward
545to use.
546
68dc0745 547=head2 A Simple Example
f6b705ef 548
549This example shows how to create a database, add key/value pairs to the
550database, delete keys/value pairs and finally how to enumerate the
551contents of the database.
552
610ab055 553 use strict ;
f6b705ef 554 use DB_File ;
610ab055 555 use vars qw( %h $k $v ) ;
f6b705ef 556
557 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
558 or die "Cannot open file 'fruit': $!\n";
559
560 # Add a few key/value pairs to the file
561 $h{"apple"} = "red" ;
562 $h{"orange"} = "orange" ;
563 $h{"banana"} = "yellow" ;
564 $h{"tomato"} = "red" ;
565
566 # Check for existence of a key
567 print "Banana Exists\n\n" if $h{"banana"} ;
568
569 # Delete a key/value pair.
570 delete $h{"apple"} ;
571
572 # print the contents of the file
573 while (($k, $v) = each %h)
574 { print "$k -> $v\n" }
575
576 untie %h ;
577
578here is the output:
579
580 Banana Exists
581
582 orange -> orange
583 tomato -> red
584 banana -> yellow
585
586Note that the like ordinary associative arrays, the order of the keys
587retrieved is in an apparently random order.
588
589=head1 DB_BTREE
590
591The DB_BTREE format is useful when you want to store data in a given
592order. By default the keys will be stored in lexical order, but as you
593will see from the example shown in the next section, it is very easy to
594define your own sorting function.
595
596=head2 Changing the BTREE sort order
597
598This script shows how to override the default sorting algorithm that
599BTREE uses. Instead of using the normal lexical ordering, a case
600insensitive compare function will be used.
88108326 601
610ab055 602 use strict ;
f6b705ef 603 use DB_File ;
610ab055 604
605 my %h ;
f6b705ef 606
607 sub Compare
608 {
609 my ($key1, $key2) = @_ ;
610 "\L$key1" cmp "\L$key2" ;
611 }
612
613 # specify the Perl sub that will do the comparison
614 $DB_BTREE->{'compare'} = \&Compare ;
615
616 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
617 or die "Cannot open file 'tree': $!\n" ;
618
619 # Add a key/value pair to the file
620 $h{'Wall'} = 'Larry' ;
621 $h{'Smith'} = 'John' ;
622 $h{'mouse'} = 'mickey' ;
623 $h{'duck'} = 'donald' ;
624
625 # Delete
626 delete $h{"duck"} ;
627
628 # Cycle through the keys printing them in order.
629 # Note it is not necessary to sort the keys as
630 # the btree will have kept them in order automatically.
631 foreach (keys %h)
632 { print "$_\n" }
633
634 untie %h ;
635
636Here is the output from the code above.
637
638 mouse
639 Smith
640 Wall
641
642There are a few point to bear in mind if you want to change the
643ordering in a BTREE database:
644
645=over 5
646
647=item 1.
648
649The new compare function must be specified when you create the database.
650
651=item 2.
652
653You cannot change the ordering once the database has been created. Thus
654you must use the same compare function every time you access the
88108326 655database.
656
f6b705ef 657=back
658
68dc0745 659=head2 Handling Duplicate Keys
f6b705ef 660
661The BTREE file type optionally allows a single key to be associated
662with an arbitrary number of values. This option is enabled by setting
663the flags element of C<$DB_BTREE> to R_DUP when creating the database.
664
88108326 665There are some difficulties in using the tied hash interface if you
666want to manipulate a BTREE database with duplicate keys. Consider this
667code:
668
610ab055 669 use strict ;
88108326 670 use DB_File ;
610ab055 671
672 use vars qw($filename %h ) ;
673
88108326 674 $filename = "tree" ;
675 unlink $filename ;
676
677 # Enable duplicate records
678 $DB_BTREE->{'flags'} = R_DUP ;
679
680 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
681 or die "Cannot open $filename: $!\n";
682
683 # Add some key/value pairs to the file
684 $h{'Wall'} = 'Larry' ;
685 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 686 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 687 $h{'Smith'} = 'John' ;
688 $h{'mouse'} = 'mickey' ;
689
690 # iterate through the associative array
691 # and print each key/value pair.
692 foreach (keys %h)
693 { print "$_ -> $h{$_}\n" }
694
f6b705ef 695 untie %h ;
696
88108326 697Here is the output:
698
699 Smith -> John
700 Wall -> Larry
701 Wall -> Larry
f6b705ef 702 Wall -> Larry
88108326 703 mouse -> mickey
704
f6b705ef 705As you can see 3 records have been successfully created with key C<Wall>
88108326 706- the only thing is, when they are retrieved from the database they
f6b705ef 707I<seem> to have the same value, namely C<Larry>. The problem is caused
708by the way that the associative array interface works. Basically, when
709the associative array interface is used to fetch the value associated
710with a given key, it will only ever retrieve the first value.
88108326 711
712Although it may not be immediately obvious from the code above, the
713associative array interface can be used to write values with duplicate
714keys, but it cannot be used to read them back from the database.
715
716The way to get around this problem is to use the Berkeley DB API method
717called C<seq>. This method allows sequential access to key/value
f6b705ef 718pairs. See L<THE API INTERFACE> for details of both the C<seq> method
719and the API in general.
88108326 720
721Here is the script above rewritten using the C<seq> API method.
722
610ab055 723 use strict ;
88108326 724 use DB_File ;
88108326 725
610ab055 726 use vars qw($filename $x %h $status $key $value) ;
727
88108326 728 $filename = "tree" ;
729 unlink $filename ;
730
731 # Enable duplicate records
732 $DB_BTREE->{'flags'} = R_DUP ;
733
734 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
735 or die "Cannot open $filename: $!\n";
736
737 # Add some key/value pairs to the file
738 $h{'Wall'} = 'Larry' ;
739 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 740 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 741 $h{'Smith'} = 'John' ;
742 $h{'mouse'} = 'mickey' ;
743
f6b705ef 744 # iterate through the btree using seq
88108326 745 # and print each key/value pair.
610ab055 746 $key = $value = 0 ;
f6b705ef 747 for ($status = $x->seq($key, $value, R_FIRST) ;
748 $status == 0 ;
749 $status = $x->seq($key, $value, R_NEXT) )
88108326 750 { print "$key -> $value\n" }
751
752 undef $x ;
753 untie %h ;
754
755that prints:
756
757 Smith -> John
758 Wall -> Brick
f6b705ef 759 Wall -> Brick
88108326 760 Wall -> Larry
761 mouse -> mickey
762
f6b705ef 763This time we have got all the key/value pairs, including the multiple
88108326 764values associated with the key C<Wall>.
765
68dc0745 766=head2 The get_dup() Method
f6b705ef 767
768B<DB_File> comes with a utility method, called C<get_dup>, to assist in
88108326 769reading duplicate values from BTREE databases. The method can take the
770following forms:
771
772 $count = $x->get_dup($key) ;
773 @list = $x->get_dup($key) ;
774 %list = $x->get_dup($key, 1) ;
775
776In a scalar context the method returns the number of values associated
777with the key, C<$key>.
778
779In list context, it returns all the values which match C<$key>. Note
f6b705ef 780that the values will be returned in an apparently random order.
88108326 781
7a2e2cd6 782In list context, if the second parameter is present and evaluates
783TRUE, the method returns an associative array. The keys of the
784associative array correspond to the values that matched in the BTREE
785and the values of the array are a count of the number of times that
786particular value occurred in the BTREE.
88108326 787
f6b705ef 788So assuming the database created above, we can use C<get_dup> like
88108326 789this:
790
610ab055 791 my $cnt = $x->get_dup("Wall") ;
88108326 792 print "Wall occurred $cnt times\n" ;
793
610ab055 794 my %hash = $x->get_dup("Wall", 1) ;
88108326 795 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 796 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 797
610ab055 798 my @list = $x->get_dup("Wall") ;
88108326 799 print "Wall => [@list]\n" ;
800
f6b705ef 801 @list = $x->get_dup("Smith") ;
88108326 802 print "Smith => [@list]\n" ;
803
f6b705ef 804 @list = $x->get_dup("Dog") ;
88108326 805 print "Dog => [@list]\n" ;
806
807
808and it will print:
809
f6b705ef 810 Wall occurred 3 times
88108326 811 Larry is there
f6b705ef 812 There are 2 Brick Walls
813 Wall => [Brick Brick Larry]
88108326 814 Smith => [John]
815 Dog => []
3b35bae3 816
f6b705ef 817=head2 Matching Partial Keys
818
819The BTREE interface has a feature which allows partial keys to be
820matched. This functionality is I<only> available when the C<seq> method
821is used along with the R_CURSOR flag.
822
823 $x->seq($key, $value, R_CURSOR) ;
824
825Here is the relevant quote from the dbopen man page where it defines
826the use of the R_CURSOR flag with seq:
827
f6b705ef 828 Note, for the DB_BTREE access method, the returned key is not
829 necessarily an exact match for the specified key. The returned key
830 is the smallest key greater than or equal to the specified key,
831 permitting partial key matches and range searches.
832
f6b705ef 833In the example script below, the C<match> sub uses this feature to find
834and print the first matching key/value pair given a partial key.
835
610ab055 836 use strict ;
f6b705ef 837 use DB_File ;
838 use Fcntl ;
610ab055 839
840 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 841
842 sub match
843 {
844 my $key = shift ;
610ab055 845 my $value = 0;
f6b705ef 846 my $orig_key = $key ;
847 $x->seq($key, $value, R_CURSOR) ;
848 print "$orig_key\t-> $key\t-> $value\n" ;
849 }
850
851 $filename = "tree" ;
852 unlink $filename ;
853
854 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
855 or die "Cannot open $filename: $!\n";
856
857 # Add some key/value pairs to the file
858 $h{'mouse'} = 'mickey' ;
859 $h{'Wall'} = 'Larry' ;
860 $h{'Walls'} = 'Brick' ;
861 $h{'Smith'} = 'John' ;
862
863
610ab055 864 $key = $value = 0 ;
f6b705ef 865 print "IN ORDER\n" ;
866 for ($st = $x->seq($key, $value, R_FIRST) ;
867 $st == 0 ;
868 $st = $x->seq($key, $value, R_NEXT) )
869
870 { print "$key -> $value\n" }
871
872 print "\nPARTIAL MATCH\n" ;
873
874 match "Wa" ;
875 match "A" ;
876 match "a" ;
877
878 undef $x ;
879 untie %h ;
880
881Here is the output:
882
883 IN ORDER
884 Smith -> John
885 Wall -> Larry
886 Walls -> Brick
887 mouse -> mickey
888
889 PARTIAL MATCH
890 Wa -> Wall -> Larry
891 A -> Smith -> John
892 a -> mouse -> mickey
893
894=head1 DB_RECNO
895
896DB_RECNO provides an interface to flat text files. Both variable and
897fixed length records are supported.
3b35bae3 898
88108326 899In order to make RECNO more compatible with Perl the array offset for
900all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 901
88108326 902As with normal Perl arrays, a RECNO array can be accessed using
903negative indexes. The index -1 refers to the last element of the array,
904-2 the second last, and so on. Attempting to access an element before
905the start of the array will raise a fatal run-time error.
3b35bae3 906
68dc0745 907=head2 The 'bval' Option
36477c24 908
909The operation of the bval option warrants some discussion. Here is the
910definition of bval from the Berkeley DB 1.85 recno manual page:
911
912 The delimiting byte to be used to mark the end of a
913 record for variable-length records, and the pad charac-
914 ter for fixed-length records. If no value is speci-
915 fied, newlines (``\n'') are used to mark the end of
916 variable-length records and fixed-length records are
917 padded with spaces.
918
919The second sentence is wrong. In actual fact bval will only default to
920C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
921openinfo parameter is used at all, the value that happens to be in bval
922will be used. That means you always have to specify bval when making
923use of any of the options in the openinfo parameter. This documentation
924error will be fixed in the next release of Berkeley DB.
925
926That clarifies the situation with regards Berkeley DB itself. What
927about B<DB_File>? Well, the behavior defined in the quote above is
928quite useful, so B<DB_File> conforms it.
929
930That means that you can specify other options (e.g. cachesize) and
931still have bval default to C<"\n"> for variable length records, and
932space for fixed length records.
933
f6b705ef 934=head2 A Simple Example
3b35bae3 935
f6b705ef 936Here is a simple example that uses RECNO.
937
610ab055 938 use strict ;
f6b705ef 939 use DB_File ;
f6b705ef 940
610ab055 941 my @h ;
f6b705ef 942 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
943 or die "Cannot open file 'text': $!\n" ;
944
945 # Add a few key/value pairs to the file
946 $h[0] = "orange" ;
947 $h[1] = "blue" ;
948 $h[2] = "yellow" ;
949
950 # Check for existence of a key
951 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
952
953 # use a negative index
954 print "The last element is $h[-1]\n" ;
955 print "The 2nd last element is $h[-2]\n" ;
956
957 untie @h ;
3b35bae3 958
f6b705ef 959Here is the output from the script:
960
961
962 Element 1 Exists with value blue
963 The last element is yellow
964 The 2nd last element is blue
965
966=head2 Extra Methods
967
968As you can see from the example above, the tied array interface is
969quite limited. To make the interface more useful, a number of methods
970are supplied with B<DB_File> to simulate the standard array operations
971that are not currently implemented in Perl's tied array interface. All
972these methods are accessed via the object returned from the tie call.
973
974Here are the methods:
975
976=over 5
3b35bae3 977
f6b705ef 978=item B<$X-E<gt>push(list) ;>
979
980Pushes the elements of C<list> to the end of the array.
981
982=item B<$value = $X-E<gt>pop ;>
983
984Removes and returns the last element of the array.
985
986=item B<$X-E<gt>shift>
987
988Removes and returns the first element of the array.
989
990=item B<$X-E<gt>unshift(list) ;>
991
992Pushes the elements of C<list> to the start of the array.
993
994=item B<$X-E<gt>length>
995
996Returns the number of elements in the array.
997
998=back
999
1000=head2 Another Example
1001
1002Here is a more complete example that makes use of some of the methods
1003described above. It also makes use of the API interface directly (see
1004L<THE API INTERFACE>).
1005
1006 use strict ;
1007 use vars qw(@h $H $file $i) ;
1008 use DB_File ;
1009 use Fcntl ;
1010
1011 $file = "text" ;
1012
1013 unlink $file ;
1014
1015 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1016 or die "Cannot open file $file: $!\n" ;
1017
1018 # first create a text file to play with
1019 $h[0] = "zero" ;
1020 $h[1] = "one" ;
1021 $h[2] = "two" ;
1022 $h[3] = "three" ;
1023 $h[4] = "four" ;
1024
1025
1026 # Print the records in order.
1027 #
1028 # The length method is needed here because evaluating a tied
1029 # array in a scalar context does not return the number of
1030 # elements in the array.
1031
1032 print "\nORIGINAL\n" ;
1033 foreach $i (0 .. $H->length - 1) {
1034 print "$i: $h[$i]\n" ;
1035 }
1036
1037 # use the push & pop methods
1038 $a = $H->pop ;
1039 $H->push("last") ;
1040 print "\nThe last record was [$a]\n" ;
1041
1042 # and the shift & unshift methods
1043 $a = $H->shift ;
1044 $H->unshift("first") ;
1045 print "The first record was [$a]\n" ;
1046
1047 # Use the API to add a new record after record 2.
1048 $i = 2 ;
1049 $H->put($i, "Newbie", R_IAFTER) ;
1050
1051 # and a new record before record 1.
1052 $i = 1 ;
1053 $H->put($i, "New One", R_IBEFORE) ;
1054
1055 # delete record 3
1056 $H->del(3) ;
1057
1058 # now print the records in reverse order
1059 print "\nREVERSE\n" ;
1060 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1061 { print "$i: $h[$i]\n" }
1062
1063 # same again, but use the API functions instead
1064 print "\nREVERSE again\n" ;
610ab055 1065 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1066 for ($s = $H->seq($k, $v, R_LAST) ;
1067 $s == 0 ;
1068 $s = $H->seq($k, $v, R_PREV))
1069 { print "$k: $v\n" }
1070
1071 undef $H ;
1072 untie @h ;
1073
1074and this is what it outputs:
1075
1076 ORIGINAL
1077 0: zero
1078 1: one
1079 2: two
1080 3: three
1081 4: four
1082
1083 The last record was [four]
1084 The first record was [zero]
1085
1086 REVERSE
1087 5: last
1088 4: three
1089 3: Newbie
1090 2: one
1091 1: New One
1092 0: first
1093
1094 REVERSE again
1095 5: last
1096 4: three
1097 3: Newbie
1098 2: one
1099 1: New One
1100 0: first
1101
1102Notes:
1103
1104=over 5
1105
1106=item 1.
1107
1108Rather than iterating through the array, C<@h> like this:
1109
1110 foreach $i (@h)
1111
1112it is necessary to use either this:
1113
1114 foreach $i (0 .. $H->length - 1)
1115
1116or this:
1117
1118 for ($a = $H->get($k, $v, R_FIRST) ;
1119 $a == 0 ;
1120 $a = $H->get($k, $v, R_NEXT) )
1121
1122=item 2.
1123
1124Notice that both times the C<put> method was used the record index was
1125specified using a variable, C<$i>, rather than the literal value
1126itself. This is because C<put> will return the record number of the
1127inserted line via that parameter.
1128
1129=back
1130
1131=head1 THE API INTERFACE
3b35bae3 1132
1133As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1134possible to make direct use of most of the API functions defined in the
8e07c86e 1135Berkeley DB documentation.
3b35bae3 1136
88108326 1137To do this you need to store a copy of the object returned from the tie.
3b35bae3 1138
88108326 1139 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1140
8e07c86e 1141Once you have done that, you can access the Berkeley DB API functions
88108326 1142as B<DB_File> methods directly like this:
3b35bae3 1143
1144 $db->put($key, $value, R_NOOVERWRITE) ;
1145
88108326 1146B<Important:> If you have saved a copy of the object returned from
1147C<tie>, the underlying database file will I<not> be closed until both
1148the tied variable is untied and all copies of the saved object are
610ab055 1149destroyed.
88108326 1150
1151 use DB_File ;
1152 $db = tie %hash, "DB_File", "filename"
1153 or die "Cannot tie filename: $!" ;
1154 ...
1155 undef $db ;
1156 untie %hash ;
1157
9a2c4ce3 1158See L<The untie() Gotcha> for more details.
778183f3 1159
88108326 1160All the functions defined in L<dbopen> are available except for
1161close() and dbopen() itself. The B<DB_File> method interface to the
1162supported functions have been implemented to mirror the way Berkeley DB
1163works whenever possible. In particular note that:
1164
1165=over 5
1166
1167=item *
1168
1169The methods return a status value. All return 0 on success.
1170All return -1 to signify an error and set C<$!> to the exact
1171error code. The return code 1 generally (but not always) means that the
1172key specified did not exist in the database.
1173
1174Other return codes are defined. See below and in the Berkeley DB
1175documentation for details. The Berkeley DB documentation should be used
1176as the definitive source.
1177
1178=item *
3b35bae3 1179
88108326 1180Whenever a Berkeley DB function returns data via one of its parameters,
1181the equivalent B<DB_File> method does exactly the same.
3b35bae3 1182
88108326 1183=item *
1184
1185If you are careful, it is possible to mix API calls with the tied
1186hash/array interface in the same piece of code. Although only a few of
1187the methods used to implement the tied interface currently make use of
1188the cursor, you should always assume that the cursor has been changed
1189any time the tied hash/array interface is used. As an example, this
1190code will probably not do what you expect:
1191
1192 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1193 or die "Cannot tie $filename: $!" ;
1194
1195 # Get the first key/value pair and set the cursor
1196 $X->seq($key, $value, R_FIRST) ;
1197
1198 # this line will modify the cursor
1199 $count = scalar keys %x ;
1200
1201 # Get the second key/value pair.
1202 # oops, it didn't, it got the last key/value pair!
1203 $X->seq($key, $value, R_NEXT) ;
1204
1205The code above can be rearranged to get around the problem, like this:
1206
1207 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1208 or die "Cannot tie $filename: $!" ;
1209
1210 # this line will modify the cursor
1211 $count = scalar keys %x ;
1212
1213 # Get the first key/value pair and set the cursor
1214 $X->seq($key, $value, R_FIRST) ;
1215
1216 # Get the second key/value pair.
1217 # worked this time.
1218 $X->seq($key, $value, R_NEXT) ;
1219
1220=back
1221
1222All the constants defined in L<dbopen> for use in the flags parameters
1223in the methods defined below are also available. Refer to the Berkeley
1224DB documentation for the precise meaning of the flags values.
1225
1226Below is a list of the methods available.
3b35bae3 1227
1228=over 5
1229
f6b705ef 1230=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1231
1232Given a key (C<$key>) this method reads the value associated with it
1233from the database. The value read from the database is returned in the
1234C<$value> parameter.
3b35bae3 1235
88108326 1236If the key does not exist the method returns 1.
3b35bae3 1237
88108326 1238No flags are currently defined for this method.
3b35bae3 1239
f6b705ef 1240=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1241
88108326 1242Stores the key/value pair in the database.
1243
1244If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1245will have the record number of the inserted key/value pair set.
3b35bae3 1246
88108326 1247Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1248R_SETCURSOR.
1249
f6b705ef 1250=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1251
88108326 1252Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1253
88108326 1254A return code of 1 means that the requested key was not in the
1255database.
3b35bae3 1256
88108326 1257R_CURSOR is the only valid flag at present.
3b35bae3 1258
f6b705ef 1259=item B<$status = $X-E<gt>fd ;>
3b35bae3 1260
88108326 1261Returns the file descriptor for the underlying database.
3b35bae3 1262
f6b705ef 1263See L<Locking Databases> for an example of how to make use of the
88108326 1264C<fd> method to lock your database.
3b35bae3 1265
f6b705ef 1266=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1267
88108326 1268This interface allows sequential retrieval from the database. See
1269L<dbopen> for full details.
1270
1271Both the C<$key> and C<$value> parameters will be set to the key/value
1272pair read from the database.
1273
1274The flags parameter is mandatory. The valid flag values are R_CURSOR,
1275R_FIRST, R_LAST, R_NEXT and R_PREV.
1276
f6b705ef 1277=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1278
1279Flushes any cached buffers to disk.
1280
1281R_RECNOSYNC is the only valid flag at present.
3b35bae3 1282
1283=back
1284
f6b705ef 1285=head1 HINTS AND TIPS
3b35bae3 1286
3b35bae3 1287
cb1a09d0 1288=head2 Locking Databases
3b35bae3 1289
cb1a09d0 1290Concurrent access of a read-write database by several parties requires
1291them all to use some kind of locking. Here's an example of Tom's that
1292uses the I<fd> method to get the file descriptor, and then a careful
1293open() to give something Perl will flock() for you. Run this repeatedly
1294in the background to watch the locks granted in proper order.
3b35bae3 1295
cb1a09d0 1296 use DB_File;
1297
1298 use strict;
1299
1300 sub LOCK_SH { 1 }
1301 sub LOCK_EX { 2 }
1302 sub LOCK_NB { 4 }
1303 sub LOCK_UN { 8 }
1304
1305 my($oldval, $fd, $db, %db, $value, $key);
1306
1307 $key = shift || 'default';
1308 $value = shift || 'magic';
1309
1310 $value .= " $$";
1311
1312 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1313 || die "dbcreat /tmp/foo.db $!";
1314 $fd = $db->fd;
1315 print "$$: db fd is $fd\n";
1316 open(DB_FH, "+<&=$fd") || die "dup $!";
1317
1318
1319 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1320 print "$$: CONTENTION; can't read during write update!
1321 Waiting for read lock ($!) ....";
1322 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1323 }
1324 print "$$: Read lock granted\n";
1325
1326 $oldval = $db{$key};
1327 print "$$: Old value was $oldval\n";
1328 flock(DB_FH, LOCK_UN);
1329
1330 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1331 print "$$: CONTENTION; must have exclusive lock!
1332 Waiting for write lock ($!) ....";
1333 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1334 }
1335
1336 print "$$: Write lock granted\n";
1337 $db{$key} = $value;
610ab055 1338 $db->sync; # to flush
cb1a09d0 1339 sleep 10;
1340
1341 flock(DB_FH, LOCK_UN);
88108326 1342 undef $db;
cb1a09d0 1343 untie %db;
1344 close(DB_FH);
1345 print "$$: Updated db to $key=$value\n";
1346
68dc0745 1347=head2 Sharing Databases With C Applications
f6b705ef 1348
1349There is no technical reason why a Berkeley DB database cannot be
1350shared by both a Perl and a C application.
1351
1352The vast majority of problems that are reported in this area boil down
1353to the fact that C strings are NULL terminated, whilst Perl strings are
1354not.
1355
1356Here is a real example. Netscape 2.0 keeps a record of the locations you
1357visit along with the time you last visited them in a DB_HASH database.
1358This is usually stored in the file F<~/.netscape/history.db>. The key
1359field in the database is the location string and the value field is the
1360time the location was last visited stored as a 4 byte binary value.
1361
1362If you haven't already guessed, the location string is stored with a
1363terminating NULL. This means you need to be careful when accessing the
1364database.
1365
1366Here is a snippet of code that is loosely based on Tom Christiansen's
1367I<ggh> script (available from your nearest CPAN archive in
1368F<authors/id/TOMC/scripts/nshist.gz>).
1369
610ab055 1370 use strict ;
f6b705ef 1371 use DB_File ;
1372 use Fcntl ;
f6b705ef 1373
610ab055 1374 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1375 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1376
1377 $HISTORY = "$dotdir/.netscape/history.db";
1378
1379 tie %hist_db, 'DB_File', $HISTORY
1380 or die "Cannot open $HISTORY: $!\n" ;;
1381
1382 # Dump the complete database
1383 while ( ($href, $binary_time) = each %hist_db ) {
1384
1385 # remove the terminating NULL
1386 $href =~ s/\x00$// ;
1387
1388 # convert the binary time into a user friendly string
1389 $date = localtime unpack("V", $binary_time);
1390 print "$date $href\n" ;
1391 }
1392
1393 # check for the existence of a specific key
1394 # remember to add the NULL
1395 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1396 $date = localtime unpack("V", $binary_time) ;
1397 print "Last visited mox.perl.com on $date\n" ;
1398 }
1399 else {
1400 print "Never visited mox.perl.com\n"
1401 }
1402
1403 untie %hist_db ;
1404
68dc0745 1405=head2 The untie() Gotcha
778183f3 1406
7a2e2cd6 1407If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1408recommended that you read L<perltie/The untie Gotcha>.
778183f3 1409
1410Even if you don't currently make use of the API interface, it is still
1411worth reading it.
1412
1413Here is an example which illustrates the problem from a B<DB_File>
1414perspective:
1415
1416 use DB_File ;
1417 use Fcntl ;
1418
1419 my %x ;
1420 my $X ;
1421
1422 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1423 or die "Cannot tie first time: $!" ;
1424
1425 $x{123} = 456 ;
1426
1427 untie %x ;
1428
1429 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1430 or die "Cannot tie second time: $!" ;
1431
1432 untie %x ;
1433
1434When run, the script will produce this error message:
1435
1436 Cannot tie second time: Invalid argument at bad.file line 14.
1437
1438Although the error message above refers to the second tie() statement
1439in the script, the source of the problem is really with the untie()
1440statement that precedes it.
1441
1442Having read L<perltie> you will probably have already guessed that the
1443error is caused by the extra copy of the tied object stored in C<$X>.
1444If you haven't, then the problem boils down to the fact that the
1445B<DB_File> destructor, DESTROY, will not be called until I<all>
1446references to the tied object are destroyed. Both the tied variable,
1447C<%x>, and C<$X> above hold a reference to the object. The call to
1448untie() will destroy the first, but C<$X> still holds a valid
1449reference, so the destructor will not get called and the database file
1450F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1451attempt to open a database that is alreday open via the catch-all
1452"Invalid argument" doesn't help.
1453
1454If you run the script with the C<-w> flag the error message becomes:
1455
1456 untie attempted while 1 inner references still exist at bad.file line 12.
1457 Cannot tie second time: Invalid argument at bad.file line 14.
1458
1459which pinpoints the real problem. Finally the script can now be
1460modified to fix the original problem by destroying the API object
1461before the untie:
1462
1463 ...
1464 $x{123} = 456 ;
1465
1466 undef $X ;
1467 untie %x ;
1468
1469 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1470 ...
1471
f6b705ef 1472
1473=head1 COMMON QUESTIONS
1474
1475=head2 Why is there Perl source in my database?
1476
1477If you look at the contents of a database file created by DB_File,
1478there can sometimes be part of a Perl script included in it.
1479
1480This happens because Berkeley DB uses dynamic memory to allocate
1481buffers which will subsequently be written to the database file. Being
1482dynamic, the memory could have been used for anything before DB
1483malloced it. As Berkeley DB doesn't clear the memory once it has been
1484allocated, the unused portions will contain random junk. In the case
1485where a Perl script gets written to the database, the random junk will
1486correspond to an area of dynamic memory that happened to be used during
1487the compilation of the script.
1488
1489Unless you don't like the possibility of there being part of your Perl
1490scripts embedded in a database file, this is nothing to worry about.
1491
1492=head2 How do I store complex data structures with DB_File?
1493
1494Although B<DB_File> cannot do this directly, there is a module which
1495can layer transparently over B<DB_File> to accomplish this feat.
1496
1497Check out the MLDBM module, available on CPAN in the directory
1498F<modules/by-module/MLDBM>.
1499
1500=head2 What does "Invalid Argument" mean?
1501
1502You will get this error message when one of the parameters in the
1503C<tie> call is wrong. Unfortunately there are quite a few parameters to
1504get wrong, so it can be difficult to figure out which one it is.
1505
1506Here are a couple of possibilities:
1507
1508=over 5
1509
1510=item 1.
1511
610ab055 1512Attempting to reopen a database without closing it.
f6b705ef 1513
1514=item 2.
1515
1516Using the O_WRONLY flag.
1517
1518=back
1519
1520=head2 What does "Bareword 'DB_File' not allowed" mean?
1521
1522You will encounter this particular error message when you have the
1523C<strict 'subs'> pragma (or the full strict pragma) in your script.
1524Consider this script:
1525
1526 use strict ;
1527 use DB_File ;
1528 use vars qw(%x) ;
1529 tie %x, DB_File, "filename" ;
1530
1531Running it produces the error in question:
1532
1533 Bareword "DB_File" not allowed while "strict subs" in use
1534
1535To get around the error, place the word C<DB_File> in either single or
1536double quotes, like this:
1537
1538 tie %x, "DB_File", "filename" ;
1539
1540Although it might seem like a real pain, it is really worth the effort
1541of having a C<use strict> in all your scripts.
1542
cb1a09d0 1543=head1 HISTORY
1544
1545=over
1546
1547=item 0.1
3b35bae3 1548
1549First Release.
1550
cb1a09d0 1551=item 0.2
3b35bae3 1552
1553When B<DB_File> is opening a database file it no longer terminates the
1554process if I<dbopen> returned an error. This allows file protection
1555errors to be caught at run time. Thanks to Judith Grass
cb1a09d0 1556E<lt>grass@cybercash.comE<gt> for spotting the bug.
3b35bae3 1557
cb1a09d0 1558=item 0.3
8e07c86e 1559
1560Added prototype support for multiple btree compare callbacks.
1561
cb1a09d0 1562=item 1.0
8e07c86e 1563
1564B<DB_File> has been in use for over a year. To reflect that, the
1565version number has been incremented to 1.0.
1566
1567Added complete support for multiple concurrent callbacks.
1568
1569Using the I<push> method on an empty list didn't work properly. This
1570has been fixed.
1571
cb1a09d0 1572=item 1.01
4633a7c4 1573
1574Fixed a core dump problem with SunOS.
1575
1576The return value from TIEHASH wasn't set to NULL when dbopen returned
1577an error.
1578
88108326 1579=item 1.02
1580
f6b705ef 1581Merged OS/2 specific code into DB_File.xs
88108326 1582
1583Removed some redundant code in DB_File.xs.
1584
1585Documentation update.
1586
1587Allow negative subscripts with RECNO interface.
1588
1589Changed the default flags from O_RDWR to O_CREAT|O_RDWR.
1590
1591The example code which showed how to lock a database needed a call to
1592C<sync> added. Without it the resultant database file was empty.
1593
f6b705ef 1594Added get_dup method.
88108326 1595
f6b705ef 1596=item 1.03
1597
1598Documentation update.
3b35bae3 1599
f6b705ef 1600B<DB_File> now imports the constants (O_RDWR, O_CREAT etc.) from Fcntl
1601automatically.
3b35bae3 1602
f6b705ef 1603The standard hash function C<exists> is now supported.
1604
1605Modified the behavior of get_dup. When it returns an associative
1606array, the value is the count of the number of matching BTREE values.
3b35bae3 1607
610ab055 1608=item 1.04
1609
1610Minor documentation changes.
1611
1612Fixed a bug in hash_cb. Patches supplied by Dave Hammen,
1613E<lt>hammen@gothamcity.jsc.nasa.govE<gt>.
1614
1615Fixed a bug with the constructors for DB_File::HASHINFO,
1616DB_File::BTREEINFO and DB_File::RECNOINFO. Also tidied up the
1617constructors to make them C<-w> clean.
1618
1619Reworked part of the test harness to be more locale friendly.
1620
1621=item 1.05
1622
1623Made all scripts in the documentation C<strict> and C<-w> clean.
1624
1625Added logic to F<DB_File.xs> to allow the module to be built after Perl
1626is installed.
1627
ff68c719 1628=item 1.06
1629
1630Minor namespace cleanup: Localized C<PrintBtree>.
1631
36477c24 1632=item 1.07
1633
1634Fixed bug with RECNO, where bval wasn't defaulting to "\n".
1635
1636=item 1.08
1637
1638Documented operation of bval.
1639
18d2dc8c 1640=item 1.09
1641
1642Minor bug fix in DB_File::HASHINFO, DB_File::RECNOINFO and
1643DB_File::BTREEINFO.
1644
1645Changed default mode to 0666.
1646
a0b8c8c1 1647=item 1.10
1648
1649Fixed fd method so that it still returns -1 for in-memory files when db
16501.86 is used.
1651
778183f3 1652=item 1.11
1653
1654Documented the untie gotcha.
1655
68dc0745 1656=item 1.12
1657
1658Documented the incompatibility with version 2 of Berkeley DB.
1659
d3ef3b8a 1660=item 1.13
1661
1662Minor changes to DB_FIle.xs and DB_File.pm
1663
05475680 1664=item 1.14
1665
1666Made it illegal to tie an associative array to a RECNO database and an
1667ordinary array to a HASH or BTREE database.
1668
610ab055 1669=back
1670
3b35bae3 1671=head1 BUGS
1672
8e07c86e 1673Some older versions of Berkeley DB had problems with fixed length
1674records using the RECNO file format. The newest version at the time of
1675writing was 1.85 - this seems to have fixed the problems with RECNO.
3b35bae3 1676
8e07c86e 1677I am sure there are bugs in the code. If you do find any, or can
1678suggest any enhancements, I would welcome your comments.
3b35bae3 1679
1680=head1 AVAILABILITY
1681
f6b705ef 1682B<DB_File> comes with the standard Perl source distribution. Look in
1683the directory F<ext/DB_File>.
1684
68dc0745 1685This version of B<DB_File> will only work with version 1.x of Berkeley
1686DB. It is I<not> yet compatible with version 2.
1687
1688Version 1 of Berkeley DB is available at your nearest CPAN archive (see
cb1a09d0 1689L<perlmod/"CPAN"> for a list) in F<src/misc/db.1.85.tar.gz>, or via the
610ab055 1690host F<ftp.cs.berkeley.edu> in F</ucb/4bsd/db.tar.gz>. Alternatively,
1691check out the Berkeley DB home page at F<http://www.bostic.com/db>. It
1692is I<not> under the GPL.
3b35bae3 1693
88108326 1694If you are running IRIX, then get Berkeley DB from
1695F<http://reality.sgi.com/ariel>. It has the patches necessary to
1696compile properly on IRIX 5.3.
1697
a0b8c8c1 1698As of January 1997, version 1.86 of Berkeley DB is available from the
1699Berkeley DB home page. Although this release does fix a number of bugs
778183f3 1700that were present in 1.85 you should be aware of the following
a0b8c8c1 1701information (taken from the Berkeley DB home page) before you consider
1702using it:
1703
1704 DB version 1.86 includes a new implementation of the hash access
1705 method that fixes a variety of hashing problems found in DB version
1706 1.85. We are making it available as an interim solution until DB
1707 2.0 is available.
1708
1709 PLEASE NOTE: the underlying file format for the hash access method
1710 changed between version 1.85 and version 1.86, so you will have to
1711 dump and reload all of your databases to convert from version 1.85
1712 to version 1.86. If you do not absolutely require the fixes from
1713 version 1.86, we strongly urge you to wait until DB 2.0 is released
1714 before upgrading from 1.85.
1715
1716
3b35bae3 1717=head1 SEE ALSO
1718
1719L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>
1720
3b35bae3 1721=head1 AUTHOR
1722
8e07c86e 1723The DB_File interface was written by Paul Marquess
88108326 1724E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
d3ef3b8a 1725Questions about the DB system itself may be addressed to
1726E<lt>db@sleepycat.com<gt>.
3b35bae3 1727
1728=cut