Refresh DB_File to 1.13
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
3# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
d3ef3b8a 4# last modified 27th Apr 1997
5# version 1.13
36477c24 6#
a0b8c8c1 7# Copyright (c) 1995, 1996, 1997 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
88108326 101sub DESTROY { undef %{$_[0]} }
18d2dc8c 102sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
103sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
104sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 105
106package DB_File::RECNOINFO ;
785da04d 107
88108326 108use strict ;
109
110@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 111
112sub TIEHASH
113{
88108326 114 my $pkg = shift ;
115
36477c24 116 bless { VALID => { map {$_, 1}
117 qw( bval cachesize psize flags lorder reclen bfname )
118 },
119 GOT => {},
120 }, $pkg ;
8e07c86e 121}
122
88108326 123package DB_File::BTREEINFO ;
8e07c86e 124
88108326 125use strict ;
8e07c86e 126
88108326 127@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 128
88108326 129sub TIEHASH
8e07c86e 130{
88108326 131 my $pkg = shift ;
132
36477c24 133 bless { VALID => { map {$_, 1}
134 qw( flags cachesize maxkeypage minkeypage psize
135 compare prefix lorder )
136 },
137 GOT => {},
138 }, $pkg ;
8e07c86e 139}
140
141
8e07c86e 142package DB_File ;
785da04d 143
144use strict;
145use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO) ;
8e07c86e 146use Carp;
147
785da04d 148
d3ef3b8a 149$VERSION = "1.13" ;
8e07c86e 150
151#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 152$DB_BTREE = new DB_File::BTREEINFO ;
153$DB_HASH = new DB_File::HASHINFO ;
154$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 155
785da04d 156require Tie::Hash;
8e07c86e 157require Exporter;
158use AutoLoader;
159require DynaLoader;
785da04d 160@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 161@EXPORT = qw(
162 $DB_BTREE $DB_HASH $DB_RECNO
88108326 163
8e07c86e 164 BTREEMAGIC
165 BTREEVERSION
166 DB_LOCK
167 DB_SHMEM
168 DB_TXN
169 HASHMAGIC
170 HASHVERSION
171 MAX_PAGE_NUMBER
172 MAX_PAGE_OFFSET
173 MAX_REC_NUMBER
174 RET_ERROR
175 RET_SPECIAL
176 RET_SUCCESS
177 R_CURSOR
178 R_DUP
179 R_FIRST
180 R_FIXEDLEN
181 R_IAFTER
182 R_IBEFORE
183 R_LAST
184 R_NEXT
185 R_NOKEY
186 R_NOOVERWRITE
187 R_PREV
188 R_RECNOSYNC
189 R_SETCURSOR
190 R_SNAPSHOT
191 __R_UNUSED
88108326 192
8e07c86e 193);
194
195sub AUTOLOAD {
785da04d 196 my($constname);
8e07c86e 197 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 198 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 199 if ($! != 0) {
200 if ($! =~ /Invalid/) {
201 $AutoLoader::AUTOLOAD = $AUTOLOAD;
202 goto &AutoLoader::AUTOLOAD;
203 }
204 else {
785da04d 205 my($pack,$file,$line) = caller;
8e07c86e 206 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
207";
208 }
209 }
210 eval "sub $AUTOLOAD { $val }";
211 goto &$AUTOLOAD;
212}
213
f6b705ef 214
215# import borrowed from IO::File
216# exports Fcntl constants if available.
217sub import {
218 my $pkg = shift;
219 my $callpkg = caller;
18d2dc8c 220 Exporter::export $pkg, $callpkg, @_;
f6b705ef 221 eval {
222 require Fcntl;
18d2dc8c 223 Exporter::export 'Fcntl', $callpkg, '/^O_/';
f6b705ef 224 };
225}
226
785da04d 227bootstrap DB_File $VERSION;
8e07c86e 228
229# Preloaded methods go here. Autoload methods go after __END__, and are
230# processed by the autosplit program.
231
610ab055 232sub TIEHASH
233{
234 my (@arg) = @_ ;
235
236 $arg[4] = tied %{ $arg[4] }
237 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
238
239 DoTie_(@arg) ;
240}
241
242*TIEARRAY = \&TIEHASH ;
88108326 243
244sub get_dup
245{
246 croak "Usage: \$db->get_dup(key [,flag])\n"
247 unless @_ == 2 or @_ == 3 ;
248
249 my $db = shift ;
250 my $key = shift ;
251 my $flag = shift ;
f6b705ef 252 my $value = 0 ;
88108326 253 my $origkey = $key ;
254 my $wantarray = wantarray ;
f6b705ef 255 my %values = () ;
88108326 256 my @values = () ;
257 my $counter = 0 ;
f6b705ef 258 my $status = 0 ;
88108326 259
f6b705ef 260 # iterate through the database until either EOF ($status == 0)
261 # or a different key is encountered ($key ne $origkey).
262 for ($status = $db->seq($key, $value, R_CURSOR()) ;
263 $status == 0 and $key eq $origkey ;
264 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 265
f6b705ef 266 # save the value or count number of matches
267 if ($wantarray) {
268 if ($flag)
269 { ++ $values{$value} }
270 else
271 { push (@values, $value) }
272 }
273 else
274 { ++ $counter }
88108326 275
88108326 276 }
277
f6b705ef 278 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 279}
280
281
8e07c86e 2821;
283__END__
284
3b35bae3 285=head1 NAME
286
287DB_File - Perl5 access to Berkeley DB
288
289=head1 SYNOPSIS
290
291 use DB_File ;
88108326 292
293 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
294 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
295 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 296
3b35bae3 297 $status = $X->del($key [, $flags]) ;
298 $status = $X->put($key, $value [, $flags]) ;
299 $status = $X->get($key, $value [, $flags]) ;
760ac839 300 $status = $X->seq($key, $value, $flags) ;
3b35bae3 301 $status = $X->sync([$flags]) ;
302 $status = $X->fd ;
760ac839 303
f6b705ef 304 # BTREE only
88108326 305 $count = $X->get_dup($key) ;
306 @list = $X->get_dup($key) ;
307 %list = $X->get_dup($key, 1) ;
308
f6b705ef 309 # RECNO only
310 $a = $X->length;
311 $a = $X->pop ;
312 $X->push(list);
313 $a = $X->shift;
314 $X->unshift(list);
315
3b35bae3 316 untie %hash ;
317 untie @array ;
318
319=head1 DESCRIPTION
320
8e07c86e 321B<DB_File> is a module which allows Perl programs to make use of the
322facilities provided by Berkeley DB. If you intend to use this
f6b705ef 323module you should really have a copy of the Berkeley DB manual pages at
8e07c86e 324hand. The interface defined here mirrors the Berkeley DB interface
325closely.
3b35bae3 326
68dc0745 327Please note that this module will only work with version 1.x of
328Berkeley DB. Once Berkeley DB version 2 is released, B<DB_File> will be
329upgraded to work with it.
330
8e07c86e 331Berkeley DB is a C library which provides a consistent interface to a
332number of database formats. B<DB_File> provides an interface to all
333three of the database types currently supported by Berkeley DB.
3b35bae3 334
335The file types are:
336
337=over 5
338
88108326 339=item B<DB_HASH>
3b35bae3 340
88108326 341This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 342files. This is equivalent to the functionality provided by other
343hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
344the files created using DB_HASH are not compatible with any of the
345other packages mentioned.
3b35bae3 346
8e07c86e 347A default hashing algorithm, which will be adequate for most
348applications, is built into Berkeley DB. If you do need to use your own
349hashing algorithm it is possible to write your own in Perl and have
350B<DB_File> use it instead.
3b35bae3 351
88108326 352=item B<DB_BTREE>
353
354The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 355sorted, balanced binary tree.
3b35bae3 356
8e07c86e 357As with the DB_HASH format, it is possible to provide a user defined
358Perl routine to perform the comparison of keys. By default, though, the
359keys are stored in lexical order.
3b35bae3 360
88108326 361=item B<DB_RECNO>
3b35bae3 362
8e07c86e 363DB_RECNO allows both fixed-length and variable-length flat text files
364to be manipulated using the same key/value pair interface as in DB_HASH
365and DB_BTREE. In this case the key will consist of a record (line)
366number.
3b35bae3 367
368=back
369
68dc0745 370=head2 Interface to Berkeley DB
3b35bae3 371
372B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 373in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
374allows B<DB_File> to access Berkeley DB files using either an
375associative array (for DB_HASH & DB_BTREE file types) or an ordinary
376array (for the DB_RECNO file type).
3b35bae3 377
88108326 378In addition to the tie() interface, it is also possible to access most
379of the functions provided in the Berkeley DB API directly.
f6b705ef 380See L<THE API INTERFACE>.
3b35bae3 381
88108326 382=head2 Opening a Berkeley DB Database File
3b35bae3 383
8e07c86e 384Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 385Here is the C prototype for dbopen():
3b35bae3 386
387 DB*
388 dbopen (const char * file, int flags, int mode,
389 DBTYPE type, const void * openinfo)
390
391The parameter C<type> is an enumeration which specifies which of the 3
392interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
393Depending on which of these is actually chosen, the final parameter,
394I<openinfo> points to a data structure which allows tailoring of the
395specific interface method.
396
8e07c86e 397This interface is handled slightly differently in B<DB_File>. Here is
88108326 398an equivalent call using B<DB_File>:
3b35bae3 399
88108326 400 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 401
8e07c86e 402The C<filename>, C<flags> and C<mode> parameters are the direct
403equivalent of their dbopen() counterparts. The final parameter $DB_HASH
404performs the function of both the C<type> and C<openinfo> parameters in
405dbopen().
3b35bae3 406
88108326 407In the example above $DB_HASH is actually a pre-defined reference to a
408hash object. B<DB_File> has three of these pre-defined references.
409Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 410
8e07c86e 411The keys allowed in each of these pre-defined references is limited to
412the names used in the equivalent C structure. So, for example, the
413$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 414C<ffactor>, C<hash>, C<lorder> and C<nelem>.
415
416To change one of these elements, just assign to it like this:
417
418 $DB_HASH->{'cachesize'} = 10000 ;
419
420The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
421usually adequate for most applications. If you do need to create extra
422instances of these objects, constructors are available for each file
423type.
424
425Here are examples of the constructors and the valid options available
426for DB_HASH, DB_BTREE and DB_RECNO respectively.
427
428 $a = new DB_File::HASHINFO ;
429 $a->{'bsize'} ;
430 $a->{'cachesize'} ;
431 $a->{'ffactor'};
432 $a->{'hash'} ;
433 $a->{'lorder'} ;
434 $a->{'nelem'} ;
435
436 $b = new DB_File::BTREEINFO ;
437 $b->{'flags'} ;
438 $b->{'cachesize'} ;
439 $b->{'maxkeypage'} ;
440 $b->{'minkeypage'} ;
441 $b->{'psize'} ;
442 $b->{'compare'} ;
443 $b->{'prefix'} ;
444 $b->{'lorder'} ;
445
446 $c = new DB_File::RECNOINFO ;
447 $c->{'bval'} ;
448 $c->{'cachesize'} ;
449 $c->{'psize'} ;
450 $c->{'flags'} ;
451 $c->{'lorder'} ;
452 $c->{'reclen'} ;
453 $c->{'bfname'} ;
454
455The values stored in the hashes above are mostly the direct equivalent
456of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 457default values - that means you don't have to set I<all> of the
88108326 458values when you only want to change one. Here is an example:
459
460 $a = new DB_File::HASHINFO ;
461 $a->{'cachesize'} = 12345 ;
462 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
463
36477c24 464A few of the options need extra discussion here. When used, the C
88108326 465equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
466to C functions. In B<DB_File> these keys are used to store references
467to Perl subs. Below are templates for each of the subs:
468
469 sub hash
470 {
471 my ($data) = @_ ;
472 ...
473 # return the hash value for $data
474 return $hash ;
475 }
3b35bae3 476
88108326 477 sub compare
478 {
479 my ($key, $key2) = @_ ;
480 ...
481 # return 0 if $key1 eq $key2
482 # -1 if $key1 lt $key2
483 # 1 if $key1 gt $key2
484 return (-1 , 0 or 1) ;
485 }
3b35bae3 486
88108326 487 sub prefix
488 {
489 my ($key, $key2) = @_ ;
490 ...
491 # return number of bytes of $key2 which are
492 # necessary to determine that it is greater than $key1
493 return $bytes ;
494 }
3b35bae3 495
f6b705ef 496See L<Changing the BTREE sort order> for an example of using the
497C<compare> template.
88108326 498
36477c24 499If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 500C<bval>, you should check out L<The 'bval' Option>.
36477c24 501
88108326 502=head2 Default Parameters
503
504It is possible to omit some or all of the final 4 parameters in the
505call to C<tie> and let them take default values. As DB_HASH is the most
506common file format used, the call:
507
508 tie %A, "DB_File", "filename" ;
509
510is equivalent to:
511
18d2dc8c 512 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 513
514It is also possible to omit the filename parameter as well, so the
515call:
516
517 tie %A, "DB_File" ;
518
519is equivalent to:
520
18d2dc8c 521 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 522
f6b705ef 523See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 524in place of a filename.
525
f6b705ef 526=head2 In Memory Databases
527
528Berkeley DB allows the creation of in-memory databases by using NULL
529(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
530uses C<undef> instead of NULL to provide this functionality.
531
532=head1 DB_HASH
533
534The DB_HASH file format is probably the most commonly used of the three
535file formats that B<DB_File> supports. It is also very straightforward
536to use.
537
68dc0745 538=head2 A Simple Example
f6b705ef 539
540This example shows how to create a database, add key/value pairs to the
541database, delete keys/value pairs and finally how to enumerate the
542contents of the database.
543
610ab055 544 use strict ;
f6b705ef 545 use DB_File ;
610ab055 546 use vars qw( %h $k $v ) ;
f6b705ef 547
548 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
549 or die "Cannot open file 'fruit': $!\n";
550
551 # Add a few key/value pairs to the file
552 $h{"apple"} = "red" ;
553 $h{"orange"} = "orange" ;
554 $h{"banana"} = "yellow" ;
555 $h{"tomato"} = "red" ;
556
557 # Check for existence of a key
558 print "Banana Exists\n\n" if $h{"banana"} ;
559
560 # Delete a key/value pair.
561 delete $h{"apple"} ;
562
563 # print the contents of the file
564 while (($k, $v) = each %h)
565 { print "$k -> $v\n" }
566
567 untie %h ;
568
569here is the output:
570
571 Banana Exists
572
573 orange -> orange
574 tomato -> red
575 banana -> yellow
576
577Note that the like ordinary associative arrays, the order of the keys
578retrieved is in an apparently random order.
579
580=head1 DB_BTREE
581
582The DB_BTREE format is useful when you want to store data in a given
583order. By default the keys will be stored in lexical order, but as you
584will see from the example shown in the next section, it is very easy to
585define your own sorting function.
586
587=head2 Changing the BTREE sort order
588
589This script shows how to override the default sorting algorithm that
590BTREE uses. Instead of using the normal lexical ordering, a case
591insensitive compare function will be used.
88108326 592
610ab055 593 use strict ;
f6b705ef 594 use DB_File ;
610ab055 595
596 my %h ;
f6b705ef 597
598 sub Compare
599 {
600 my ($key1, $key2) = @_ ;
601 "\L$key1" cmp "\L$key2" ;
602 }
603
604 # specify the Perl sub that will do the comparison
605 $DB_BTREE->{'compare'} = \&Compare ;
606
607 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
608 or die "Cannot open file 'tree': $!\n" ;
609
610 # Add a key/value pair to the file
611 $h{'Wall'} = 'Larry' ;
612 $h{'Smith'} = 'John' ;
613 $h{'mouse'} = 'mickey' ;
614 $h{'duck'} = 'donald' ;
615
616 # Delete
617 delete $h{"duck"} ;
618
619 # Cycle through the keys printing them in order.
620 # Note it is not necessary to sort the keys as
621 # the btree will have kept them in order automatically.
622 foreach (keys %h)
623 { print "$_\n" }
624
625 untie %h ;
626
627Here is the output from the code above.
628
629 mouse
630 Smith
631 Wall
632
633There are a few point to bear in mind if you want to change the
634ordering in a BTREE database:
635
636=over 5
637
638=item 1.
639
640The new compare function must be specified when you create the database.
641
642=item 2.
643
644You cannot change the ordering once the database has been created. Thus
645you must use the same compare function every time you access the
88108326 646database.
647
f6b705ef 648=back
649
68dc0745 650=head2 Handling Duplicate Keys
f6b705ef 651
652The BTREE file type optionally allows a single key to be associated
653with an arbitrary number of values. This option is enabled by setting
654the flags element of C<$DB_BTREE> to R_DUP when creating the database.
655
88108326 656There are some difficulties in using the tied hash interface if you
657want to manipulate a BTREE database with duplicate keys. Consider this
658code:
659
610ab055 660 use strict ;
88108326 661 use DB_File ;
610ab055 662
663 use vars qw($filename %h ) ;
664
88108326 665 $filename = "tree" ;
666 unlink $filename ;
667
668 # Enable duplicate records
669 $DB_BTREE->{'flags'} = R_DUP ;
670
671 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
672 or die "Cannot open $filename: $!\n";
673
674 # Add some key/value pairs to the file
675 $h{'Wall'} = 'Larry' ;
676 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 677 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 678 $h{'Smith'} = 'John' ;
679 $h{'mouse'} = 'mickey' ;
680
681 # iterate through the associative array
682 # and print each key/value pair.
683 foreach (keys %h)
684 { print "$_ -> $h{$_}\n" }
685
f6b705ef 686 untie %h ;
687
88108326 688Here is the output:
689
690 Smith -> John
691 Wall -> Larry
692 Wall -> Larry
f6b705ef 693 Wall -> Larry
88108326 694 mouse -> mickey
695
f6b705ef 696As you can see 3 records have been successfully created with key C<Wall>
88108326 697- the only thing is, when they are retrieved from the database they
f6b705ef 698I<seem> to have the same value, namely C<Larry>. The problem is caused
699by the way that the associative array interface works. Basically, when
700the associative array interface is used to fetch the value associated
701with a given key, it will only ever retrieve the first value.
88108326 702
703Although it may not be immediately obvious from the code above, the
704associative array interface can be used to write values with duplicate
705keys, but it cannot be used to read them back from the database.
706
707The way to get around this problem is to use the Berkeley DB API method
708called C<seq>. This method allows sequential access to key/value
f6b705ef 709pairs. See L<THE API INTERFACE> for details of both the C<seq> method
710and the API in general.
88108326 711
712Here is the script above rewritten using the C<seq> API method.
713
610ab055 714 use strict ;
88108326 715 use DB_File ;
88108326 716
610ab055 717 use vars qw($filename $x %h $status $key $value) ;
718
88108326 719 $filename = "tree" ;
720 unlink $filename ;
721
722 # Enable duplicate records
723 $DB_BTREE->{'flags'} = R_DUP ;
724
725 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
726 or die "Cannot open $filename: $!\n";
727
728 # Add some key/value pairs to the file
729 $h{'Wall'} = 'Larry' ;
730 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 731 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 732 $h{'Smith'} = 'John' ;
733 $h{'mouse'} = 'mickey' ;
734
f6b705ef 735 # iterate through the btree using seq
88108326 736 # and print each key/value pair.
610ab055 737 $key = $value = 0 ;
f6b705ef 738 for ($status = $x->seq($key, $value, R_FIRST) ;
739 $status == 0 ;
740 $status = $x->seq($key, $value, R_NEXT) )
88108326 741 { print "$key -> $value\n" }
742
743 undef $x ;
744 untie %h ;
745
746that prints:
747
748 Smith -> John
749 Wall -> Brick
f6b705ef 750 Wall -> Brick
88108326 751 Wall -> Larry
752 mouse -> mickey
753
f6b705ef 754This time we have got all the key/value pairs, including the multiple
88108326 755values associated with the key C<Wall>.
756
68dc0745 757=head2 The get_dup() Method
f6b705ef 758
759B<DB_File> comes with a utility method, called C<get_dup>, to assist in
88108326 760reading duplicate values from BTREE databases. The method can take the
761following forms:
762
763 $count = $x->get_dup($key) ;
764 @list = $x->get_dup($key) ;
765 %list = $x->get_dup($key, 1) ;
766
767In a scalar context the method returns the number of values associated
768with the key, C<$key>.
769
770In list context, it returns all the values which match C<$key>. Note
f6b705ef 771that the values will be returned in an apparently random order.
88108326 772
7a2e2cd6 773In list context, if the second parameter is present and evaluates
774TRUE, the method returns an associative array. The keys of the
775associative array correspond to the values that matched in the BTREE
776and the values of the array are a count of the number of times that
777particular value occurred in the BTREE.
88108326 778
f6b705ef 779So assuming the database created above, we can use C<get_dup> like
88108326 780this:
781
610ab055 782 my $cnt = $x->get_dup("Wall") ;
88108326 783 print "Wall occurred $cnt times\n" ;
784
610ab055 785 my %hash = $x->get_dup("Wall", 1) ;
88108326 786 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 787 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 788
610ab055 789 my @list = $x->get_dup("Wall") ;
88108326 790 print "Wall => [@list]\n" ;
791
f6b705ef 792 @list = $x->get_dup("Smith") ;
88108326 793 print "Smith => [@list]\n" ;
794
f6b705ef 795 @list = $x->get_dup("Dog") ;
88108326 796 print "Dog => [@list]\n" ;
797
798
799and it will print:
800
f6b705ef 801 Wall occurred 3 times
88108326 802 Larry is there
f6b705ef 803 There are 2 Brick Walls
804 Wall => [Brick Brick Larry]
88108326 805 Smith => [John]
806 Dog => []
3b35bae3 807
f6b705ef 808=head2 Matching Partial Keys
809
810The BTREE interface has a feature which allows partial keys to be
811matched. This functionality is I<only> available when the C<seq> method
812is used along with the R_CURSOR flag.
813
814 $x->seq($key, $value, R_CURSOR) ;
815
816Here is the relevant quote from the dbopen man page where it defines
817the use of the R_CURSOR flag with seq:
818
f6b705ef 819 Note, for the DB_BTREE access method, the returned key is not
820 necessarily an exact match for the specified key. The returned key
821 is the smallest key greater than or equal to the specified key,
822 permitting partial key matches and range searches.
823
f6b705ef 824In the example script below, the C<match> sub uses this feature to find
825and print the first matching key/value pair given a partial key.
826
610ab055 827 use strict ;
f6b705ef 828 use DB_File ;
829 use Fcntl ;
610ab055 830
831 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 832
833 sub match
834 {
835 my $key = shift ;
610ab055 836 my $value = 0;
f6b705ef 837 my $orig_key = $key ;
838 $x->seq($key, $value, R_CURSOR) ;
839 print "$orig_key\t-> $key\t-> $value\n" ;
840 }
841
842 $filename = "tree" ;
843 unlink $filename ;
844
845 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
846 or die "Cannot open $filename: $!\n";
847
848 # Add some key/value pairs to the file
849 $h{'mouse'} = 'mickey' ;
850 $h{'Wall'} = 'Larry' ;
851 $h{'Walls'} = 'Brick' ;
852 $h{'Smith'} = 'John' ;
853
854
610ab055 855 $key = $value = 0 ;
f6b705ef 856 print "IN ORDER\n" ;
857 for ($st = $x->seq($key, $value, R_FIRST) ;
858 $st == 0 ;
859 $st = $x->seq($key, $value, R_NEXT) )
860
861 { print "$key -> $value\n" }
862
863 print "\nPARTIAL MATCH\n" ;
864
865 match "Wa" ;
866 match "A" ;
867 match "a" ;
868
869 undef $x ;
870 untie %h ;
871
872Here is the output:
873
874 IN ORDER
875 Smith -> John
876 Wall -> Larry
877 Walls -> Brick
878 mouse -> mickey
879
880 PARTIAL MATCH
881 Wa -> Wall -> Larry
882 A -> Smith -> John
883 a -> mouse -> mickey
884
885=head1 DB_RECNO
886
887DB_RECNO provides an interface to flat text files. Both variable and
888fixed length records are supported.
3b35bae3 889
88108326 890In order to make RECNO more compatible with Perl the array offset for
891all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 892
88108326 893As with normal Perl arrays, a RECNO array can be accessed using
894negative indexes. The index -1 refers to the last element of the array,
895-2 the second last, and so on. Attempting to access an element before
896the start of the array will raise a fatal run-time error.
3b35bae3 897
68dc0745 898=head2 The 'bval' Option
36477c24 899
900The operation of the bval option warrants some discussion. Here is the
901definition of bval from the Berkeley DB 1.85 recno manual page:
902
903 The delimiting byte to be used to mark the end of a
904 record for variable-length records, and the pad charac-
905 ter for fixed-length records. If no value is speci-
906 fied, newlines (``\n'') are used to mark the end of
907 variable-length records and fixed-length records are
908 padded with spaces.
909
910The second sentence is wrong. In actual fact bval will only default to
911C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
912openinfo parameter is used at all, the value that happens to be in bval
913will be used. That means you always have to specify bval when making
914use of any of the options in the openinfo parameter. This documentation
915error will be fixed in the next release of Berkeley DB.
916
917That clarifies the situation with regards Berkeley DB itself. What
918about B<DB_File>? Well, the behavior defined in the quote above is
919quite useful, so B<DB_File> conforms it.
920
921That means that you can specify other options (e.g. cachesize) and
922still have bval default to C<"\n"> for variable length records, and
923space for fixed length records.
924
f6b705ef 925=head2 A Simple Example
3b35bae3 926
f6b705ef 927Here is a simple example that uses RECNO.
928
610ab055 929 use strict ;
f6b705ef 930 use DB_File ;
f6b705ef 931
610ab055 932 my @h ;
f6b705ef 933 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
934 or die "Cannot open file 'text': $!\n" ;
935
936 # Add a few key/value pairs to the file
937 $h[0] = "orange" ;
938 $h[1] = "blue" ;
939 $h[2] = "yellow" ;
940
941 # Check for existence of a key
942 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
943
944 # use a negative index
945 print "The last element is $h[-1]\n" ;
946 print "The 2nd last element is $h[-2]\n" ;
947
948 untie @h ;
3b35bae3 949
f6b705ef 950Here is the output from the script:
951
952
953 Element 1 Exists with value blue
954 The last element is yellow
955 The 2nd last element is blue
956
957=head2 Extra Methods
958
959As you can see from the example above, the tied array interface is
960quite limited. To make the interface more useful, a number of methods
961are supplied with B<DB_File> to simulate the standard array operations
962that are not currently implemented in Perl's tied array interface. All
963these methods are accessed via the object returned from the tie call.
964
965Here are the methods:
966
967=over 5
3b35bae3 968
f6b705ef 969=item B<$X-E<gt>push(list) ;>
970
971Pushes the elements of C<list> to the end of the array.
972
973=item B<$value = $X-E<gt>pop ;>
974
975Removes and returns the last element of the array.
976
977=item B<$X-E<gt>shift>
978
979Removes and returns the first element of the array.
980
981=item B<$X-E<gt>unshift(list) ;>
982
983Pushes the elements of C<list> to the start of the array.
984
985=item B<$X-E<gt>length>
986
987Returns the number of elements in the array.
988
989=back
990
991=head2 Another Example
992
993Here is a more complete example that makes use of some of the methods
994described above. It also makes use of the API interface directly (see
995L<THE API INTERFACE>).
996
997 use strict ;
998 use vars qw(@h $H $file $i) ;
999 use DB_File ;
1000 use Fcntl ;
1001
1002 $file = "text" ;
1003
1004 unlink $file ;
1005
1006 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1007 or die "Cannot open file $file: $!\n" ;
1008
1009 # first create a text file to play with
1010 $h[0] = "zero" ;
1011 $h[1] = "one" ;
1012 $h[2] = "two" ;
1013 $h[3] = "three" ;
1014 $h[4] = "four" ;
1015
1016
1017 # Print the records in order.
1018 #
1019 # The length method is needed here because evaluating a tied
1020 # array in a scalar context does not return the number of
1021 # elements in the array.
1022
1023 print "\nORIGINAL\n" ;
1024 foreach $i (0 .. $H->length - 1) {
1025 print "$i: $h[$i]\n" ;
1026 }
1027
1028 # use the push & pop methods
1029 $a = $H->pop ;
1030 $H->push("last") ;
1031 print "\nThe last record was [$a]\n" ;
1032
1033 # and the shift & unshift methods
1034 $a = $H->shift ;
1035 $H->unshift("first") ;
1036 print "The first record was [$a]\n" ;
1037
1038 # Use the API to add a new record after record 2.
1039 $i = 2 ;
1040 $H->put($i, "Newbie", R_IAFTER) ;
1041
1042 # and a new record before record 1.
1043 $i = 1 ;
1044 $H->put($i, "New One", R_IBEFORE) ;
1045
1046 # delete record 3
1047 $H->del(3) ;
1048
1049 # now print the records in reverse order
1050 print "\nREVERSE\n" ;
1051 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1052 { print "$i: $h[$i]\n" }
1053
1054 # same again, but use the API functions instead
1055 print "\nREVERSE again\n" ;
610ab055 1056 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1057 for ($s = $H->seq($k, $v, R_LAST) ;
1058 $s == 0 ;
1059 $s = $H->seq($k, $v, R_PREV))
1060 { print "$k: $v\n" }
1061
1062 undef $H ;
1063 untie @h ;
1064
1065and this is what it outputs:
1066
1067 ORIGINAL
1068 0: zero
1069 1: one
1070 2: two
1071 3: three
1072 4: four
1073
1074 The last record was [four]
1075 The first record was [zero]
1076
1077 REVERSE
1078 5: last
1079 4: three
1080 3: Newbie
1081 2: one
1082 1: New One
1083 0: first
1084
1085 REVERSE again
1086 5: last
1087 4: three
1088 3: Newbie
1089 2: one
1090 1: New One
1091 0: first
1092
1093Notes:
1094
1095=over 5
1096
1097=item 1.
1098
1099Rather than iterating through the array, C<@h> like this:
1100
1101 foreach $i (@h)
1102
1103it is necessary to use either this:
1104
1105 foreach $i (0 .. $H->length - 1)
1106
1107or this:
1108
1109 for ($a = $H->get($k, $v, R_FIRST) ;
1110 $a == 0 ;
1111 $a = $H->get($k, $v, R_NEXT) )
1112
1113=item 2.
1114
1115Notice that both times the C<put> method was used the record index was
1116specified using a variable, C<$i>, rather than the literal value
1117itself. This is because C<put> will return the record number of the
1118inserted line via that parameter.
1119
1120=back
1121
1122=head1 THE API INTERFACE
3b35bae3 1123
1124As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1125possible to make direct use of most of the API functions defined in the
8e07c86e 1126Berkeley DB documentation.
3b35bae3 1127
88108326 1128To do this you need to store a copy of the object returned from the tie.
3b35bae3 1129
88108326 1130 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1131
8e07c86e 1132Once you have done that, you can access the Berkeley DB API functions
88108326 1133as B<DB_File> methods directly like this:
3b35bae3 1134
1135 $db->put($key, $value, R_NOOVERWRITE) ;
1136
88108326 1137B<Important:> If you have saved a copy of the object returned from
1138C<tie>, the underlying database file will I<not> be closed until both
1139the tied variable is untied and all copies of the saved object are
610ab055 1140destroyed.
88108326 1141
1142 use DB_File ;
1143 $db = tie %hash, "DB_File", "filename"
1144 or die "Cannot tie filename: $!" ;
1145 ...
1146 undef $db ;
1147 untie %hash ;
1148
9a2c4ce3 1149See L<The untie() Gotcha> for more details.
778183f3 1150
88108326 1151All the functions defined in L<dbopen> are available except for
1152close() and dbopen() itself. The B<DB_File> method interface to the
1153supported functions have been implemented to mirror the way Berkeley DB
1154works whenever possible. In particular note that:
1155
1156=over 5
1157
1158=item *
1159
1160The methods return a status value. All return 0 on success.
1161All return -1 to signify an error and set C<$!> to the exact
1162error code. The return code 1 generally (but not always) means that the
1163key specified did not exist in the database.
1164
1165Other return codes are defined. See below and in the Berkeley DB
1166documentation for details. The Berkeley DB documentation should be used
1167as the definitive source.
1168
1169=item *
3b35bae3 1170
88108326 1171Whenever a Berkeley DB function returns data via one of its parameters,
1172the equivalent B<DB_File> method does exactly the same.
3b35bae3 1173
88108326 1174=item *
1175
1176If you are careful, it is possible to mix API calls with the tied
1177hash/array interface in the same piece of code. Although only a few of
1178the methods used to implement the tied interface currently make use of
1179the cursor, you should always assume that the cursor has been changed
1180any time the tied hash/array interface is used. As an example, this
1181code will probably not do what you expect:
1182
1183 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1184 or die "Cannot tie $filename: $!" ;
1185
1186 # Get the first key/value pair and set the cursor
1187 $X->seq($key, $value, R_FIRST) ;
1188
1189 # this line will modify the cursor
1190 $count = scalar keys %x ;
1191
1192 # Get the second key/value pair.
1193 # oops, it didn't, it got the last key/value pair!
1194 $X->seq($key, $value, R_NEXT) ;
1195
1196The code above can be rearranged to get around the problem, like this:
1197
1198 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1199 or die "Cannot tie $filename: $!" ;
1200
1201 # this line will modify the cursor
1202 $count = scalar keys %x ;
1203
1204 # Get the first key/value pair and set the cursor
1205 $X->seq($key, $value, R_FIRST) ;
1206
1207 # Get the second key/value pair.
1208 # worked this time.
1209 $X->seq($key, $value, R_NEXT) ;
1210
1211=back
1212
1213All the constants defined in L<dbopen> for use in the flags parameters
1214in the methods defined below are also available. Refer to the Berkeley
1215DB documentation for the precise meaning of the flags values.
1216
1217Below is a list of the methods available.
3b35bae3 1218
1219=over 5
1220
f6b705ef 1221=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1222
1223Given a key (C<$key>) this method reads the value associated with it
1224from the database. The value read from the database is returned in the
1225C<$value> parameter.
3b35bae3 1226
88108326 1227If the key does not exist the method returns 1.
3b35bae3 1228
88108326 1229No flags are currently defined for this method.
3b35bae3 1230
f6b705ef 1231=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1232
88108326 1233Stores the key/value pair in the database.
1234
1235If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1236will have the record number of the inserted key/value pair set.
3b35bae3 1237
88108326 1238Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1239R_SETCURSOR.
1240
f6b705ef 1241=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1242
88108326 1243Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1244
88108326 1245A return code of 1 means that the requested key was not in the
1246database.
3b35bae3 1247
88108326 1248R_CURSOR is the only valid flag at present.
3b35bae3 1249
f6b705ef 1250=item B<$status = $X-E<gt>fd ;>
3b35bae3 1251
88108326 1252Returns the file descriptor for the underlying database.
3b35bae3 1253
f6b705ef 1254See L<Locking Databases> for an example of how to make use of the
88108326 1255C<fd> method to lock your database.
3b35bae3 1256
f6b705ef 1257=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1258
88108326 1259This interface allows sequential retrieval from the database. See
1260L<dbopen> for full details.
1261
1262Both the C<$key> and C<$value> parameters will be set to the key/value
1263pair read from the database.
1264
1265The flags parameter is mandatory. The valid flag values are R_CURSOR,
1266R_FIRST, R_LAST, R_NEXT and R_PREV.
1267
f6b705ef 1268=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1269
1270Flushes any cached buffers to disk.
1271
1272R_RECNOSYNC is the only valid flag at present.
3b35bae3 1273
1274=back
1275
f6b705ef 1276=head1 HINTS AND TIPS
3b35bae3 1277
3b35bae3 1278
cb1a09d0 1279=head2 Locking Databases
3b35bae3 1280
cb1a09d0 1281Concurrent access of a read-write database by several parties requires
1282them all to use some kind of locking. Here's an example of Tom's that
1283uses the I<fd> method to get the file descriptor, and then a careful
1284open() to give something Perl will flock() for you. Run this repeatedly
1285in the background to watch the locks granted in proper order.
3b35bae3 1286
cb1a09d0 1287 use DB_File;
1288
1289 use strict;
1290
1291 sub LOCK_SH { 1 }
1292 sub LOCK_EX { 2 }
1293 sub LOCK_NB { 4 }
1294 sub LOCK_UN { 8 }
1295
1296 my($oldval, $fd, $db, %db, $value, $key);
1297
1298 $key = shift || 'default';
1299 $value = shift || 'magic';
1300
1301 $value .= " $$";
1302
1303 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1304 || die "dbcreat /tmp/foo.db $!";
1305 $fd = $db->fd;
1306 print "$$: db fd is $fd\n";
1307 open(DB_FH, "+<&=$fd") || die "dup $!";
1308
1309
1310 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1311 print "$$: CONTENTION; can't read during write update!
1312 Waiting for read lock ($!) ....";
1313 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1314 }
1315 print "$$: Read lock granted\n";
1316
1317 $oldval = $db{$key};
1318 print "$$: Old value was $oldval\n";
1319 flock(DB_FH, LOCK_UN);
1320
1321 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1322 print "$$: CONTENTION; must have exclusive lock!
1323 Waiting for write lock ($!) ....";
1324 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1325 }
1326
1327 print "$$: Write lock granted\n";
1328 $db{$key} = $value;
610ab055 1329 $db->sync; # to flush
cb1a09d0 1330 sleep 10;
1331
1332 flock(DB_FH, LOCK_UN);
88108326 1333 undef $db;
cb1a09d0 1334 untie %db;
1335 close(DB_FH);
1336 print "$$: Updated db to $key=$value\n";
1337
68dc0745 1338=head2 Sharing Databases With C Applications
f6b705ef 1339
1340There is no technical reason why a Berkeley DB database cannot be
1341shared by both a Perl and a C application.
1342
1343The vast majority of problems that are reported in this area boil down
1344to the fact that C strings are NULL terminated, whilst Perl strings are
1345not.
1346
1347Here is a real example. Netscape 2.0 keeps a record of the locations you
1348visit along with the time you last visited them in a DB_HASH database.
1349This is usually stored in the file F<~/.netscape/history.db>. The key
1350field in the database is the location string and the value field is the
1351time the location was last visited stored as a 4 byte binary value.
1352
1353If you haven't already guessed, the location string is stored with a
1354terminating NULL. This means you need to be careful when accessing the
1355database.
1356
1357Here is a snippet of code that is loosely based on Tom Christiansen's
1358I<ggh> script (available from your nearest CPAN archive in
1359F<authors/id/TOMC/scripts/nshist.gz>).
1360
610ab055 1361 use strict ;
f6b705ef 1362 use DB_File ;
1363 use Fcntl ;
f6b705ef 1364
610ab055 1365 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1366 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1367
1368 $HISTORY = "$dotdir/.netscape/history.db";
1369
1370 tie %hist_db, 'DB_File', $HISTORY
1371 or die "Cannot open $HISTORY: $!\n" ;;
1372
1373 # Dump the complete database
1374 while ( ($href, $binary_time) = each %hist_db ) {
1375
1376 # remove the terminating NULL
1377 $href =~ s/\x00$// ;
1378
1379 # convert the binary time into a user friendly string
1380 $date = localtime unpack("V", $binary_time);
1381 print "$date $href\n" ;
1382 }
1383
1384 # check for the existence of a specific key
1385 # remember to add the NULL
1386 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1387 $date = localtime unpack("V", $binary_time) ;
1388 print "Last visited mox.perl.com on $date\n" ;
1389 }
1390 else {
1391 print "Never visited mox.perl.com\n"
1392 }
1393
1394 untie %hist_db ;
1395
68dc0745 1396=head2 The untie() Gotcha
778183f3 1397
7a2e2cd6 1398If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1399recommended that you read L<perltie/The untie Gotcha>.
778183f3 1400
1401Even if you don't currently make use of the API interface, it is still
1402worth reading it.
1403
1404Here is an example which illustrates the problem from a B<DB_File>
1405perspective:
1406
1407 use DB_File ;
1408 use Fcntl ;
1409
1410 my %x ;
1411 my $X ;
1412
1413 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1414 or die "Cannot tie first time: $!" ;
1415
1416 $x{123} = 456 ;
1417
1418 untie %x ;
1419
1420 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1421 or die "Cannot tie second time: $!" ;
1422
1423 untie %x ;
1424
1425When run, the script will produce this error message:
1426
1427 Cannot tie second time: Invalid argument at bad.file line 14.
1428
1429Although the error message above refers to the second tie() statement
1430in the script, the source of the problem is really with the untie()
1431statement that precedes it.
1432
1433Having read L<perltie> you will probably have already guessed that the
1434error is caused by the extra copy of the tied object stored in C<$X>.
1435If you haven't, then the problem boils down to the fact that the
1436B<DB_File> destructor, DESTROY, will not be called until I<all>
1437references to the tied object are destroyed. Both the tied variable,
1438C<%x>, and C<$X> above hold a reference to the object. The call to
1439untie() will destroy the first, but C<$X> still holds a valid
1440reference, so the destructor will not get called and the database file
1441F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1442attempt to open a database that is alreday open via the catch-all
1443"Invalid argument" doesn't help.
1444
1445If you run the script with the C<-w> flag the error message becomes:
1446
1447 untie attempted while 1 inner references still exist at bad.file line 12.
1448 Cannot tie second time: Invalid argument at bad.file line 14.
1449
1450which pinpoints the real problem. Finally the script can now be
1451modified to fix the original problem by destroying the API object
1452before the untie:
1453
1454 ...
1455 $x{123} = 456 ;
1456
1457 undef $X ;
1458 untie %x ;
1459
1460 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1461 ...
1462
f6b705ef 1463
1464=head1 COMMON QUESTIONS
1465
1466=head2 Why is there Perl source in my database?
1467
1468If you look at the contents of a database file created by DB_File,
1469there can sometimes be part of a Perl script included in it.
1470
1471This happens because Berkeley DB uses dynamic memory to allocate
1472buffers which will subsequently be written to the database file. Being
1473dynamic, the memory could have been used for anything before DB
1474malloced it. As Berkeley DB doesn't clear the memory once it has been
1475allocated, the unused portions will contain random junk. In the case
1476where a Perl script gets written to the database, the random junk will
1477correspond to an area of dynamic memory that happened to be used during
1478the compilation of the script.
1479
1480Unless you don't like the possibility of there being part of your Perl
1481scripts embedded in a database file, this is nothing to worry about.
1482
1483=head2 How do I store complex data structures with DB_File?
1484
1485Although B<DB_File> cannot do this directly, there is a module which
1486can layer transparently over B<DB_File> to accomplish this feat.
1487
1488Check out the MLDBM module, available on CPAN in the directory
1489F<modules/by-module/MLDBM>.
1490
1491=head2 What does "Invalid Argument" mean?
1492
1493You will get this error message when one of the parameters in the
1494C<tie> call is wrong. Unfortunately there are quite a few parameters to
1495get wrong, so it can be difficult to figure out which one it is.
1496
1497Here are a couple of possibilities:
1498
1499=over 5
1500
1501=item 1.
1502
610ab055 1503Attempting to reopen a database without closing it.
f6b705ef 1504
1505=item 2.
1506
1507Using the O_WRONLY flag.
1508
1509=back
1510
1511=head2 What does "Bareword 'DB_File' not allowed" mean?
1512
1513You will encounter this particular error message when you have the
1514C<strict 'subs'> pragma (or the full strict pragma) in your script.
1515Consider this script:
1516
1517 use strict ;
1518 use DB_File ;
1519 use vars qw(%x) ;
1520 tie %x, DB_File, "filename" ;
1521
1522Running it produces the error in question:
1523
1524 Bareword "DB_File" not allowed while "strict subs" in use
1525
1526To get around the error, place the word C<DB_File> in either single or
1527double quotes, like this:
1528
1529 tie %x, "DB_File", "filename" ;
1530
1531Although it might seem like a real pain, it is really worth the effort
1532of having a C<use strict> in all your scripts.
1533
cb1a09d0 1534=head1 HISTORY
1535
1536=over
1537
1538=item 0.1
3b35bae3 1539
1540First Release.
1541
cb1a09d0 1542=item 0.2
3b35bae3 1543
1544When B<DB_File> is opening a database file it no longer terminates the
1545process if I<dbopen> returned an error. This allows file protection
1546errors to be caught at run time. Thanks to Judith Grass
cb1a09d0 1547E<lt>grass@cybercash.comE<gt> for spotting the bug.
3b35bae3 1548
cb1a09d0 1549=item 0.3
8e07c86e 1550
1551Added prototype support for multiple btree compare callbacks.
1552
cb1a09d0 1553=item 1.0
8e07c86e 1554
1555B<DB_File> has been in use for over a year. To reflect that, the
1556version number has been incremented to 1.0.
1557
1558Added complete support for multiple concurrent callbacks.
1559
1560Using the I<push> method on an empty list didn't work properly. This
1561has been fixed.
1562
cb1a09d0 1563=item 1.01
4633a7c4 1564
1565Fixed a core dump problem with SunOS.
1566
1567The return value from TIEHASH wasn't set to NULL when dbopen returned
1568an error.
1569
88108326 1570=item 1.02
1571
f6b705ef 1572Merged OS/2 specific code into DB_File.xs
88108326 1573
1574Removed some redundant code in DB_File.xs.
1575
1576Documentation update.
1577
1578Allow negative subscripts with RECNO interface.
1579
1580Changed the default flags from O_RDWR to O_CREAT|O_RDWR.
1581
1582The example code which showed how to lock a database needed a call to
1583C<sync> added. Without it the resultant database file was empty.
1584
f6b705ef 1585Added get_dup method.
88108326 1586
f6b705ef 1587=item 1.03
1588
1589Documentation update.
3b35bae3 1590
f6b705ef 1591B<DB_File> now imports the constants (O_RDWR, O_CREAT etc.) from Fcntl
1592automatically.
3b35bae3 1593
f6b705ef 1594The standard hash function C<exists> is now supported.
1595
1596Modified the behavior of get_dup. When it returns an associative
1597array, the value is the count of the number of matching BTREE values.
3b35bae3 1598
610ab055 1599=item 1.04
1600
1601Minor documentation changes.
1602
1603Fixed a bug in hash_cb. Patches supplied by Dave Hammen,
1604E<lt>hammen@gothamcity.jsc.nasa.govE<gt>.
1605
1606Fixed a bug with the constructors for DB_File::HASHINFO,
1607DB_File::BTREEINFO and DB_File::RECNOINFO. Also tidied up the
1608constructors to make them C<-w> clean.
1609
1610Reworked part of the test harness to be more locale friendly.
1611
1612=item 1.05
1613
1614Made all scripts in the documentation C<strict> and C<-w> clean.
1615
1616Added logic to F<DB_File.xs> to allow the module to be built after Perl
1617is installed.
1618
ff68c719 1619=item 1.06
1620
1621Minor namespace cleanup: Localized C<PrintBtree>.
1622
36477c24 1623=item 1.07
1624
1625Fixed bug with RECNO, where bval wasn't defaulting to "\n".
1626
1627=item 1.08
1628
1629Documented operation of bval.
1630
18d2dc8c 1631=item 1.09
1632
1633Minor bug fix in DB_File::HASHINFO, DB_File::RECNOINFO and
1634DB_File::BTREEINFO.
1635
1636Changed default mode to 0666.
1637
a0b8c8c1 1638=item 1.10
1639
1640Fixed fd method so that it still returns -1 for in-memory files when db
16411.86 is used.
1642
778183f3 1643=item 1.11
1644
1645Documented the untie gotcha.
1646
68dc0745 1647=item 1.12
1648
1649Documented the incompatibility with version 2 of Berkeley DB.
1650
d3ef3b8a 1651=item 1.13
1652
1653Minor changes to DB_FIle.xs and DB_File.pm
1654
610ab055 1655=back
1656
3b35bae3 1657=head1 BUGS
1658
8e07c86e 1659Some older versions of Berkeley DB had problems with fixed length
1660records using the RECNO file format. The newest version at the time of
1661writing was 1.85 - this seems to have fixed the problems with RECNO.
3b35bae3 1662
8e07c86e 1663I am sure there are bugs in the code. If you do find any, or can
1664suggest any enhancements, I would welcome your comments.
3b35bae3 1665
1666=head1 AVAILABILITY
1667
f6b705ef 1668B<DB_File> comes with the standard Perl source distribution. Look in
1669the directory F<ext/DB_File>.
1670
68dc0745 1671This version of B<DB_File> will only work with version 1.x of Berkeley
1672DB. It is I<not> yet compatible with version 2.
1673
1674Version 1 of Berkeley DB is available at your nearest CPAN archive (see
cb1a09d0 1675L<perlmod/"CPAN"> for a list) in F<src/misc/db.1.85.tar.gz>, or via the
610ab055 1676host F<ftp.cs.berkeley.edu> in F</ucb/4bsd/db.tar.gz>. Alternatively,
1677check out the Berkeley DB home page at F<http://www.bostic.com/db>. It
1678is I<not> under the GPL.
3b35bae3 1679
88108326 1680If you are running IRIX, then get Berkeley DB from
1681F<http://reality.sgi.com/ariel>. It has the patches necessary to
1682compile properly on IRIX 5.3.
1683
a0b8c8c1 1684As of January 1997, version 1.86 of Berkeley DB is available from the
1685Berkeley DB home page. Although this release does fix a number of bugs
778183f3 1686that were present in 1.85 you should be aware of the following
a0b8c8c1 1687information (taken from the Berkeley DB home page) before you consider
1688using it:
1689
1690 DB version 1.86 includes a new implementation of the hash access
1691 method that fixes a variety of hashing problems found in DB version
1692 1.85. We are making it available as an interim solution until DB
1693 2.0 is available.
1694
1695 PLEASE NOTE: the underlying file format for the hash access method
1696 changed between version 1.85 and version 1.86, so you will have to
1697 dump and reload all of your databases to convert from version 1.85
1698 to version 1.86. If you do not absolutely require the fixes from
1699 version 1.86, we strongly urge you to wait until DB 2.0 is released
1700 before upgrading from 1.85.
1701
1702
3b35bae3 1703=head1 SEE ALSO
1704
1705L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>
1706
3b35bae3 1707=head1 AUTHOR
1708
8e07c86e 1709The DB_File interface was written by Paul Marquess
88108326 1710E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
d3ef3b8a 1711Questions about the DB system itself may be addressed to
1712E<lt>db@sleepycat.com<gt>.
3b35bae3 1713
1714=cut