DB_File 1.15 patch
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
3# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
a6ed719b 4# last modified 29th Jun 1997
5# version 1.15
36477c24 6#
a0b8c8c1 7# Copyright (c) 1995, 1996, 1997 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
18d2dc8c 101sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
102sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
103sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 104
105package DB_File::RECNOINFO ;
785da04d 106
88108326 107use strict ;
108
109@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 110
111sub TIEHASH
112{
88108326 113 my $pkg = shift ;
114
36477c24 115 bless { VALID => { map {$_, 1}
116 qw( bval cachesize psize flags lorder reclen bfname )
117 },
118 GOT => {},
119 }, $pkg ;
8e07c86e 120}
121
88108326 122package DB_File::BTREEINFO ;
8e07c86e 123
88108326 124use strict ;
8e07c86e 125
88108326 126@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 127
88108326 128sub TIEHASH
8e07c86e 129{
88108326 130 my $pkg = shift ;
131
36477c24 132 bless { VALID => { map {$_, 1}
133 qw( flags cachesize maxkeypage minkeypage psize
134 compare prefix lorder )
135 },
136 GOT => {},
137 }, $pkg ;
8e07c86e 138}
139
140
8e07c86e 141package DB_File ;
785da04d 142
143use strict;
144use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO) ;
8e07c86e 145use Carp;
146
785da04d 147
a6ed719b 148$VERSION = "1.15" ;
8e07c86e 149
150#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 151$DB_BTREE = new DB_File::BTREEINFO ;
152$DB_HASH = new DB_File::HASHINFO ;
153$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 154
785da04d 155require Tie::Hash;
8e07c86e 156require Exporter;
157use AutoLoader;
158require DynaLoader;
785da04d 159@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 160@EXPORT = qw(
161 $DB_BTREE $DB_HASH $DB_RECNO
88108326 162
8e07c86e 163 BTREEMAGIC
164 BTREEVERSION
165 DB_LOCK
166 DB_SHMEM
167 DB_TXN
168 HASHMAGIC
169 HASHVERSION
170 MAX_PAGE_NUMBER
171 MAX_PAGE_OFFSET
172 MAX_REC_NUMBER
173 RET_ERROR
174 RET_SPECIAL
175 RET_SUCCESS
176 R_CURSOR
177 R_DUP
178 R_FIRST
179 R_FIXEDLEN
180 R_IAFTER
181 R_IBEFORE
182 R_LAST
183 R_NEXT
184 R_NOKEY
185 R_NOOVERWRITE
186 R_PREV
187 R_RECNOSYNC
188 R_SETCURSOR
189 R_SNAPSHOT
190 __R_UNUSED
88108326 191
8e07c86e 192);
193
194sub AUTOLOAD {
785da04d 195 my($constname);
8e07c86e 196 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 197 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 198 if ($! != 0) {
199 if ($! =~ /Invalid/) {
200 $AutoLoader::AUTOLOAD = $AUTOLOAD;
201 goto &AutoLoader::AUTOLOAD;
202 }
203 else {
785da04d 204 my($pack,$file,$line) = caller;
8e07c86e 205 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
206";
207 }
208 }
209 eval "sub $AUTOLOAD { $val }";
210 goto &$AUTOLOAD;
211}
212
f6b705ef 213
a6ed719b 214eval {
215 # Make all Fcntl O_XXX constants available for importing
216 require Fcntl;
217 my @O = grep /^O_/, @Fcntl::EXPORT;
218 Fcntl->import(@O); # first we import what we want to export
219 push(@EXPORT, @O);
220};
f6b705ef 221
785da04d 222bootstrap DB_File $VERSION;
8e07c86e 223
224# Preloaded methods go here. Autoload methods go after __END__, and are
225# processed by the autosplit program.
226
05475680 227sub tie_hash_or_array
610ab055 228{
229 my (@arg) = @_ ;
05475680 230 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 231
232 $arg[4] = tied %{ $arg[4] }
233 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
234
05475680 235 DoTie_($tieHASH, @arg) ;
610ab055 236}
237
05475680 238sub TIEHASH
239{
240 tie_hash_or_array(@_) ;
241}
242
243sub TIEARRAY
244{
245 tie_hash_or_array(@_) ;
246}
88108326 247
248sub get_dup
249{
250 croak "Usage: \$db->get_dup(key [,flag])\n"
251 unless @_ == 2 or @_ == 3 ;
252
253 my $db = shift ;
254 my $key = shift ;
255 my $flag = shift ;
f6b705ef 256 my $value = 0 ;
88108326 257 my $origkey = $key ;
258 my $wantarray = wantarray ;
f6b705ef 259 my %values = () ;
88108326 260 my @values = () ;
261 my $counter = 0 ;
f6b705ef 262 my $status = 0 ;
88108326 263
f6b705ef 264 # iterate through the database until either EOF ($status == 0)
265 # or a different key is encountered ($key ne $origkey).
266 for ($status = $db->seq($key, $value, R_CURSOR()) ;
267 $status == 0 and $key eq $origkey ;
268 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 269
f6b705ef 270 # save the value or count number of matches
271 if ($wantarray) {
272 if ($flag)
273 { ++ $values{$value} }
274 else
275 { push (@values, $value) }
276 }
277 else
278 { ++ $counter }
88108326 279
88108326 280 }
281
f6b705ef 282 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 283}
284
285
8e07c86e 2861;
287__END__
288
3b35bae3 289=head1 NAME
290
291DB_File - Perl5 access to Berkeley DB
292
293=head1 SYNOPSIS
294
295 use DB_File ;
88108326 296
297 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
298 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
299 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 300
3b35bae3 301 $status = $X->del($key [, $flags]) ;
302 $status = $X->put($key, $value [, $flags]) ;
303 $status = $X->get($key, $value [, $flags]) ;
760ac839 304 $status = $X->seq($key, $value, $flags) ;
3b35bae3 305 $status = $X->sync([$flags]) ;
306 $status = $X->fd ;
760ac839 307
f6b705ef 308 # BTREE only
88108326 309 $count = $X->get_dup($key) ;
310 @list = $X->get_dup($key) ;
311 %list = $X->get_dup($key, 1) ;
312
f6b705ef 313 # RECNO only
314 $a = $X->length;
315 $a = $X->pop ;
316 $X->push(list);
317 $a = $X->shift;
318 $X->unshift(list);
319
3b35bae3 320 untie %hash ;
321 untie @array ;
322
323=head1 DESCRIPTION
324
8e07c86e 325B<DB_File> is a module which allows Perl programs to make use of the
326facilities provided by Berkeley DB. If you intend to use this
f6b705ef 327module you should really have a copy of the Berkeley DB manual pages at
8e07c86e 328hand. The interface defined here mirrors the Berkeley DB interface
329closely.
3b35bae3 330
68dc0745 331Please note that this module will only work with version 1.x of
332Berkeley DB. Once Berkeley DB version 2 is released, B<DB_File> will be
333upgraded to work with it.
334
8e07c86e 335Berkeley DB is a C library which provides a consistent interface to a
336number of database formats. B<DB_File> provides an interface to all
337three of the database types currently supported by Berkeley DB.
3b35bae3 338
339The file types are:
340
341=over 5
342
88108326 343=item B<DB_HASH>
3b35bae3 344
88108326 345This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 346files. This is equivalent to the functionality provided by other
347hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
348the files created using DB_HASH are not compatible with any of the
349other packages mentioned.
3b35bae3 350
8e07c86e 351A default hashing algorithm, which will be adequate for most
352applications, is built into Berkeley DB. If you do need to use your own
353hashing algorithm it is possible to write your own in Perl and have
354B<DB_File> use it instead.
3b35bae3 355
88108326 356=item B<DB_BTREE>
357
358The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 359sorted, balanced binary tree.
3b35bae3 360
8e07c86e 361As with the DB_HASH format, it is possible to provide a user defined
362Perl routine to perform the comparison of keys. By default, though, the
363keys are stored in lexical order.
3b35bae3 364
88108326 365=item B<DB_RECNO>
3b35bae3 366
8e07c86e 367DB_RECNO allows both fixed-length and variable-length flat text files
368to be manipulated using the same key/value pair interface as in DB_HASH
369and DB_BTREE. In this case the key will consist of a record (line)
370number.
3b35bae3 371
372=back
373
68dc0745 374=head2 Interface to Berkeley DB
3b35bae3 375
376B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 377in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
378allows B<DB_File> to access Berkeley DB files using either an
379associative array (for DB_HASH & DB_BTREE file types) or an ordinary
380array (for the DB_RECNO file type).
3b35bae3 381
88108326 382In addition to the tie() interface, it is also possible to access most
383of the functions provided in the Berkeley DB API directly.
f6b705ef 384See L<THE API INTERFACE>.
3b35bae3 385
88108326 386=head2 Opening a Berkeley DB Database File
3b35bae3 387
8e07c86e 388Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 389Here is the C prototype for dbopen():
3b35bae3 390
391 DB*
392 dbopen (const char * file, int flags, int mode,
393 DBTYPE type, const void * openinfo)
394
395The parameter C<type> is an enumeration which specifies which of the 3
396interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
397Depending on which of these is actually chosen, the final parameter,
398I<openinfo> points to a data structure which allows tailoring of the
399specific interface method.
400
8e07c86e 401This interface is handled slightly differently in B<DB_File>. Here is
88108326 402an equivalent call using B<DB_File>:
3b35bae3 403
88108326 404 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 405
8e07c86e 406The C<filename>, C<flags> and C<mode> parameters are the direct
407equivalent of their dbopen() counterparts. The final parameter $DB_HASH
408performs the function of both the C<type> and C<openinfo> parameters in
409dbopen().
3b35bae3 410
88108326 411In the example above $DB_HASH is actually a pre-defined reference to a
412hash object. B<DB_File> has three of these pre-defined references.
413Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 414
8e07c86e 415The keys allowed in each of these pre-defined references is limited to
416the names used in the equivalent C structure. So, for example, the
417$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 418C<ffactor>, C<hash>, C<lorder> and C<nelem>.
419
420To change one of these elements, just assign to it like this:
421
422 $DB_HASH->{'cachesize'} = 10000 ;
423
424The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
425usually adequate for most applications. If you do need to create extra
426instances of these objects, constructors are available for each file
427type.
428
429Here are examples of the constructors and the valid options available
430for DB_HASH, DB_BTREE and DB_RECNO respectively.
431
432 $a = new DB_File::HASHINFO ;
433 $a->{'bsize'} ;
434 $a->{'cachesize'} ;
435 $a->{'ffactor'};
436 $a->{'hash'} ;
437 $a->{'lorder'} ;
438 $a->{'nelem'} ;
439
440 $b = new DB_File::BTREEINFO ;
441 $b->{'flags'} ;
442 $b->{'cachesize'} ;
443 $b->{'maxkeypage'} ;
444 $b->{'minkeypage'} ;
445 $b->{'psize'} ;
446 $b->{'compare'} ;
447 $b->{'prefix'} ;
448 $b->{'lorder'} ;
449
450 $c = new DB_File::RECNOINFO ;
451 $c->{'bval'} ;
452 $c->{'cachesize'} ;
453 $c->{'psize'} ;
454 $c->{'flags'} ;
455 $c->{'lorder'} ;
456 $c->{'reclen'} ;
457 $c->{'bfname'} ;
458
459The values stored in the hashes above are mostly the direct equivalent
460of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 461default values - that means you don't have to set I<all> of the
88108326 462values when you only want to change one. Here is an example:
463
464 $a = new DB_File::HASHINFO ;
465 $a->{'cachesize'} = 12345 ;
466 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
467
36477c24 468A few of the options need extra discussion here. When used, the C
88108326 469equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
470to C functions. In B<DB_File> these keys are used to store references
471to Perl subs. Below are templates for each of the subs:
472
473 sub hash
474 {
475 my ($data) = @_ ;
476 ...
477 # return the hash value for $data
478 return $hash ;
479 }
3b35bae3 480
88108326 481 sub compare
482 {
483 my ($key, $key2) = @_ ;
484 ...
485 # return 0 if $key1 eq $key2
486 # -1 if $key1 lt $key2
487 # 1 if $key1 gt $key2
488 return (-1 , 0 or 1) ;
489 }
3b35bae3 490
88108326 491 sub prefix
492 {
493 my ($key, $key2) = @_ ;
494 ...
495 # return number of bytes of $key2 which are
496 # necessary to determine that it is greater than $key1
497 return $bytes ;
498 }
3b35bae3 499
f6b705ef 500See L<Changing the BTREE sort order> for an example of using the
501C<compare> template.
88108326 502
36477c24 503If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 504C<bval>, you should check out L<The 'bval' Option>.
36477c24 505
88108326 506=head2 Default Parameters
507
508It is possible to omit some or all of the final 4 parameters in the
509call to C<tie> and let them take default values. As DB_HASH is the most
510common file format used, the call:
511
512 tie %A, "DB_File", "filename" ;
513
514is equivalent to:
515
18d2dc8c 516 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 517
518It is also possible to omit the filename parameter as well, so the
519call:
520
521 tie %A, "DB_File" ;
522
523is equivalent to:
524
18d2dc8c 525 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 526
f6b705ef 527See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 528in place of a filename.
529
f6b705ef 530=head2 In Memory Databases
531
532Berkeley DB allows the creation of in-memory databases by using NULL
533(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
534uses C<undef> instead of NULL to provide this functionality.
535
536=head1 DB_HASH
537
538The DB_HASH file format is probably the most commonly used of the three
539file formats that B<DB_File> supports. It is also very straightforward
540to use.
541
68dc0745 542=head2 A Simple Example
f6b705ef 543
544This example shows how to create a database, add key/value pairs to the
545database, delete keys/value pairs and finally how to enumerate the
546contents of the database.
547
610ab055 548 use strict ;
f6b705ef 549 use DB_File ;
610ab055 550 use vars qw( %h $k $v ) ;
f6b705ef 551
552 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
553 or die "Cannot open file 'fruit': $!\n";
554
555 # Add a few key/value pairs to the file
556 $h{"apple"} = "red" ;
557 $h{"orange"} = "orange" ;
558 $h{"banana"} = "yellow" ;
559 $h{"tomato"} = "red" ;
560
561 # Check for existence of a key
562 print "Banana Exists\n\n" if $h{"banana"} ;
563
564 # Delete a key/value pair.
565 delete $h{"apple"} ;
566
567 # print the contents of the file
568 while (($k, $v) = each %h)
569 { print "$k -> $v\n" }
570
571 untie %h ;
572
573here is the output:
574
575 Banana Exists
576
577 orange -> orange
578 tomato -> red
579 banana -> yellow
580
581Note that the like ordinary associative arrays, the order of the keys
582retrieved is in an apparently random order.
583
584=head1 DB_BTREE
585
586The DB_BTREE format is useful when you want to store data in a given
587order. By default the keys will be stored in lexical order, but as you
588will see from the example shown in the next section, it is very easy to
589define your own sorting function.
590
591=head2 Changing the BTREE sort order
592
593This script shows how to override the default sorting algorithm that
594BTREE uses. Instead of using the normal lexical ordering, a case
595insensitive compare function will be used.
88108326 596
610ab055 597 use strict ;
f6b705ef 598 use DB_File ;
610ab055 599
600 my %h ;
f6b705ef 601
602 sub Compare
603 {
604 my ($key1, $key2) = @_ ;
605 "\L$key1" cmp "\L$key2" ;
606 }
607
608 # specify the Perl sub that will do the comparison
609 $DB_BTREE->{'compare'} = \&Compare ;
610
611 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
612 or die "Cannot open file 'tree': $!\n" ;
613
614 # Add a key/value pair to the file
615 $h{'Wall'} = 'Larry' ;
616 $h{'Smith'} = 'John' ;
617 $h{'mouse'} = 'mickey' ;
618 $h{'duck'} = 'donald' ;
619
620 # Delete
621 delete $h{"duck"} ;
622
623 # Cycle through the keys printing them in order.
624 # Note it is not necessary to sort the keys as
625 # the btree will have kept them in order automatically.
626 foreach (keys %h)
627 { print "$_\n" }
628
629 untie %h ;
630
631Here is the output from the code above.
632
633 mouse
634 Smith
635 Wall
636
637There are a few point to bear in mind if you want to change the
638ordering in a BTREE database:
639
640=over 5
641
642=item 1.
643
644The new compare function must be specified when you create the database.
645
646=item 2.
647
648You cannot change the ordering once the database has been created. Thus
649you must use the same compare function every time you access the
88108326 650database.
651
f6b705ef 652=back
653
68dc0745 654=head2 Handling Duplicate Keys
f6b705ef 655
656The BTREE file type optionally allows a single key to be associated
657with an arbitrary number of values. This option is enabled by setting
658the flags element of C<$DB_BTREE> to R_DUP when creating the database.
659
88108326 660There are some difficulties in using the tied hash interface if you
661want to manipulate a BTREE database with duplicate keys. Consider this
662code:
663
610ab055 664 use strict ;
88108326 665 use DB_File ;
610ab055 666
667 use vars qw($filename %h ) ;
668
88108326 669 $filename = "tree" ;
670 unlink $filename ;
671
672 # Enable duplicate records
673 $DB_BTREE->{'flags'} = R_DUP ;
674
675 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
676 or die "Cannot open $filename: $!\n";
677
678 # Add some key/value pairs to the file
679 $h{'Wall'} = 'Larry' ;
680 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 681 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 682 $h{'Smith'} = 'John' ;
683 $h{'mouse'} = 'mickey' ;
684
685 # iterate through the associative array
686 # and print each key/value pair.
687 foreach (keys %h)
688 { print "$_ -> $h{$_}\n" }
689
f6b705ef 690 untie %h ;
691
88108326 692Here is the output:
693
694 Smith -> John
695 Wall -> Larry
696 Wall -> Larry
f6b705ef 697 Wall -> Larry
88108326 698 mouse -> mickey
699
f6b705ef 700As you can see 3 records have been successfully created with key C<Wall>
88108326 701- the only thing is, when they are retrieved from the database they
f6b705ef 702I<seem> to have the same value, namely C<Larry>. The problem is caused
703by the way that the associative array interface works. Basically, when
704the associative array interface is used to fetch the value associated
705with a given key, it will only ever retrieve the first value.
88108326 706
707Although it may not be immediately obvious from the code above, the
708associative array interface can be used to write values with duplicate
709keys, but it cannot be used to read them back from the database.
710
711The way to get around this problem is to use the Berkeley DB API method
712called C<seq>. This method allows sequential access to key/value
f6b705ef 713pairs. See L<THE API INTERFACE> for details of both the C<seq> method
714and the API in general.
88108326 715
716Here is the script above rewritten using the C<seq> API method.
717
610ab055 718 use strict ;
88108326 719 use DB_File ;
88108326 720
610ab055 721 use vars qw($filename $x %h $status $key $value) ;
722
88108326 723 $filename = "tree" ;
724 unlink $filename ;
725
726 # Enable duplicate records
727 $DB_BTREE->{'flags'} = R_DUP ;
728
729 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
730 or die "Cannot open $filename: $!\n";
731
732 # Add some key/value pairs to the file
733 $h{'Wall'} = 'Larry' ;
734 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 735 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 736 $h{'Smith'} = 'John' ;
737 $h{'mouse'} = 'mickey' ;
738
f6b705ef 739 # iterate through the btree using seq
88108326 740 # and print each key/value pair.
610ab055 741 $key = $value = 0 ;
f6b705ef 742 for ($status = $x->seq($key, $value, R_FIRST) ;
743 $status == 0 ;
744 $status = $x->seq($key, $value, R_NEXT) )
88108326 745 { print "$key -> $value\n" }
746
747 undef $x ;
748 untie %h ;
749
750that prints:
751
752 Smith -> John
753 Wall -> Brick
f6b705ef 754 Wall -> Brick
88108326 755 Wall -> Larry
756 mouse -> mickey
757
f6b705ef 758This time we have got all the key/value pairs, including the multiple
88108326 759values associated with the key C<Wall>.
760
68dc0745 761=head2 The get_dup() Method
f6b705ef 762
763B<DB_File> comes with a utility method, called C<get_dup>, to assist in
88108326 764reading duplicate values from BTREE databases. The method can take the
765following forms:
766
767 $count = $x->get_dup($key) ;
768 @list = $x->get_dup($key) ;
769 %list = $x->get_dup($key, 1) ;
770
771In a scalar context the method returns the number of values associated
772with the key, C<$key>.
773
774In list context, it returns all the values which match C<$key>. Note
f6b705ef 775that the values will be returned in an apparently random order.
88108326 776
7a2e2cd6 777In list context, if the second parameter is present and evaluates
778TRUE, the method returns an associative array. The keys of the
779associative array correspond to the values that matched in the BTREE
780and the values of the array are a count of the number of times that
781particular value occurred in the BTREE.
88108326 782
f6b705ef 783So assuming the database created above, we can use C<get_dup> like
88108326 784this:
785
610ab055 786 my $cnt = $x->get_dup("Wall") ;
88108326 787 print "Wall occurred $cnt times\n" ;
788
610ab055 789 my %hash = $x->get_dup("Wall", 1) ;
88108326 790 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 791 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 792
610ab055 793 my @list = $x->get_dup("Wall") ;
88108326 794 print "Wall => [@list]\n" ;
795
f6b705ef 796 @list = $x->get_dup("Smith") ;
88108326 797 print "Smith => [@list]\n" ;
798
f6b705ef 799 @list = $x->get_dup("Dog") ;
88108326 800 print "Dog => [@list]\n" ;
801
802
803and it will print:
804
f6b705ef 805 Wall occurred 3 times
88108326 806 Larry is there
f6b705ef 807 There are 2 Brick Walls
808 Wall => [Brick Brick Larry]
88108326 809 Smith => [John]
810 Dog => []
3b35bae3 811
f6b705ef 812=head2 Matching Partial Keys
813
814The BTREE interface has a feature which allows partial keys to be
815matched. This functionality is I<only> available when the C<seq> method
816is used along with the R_CURSOR flag.
817
818 $x->seq($key, $value, R_CURSOR) ;
819
820Here is the relevant quote from the dbopen man page where it defines
821the use of the R_CURSOR flag with seq:
822
f6b705ef 823 Note, for the DB_BTREE access method, the returned key is not
824 necessarily an exact match for the specified key. The returned key
825 is the smallest key greater than or equal to the specified key,
826 permitting partial key matches and range searches.
827
f6b705ef 828In the example script below, the C<match> sub uses this feature to find
829and print the first matching key/value pair given a partial key.
830
610ab055 831 use strict ;
f6b705ef 832 use DB_File ;
833 use Fcntl ;
610ab055 834
835 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 836
837 sub match
838 {
839 my $key = shift ;
610ab055 840 my $value = 0;
f6b705ef 841 my $orig_key = $key ;
842 $x->seq($key, $value, R_CURSOR) ;
843 print "$orig_key\t-> $key\t-> $value\n" ;
844 }
845
846 $filename = "tree" ;
847 unlink $filename ;
848
849 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
850 or die "Cannot open $filename: $!\n";
851
852 # Add some key/value pairs to the file
853 $h{'mouse'} = 'mickey' ;
854 $h{'Wall'} = 'Larry' ;
855 $h{'Walls'} = 'Brick' ;
856 $h{'Smith'} = 'John' ;
857
858
610ab055 859 $key = $value = 0 ;
f6b705ef 860 print "IN ORDER\n" ;
861 for ($st = $x->seq($key, $value, R_FIRST) ;
862 $st == 0 ;
863 $st = $x->seq($key, $value, R_NEXT) )
864
865 { print "$key -> $value\n" }
866
867 print "\nPARTIAL MATCH\n" ;
868
869 match "Wa" ;
870 match "A" ;
871 match "a" ;
872
873 undef $x ;
874 untie %h ;
875
876Here is the output:
877
878 IN ORDER
879 Smith -> John
880 Wall -> Larry
881 Walls -> Brick
882 mouse -> mickey
883
884 PARTIAL MATCH
885 Wa -> Wall -> Larry
886 A -> Smith -> John
887 a -> mouse -> mickey
888
889=head1 DB_RECNO
890
891DB_RECNO provides an interface to flat text files. Both variable and
892fixed length records are supported.
3b35bae3 893
88108326 894In order to make RECNO more compatible with Perl the array offset for
895all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 896
88108326 897As with normal Perl arrays, a RECNO array can be accessed using
898negative indexes. The index -1 refers to the last element of the array,
899-2 the second last, and so on. Attempting to access an element before
900the start of the array will raise a fatal run-time error.
3b35bae3 901
68dc0745 902=head2 The 'bval' Option
36477c24 903
904The operation of the bval option warrants some discussion. Here is the
905definition of bval from the Berkeley DB 1.85 recno manual page:
906
907 The delimiting byte to be used to mark the end of a
908 record for variable-length records, and the pad charac-
909 ter for fixed-length records. If no value is speci-
910 fied, newlines (``\n'') are used to mark the end of
911 variable-length records and fixed-length records are
912 padded with spaces.
913
914The second sentence is wrong. In actual fact bval will only default to
915C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
916openinfo parameter is used at all, the value that happens to be in bval
917will be used. That means you always have to specify bval when making
918use of any of the options in the openinfo parameter. This documentation
919error will be fixed in the next release of Berkeley DB.
920
921That clarifies the situation with regards Berkeley DB itself. What
922about B<DB_File>? Well, the behavior defined in the quote above is
923quite useful, so B<DB_File> conforms it.
924
925That means that you can specify other options (e.g. cachesize) and
926still have bval default to C<"\n"> for variable length records, and
927space for fixed length records.
928
f6b705ef 929=head2 A Simple Example
3b35bae3 930
f6b705ef 931Here is a simple example that uses RECNO.
932
610ab055 933 use strict ;
f6b705ef 934 use DB_File ;
f6b705ef 935
610ab055 936 my @h ;
f6b705ef 937 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
938 or die "Cannot open file 'text': $!\n" ;
939
940 # Add a few key/value pairs to the file
941 $h[0] = "orange" ;
942 $h[1] = "blue" ;
943 $h[2] = "yellow" ;
944
945 # Check for existence of a key
946 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
947
948 # use a negative index
949 print "The last element is $h[-1]\n" ;
950 print "The 2nd last element is $h[-2]\n" ;
951
952 untie @h ;
3b35bae3 953
f6b705ef 954Here is the output from the script:
955
956
957 Element 1 Exists with value blue
958 The last element is yellow
959 The 2nd last element is blue
960
961=head2 Extra Methods
962
963As you can see from the example above, the tied array interface is
964quite limited. To make the interface more useful, a number of methods
965are supplied with B<DB_File> to simulate the standard array operations
966that are not currently implemented in Perl's tied array interface. All
967these methods are accessed via the object returned from the tie call.
968
969Here are the methods:
970
971=over 5
3b35bae3 972
f6b705ef 973=item B<$X-E<gt>push(list) ;>
974
975Pushes the elements of C<list> to the end of the array.
976
977=item B<$value = $X-E<gt>pop ;>
978
979Removes and returns the last element of the array.
980
981=item B<$X-E<gt>shift>
982
983Removes and returns the first element of the array.
984
985=item B<$X-E<gt>unshift(list) ;>
986
987Pushes the elements of C<list> to the start of the array.
988
989=item B<$X-E<gt>length>
990
991Returns the number of elements in the array.
992
993=back
994
995=head2 Another Example
996
997Here is a more complete example that makes use of some of the methods
998described above. It also makes use of the API interface directly (see
999L<THE API INTERFACE>).
1000
1001 use strict ;
1002 use vars qw(@h $H $file $i) ;
1003 use DB_File ;
1004 use Fcntl ;
1005
1006 $file = "text" ;
1007
1008 unlink $file ;
1009
1010 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1011 or die "Cannot open file $file: $!\n" ;
1012
1013 # first create a text file to play with
1014 $h[0] = "zero" ;
1015 $h[1] = "one" ;
1016 $h[2] = "two" ;
1017 $h[3] = "three" ;
1018 $h[4] = "four" ;
1019
1020
1021 # Print the records in order.
1022 #
1023 # The length method is needed here because evaluating a tied
1024 # array in a scalar context does not return the number of
1025 # elements in the array.
1026
1027 print "\nORIGINAL\n" ;
1028 foreach $i (0 .. $H->length - 1) {
1029 print "$i: $h[$i]\n" ;
1030 }
1031
1032 # use the push & pop methods
1033 $a = $H->pop ;
1034 $H->push("last") ;
1035 print "\nThe last record was [$a]\n" ;
1036
1037 # and the shift & unshift methods
1038 $a = $H->shift ;
1039 $H->unshift("first") ;
1040 print "The first record was [$a]\n" ;
1041
1042 # Use the API to add a new record after record 2.
1043 $i = 2 ;
1044 $H->put($i, "Newbie", R_IAFTER) ;
1045
1046 # and a new record before record 1.
1047 $i = 1 ;
1048 $H->put($i, "New One", R_IBEFORE) ;
1049
1050 # delete record 3
1051 $H->del(3) ;
1052
1053 # now print the records in reverse order
1054 print "\nREVERSE\n" ;
1055 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1056 { print "$i: $h[$i]\n" }
1057
1058 # same again, but use the API functions instead
1059 print "\nREVERSE again\n" ;
610ab055 1060 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1061 for ($s = $H->seq($k, $v, R_LAST) ;
1062 $s == 0 ;
1063 $s = $H->seq($k, $v, R_PREV))
1064 { print "$k: $v\n" }
1065
1066 undef $H ;
1067 untie @h ;
1068
1069and this is what it outputs:
1070
1071 ORIGINAL
1072 0: zero
1073 1: one
1074 2: two
1075 3: three
1076 4: four
1077
1078 The last record was [four]
1079 The first record was [zero]
1080
1081 REVERSE
1082 5: last
1083 4: three
1084 3: Newbie
1085 2: one
1086 1: New One
1087 0: first
1088
1089 REVERSE again
1090 5: last
1091 4: three
1092 3: Newbie
1093 2: one
1094 1: New One
1095 0: first
1096
1097Notes:
1098
1099=over 5
1100
1101=item 1.
1102
1103Rather than iterating through the array, C<@h> like this:
1104
1105 foreach $i (@h)
1106
1107it is necessary to use either this:
1108
1109 foreach $i (0 .. $H->length - 1)
1110
1111or this:
1112
1113 for ($a = $H->get($k, $v, R_FIRST) ;
1114 $a == 0 ;
1115 $a = $H->get($k, $v, R_NEXT) )
1116
1117=item 2.
1118
1119Notice that both times the C<put> method was used the record index was
1120specified using a variable, C<$i>, rather than the literal value
1121itself. This is because C<put> will return the record number of the
1122inserted line via that parameter.
1123
1124=back
1125
1126=head1 THE API INTERFACE
3b35bae3 1127
1128As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1129possible to make direct use of most of the API functions defined in the
8e07c86e 1130Berkeley DB documentation.
3b35bae3 1131
88108326 1132To do this you need to store a copy of the object returned from the tie.
3b35bae3 1133
88108326 1134 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1135
8e07c86e 1136Once you have done that, you can access the Berkeley DB API functions
88108326 1137as B<DB_File> methods directly like this:
3b35bae3 1138
1139 $db->put($key, $value, R_NOOVERWRITE) ;
1140
88108326 1141B<Important:> If you have saved a copy of the object returned from
1142C<tie>, the underlying database file will I<not> be closed until both
1143the tied variable is untied and all copies of the saved object are
610ab055 1144destroyed.
88108326 1145
1146 use DB_File ;
1147 $db = tie %hash, "DB_File", "filename"
1148 or die "Cannot tie filename: $!" ;
1149 ...
1150 undef $db ;
1151 untie %hash ;
1152
9a2c4ce3 1153See L<The untie() Gotcha> for more details.
778183f3 1154
88108326 1155All the functions defined in L<dbopen> are available except for
1156close() and dbopen() itself. The B<DB_File> method interface to the
1157supported functions have been implemented to mirror the way Berkeley DB
1158works whenever possible. In particular note that:
1159
1160=over 5
1161
1162=item *
1163
1164The methods return a status value. All return 0 on success.
1165All return -1 to signify an error and set C<$!> to the exact
1166error code. The return code 1 generally (but not always) means that the
1167key specified did not exist in the database.
1168
1169Other return codes are defined. See below and in the Berkeley DB
1170documentation for details. The Berkeley DB documentation should be used
1171as the definitive source.
1172
1173=item *
3b35bae3 1174
88108326 1175Whenever a Berkeley DB function returns data via one of its parameters,
1176the equivalent B<DB_File> method does exactly the same.
3b35bae3 1177
88108326 1178=item *
1179
1180If you are careful, it is possible to mix API calls with the tied
1181hash/array interface in the same piece of code. Although only a few of
1182the methods used to implement the tied interface currently make use of
1183the cursor, you should always assume that the cursor has been changed
1184any time the tied hash/array interface is used. As an example, this
1185code will probably not do what you expect:
1186
1187 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1188 or die "Cannot tie $filename: $!" ;
1189
1190 # Get the first key/value pair and set the cursor
1191 $X->seq($key, $value, R_FIRST) ;
1192
1193 # this line will modify the cursor
1194 $count = scalar keys %x ;
1195
1196 # Get the second key/value pair.
1197 # oops, it didn't, it got the last key/value pair!
1198 $X->seq($key, $value, R_NEXT) ;
1199
1200The code above can be rearranged to get around the problem, like this:
1201
1202 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1203 or die "Cannot tie $filename: $!" ;
1204
1205 # this line will modify the cursor
1206 $count = scalar keys %x ;
1207
1208 # Get the first key/value pair and set the cursor
1209 $X->seq($key, $value, R_FIRST) ;
1210
1211 # Get the second key/value pair.
1212 # worked this time.
1213 $X->seq($key, $value, R_NEXT) ;
1214
1215=back
1216
1217All the constants defined in L<dbopen> for use in the flags parameters
1218in the methods defined below are also available. Refer to the Berkeley
1219DB documentation for the precise meaning of the flags values.
1220
1221Below is a list of the methods available.
3b35bae3 1222
1223=over 5
1224
f6b705ef 1225=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1226
1227Given a key (C<$key>) this method reads the value associated with it
1228from the database. The value read from the database is returned in the
1229C<$value> parameter.
3b35bae3 1230
88108326 1231If the key does not exist the method returns 1.
3b35bae3 1232
88108326 1233No flags are currently defined for this method.
3b35bae3 1234
f6b705ef 1235=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1236
88108326 1237Stores the key/value pair in the database.
1238
1239If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1240will have the record number of the inserted key/value pair set.
3b35bae3 1241
88108326 1242Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1243R_SETCURSOR.
1244
f6b705ef 1245=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1246
88108326 1247Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1248
88108326 1249A return code of 1 means that the requested key was not in the
1250database.
3b35bae3 1251
88108326 1252R_CURSOR is the only valid flag at present.
3b35bae3 1253
f6b705ef 1254=item B<$status = $X-E<gt>fd ;>
3b35bae3 1255
88108326 1256Returns the file descriptor for the underlying database.
3b35bae3 1257
f6b705ef 1258See L<Locking Databases> for an example of how to make use of the
88108326 1259C<fd> method to lock your database.
3b35bae3 1260
f6b705ef 1261=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1262
88108326 1263This interface allows sequential retrieval from the database. See
1264L<dbopen> for full details.
1265
1266Both the C<$key> and C<$value> parameters will be set to the key/value
1267pair read from the database.
1268
1269The flags parameter is mandatory. The valid flag values are R_CURSOR,
1270R_FIRST, R_LAST, R_NEXT and R_PREV.
1271
f6b705ef 1272=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1273
1274Flushes any cached buffers to disk.
1275
1276R_RECNOSYNC is the only valid flag at present.
3b35bae3 1277
1278=back
1279
f6b705ef 1280=head1 HINTS AND TIPS
3b35bae3 1281
3b35bae3 1282
cb1a09d0 1283=head2 Locking Databases
3b35bae3 1284
cb1a09d0 1285Concurrent access of a read-write database by several parties requires
1286them all to use some kind of locking. Here's an example of Tom's that
1287uses the I<fd> method to get the file descriptor, and then a careful
1288open() to give something Perl will flock() for you. Run this repeatedly
1289in the background to watch the locks granted in proper order.
3b35bae3 1290
cb1a09d0 1291 use DB_File;
1292
1293 use strict;
1294
1295 sub LOCK_SH { 1 }
1296 sub LOCK_EX { 2 }
1297 sub LOCK_NB { 4 }
1298 sub LOCK_UN { 8 }
1299
1300 my($oldval, $fd, $db, %db, $value, $key);
1301
1302 $key = shift || 'default';
1303 $value = shift || 'magic';
1304
1305 $value .= " $$";
1306
1307 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1308 || die "dbcreat /tmp/foo.db $!";
1309 $fd = $db->fd;
1310 print "$$: db fd is $fd\n";
1311 open(DB_FH, "+<&=$fd") || die "dup $!";
1312
1313
1314 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1315 print "$$: CONTENTION; can't read during write update!
1316 Waiting for read lock ($!) ....";
1317 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1318 }
1319 print "$$: Read lock granted\n";
1320
1321 $oldval = $db{$key};
1322 print "$$: Old value was $oldval\n";
1323 flock(DB_FH, LOCK_UN);
1324
1325 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1326 print "$$: CONTENTION; must have exclusive lock!
1327 Waiting for write lock ($!) ....";
1328 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1329 }
1330
1331 print "$$: Write lock granted\n";
1332 $db{$key} = $value;
610ab055 1333 $db->sync; # to flush
cb1a09d0 1334 sleep 10;
1335
1336 flock(DB_FH, LOCK_UN);
88108326 1337 undef $db;
cb1a09d0 1338 untie %db;
1339 close(DB_FH);
1340 print "$$: Updated db to $key=$value\n";
1341
68dc0745 1342=head2 Sharing Databases With C Applications
f6b705ef 1343
1344There is no technical reason why a Berkeley DB database cannot be
1345shared by both a Perl and a C application.
1346
1347The vast majority of problems that are reported in this area boil down
1348to the fact that C strings are NULL terminated, whilst Perl strings are
1349not.
1350
1351Here is a real example. Netscape 2.0 keeps a record of the locations you
1352visit along with the time you last visited them in a DB_HASH database.
1353This is usually stored in the file F<~/.netscape/history.db>. The key
1354field in the database is the location string and the value field is the
1355time the location was last visited stored as a 4 byte binary value.
1356
1357If you haven't already guessed, the location string is stored with a
1358terminating NULL. This means you need to be careful when accessing the
1359database.
1360
1361Here is a snippet of code that is loosely based on Tom Christiansen's
1362I<ggh> script (available from your nearest CPAN archive in
1363F<authors/id/TOMC/scripts/nshist.gz>).
1364
610ab055 1365 use strict ;
f6b705ef 1366 use DB_File ;
1367 use Fcntl ;
f6b705ef 1368
610ab055 1369 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1370 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1371
1372 $HISTORY = "$dotdir/.netscape/history.db";
1373
1374 tie %hist_db, 'DB_File', $HISTORY
1375 or die "Cannot open $HISTORY: $!\n" ;;
1376
1377 # Dump the complete database
1378 while ( ($href, $binary_time) = each %hist_db ) {
1379
1380 # remove the terminating NULL
1381 $href =~ s/\x00$// ;
1382
1383 # convert the binary time into a user friendly string
1384 $date = localtime unpack("V", $binary_time);
1385 print "$date $href\n" ;
1386 }
1387
1388 # check for the existence of a specific key
1389 # remember to add the NULL
1390 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1391 $date = localtime unpack("V", $binary_time) ;
1392 print "Last visited mox.perl.com on $date\n" ;
1393 }
1394 else {
1395 print "Never visited mox.perl.com\n"
1396 }
1397
1398 untie %hist_db ;
1399
68dc0745 1400=head2 The untie() Gotcha
778183f3 1401
7a2e2cd6 1402If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1403recommended that you read L<perltie/The untie Gotcha>.
778183f3 1404
1405Even if you don't currently make use of the API interface, it is still
1406worth reading it.
1407
1408Here is an example which illustrates the problem from a B<DB_File>
1409perspective:
1410
1411 use DB_File ;
1412 use Fcntl ;
1413
1414 my %x ;
1415 my $X ;
1416
1417 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1418 or die "Cannot tie first time: $!" ;
1419
1420 $x{123} = 456 ;
1421
1422 untie %x ;
1423
1424 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1425 or die "Cannot tie second time: $!" ;
1426
1427 untie %x ;
1428
1429When run, the script will produce this error message:
1430
1431 Cannot tie second time: Invalid argument at bad.file line 14.
1432
1433Although the error message above refers to the second tie() statement
1434in the script, the source of the problem is really with the untie()
1435statement that precedes it.
1436
1437Having read L<perltie> you will probably have already guessed that the
1438error is caused by the extra copy of the tied object stored in C<$X>.
1439If you haven't, then the problem boils down to the fact that the
1440B<DB_File> destructor, DESTROY, will not be called until I<all>
1441references to the tied object are destroyed. Both the tied variable,
1442C<%x>, and C<$X> above hold a reference to the object. The call to
1443untie() will destroy the first, but C<$X> still holds a valid
1444reference, so the destructor will not get called and the database file
1445F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1446attempt to open a database that is alreday open via the catch-all
1447"Invalid argument" doesn't help.
1448
1449If you run the script with the C<-w> flag the error message becomes:
1450
1451 untie attempted while 1 inner references still exist at bad.file line 12.
1452 Cannot tie second time: Invalid argument at bad.file line 14.
1453
1454which pinpoints the real problem. Finally the script can now be
1455modified to fix the original problem by destroying the API object
1456before the untie:
1457
1458 ...
1459 $x{123} = 456 ;
1460
1461 undef $X ;
1462 untie %x ;
1463
1464 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1465 ...
1466
f6b705ef 1467
1468=head1 COMMON QUESTIONS
1469
1470=head2 Why is there Perl source in my database?
1471
1472If you look at the contents of a database file created by DB_File,
1473there can sometimes be part of a Perl script included in it.
1474
1475This happens because Berkeley DB uses dynamic memory to allocate
1476buffers which will subsequently be written to the database file. Being
1477dynamic, the memory could have been used for anything before DB
1478malloced it. As Berkeley DB doesn't clear the memory once it has been
1479allocated, the unused portions will contain random junk. In the case
1480where a Perl script gets written to the database, the random junk will
1481correspond to an area of dynamic memory that happened to be used during
1482the compilation of the script.
1483
1484Unless you don't like the possibility of there being part of your Perl
1485scripts embedded in a database file, this is nothing to worry about.
1486
1487=head2 How do I store complex data structures with DB_File?
1488
1489Although B<DB_File> cannot do this directly, there is a module which
1490can layer transparently over B<DB_File> to accomplish this feat.
1491
1492Check out the MLDBM module, available on CPAN in the directory
1493F<modules/by-module/MLDBM>.
1494
1495=head2 What does "Invalid Argument" mean?
1496
1497You will get this error message when one of the parameters in the
1498C<tie> call is wrong. Unfortunately there are quite a few parameters to
1499get wrong, so it can be difficult to figure out which one it is.
1500
1501Here are a couple of possibilities:
1502
1503=over 5
1504
1505=item 1.
1506
610ab055 1507Attempting to reopen a database without closing it.
f6b705ef 1508
1509=item 2.
1510
1511Using the O_WRONLY flag.
1512
1513=back
1514
1515=head2 What does "Bareword 'DB_File' not allowed" mean?
1516
1517You will encounter this particular error message when you have the
1518C<strict 'subs'> pragma (or the full strict pragma) in your script.
1519Consider this script:
1520
1521 use strict ;
1522 use DB_File ;
1523 use vars qw(%x) ;
1524 tie %x, DB_File, "filename" ;
1525
1526Running it produces the error in question:
1527
1528 Bareword "DB_File" not allowed while "strict subs" in use
1529
1530To get around the error, place the word C<DB_File> in either single or
1531double quotes, like this:
1532
1533 tie %x, "DB_File", "filename" ;
1534
1535Although it might seem like a real pain, it is really worth the effort
1536of having a C<use strict> in all your scripts.
1537
cb1a09d0 1538=head1 HISTORY
1539
1540=over
1541
1542=item 0.1
3b35bae3 1543
1544First Release.
1545
cb1a09d0 1546=item 0.2
3b35bae3 1547
1548When B<DB_File> is opening a database file it no longer terminates the
1549process if I<dbopen> returned an error. This allows file protection
1550errors to be caught at run time. Thanks to Judith Grass
cb1a09d0 1551E<lt>grass@cybercash.comE<gt> for spotting the bug.
3b35bae3 1552
cb1a09d0 1553=item 0.3
8e07c86e 1554
1555Added prototype support for multiple btree compare callbacks.
1556
cb1a09d0 1557=item 1.0
8e07c86e 1558
1559B<DB_File> has been in use for over a year. To reflect that, the
1560version number has been incremented to 1.0.
1561
1562Added complete support for multiple concurrent callbacks.
1563
1564Using the I<push> method on an empty list didn't work properly. This
1565has been fixed.
1566
cb1a09d0 1567=item 1.01
4633a7c4 1568
1569Fixed a core dump problem with SunOS.
1570
1571The return value from TIEHASH wasn't set to NULL when dbopen returned
1572an error.
1573
88108326 1574=item 1.02
1575
f6b705ef 1576Merged OS/2 specific code into DB_File.xs
88108326 1577
1578Removed some redundant code in DB_File.xs.
1579
1580Documentation update.
1581
1582Allow negative subscripts with RECNO interface.
1583
1584Changed the default flags from O_RDWR to O_CREAT|O_RDWR.
1585
1586The example code which showed how to lock a database needed a call to
1587C<sync> added. Without it the resultant database file was empty.
1588
f6b705ef 1589Added get_dup method.
88108326 1590
f6b705ef 1591=item 1.03
1592
1593Documentation update.
3b35bae3 1594
f6b705ef 1595B<DB_File> now imports the constants (O_RDWR, O_CREAT etc.) from Fcntl
1596automatically.
3b35bae3 1597
f6b705ef 1598The standard hash function C<exists> is now supported.
1599
1600Modified the behavior of get_dup. When it returns an associative
1601array, the value is the count of the number of matching BTREE values.
3b35bae3 1602
610ab055 1603=item 1.04
1604
1605Minor documentation changes.
1606
1607Fixed a bug in hash_cb. Patches supplied by Dave Hammen,
1608E<lt>hammen@gothamcity.jsc.nasa.govE<gt>.
1609
1610Fixed a bug with the constructors for DB_File::HASHINFO,
1611DB_File::BTREEINFO and DB_File::RECNOINFO. Also tidied up the
1612constructors to make them C<-w> clean.
1613
1614Reworked part of the test harness to be more locale friendly.
1615
1616=item 1.05
1617
1618Made all scripts in the documentation C<strict> and C<-w> clean.
1619
1620Added logic to F<DB_File.xs> to allow the module to be built after Perl
1621is installed.
1622
ff68c719 1623=item 1.06
1624
1625Minor namespace cleanup: Localized C<PrintBtree>.
1626
36477c24 1627=item 1.07
1628
1629Fixed bug with RECNO, where bval wasn't defaulting to "\n".
1630
1631=item 1.08
1632
1633Documented operation of bval.
1634
18d2dc8c 1635=item 1.09
1636
1637Minor bug fix in DB_File::HASHINFO, DB_File::RECNOINFO and
1638DB_File::BTREEINFO.
1639
1640Changed default mode to 0666.
1641
a0b8c8c1 1642=item 1.10
1643
1644Fixed fd method so that it still returns -1 for in-memory files when db
16451.86 is used.
1646
778183f3 1647=item 1.11
1648
1649Documented the untie gotcha.
1650
68dc0745 1651=item 1.12
1652
1653Documented the incompatibility with version 2 of Berkeley DB.
1654
d3ef3b8a 1655=item 1.13
1656
1657Minor changes to DB_FIle.xs and DB_File.pm
1658
05475680 1659=item 1.14
1660
1661Made it illegal to tie an associative array to a RECNO database and an
1662ordinary array to a HASH or BTREE database.
1663
a6ed719b 1664=item 1.15
1665
1666Patch from Gisle Aas <gisle@aas.no> to suppress "use of undefined
1667value" warning with db_get and db_seq.
1668
1669Patch from Gisle Aas <gisle@aas.no> to make DB_File export only the O_*
1670constants from Fcntl.
1671
1672Removed the DESTROY method from the DB_File::HASHINFO module.
1673
1674Previously DB_File hard-wired the class name of any object that it
1675created to "DB_File". This makes sub-classing difficult. Now DB_File
1676creats objects in the namespace of the package it has been inherited
1677into.
1678
610ab055 1679=back
1680
3b35bae3 1681=head1 BUGS
1682
8e07c86e 1683Some older versions of Berkeley DB had problems with fixed length
1684records using the RECNO file format. The newest version at the time of
1685writing was 1.85 - this seems to have fixed the problems with RECNO.
3b35bae3 1686
8e07c86e 1687I am sure there are bugs in the code. If you do find any, or can
1688suggest any enhancements, I would welcome your comments.
3b35bae3 1689
1690=head1 AVAILABILITY
1691
f6b705ef 1692B<DB_File> comes with the standard Perl source distribution. Look in
1693the directory F<ext/DB_File>.
1694
68dc0745 1695This version of B<DB_File> will only work with version 1.x of Berkeley
1696DB. It is I<not> yet compatible with version 2.
1697
1698Version 1 of Berkeley DB is available at your nearest CPAN archive (see
cb1a09d0 1699L<perlmod/"CPAN"> for a list) in F<src/misc/db.1.85.tar.gz>, or via the
610ab055 1700host F<ftp.cs.berkeley.edu> in F</ucb/4bsd/db.tar.gz>. Alternatively,
1701check out the Berkeley DB home page at F<http://www.bostic.com/db>. It
1702is I<not> under the GPL.
3b35bae3 1703
88108326 1704If you are running IRIX, then get Berkeley DB from
1705F<http://reality.sgi.com/ariel>. It has the patches necessary to
1706compile properly on IRIX 5.3.
1707
a0b8c8c1 1708As of January 1997, version 1.86 of Berkeley DB is available from the
1709Berkeley DB home page. Although this release does fix a number of bugs
778183f3 1710that were present in 1.85 you should be aware of the following
a0b8c8c1 1711information (taken from the Berkeley DB home page) before you consider
1712using it:
1713
1714 DB version 1.86 includes a new implementation of the hash access
1715 method that fixes a variety of hashing problems found in DB version
1716 1.85. We are making it available as an interim solution until DB
1717 2.0 is available.
1718
1719 PLEASE NOTE: the underlying file format for the hash access method
1720 changed between version 1.85 and version 1.86, so you will have to
1721 dump and reload all of your databases to convert from version 1.85
1722 to version 1.86. If you do not absolutely require the fixes from
1723 version 1.86, we strongly urge you to wait until DB 2.0 is released
1724 before upgrading from 1.85.
1725
1726
3b35bae3 1727=head1 SEE ALSO
1728
1729L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>
1730
3b35bae3 1731=head1 AUTHOR
1732
8e07c86e 1733The DB_File interface was written by Paul Marquess
88108326 1734E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
d3ef3b8a 1735Questions about the DB system itself may be addressed to
1736E<lt>db@sleepycat.com<gt>.
3b35bae3 1737
1738=cut