Little patch for perl5.003_97c/pod/perlpod.pod
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
3# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
68dc0745 4# last modified 12th Mar 1997
5# version 1.12
36477c24 6#
a0b8c8c1 7# Copyright (c) 1995, 1996, 1997 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
88108326 101sub DESTROY { undef %{$_[0]} }
18d2dc8c 102sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
103sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
104sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 105
106package DB_File::RECNOINFO ;
785da04d 107
88108326 108use strict ;
109
110@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 111
112sub TIEHASH
113{
88108326 114 my $pkg = shift ;
115
36477c24 116 bless { VALID => { map {$_, 1}
117 qw( bval cachesize psize flags lorder reclen bfname )
118 },
119 GOT => {},
120 }, $pkg ;
8e07c86e 121}
122
88108326 123package DB_File::BTREEINFO ;
8e07c86e 124
88108326 125use strict ;
8e07c86e 126
88108326 127@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 128
88108326 129sub TIEHASH
8e07c86e 130{
88108326 131 my $pkg = shift ;
132
36477c24 133 bless { VALID => { map {$_, 1}
134 qw( flags cachesize maxkeypage minkeypage psize
135 compare prefix lorder )
136 },
137 GOT => {},
138 }, $pkg ;
8e07c86e 139}
140
141
8e07c86e 142package DB_File ;
785da04d 143
144use strict;
145use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO) ;
8e07c86e 146use Carp;
147
785da04d 148
68dc0745 149$VERSION = "1.12" ;
8e07c86e 150
151#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 152$DB_BTREE = new DB_File::BTREEINFO ;
153$DB_HASH = new DB_File::HASHINFO ;
154$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 155
785da04d 156require Tie::Hash;
8e07c86e 157require Exporter;
158use AutoLoader;
159require DynaLoader;
785da04d 160@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 161@EXPORT = qw(
162 $DB_BTREE $DB_HASH $DB_RECNO
88108326 163
8e07c86e 164 BTREEMAGIC
165 BTREEVERSION
166 DB_LOCK
167 DB_SHMEM
168 DB_TXN
169 HASHMAGIC
170 HASHVERSION
171 MAX_PAGE_NUMBER
172 MAX_PAGE_OFFSET
173 MAX_REC_NUMBER
174 RET_ERROR
175 RET_SPECIAL
176 RET_SUCCESS
177 R_CURSOR
178 R_DUP
179 R_FIRST
180 R_FIXEDLEN
181 R_IAFTER
182 R_IBEFORE
183 R_LAST
184 R_NEXT
185 R_NOKEY
186 R_NOOVERWRITE
187 R_PREV
188 R_RECNOSYNC
189 R_SETCURSOR
190 R_SNAPSHOT
191 __R_UNUSED
88108326 192
8e07c86e 193);
194
195sub AUTOLOAD {
785da04d 196 my($constname);
8e07c86e 197 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 198 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 199 if ($! != 0) {
200 if ($! =~ /Invalid/) {
201 $AutoLoader::AUTOLOAD = $AUTOLOAD;
202 goto &AutoLoader::AUTOLOAD;
203 }
204 else {
785da04d 205 my($pack,$file,$line) = caller;
8e07c86e 206 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
207";
208 }
209 }
210 eval "sub $AUTOLOAD { $val }";
211 goto &$AUTOLOAD;
212}
213
f6b705ef 214
215# import borrowed from IO::File
216# exports Fcntl constants if available.
217sub import {
218 my $pkg = shift;
219 my $callpkg = caller;
18d2dc8c 220 Exporter::export $pkg, $callpkg, @_;
f6b705ef 221 eval {
222 require Fcntl;
18d2dc8c 223 Exporter::export 'Fcntl', $callpkg, '/^O_/';
f6b705ef 224 };
225}
226
785da04d 227bootstrap DB_File $VERSION;
8e07c86e 228
229# Preloaded methods go here. Autoload methods go after __END__, and are
230# processed by the autosplit program.
231
610ab055 232sub TIEHASH
233{
234 my (@arg) = @_ ;
235
236 $arg[4] = tied %{ $arg[4] }
237 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
238
239 DoTie_(@arg) ;
240}
241
242*TIEARRAY = \&TIEHASH ;
88108326 243
244sub get_dup
245{
246 croak "Usage: \$db->get_dup(key [,flag])\n"
247 unless @_ == 2 or @_ == 3 ;
248
249 my $db = shift ;
250 my $key = shift ;
251 my $flag = shift ;
f6b705ef 252 my $value = 0 ;
88108326 253 my $origkey = $key ;
254 my $wantarray = wantarray ;
f6b705ef 255 my %values = () ;
88108326 256 my @values = () ;
257 my $counter = 0 ;
f6b705ef 258 my $status = 0 ;
88108326 259
f6b705ef 260 # iterate through the database until either EOF ($status == 0)
261 # or a different key is encountered ($key ne $origkey).
262 for ($status = $db->seq($key, $value, R_CURSOR()) ;
263 $status == 0 and $key eq $origkey ;
264 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 265
f6b705ef 266 # save the value or count number of matches
267 if ($wantarray) {
268 if ($flag)
269 { ++ $values{$value} }
270 else
271 { push (@values, $value) }
272 }
273 else
274 { ++ $counter }
88108326 275
88108326 276 }
277
f6b705ef 278 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 279}
280
281
8e07c86e 2821;
283__END__
284
285=cut
3b35bae3 286
287=head1 NAME
288
289DB_File - Perl5 access to Berkeley DB
290
291=head1 SYNOPSIS
292
293 use DB_File ;
88108326 294
295 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
296 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
297 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 298
3b35bae3 299 $status = $X->del($key [, $flags]) ;
300 $status = $X->put($key, $value [, $flags]) ;
301 $status = $X->get($key, $value [, $flags]) ;
760ac839 302 $status = $X->seq($key, $value, $flags) ;
3b35bae3 303 $status = $X->sync([$flags]) ;
304 $status = $X->fd ;
760ac839 305
f6b705ef 306 # BTREE only
88108326 307 $count = $X->get_dup($key) ;
308 @list = $X->get_dup($key) ;
309 %list = $X->get_dup($key, 1) ;
310
f6b705ef 311 # RECNO only
312 $a = $X->length;
313 $a = $X->pop ;
314 $X->push(list);
315 $a = $X->shift;
316 $X->unshift(list);
317
3b35bae3 318 untie %hash ;
319 untie @array ;
320
321=head1 DESCRIPTION
322
8e07c86e 323B<DB_File> is a module which allows Perl programs to make use of the
324facilities provided by Berkeley DB. If you intend to use this
f6b705ef 325module you should really have a copy of the Berkeley DB manual pages at
8e07c86e 326hand. The interface defined here mirrors the Berkeley DB interface
327closely.
3b35bae3 328
68dc0745 329Please note that this module will only work with version 1.x of
330Berkeley DB. Once Berkeley DB version 2 is released, B<DB_File> will be
331upgraded to work with it.
332
8e07c86e 333Berkeley DB is a C library which provides a consistent interface to a
334number of database formats. B<DB_File> provides an interface to all
335three of the database types currently supported by Berkeley DB.
3b35bae3 336
337The file types are:
338
339=over 5
340
88108326 341=item B<DB_HASH>
3b35bae3 342
88108326 343This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 344files. This is equivalent to the functionality provided by other
345hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
346the files created using DB_HASH are not compatible with any of the
347other packages mentioned.
3b35bae3 348
8e07c86e 349A default hashing algorithm, which will be adequate for most
350applications, is built into Berkeley DB. If you do need to use your own
351hashing algorithm it is possible to write your own in Perl and have
352B<DB_File> use it instead.
3b35bae3 353
88108326 354=item B<DB_BTREE>
355
356The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 357sorted, balanced binary tree.
3b35bae3 358
8e07c86e 359As with the DB_HASH format, it is possible to provide a user defined
360Perl routine to perform the comparison of keys. By default, though, the
361keys are stored in lexical order.
3b35bae3 362
88108326 363=item B<DB_RECNO>
3b35bae3 364
8e07c86e 365DB_RECNO allows both fixed-length and variable-length flat text files
366to be manipulated using the same key/value pair interface as in DB_HASH
367and DB_BTREE. In this case the key will consist of a record (line)
368number.
3b35bae3 369
370=back
371
68dc0745 372=head2 Interface to Berkeley DB
3b35bae3 373
374B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 375in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
376allows B<DB_File> to access Berkeley DB files using either an
377associative array (for DB_HASH & DB_BTREE file types) or an ordinary
378array (for the DB_RECNO file type).
3b35bae3 379
88108326 380In addition to the tie() interface, it is also possible to access most
381of the functions provided in the Berkeley DB API directly.
f6b705ef 382See L<THE API INTERFACE>.
3b35bae3 383
88108326 384=head2 Opening a Berkeley DB Database File
3b35bae3 385
8e07c86e 386Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 387Here is the C prototype for dbopen():
3b35bae3 388
389 DB*
390 dbopen (const char * file, int flags, int mode,
391 DBTYPE type, const void * openinfo)
392
393The parameter C<type> is an enumeration which specifies which of the 3
394interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
395Depending on which of these is actually chosen, the final parameter,
396I<openinfo> points to a data structure which allows tailoring of the
397specific interface method.
398
8e07c86e 399This interface is handled slightly differently in B<DB_File>. Here is
88108326 400an equivalent call using B<DB_File>:
3b35bae3 401
88108326 402 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 403
8e07c86e 404The C<filename>, C<flags> and C<mode> parameters are the direct
405equivalent of their dbopen() counterparts. The final parameter $DB_HASH
406performs the function of both the C<type> and C<openinfo> parameters in
407dbopen().
3b35bae3 408
88108326 409In the example above $DB_HASH is actually a pre-defined reference to a
410hash object. B<DB_File> has three of these pre-defined references.
411Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 412
8e07c86e 413The keys allowed in each of these pre-defined references is limited to
414the names used in the equivalent C structure. So, for example, the
415$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 416C<ffactor>, C<hash>, C<lorder> and C<nelem>.
417
418To change one of these elements, just assign to it like this:
419
420 $DB_HASH->{'cachesize'} = 10000 ;
421
422The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
423usually adequate for most applications. If you do need to create extra
424instances of these objects, constructors are available for each file
425type.
426
427Here are examples of the constructors and the valid options available
428for DB_HASH, DB_BTREE and DB_RECNO respectively.
429
430 $a = new DB_File::HASHINFO ;
431 $a->{'bsize'} ;
432 $a->{'cachesize'} ;
433 $a->{'ffactor'};
434 $a->{'hash'} ;
435 $a->{'lorder'} ;
436 $a->{'nelem'} ;
437
438 $b = new DB_File::BTREEINFO ;
439 $b->{'flags'} ;
440 $b->{'cachesize'} ;
441 $b->{'maxkeypage'} ;
442 $b->{'minkeypage'} ;
443 $b->{'psize'} ;
444 $b->{'compare'} ;
445 $b->{'prefix'} ;
446 $b->{'lorder'} ;
447
448 $c = new DB_File::RECNOINFO ;
449 $c->{'bval'} ;
450 $c->{'cachesize'} ;
451 $c->{'psize'} ;
452 $c->{'flags'} ;
453 $c->{'lorder'} ;
454 $c->{'reclen'} ;
455 $c->{'bfname'} ;
456
457The values stored in the hashes above are mostly the direct equivalent
458of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 459default values - that means you don't have to set I<all> of the
88108326 460values when you only want to change one. Here is an example:
461
462 $a = new DB_File::HASHINFO ;
463 $a->{'cachesize'} = 12345 ;
464 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
465
36477c24 466A few of the options need extra discussion here. When used, the C
88108326 467equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
468to C functions. In B<DB_File> these keys are used to store references
469to Perl subs. Below are templates for each of the subs:
470
471 sub hash
472 {
473 my ($data) = @_ ;
474 ...
475 # return the hash value for $data
476 return $hash ;
477 }
3b35bae3 478
88108326 479 sub compare
480 {
481 my ($key, $key2) = @_ ;
482 ...
483 # return 0 if $key1 eq $key2
484 # -1 if $key1 lt $key2
485 # 1 if $key1 gt $key2
486 return (-1 , 0 or 1) ;
487 }
3b35bae3 488
88108326 489 sub prefix
490 {
491 my ($key, $key2) = @_ ;
492 ...
493 # return number of bytes of $key2 which are
494 # necessary to determine that it is greater than $key1
495 return $bytes ;
496 }
3b35bae3 497
f6b705ef 498See L<Changing the BTREE sort order> for an example of using the
499C<compare> template.
88108326 500
36477c24 501If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 502C<bval>, you should check out L<The 'bval' Option>.
36477c24 503
88108326 504=head2 Default Parameters
505
506It is possible to omit some or all of the final 4 parameters in the
507call to C<tie> and let them take default values. As DB_HASH is the most
508common file format used, the call:
509
510 tie %A, "DB_File", "filename" ;
511
512is equivalent to:
513
18d2dc8c 514 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 515
516It is also possible to omit the filename parameter as well, so the
517call:
518
519 tie %A, "DB_File" ;
520
521is equivalent to:
522
18d2dc8c 523 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 524
f6b705ef 525See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 526in place of a filename.
527
f6b705ef 528=head2 In Memory Databases
529
530Berkeley DB allows the creation of in-memory databases by using NULL
531(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
532uses C<undef> instead of NULL to provide this functionality.
533
534=head1 DB_HASH
535
536The DB_HASH file format is probably the most commonly used of the three
537file formats that B<DB_File> supports. It is also very straightforward
538to use.
539
68dc0745 540=head2 A Simple Example
f6b705ef 541
542This example shows how to create a database, add key/value pairs to the
543database, delete keys/value pairs and finally how to enumerate the
544contents of the database.
545
610ab055 546 use strict ;
f6b705ef 547 use DB_File ;
610ab055 548 use vars qw( %h $k $v ) ;
f6b705ef 549
550 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
551 or die "Cannot open file 'fruit': $!\n";
552
553 # Add a few key/value pairs to the file
554 $h{"apple"} = "red" ;
555 $h{"orange"} = "orange" ;
556 $h{"banana"} = "yellow" ;
557 $h{"tomato"} = "red" ;
558
559 # Check for existence of a key
560 print "Banana Exists\n\n" if $h{"banana"} ;
561
562 # Delete a key/value pair.
563 delete $h{"apple"} ;
564
565 # print the contents of the file
566 while (($k, $v) = each %h)
567 { print "$k -> $v\n" }
568
569 untie %h ;
570
571here is the output:
572
573 Banana Exists
574
575 orange -> orange
576 tomato -> red
577 banana -> yellow
578
579Note that the like ordinary associative arrays, the order of the keys
580retrieved is in an apparently random order.
581
582=head1 DB_BTREE
583
584The DB_BTREE format is useful when you want to store data in a given
585order. By default the keys will be stored in lexical order, but as you
586will see from the example shown in the next section, it is very easy to
587define your own sorting function.
588
589=head2 Changing the BTREE sort order
590
591This script shows how to override the default sorting algorithm that
592BTREE uses. Instead of using the normal lexical ordering, a case
593insensitive compare function will be used.
88108326 594
610ab055 595 use strict ;
f6b705ef 596 use DB_File ;
610ab055 597
598 my %h ;
f6b705ef 599
600 sub Compare
601 {
602 my ($key1, $key2) = @_ ;
603 "\L$key1" cmp "\L$key2" ;
604 }
605
606 # specify the Perl sub that will do the comparison
607 $DB_BTREE->{'compare'} = \&Compare ;
608
609 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
610 or die "Cannot open file 'tree': $!\n" ;
611
612 # Add a key/value pair to the file
613 $h{'Wall'} = 'Larry' ;
614 $h{'Smith'} = 'John' ;
615 $h{'mouse'} = 'mickey' ;
616 $h{'duck'} = 'donald' ;
617
618 # Delete
619 delete $h{"duck"} ;
620
621 # Cycle through the keys printing them in order.
622 # Note it is not necessary to sort the keys as
623 # the btree will have kept them in order automatically.
624 foreach (keys %h)
625 { print "$_\n" }
626
627 untie %h ;
628
629Here is the output from the code above.
630
631 mouse
632 Smith
633 Wall
634
635There are a few point to bear in mind if you want to change the
636ordering in a BTREE database:
637
638=over 5
639
640=item 1.
641
642The new compare function must be specified when you create the database.
643
644=item 2.
645
646You cannot change the ordering once the database has been created. Thus
647you must use the same compare function every time you access the
88108326 648database.
649
f6b705ef 650=back
651
68dc0745 652=head2 Handling Duplicate Keys
f6b705ef 653
654The BTREE file type optionally allows a single key to be associated
655with an arbitrary number of values. This option is enabled by setting
656the flags element of C<$DB_BTREE> to R_DUP when creating the database.
657
88108326 658There are some difficulties in using the tied hash interface if you
659want to manipulate a BTREE database with duplicate keys. Consider this
660code:
661
610ab055 662 use strict ;
88108326 663 use DB_File ;
610ab055 664
665 use vars qw($filename %h ) ;
666
88108326 667 $filename = "tree" ;
668 unlink $filename ;
669
670 # Enable duplicate records
671 $DB_BTREE->{'flags'} = R_DUP ;
672
673 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
674 or die "Cannot open $filename: $!\n";
675
676 # Add some key/value pairs to the file
677 $h{'Wall'} = 'Larry' ;
678 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 679 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 680 $h{'Smith'} = 'John' ;
681 $h{'mouse'} = 'mickey' ;
682
683 # iterate through the associative array
684 # and print each key/value pair.
685 foreach (keys %h)
686 { print "$_ -> $h{$_}\n" }
687
f6b705ef 688 untie %h ;
689
88108326 690Here is the output:
691
692 Smith -> John
693 Wall -> Larry
694 Wall -> Larry
f6b705ef 695 Wall -> Larry
88108326 696 mouse -> mickey
697
f6b705ef 698As you can see 3 records have been successfully created with key C<Wall>
88108326 699- the only thing is, when they are retrieved from the database they
f6b705ef 700I<seem> to have the same value, namely C<Larry>. The problem is caused
701by the way that the associative array interface works. Basically, when
702the associative array interface is used to fetch the value associated
703with a given key, it will only ever retrieve the first value.
88108326 704
705Although it may not be immediately obvious from the code above, the
706associative array interface can be used to write values with duplicate
707keys, but it cannot be used to read them back from the database.
708
709The way to get around this problem is to use the Berkeley DB API method
710called C<seq>. This method allows sequential access to key/value
f6b705ef 711pairs. See L<THE API INTERFACE> for details of both the C<seq> method
712and the API in general.
88108326 713
714Here is the script above rewritten using the C<seq> API method.
715
610ab055 716 use strict ;
88108326 717 use DB_File ;
88108326 718
610ab055 719 use vars qw($filename $x %h $status $key $value) ;
720
88108326 721 $filename = "tree" ;
722 unlink $filename ;
723
724 # Enable duplicate records
725 $DB_BTREE->{'flags'} = R_DUP ;
726
727 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
728 or die "Cannot open $filename: $!\n";
729
730 # Add some key/value pairs to the file
731 $h{'Wall'} = 'Larry' ;
732 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 733 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 734 $h{'Smith'} = 'John' ;
735 $h{'mouse'} = 'mickey' ;
736
f6b705ef 737 # iterate through the btree using seq
88108326 738 # and print each key/value pair.
610ab055 739 $key = $value = 0 ;
f6b705ef 740 for ($status = $x->seq($key, $value, R_FIRST) ;
741 $status == 0 ;
742 $status = $x->seq($key, $value, R_NEXT) )
88108326 743 { print "$key -> $value\n" }
744
745 undef $x ;
746 untie %h ;
747
748that prints:
749
750 Smith -> John
751 Wall -> Brick
f6b705ef 752 Wall -> Brick
88108326 753 Wall -> Larry
754 mouse -> mickey
755
f6b705ef 756This time we have got all the key/value pairs, including the multiple
88108326 757values associated with the key C<Wall>.
758
68dc0745 759=head2 The get_dup() Method
f6b705ef 760
761B<DB_File> comes with a utility method, called C<get_dup>, to assist in
88108326 762reading duplicate values from BTREE databases. The method can take the
763following forms:
764
765 $count = $x->get_dup($key) ;
766 @list = $x->get_dup($key) ;
767 %list = $x->get_dup($key, 1) ;
768
769In a scalar context the method returns the number of values associated
770with the key, C<$key>.
771
772In list context, it returns all the values which match C<$key>. Note
f6b705ef 773that the values will be returned in an apparently random order.
88108326 774
f6b705ef 775In list context, if the second parameter is present and evaluates TRUE,
776the method returns an associative array. The keys of the associative
777array correspond to the the values that matched in the BTREE and the
778values of the array are a count of the number of times that particular
779value occurred in the BTREE.
88108326 780
f6b705ef 781So assuming the database created above, we can use C<get_dup> like
88108326 782this:
783
610ab055 784 my $cnt = $x->get_dup("Wall") ;
88108326 785 print "Wall occurred $cnt times\n" ;
786
610ab055 787 my %hash = $x->get_dup("Wall", 1) ;
88108326 788 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 789 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 790
610ab055 791 my @list = $x->get_dup("Wall") ;
88108326 792 print "Wall => [@list]\n" ;
793
f6b705ef 794 @list = $x->get_dup("Smith") ;
88108326 795 print "Smith => [@list]\n" ;
796
f6b705ef 797 @list = $x->get_dup("Dog") ;
88108326 798 print "Dog => [@list]\n" ;
799
800
801and it will print:
802
f6b705ef 803 Wall occurred 3 times
88108326 804 Larry is there
f6b705ef 805 There are 2 Brick Walls
806 Wall => [Brick Brick Larry]
88108326 807 Smith => [John]
808 Dog => []
3b35bae3 809
f6b705ef 810=head2 Matching Partial Keys
811
812The BTREE interface has a feature which allows partial keys to be
813matched. This functionality is I<only> available when the C<seq> method
814is used along with the R_CURSOR flag.
815
816 $x->seq($key, $value, R_CURSOR) ;
817
818Here is the relevant quote from the dbopen man page where it defines
819the use of the R_CURSOR flag with seq:
820
f6b705ef 821 Note, for the DB_BTREE access method, the returned key is not
822 necessarily an exact match for the specified key. The returned key
823 is the smallest key greater than or equal to the specified key,
824 permitting partial key matches and range searches.
825
f6b705ef 826In the example script below, the C<match> sub uses this feature to find
827and print the first matching key/value pair given a partial key.
828
610ab055 829 use strict ;
f6b705ef 830 use DB_File ;
831 use Fcntl ;
610ab055 832
833 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 834
835 sub match
836 {
837 my $key = shift ;
610ab055 838 my $value = 0;
f6b705ef 839 my $orig_key = $key ;
840 $x->seq($key, $value, R_CURSOR) ;
841 print "$orig_key\t-> $key\t-> $value\n" ;
842 }
843
844 $filename = "tree" ;
845 unlink $filename ;
846
847 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
848 or die "Cannot open $filename: $!\n";
849
850 # Add some key/value pairs to the file
851 $h{'mouse'} = 'mickey' ;
852 $h{'Wall'} = 'Larry' ;
853 $h{'Walls'} = 'Brick' ;
854 $h{'Smith'} = 'John' ;
855
856
610ab055 857 $key = $value = 0 ;
f6b705ef 858 print "IN ORDER\n" ;
859 for ($st = $x->seq($key, $value, R_FIRST) ;
860 $st == 0 ;
861 $st = $x->seq($key, $value, R_NEXT) )
862
863 { print "$key -> $value\n" }
864
865 print "\nPARTIAL MATCH\n" ;
866
867 match "Wa" ;
868 match "A" ;
869 match "a" ;
870
871 undef $x ;
872 untie %h ;
873
874Here is the output:
875
876 IN ORDER
877 Smith -> John
878 Wall -> Larry
879 Walls -> Brick
880 mouse -> mickey
881
882 PARTIAL MATCH
883 Wa -> Wall -> Larry
884 A -> Smith -> John
885 a -> mouse -> mickey
886
887=head1 DB_RECNO
888
889DB_RECNO provides an interface to flat text files. Both variable and
890fixed length records are supported.
3b35bae3 891
88108326 892In order to make RECNO more compatible with Perl the array offset for
893all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 894
88108326 895As with normal Perl arrays, a RECNO array can be accessed using
896negative indexes. The index -1 refers to the last element of the array,
897-2 the second last, and so on. Attempting to access an element before
898the start of the array will raise a fatal run-time error.
3b35bae3 899
68dc0745 900=head2 The 'bval' Option
36477c24 901
902The operation of the bval option warrants some discussion. Here is the
903definition of bval from the Berkeley DB 1.85 recno manual page:
904
905 The delimiting byte to be used to mark the end of a
906 record for variable-length records, and the pad charac-
907 ter for fixed-length records. If no value is speci-
908 fied, newlines (``\n'') are used to mark the end of
909 variable-length records and fixed-length records are
910 padded with spaces.
911
912The second sentence is wrong. In actual fact bval will only default to
913C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
914openinfo parameter is used at all, the value that happens to be in bval
915will be used. That means you always have to specify bval when making
916use of any of the options in the openinfo parameter. This documentation
917error will be fixed in the next release of Berkeley DB.
918
919That clarifies the situation with regards Berkeley DB itself. What
920about B<DB_File>? Well, the behavior defined in the quote above is
921quite useful, so B<DB_File> conforms it.
922
923That means that you can specify other options (e.g. cachesize) and
924still have bval default to C<"\n"> for variable length records, and
925space for fixed length records.
926
f6b705ef 927=head2 A Simple Example
3b35bae3 928
f6b705ef 929Here is a simple example that uses RECNO.
930
610ab055 931 use strict ;
f6b705ef 932 use DB_File ;
f6b705ef 933
610ab055 934 my @h ;
f6b705ef 935 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
936 or die "Cannot open file 'text': $!\n" ;
937
938 # Add a few key/value pairs to the file
939 $h[0] = "orange" ;
940 $h[1] = "blue" ;
941 $h[2] = "yellow" ;
942
943 # Check for existence of a key
944 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
945
946 # use a negative index
947 print "The last element is $h[-1]\n" ;
948 print "The 2nd last element is $h[-2]\n" ;
949
950 untie @h ;
3b35bae3 951
f6b705ef 952Here is the output from the script:
953
954
955 Element 1 Exists with value blue
956 The last element is yellow
957 The 2nd last element is blue
958
959=head2 Extra Methods
960
961As you can see from the example above, the tied array interface is
962quite limited. To make the interface more useful, a number of methods
963are supplied with B<DB_File> to simulate the standard array operations
964that are not currently implemented in Perl's tied array interface. All
965these methods are accessed via the object returned from the tie call.
966
967Here are the methods:
968
969=over 5
3b35bae3 970
f6b705ef 971=item B<$X-E<gt>push(list) ;>
972
973Pushes the elements of C<list> to the end of the array.
974
975=item B<$value = $X-E<gt>pop ;>
976
977Removes and returns the last element of the array.
978
979=item B<$X-E<gt>shift>
980
981Removes and returns the first element of the array.
982
983=item B<$X-E<gt>unshift(list) ;>
984
985Pushes the elements of C<list> to the start of the array.
986
987=item B<$X-E<gt>length>
988
989Returns the number of elements in the array.
990
991=back
992
993=head2 Another Example
994
995Here is a more complete example that makes use of some of the methods
996described above. It also makes use of the API interface directly (see
997L<THE API INTERFACE>).
998
999 use strict ;
1000 use vars qw(@h $H $file $i) ;
1001 use DB_File ;
1002 use Fcntl ;
1003
1004 $file = "text" ;
1005
1006 unlink $file ;
1007
1008 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1009 or die "Cannot open file $file: $!\n" ;
1010
1011 # first create a text file to play with
1012 $h[0] = "zero" ;
1013 $h[1] = "one" ;
1014 $h[2] = "two" ;
1015 $h[3] = "three" ;
1016 $h[4] = "four" ;
1017
1018
1019 # Print the records in order.
1020 #
1021 # The length method is needed here because evaluating a tied
1022 # array in a scalar context does not return the number of
1023 # elements in the array.
1024
1025 print "\nORIGINAL\n" ;
1026 foreach $i (0 .. $H->length - 1) {
1027 print "$i: $h[$i]\n" ;
1028 }
1029
1030 # use the push & pop methods
1031 $a = $H->pop ;
1032 $H->push("last") ;
1033 print "\nThe last record was [$a]\n" ;
1034
1035 # and the shift & unshift methods
1036 $a = $H->shift ;
1037 $H->unshift("first") ;
1038 print "The first record was [$a]\n" ;
1039
1040 # Use the API to add a new record after record 2.
1041 $i = 2 ;
1042 $H->put($i, "Newbie", R_IAFTER) ;
1043
1044 # and a new record before record 1.
1045 $i = 1 ;
1046 $H->put($i, "New One", R_IBEFORE) ;
1047
1048 # delete record 3
1049 $H->del(3) ;
1050
1051 # now print the records in reverse order
1052 print "\nREVERSE\n" ;
1053 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1054 { print "$i: $h[$i]\n" }
1055
1056 # same again, but use the API functions instead
1057 print "\nREVERSE again\n" ;
610ab055 1058 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1059 for ($s = $H->seq($k, $v, R_LAST) ;
1060 $s == 0 ;
1061 $s = $H->seq($k, $v, R_PREV))
1062 { print "$k: $v\n" }
1063
1064 undef $H ;
1065 untie @h ;
1066
1067and this is what it outputs:
1068
1069 ORIGINAL
1070 0: zero
1071 1: one
1072 2: two
1073 3: three
1074 4: four
1075
1076 The last record was [four]
1077 The first record was [zero]
1078
1079 REVERSE
1080 5: last
1081 4: three
1082 3: Newbie
1083 2: one
1084 1: New One
1085 0: first
1086
1087 REVERSE again
1088 5: last
1089 4: three
1090 3: Newbie
1091 2: one
1092 1: New One
1093 0: first
1094
1095Notes:
1096
1097=over 5
1098
1099=item 1.
1100
1101Rather than iterating through the array, C<@h> like this:
1102
1103 foreach $i (@h)
1104
1105it is necessary to use either this:
1106
1107 foreach $i (0 .. $H->length - 1)
1108
1109or this:
1110
1111 for ($a = $H->get($k, $v, R_FIRST) ;
1112 $a == 0 ;
1113 $a = $H->get($k, $v, R_NEXT) )
1114
1115=item 2.
1116
1117Notice that both times the C<put> method was used the record index was
1118specified using a variable, C<$i>, rather than the literal value
1119itself. This is because C<put> will return the record number of the
1120inserted line via that parameter.
1121
1122=back
1123
1124=head1 THE API INTERFACE
3b35bae3 1125
1126As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1127possible to make direct use of most of the API functions defined in the
8e07c86e 1128Berkeley DB documentation.
3b35bae3 1129
88108326 1130To do this you need to store a copy of the object returned from the tie.
3b35bae3 1131
88108326 1132 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1133
8e07c86e 1134Once you have done that, you can access the Berkeley DB API functions
88108326 1135as B<DB_File> methods directly like this:
3b35bae3 1136
1137 $db->put($key, $value, R_NOOVERWRITE) ;
1138
88108326 1139B<Important:> If you have saved a copy of the object returned from
1140C<tie>, the underlying database file will I<not> be closed until both
1141the tied variable is untied and all copies of the saved object are
610ab055 1142destroyed.
88108326 1143
1144 use DB_File ;
1145 $db = tie %hash, "DB_File", "filename"
1146 or die "Cannot tie filename: $!" ;
1147 ...
1148 undef $db ;
1149 untie %hash ;
1150
9a2c4ce3 1151See L<The untie() Gotcha> for more details.
778183f3 1152
88108326 1153All the functions defined in L<dbopen> are available except for
1154close() and dbopen() itself. The B<DB_File> method interface to the
1155supported functions have been implemented to mirror the way Berkeley DB
1156works whenever possible. In particular note that:
1157
1158=over 5
1159
1160=item *
1161
1162The methods return a status value. All return 0 on success.
1163All return -1 to signify an error and set C<$!> to the exact
1164error code. The return code 1 generally (but not always) means that the
1165key specified did not exist in the database.
1166
1167Other return codes are defined. See below and in the Berkeley DB
1168documentation for details. The Berkeley DB documentation should be used
1169as the definitive source.
1170
1171=item *
3b35bae3 1172
88108326 1173Whenever a Berkeley DB function returns data via one of its parameters,
1174the equivalent B<DB_File> method does exactly the same.
3b35bae3 1175
88108326 1176=item *
1177
1178If you are careful, it is possible to mix API calls with the tied
1179hash/array interface in the same piece of code. Although only a few of
1180the methods used to implement the tied interface currently make use of
1181the cursor, you should always assume that the cursor has been changed
1182any time the tied hash/array interface is used. As an example, this
1183code will probably not do what you expect:
1184
1185 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1186 or die "Cannot tie $filename: $!" ;
1187
1188 # Get the first key/value pair and set the cursor
1189 $X->seq($key, $value, R_FIRST) ;
1190
1191 # this line will modify the cursor
1192 $count = scalar keys %x ;
1193
1194 # Get the second key/value pair.
1195 # oops, it didn't, it got the last key/value pair!
1196 $X->seq($key, $value, R_NEXT) ;
1197
1198The code above can be rearranged to get around the problem, like this:
1199
1200 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1201 or die "Cannot tie $filename: $!" ;
1202
1203 # this line will modify the cursor
1204 $count = scalar keys %x ;
1205
1206 # Get the first key/value pair and set the cursor
1207 $X->seq($key, $value, R_FIRST) ;
1208
1209 # Get the second key/value pair.
1210 # worked this time.
1211 $X->seq($key, $value, R_NEXT) ;
1212
1213=back
1214
1215All the constants defined in L<dbopen> for use in the flags parameters
1216in the methods defined below are also available. Refer to the Berkeley
1217DB documentation for the precise meaning of the flags values.
1218
1219Below is a list of the methods available.
3b35bae3 1220
1221=over 5
1222
f6b705ef 1223=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1224
1225Given a key (C<$key>) this method reads the value associated with it
1226from the database. The value read from the database is returned in the
1227C<$value> parameter.
3b35bae3 1228
88108326 1229If the key does not exist the method returns 1.
3b35bae3 1230
88108326 1231No flags are currently defined for this method.
3b35bae3 1232
f6b705ef 1233=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1234
88108326 1235Stores the key/value pair in the database.
1236
1237If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1238will have the record number of the inserted key/value pair set.
3b35bae3 1239
88108326 1240Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1241R_SETCURSOR.
1242
f6b705ef 1243=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1244
88108326 1245Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1246
88108326 1247A return code of 1 means that the requested key was not in the
1248database.
3b35bae3 1249
88108326 1250R_CURSOR is the only valid flag at present.
3b35bae3 1251
f6b705ef 1252=item B<$status = $X-E<gt>fd ;>
3b35bae3 1253
88108326 1254Returns the file descriptor for the underlying database.
3b35bae3 1255
f6b705ef 1256See L<Locking Databases> for an example of how to make use of the
88108326 1257C<fd> method to lock your database.
3b35bae3 1258
f6b705ef 1259=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1260
88108326 1261This interface allows sequential retrieval from the database. See
1262L<dbopen> for full details.
1263
1264Both the C<$key> and C<$value> parameters will be set to the key/value
1265pair read from the database.
1266
1267The flags parameter is mandatory. The valid flag values are R_CURSOR,
1268R_FIRST, R_LAST, R_NEXT and R_PREV.
1269
f6b705ef 1270=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1271
1272Flushes any cached buffers to disk.
1273
1274R_RECNOSYNC is the only valid flag at present.
3b35bae3 1275
1276=back
1277
f6b705ef 1278=head1 HINTS AND TIPS
3b35bae3 1279
3b35bae3 1280
cb1a09d0 1281=head2 Locking Databases
3b35bae3 1282
cb1a09d0 1283Concurrent access of a read-write database by several parties requires
1284them all to use some kind of locking. Here's an example of Tom's that
1285uses the I<fd> method to get the file descriptor, and then a careful
1286open() to give something Perl will flock() for you. Run this repeatedly
1287in the background to watch the locks granted in proper order.
3b35bae3 1288
cb1a09d0 1289 use DB_File;
1290
1291 use strict;
1292
1293 sub LOCK_SH { 1 }
1294 sub LOCK_EX { 2 }
1295 sub LOCK_NB { 4 }
1296 sub LOCK_UN { 8 }
1297
1298 my($oldval, $fd, $db, %db, $value, $key);
1299
1300 $key = shift || 'default';
1301 $value = shift || 'magic';
1302
1303 $value .= " $$";
1304
1305 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1306 || die "dbcreat /tmp/foo.db $!";
1307 $fd = $db->fd;
1308 print "$$: db fd is $fd\n";
1309 open(DB_FH, "+<&=$fd") || die "dup $!";
1310
1311
1312 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1313 print "$$: CONTENTION; can't read during write update!
1314 Waiting for read lock ($!) ....";
1315 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1316 }
1317 print "$$: Read lock granted\n";
1318
1319 $oldval = $db{$key};
1320 print "$$: Old value was $oldval\n";
1321 flock(DB_FH, LOCK_UN);
1322
1323 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1324 print "$$: CONTENTION; must have exclusive lock!
1325 Waiting for write lock ($!) ....";
1326 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1327 }
1328
1329 print "$$: Write lock granted\n";
1330 $db{$key} = $value;
610ab055 1331 $db->sync; # to flush
cb1a09d0 1332 sleep 10;
1333
1334 flock(DB_FH, LOCK_UN);
88108326 1335 undef $db;
cb1a09d0 1336 untie %db;
1337 close(DB_FH);
1338 print "$$: Updated db to $key=$value\n";
1339
68dc0745 1340=head2 Sharing Databases With C Applications
f6b705ef 1341
1342There is no technical reason why a Berkeley DB database cannot be
1343shared by both a Perl and a C application.
1344
1345The vast majority of problems that are reported in this area boil down
1346to the fact that C strings are NULL terminated, whilst Perl strings are
1347not.
1348
1349Here is a real example. Netscape 2.0 keeps a record of the locations you
1350visit along with the time you last visited them in a DB_HASH database.
1351This is usually stored in the file F<~/.netscape/history.db>. The key
1352field in the database is the location string and the value field is the
1353time the location was last visited stored as a 4 byte binary value.
1354
1355If you haven't already guessed, the location string is stored with a
1356terminating NULL. This means you need to be careful when accessing the
1357database.
1358
1359Here is a snippet of code that is loosely based on Tom Christiansen's
1360I<ggh> script (available from your nearest CPAN archive in
1361F<authors/id/TOMC/scripts/nshist.gz>).
1362
610ab055 1363 use strict ;
f6b705ef 1364 use DB_File ;
1365 use Fcntl ;
f6b705ef 1366
610ab055 1367 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1368 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1369
1370 $HISTORY = "$dotdir/.netscape/history.db";
1371
1372 tie %hist_db, 'DB_File', $HISTORY
1373 or die "Cannot open $HISTORY: $!\n" ;;
1374
1375 # Dump the complete database
1376 while ( ($href, $binary_time) = each %hist_db ) {
1377
1378 # remove the terminating NULL
1379 $href =~ s/\x00$// ;
1380
1381 # convert the binary time into a user friendly string
1382 $date = localtime unpack("V", $binary_time);
1383 print "$date $href\n" ;
1384 }
1385
1386 # check for the existence of a specific key
1387 # remember to add the NULL
1388 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1389 $date = localtime unpack("V", $binary_time) ;
1390 print "Last visited mox.perl.com on $date\n" ;
1391 }
1392 else {
1393 print "Never visited mox.perl.com\n"
1394 }
1395
1396 untie %hist_db ;
1397
68dc0745 1398=head2 The untie() Gotcha
778183f3 1399
1400If you make use of the Berkeley DB API, it is is I<very> strongly
68dc0745 1401recommended that you read L<perltie/The untie Gotcha>.
778183f3 1402
1403Even if you don't currently make use of the API interface, it is still
1404worth reading it.
1405
1406Here is an example which illustrates the problem from a B<DB_File>
1407perspective:
1408
1409 use DB_File ;
1410 use Fcntl ;
1411
1412 my %x ;
1413 my $X ;
1414
1415 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1416 or die "Cannot tie first time: $!" ;
1417
1418 $x{123} = 456 ;
1419
1420 untie %x ;
1421
1422 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1423 or die "Cannot tie second time: $!" ;
1424
1425 untie %x ;
1426
1427When run, the script will produce this error message:
1428
1429 Cannot tie second time: Invalid argument at bad.file line 14.
1430
1431Although the error message above refers to the second tie() statement
1432in the script, the source of the problem is really with the untie()
1433statement that precedes it.
1434
1435Having read L<perltie> you will probably have already guessed that the
1436error is caused by the extra copy of the tied object stored in C<$X>.
1437If you haven't, then the problem boils down to the fact that the
1438B<DB_File> destructor, DESTROY, will not be called until I<all>
1439references to the tied object are destroyed. Both the tied variable,
1440C<%x>, and C<$X> above hold a reference to the object. The call to
1441untie() will destroy the first, but C<$X> still holds a valid
1442reference, so the destructor will not get called and the database file
1443F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1444attempt to open a database that is alreday open via the catch-all
1445"Invalid argument" doesn't help.
1446
1447If you run the script with the C<-w> flag the error message becomes:
1448
1449 untie attempted while 1 inner references still exist at bad.file line 12.
1450 Cannot tie second time: Invalid argument at bad.file line 14.
1451
1452which pinpoints the real problem. Finally the script can now be
1453modified to fix the original problem by destroying the API object
1454before the untie:
1455
1456 ...
1457 $x{123} = 456 ;
1458
1459 undef $X ;
1460 untie %x ;
1461
1462 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1463 ...
1464
f6b705ef 1465
1466=head1 COMMON QUESTIONS
1467
1468=head2 Why is there Perl source in my database?
1469
1470If you look at the contents of a database file created by DB_File,
1471there can sometimes be part of a Perl script included in it.
1472
1473This happens because Berkeley DB uses dynamic memory to allocate
1474buffers which will subsequently be written to the database file. Being
1475dynamic, the memory could have been used for anything before DB
1476malloced it. As Berkeley DB doesn't clear the memory once it has been
1477allocated, the unused portions will contain random junk. In the case
1478where a Perl script gets written to the database, the random junk will
1479correspond to an area of dynamic memory that happened to be used during
1480the compilation of the script.
1481
1482Unless you don't like the possibility of there being part of your Perl
1483scripts embedded in a database file, this is nothing to worry about.
1484
1485=head2 How do I store complex data structures with DB_File?
1486
1487Although B<DB_File> cannot do this directly, there is a module which
1488can layer transparently over B<DB_File> to accomplish this feat.
1489
1490Check out the MLDBM module, available on CPAN in the directory
1491F<modules/by-module/MLDBM>.
1492
1493=head2 What does "Invalid Argument" mean?
1494
1495You will get this error message when one of the parameters in the
1496C<tie> call is wrong. Unfortunately there are quite a few parameters to
1497get wrong, so it can be difficult to figure out which one it is.
1498
1499Here are a couple of possibilities:
1500
1501=over 5
1502
1503=item 1.
1504
610ab055 1505Attempting to reopen a database without closing it.
f6b705ef 1506
1507=item 2.
1508
1509Using the O_WRONLY flag.
1510
1511=back
1512
1513=head2 What does "Bareword 'DB_File' not allowed" mean?
1514
1515You will encounter this particular error message when you have the
1516C<strict 'subs'> pragma (or the full strict pragma) in your script.
1517Consider this script:
1518
1519 use strict ;
1520 use DB_File ;
1521 use vars qw(%x) ;
1522 tie %x, DB_File, "filename" ;
1523
1524Running it produces the error in question:
1525
1526 Bareword "DB_File" not allowed while "strict subs" in use
1527
1528To get around the error, place the word C<DB_File> in either single or
1529double quotes, like this:
1530
1531 tie %x, "DB_File", "filename" ;
1532
1533Although it might seem like a real pain, it is really worth the effort
1534of having a C<use strict> in all your scripts.
1535
cb1a09d0 1536=head1 HISTORY
1537
1538=over
1539
1540=item 0.1
3b35bae3 1541
1542First Release.
1543
cb1a09d0 1544=item 0.2
3b35bae3 1545
1546When B<DB_File> is opening a database file it no longer terminates the
1547process if I<dbopen> returned an error. This allows file protection
1548errors to be caught at run time. Thanks to Judith Grass
cb1a09d0 1549E<lt>grass@cybercash.comE<gt> for spotting the bug.
3b35bae3 1550
cb1a09d0 1551=item 0.3
8e07c86e 1552
1553Added prototype support for multiple btree compare callbacks.
1554
cb1a09d0 1555=item 1.0
8e07c86e 1556
1557B<DB_File> has been in use for over a year. To reflect that, the
1558version number has been incremented to 1.0.
1559
1560Added complete support for multiple concurrent callbacks.
1561
1562Using the I<push> method on an empty list didn't work properly. This
1563has been fixed.
1564
cb1a09d0 1565=item 1.01
4633a7c4 1566
1567Fixed a core dump problem with SunOS.
1568
1569The return value from TIEHASH wasn't set to NULL when dbopen returned
1570an error.
1571
88108326 1572=item 1.02
1573
f6b705ef 1574Merged OS/2 specific code into DB_File.xs
88108326 1575
1576Removed some redundant code in DB_File.xs.
1577
1578Documentation update.
1579
1580Allow negative subscripts with RECNO interface.
1581
1582Changed the default flags from O_RDWR to O_CREAT|O_RDWR.
1583
1584The example code which showed how to lock a database needed a call to
1585C<sync> added. Without it the resultant database file was empty.
1586
f6b705ef 1587Added get_dup method.
88108326 1588
f6b705ef 1589=item 1.03
1590
1591Documentation update.
3b35bae3 1592
f6b705ef 1593B<DB_File> now imports the constants (O_RDWR, O_CREAT etc.) from Fcntl
1594automatically.
3b35bae3 1595
f6b705ef 1596The standard hash function C<exists> is now supported.
1597
1598Modified the behavior of get_dup. When it returns an associative
1599array, the value is the count of the number of matching BTREE values.
3b35bae3 1600
610ab055 1601=item 1.04
1602
1603Minor documentation changes.
1604
1605Fixed a bug in hash_cb. Patches supplied by Dave Hammen,
1606E<lt>hammen@gothamcity.jsc.nasa.govE<gt>.
1607
1608Fixed a bug with the constructors for DB_File::HASHINFO,
1609DB_File::BTREEINFO and DB_File::RECNOINFO. Also tidied up the
1610constructors to make them C<-w> clean.
1611
1612Reworked part of the test harness to be more locale friendly.
1613
1614=item 1.05
1615
1616Made all scripts in the documentation C<strict> and C<-w> clean.
1617
1618Added logic to F<DB_File.xs> to allow the module to be built after Perl
1619is installed.
1620
ff68c719 1621=item 1.06
1622
1623Minor namespace cleanup: Localized C<PrintBtree>.
1624
36477c24 1625=item 1.07
1626
1627Fixed bug with RECNO, where bval wasn't defaulting to "\n".
1628
1629=item 1.08
1630
1631Documented operation of bval.
1632
18d2dc8c 1633=item 1.09
1634
1635Minor bug fix in DB_File::HASHINFO, DB_File::RECNOINFO and
1636DB_File::BTREEINFO.
1637
1638Changed default mode to 0666.
1639
a0b8c8c1 1640=item 1.10
1641
1642Fixed fd method so that it still returns -1 for in-memory files when db
16431.86 is used.
1644
778183f3 1645=item 1.11
1646
1647Documented the untie gotcha.
1648
68dc0745 1649=item 1.12
1650
1651Documented the incompatibility with version 2 of Berkeley DB.
1652
610ab055 1653=back
1654
3b35bae3 1655=head1 BUGS
1656
8e07c86e 1657Some older versions of Berkeley DB had problems with fixed length
1658records using the RECNO file format. The newest version at the time of
1659writing was 1.85 - this seems to have fixed the problems with RECNO.
3b35bae3 1660
8e07c86e 1661I am sure there are bugs in the code. If you do find any, or can
1662suggest any enhancements, I would welcome your comments.
3b35bae3 1663
1664=head1 AVAILABILITY
1665
f6b705ef 1666B<DB_File> comes with the standard Perl source distribution. Look in
1667the directory F<ext/DB_File>.
1668
68dc0745 1669This version of B<DB_File> will only work with version 1.x of Berkeley
1670DB. It is I<not> yet compatible with version 2.
1671
1672Version 1 of Berkeley DB is available at your nearest CPAN archive (see
cb1a09d0 1673L<perlmod/"CPAN"> for a list) in F<src/misc/db.1.85.tar.gz>, or via the
610ab055 1674host F<ftp.cs.berkeley.edu> in F</ucb/4bsd/db.tar.gz>. Alternatively,
1675check out the Berkeley DB home page at F<http://www.bostic.com/db>. It
1676is I<not> under the GPL.
3b35bae3 1677
88108326 1678If you are running IRIX, then get Berkeley DB from
1679F<http://reality.sgi.com/ariel>. It has the patches necessary to
1680compile properly on IRIX 5.3.
1681
a0b8c8c1 1682As of January 1997, version 1.86 of Berkeley DB is available from the
1683Berkeley DB home page. Although this release does fix a number of bugs
778183f3 1684that were present in 1.85 you should be aware of the following
a0b8c8c1 1685information (taken from the Berkeley DB home page) before you consider
1686using it:
1687
1688 DB version 1.86 includes a new implementation of the hash access
1689 method that fixes a variety of hashing problems found in DB version
1690 1.85. We are making it available as an interim solution until DB
1691 2.0 is available.
1692
1693 PLEASE NOTE: the underlying file format for the hash access method
1694 changed between version 1.85 and version 1.86, so you will have to
1695 dump and reload all of your databases to convert from version 1.85
1696 to version 1.86. If you do not absolutely require the fixes from
1697 version 1.86, we strongly urge you to wait until DB 2.0 is released
1698 before upgrading from 1.85.
1699
1700
3b35bae3 1701=head1 SEE ALSO
1702
1703L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>
1704
3b35bae3 1705=head1 AUTHOR
1706
8e07c86e 1707The DB_File interface was written by Paul Marquess
88108326 1708E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
8e07c86e 1709Questions about the DB system itself may be addressed to Keith Bostic
88108326 1710E<lt>bostic@cs.berkeley.eduE<gt>.
3b35bae3 1711
1712=cut