TIEARRAY updates - almost works ...
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
3# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
1f70e1ea 4# last modified 20th Nov 1997
5# version 1.56
36477c24 6#
a0b8c8c1 7# Copyright (c) 1995, 1996, 1997 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
18d2dc8c 101sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
102sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
103sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 104
105package DB_File::RECNOINFO ;
785da04d 106
88108326 107use strict ;
108
93965878 109@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 110
111sub TIEHASH
112{
88108326 113 my $pkg = shift ;
114
36477c24 115 bless { VALID => { map {$_, 1}
116 qw( bval cachesize psize flags lorder reclen bfname )
117 },
118 GOT => {},
119 }, $pkg ;
8e07c86e 120}
121
88108326 122package DB_File::BTREEINFO ;
8e07c86e 123
88108326 124use strict ;
8e07c86e 125
88108326 126@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 127
88108326 128sub TIEHASH
8e07c86e 129{
88108326 130 my $pkg = shift ;
131
36477c24 132 bless { VALID => { map {$_, 1}
133 qw( flags cachesize maxkeypage minkeypage psize
134 compare prefix lorder )
135 },
136 GOT => {},
137 }, $pkg ;
8e07c86e 138}
139
140
8e07c86e 141package DB_File ;
785da04d 142
143use strict;
1f70e1ea 144use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version) ;
8e07c86e 145use Carp;
146
785da04d 147
1f70e1ea 148$VERSION = "1.56" ;
8e07c86e 149
150#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 151$DB_BTREE = new DB_File::BTREEINFO ;
152$DB_HASH = new DB_File::HASHINFO ;
153$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 154
785da04d 155require Tie::Hash;
8e07c86e 156require Exporter;
157use AutoLoader;
158require DynaLoader;
785da04d 159@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 160@EXPORT = qw(
161 $DB_BTREE $DB_HASH $DB_RECNO
88108326 162
8e07c86e 163 BTREEMAGIC
164 BTREEVERSION
165 DB_LOCK
166 DB_SHMEM
167 DB_TXN
168 HASHMAGIC
169 HASHVERSION
170 MAX_PAGE_NUMBER
171 MAX_PAGE_OFFSET
172 MAX_REC_NUMBER
173 RET_ERROR
174 RET_SPECIAL
175 RET_SUCCESS
176 R_CURSOR
177 R_DUP
178 R_FIRST
179 R_FIXEDLEN
180 R_IAFTER
181 R_IBEFORE
182 R_LAST
183 R_NEXT
184 R_NOKEY
185 R_NOOVERWRITE
186 R_PREV
187 R_RECNOSYNC
188 R_SETCURSOR
189 R_SNAPSHOT
190 __R_UNUSED
88108326 191
93965878 192);
193
a60c0954 194*FETCHSIZE = \&length;
8e07c86e 195
196sub AUTOLOAD {
785da04d 197 my($constname);
8e07c86e 198 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 199 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 200 if ($! != 0) {
201 if ($! =~ /Invalid/) {
202 $AutoLoader::AUTOLOAD = $AUTOLOAD;
203 goto &AutoLoader::AUTOLOAD;
204 }
205 else {
785da04d 206 my($pack,$file,$line) = caller;
8e07c86e 207 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
208";
209 }
210 }
211 eval "sub $AUTOLOAD { $val }";
212 goto &$AUTOLOAD;
213}
214
f6b705ef 215
a6ed719b 216eval {
1f70e1ea 217 # Make all Fcntl O_XXX constants available for importing
218 require Fcntl;
219 my @O = grep /^O_/, @Fcntl::EXPORT;
220 Fcntl->import(@O); # first we import what we want to export
221 push(@EXPORT, @O);
a6ed719b 222};
f6b705ef 223
1f70e1ea 224## import borrowed from IO::File
225## exports Fcntl constants if available.
226#sub import {
227# my $pkg = shift;
228# my $callpkg = caller;
229# Exporter::export $pkg, $callpkg, @_;
230# eval {
231# require Fcntl;
232# Exporter::export 'Fcntl', $callpkg, '/^O_/';
233# };
234#}
235
785da04d 236bootstrap DB_File $VERSION;
8e07c86e 237
238# Preloaded methods go here. Autoload methods go after __END__, and are
239# processed by the autosplit program.
240
05475680 241sub tie_hash_or_array
610ab055 242{
243 my (@arg) = @_ ;
05475680 244 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 245
246 $arg[4] = tied %{ $arg[4] }
247 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
248
1f70e1ea 249 # make recno in Berkeley DB version 2 work like recno in version 1.
250 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
251 $arg[1] and ! -e $arg[1]) {
252 open(FH, ">$arg[1]") or return undef ;
253 close FH ;
254 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
255 }
256
05475680 257 DoTie_($tieHASH, @arg) ;
610ab055 258}
259
05475680 260sub TIEHASH
261{
262 tie_hash_or_array(@_) ;
263}
264
265sub TIEARRAY
266{
267 tie_hash_or_array(@_) ;
268}
88108326 269
1f70e1ea 270sub CLEAR {
271 my $self = shift;
272 my $key = "" ;
273 my $value = "" ;
274 my $status = $self->seq($key, $value, R_FIRST());
275 my @keys;
276
277 while ($status == 0) {
278 push @keys, $key;
279 $status = $self->seq($key, $value, R_NEXT());
280 }
281 foreach $key (reverse @keys) {
282 my $s = $self->del($key);
283 }
284}
285
88108326 286sub get_dup
287{
288 croak "Usage: \$db->get_dup(key [,flag])\n"
289 unless @_ == 2 or @_ == 3 ;
290
291 my $db = shift ;
292 my $key = shift ;
293 my $flag = shift ;
f6b705ef 294 my $value = 0 ;
88108326 295 my $origkey = $key ;
296 my $wantarray = wantarray ;
f6b705ef 297 my %values = () ;
88108326 298 my @values = () ;
299 my $counter = 0 ;
f6b705ef 300 my $status = 0 ;
88108326 301
f6b705ef 302 # iterate through the database until either EOF ($status == 0)
303 # or a different key is encountered ($key ne $origkey).
304 for ($status = $db->seq($key, $value, R_CURSOR()) ;
305 $status == 0 and $key eq $origkey ;
306 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 307
f6b705ef 308 # save the value or count number of matches
309 if ($wantarray) {
310 if ($flag)
311 { ++ $values{$value} }
312 else
313 { push (@values, $value) }
314 }
315 else
316 { ++ $counter }
88108326 317
88108326 318 }
319
f6b705ef 320 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 321}
322
323
8e07c86e 3241;
325__END__
326
3b35bae3 327=head1 NAME
328
1f70e1ea 329DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 330
331=head1 SYNOPSIS
332
333 use DB_File ;
88108326 334
335 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
336 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
337 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 338
3b35bae3 339 $status = $X->del($key [, $flags]) ;
340 $status = $X->put($key, $value [, $flags]) ;
341 $status = $X->get($key, $value [, $flags]) ;
760ac839 342 $status = $X->seq($key, $value, $flags) ;
3b35bae3 343 $status = $X->sync([$flags]) ;
344 $status = $X->fd ;
760ac839 345
f6b705ef 346 # BTREE only
88108326 347 $count = $X->get_dup($key) ;
348 @list = $X->get_dup($key) ;
349 %list = $X->get_dup($key, 1) ;
350
f6b705ef 351 # RECNO only
352 $a = $X->length;
353 $a = $X->pop ;
354 $X->push(list);
355 $a = $X->shift;
356 $X->unshift(list);
357
3b35bae3 358 untie %hash ;
359 untie @array ;
360
361=head1 DESCRIPTION
362
8e07c86e 363B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 364facilities provided by Berkeley DB version 1.x (if you have a newer
365version of DB, see L<Using DB_File with Berkeley DB version 2>). It is
366assumed that you have a copy of the Berkeley DB manual pages at hand
367when reading this documentation. The interface defined here mirrors the
368Berkeley DB interface closely.
68dc0745 369
8e07c86e 370Berkeley DB is a C library which provides a consistent interface to a
371number of database formats. B<DB_File> provides an interface to all
372three of the database types currently supported by Berkeley DB.
3b35bae3 373
374The file types are:
375
376=over 5
377
88108326 378=item B<DB_HASH>
3b35bae3 379
88108326 380This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 381files. This is equivalent to the functionality provided by other
382hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
383the files created using DB_HASH are not compatible with any of the
384other packages mentioned.
3b35bae3 385
8e07c86e 386A default hashing algorithm, which will be adequate for most
387applications, is built into Berkeley DB. If you do need to use your own
388hashing algorithm it is possible to write your own in Perl and have
389B<DB_File> use it instead.
3b35bae3 390
88108326 391=item B<DB_BTREE>
392
393The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 394sorted, balanced binary tree.
3b35bae3 395
8e07c86e 396As with the DB_HASH format, it is possible to provide a user defined
397Perl routine to perform the comparison of keys. By default, though, the
398keys are stored in lexical order.
3b35bae3 399
88108326 400=item B<DB_RECNO>
3b35bae3 401
8e07c86e 402DB_RECNO allows both fixed-length and variable-length flat text files
403to be manipulated using the same key/value pair interface as in DB_HASH
404and DB_BTREE. In this case the key will consist of a record (line)
405number.
3b35bae3 406
407=back
408
1f70e1ea 409=head2 Using DB_File with Berkeley DB version 2
410
411Although B<DB_File> is intended to be used with Berkeley DB version 1,
412it can also be used with version 2. In this case the interface is
413limited to the functionality provided by Berkeley DB 1.x. Anywhere the
414version 2 interface differs, B<DB_File> arranges for it to work like
415version 1. This feature allows B<DB_File> scripts that were built with
416version 1 to be migrated to version 2 without any changes.
417
418If you want to make use of the new features available in Berkeley DB
4192.x, use the Perl module B<BerkeleyDB> instead.
420
421At the time of writing this document the B<BerkeleyDB> module is still
422alpha quality (the version number is < 1.0), and so unsuitable for use
423in any serious development work. Once its version number is >= 1.0, it
424is considered stable enough for real work.
425
426B<Note:> The database file format has changed in Berkeley DB version 2.
427If you cannot recreate your databases, you must dump any existing
428databases with the C<db_dump185> utility that comes with Berkeley DB.
429Once you have upgraded DB_File to use Berkeley DB version 2, your
430databases can be recreated using C<db_load>. Refer to the Berkeley DB
431documentation for further details.
432
433Please read L<COPYRIGHT> before using version 2.x of Berkeley DB with
434DB_File.
435
68dc0745 436=head2 Interface to Berkeley DB
3b35bae3 437
438B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 439in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
440allows B<DB_File> to access Berkeley DB files using either an
441associative array (for DB_HASH & DB_BTREE file types) or an ordinary
442array (for the DB_RECNO file type).
3b35bae3 443
88108326 444In addition to the tie() interface, it is also possible to access most
445of the functions provided in the Berkeley DB API directly.
f6b705ef 446See L<THE API INTERFACE>.
3b35bae3 447
88108326 448=head2 Opening a Berkeley DB Database File
3b35bae3 449
8e07c86e 450Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 451Here is the C prototype for dbopen():
3b35bae3 452
453 DB*
454 dbopen (const char * file, int flags, int mode,
455 DBTYPE type, const void * openinfo)
456
457The parameter C<type> is an enumeration which specifies which of the 3
458interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
459Depending on which of these is actually chosen, the final parameter,
460I<openinfo> points to a data structure which allows tailoring of the
461specific interface method.
462
8e07c86e 463This interface is handled slightly differently in B<DB_File>. Here is
88108326 464an equivalent call using B<DB_File>:
3b35bae3 465
88108326 466 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 467
8e07c86e 468The C<filename>, C<flags> and C<mode> parameters are the direct
469equivalent of their dbopen() counterparts. The final parameter $DB_HASH
470performs the function of both the C<type> and C<openinfo> parameters in
471dbopen().
3b35bae3 472
88108326 473In the example above $DB_HASH is actually a pre-defined reference to a
474hash object. B<DB_File> has three of these pre-defined references.
475Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 476
8e07c86e 477The keys allowed in each of these pre-defined references is limited to
478the names used in the equivalent C structure. So, for example, the
479$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 480C<ffactor>, C<hash>, C<lorder> and C<nelem>.
481
482To change one of these elements, just assign to it like this:
483
484 $DB_HASH->{'cachesize'} = 10000 ;
485
486The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
487usually adequate for most applications. If you do need to create extra
488instances of these objects, constructors are available for each file
489type.
490
491Here are examples of the constructors and the valid options available
492for DB_HASH, DB_BTREE and DB_RECNO respectively.
493
494 $a = new DB_File::HASHINFO ;
495 $a->{'bsize'} ;
496 $a->{'cachesize'} ;
497 $a->{'ffactor'};
498 $a->{'hash'} ;
499 $a->{'lorder'} ;
500 $a->{'nelem'} ;
501
502 $b = new DB_File::BTREEINFO ;
503 $b->{'flags'} ;
504 $b->{'cachesize'} ;
505 $b->{'maxkeypage'} ;
506 $b->{'minkeypage'} ;
507 $b->{'psize'} ;
508 $b->{'compare'} ;
509 $b->{'prefix'} ;
510 $b->{'lorder'} ;
511
512 $c = new DB_File::RECNOINFO ;
513 $c->{'bval'} ;
514 $c->{'cachesize'} ;
515 $c->{'psize'} ;
516 $c->{'flags'} ;
517 $c->{'lorder'} ;
518 $c->{'reclen'} ;
519 $c->{'bfname'} ;
520
521The values stored in the hashes above are mostly the direct equivalent
522of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 523default values - that means you don't have to set I<all> of the
88108326 524values when you only want to change one. Here is an example:
525
526 $a = new DB_File::HASHINFO ;
527 $a->{'cachesize'} = 12345 ;
528 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
529
36477c24 530A few of the options need extra discussion here. When used, the C
88108326 531equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
532to C functions. In B<DB_File> these keys are used to store references
533to Perl subs. Below are templates for each of the subs:
534
535 sub hash
536 {
537 my ($data) = @_ ;
538 ...
539 # return the hash value for $data
540 return $hash ;
541 }
3b35bae3 542
88108326 543 sub compare
544 {
545 my ($key, $key2) = @_ ;
546 ...
547 # return 0 if $key1 eq $key2
548 # -1 if $key1 lt $key2
549 # 1 if $key1 gt $key2
550 return (-1 , 0 or 1) ;
551 }
3b35bae3 552
88108326 553 sub prefix
554 {
555 my ($key, $key2) = @_ ;
556 ...
557 # return number of bytes of $key2 which are
558 # necessary to determine that it is greater than $key1
559 return $bytes ;
560 }
3b35bae3 561
f6b705ef 562See L<Changing the BTREE sort order> for an example of using the
563C<compare> template.
88108326 564
36477c24 565If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 566C<bval>, you should check out L<The 'bval' Option>.
36477c24 567
88108326 568=head2 Default Parameters
569
570It is possible to omit some or all of the final 4 parameters in the
571call to C<tie> and let them take default values. As DB_HASH is the most
572common file format used, the call:
573
574 tie %A, "DB_File", "filename" ;
575
576is equivalent to:
577
18d2dc8c 578 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 579
580It is also possible to omit the filename parameter as well, so the
581call:
582
583 tie %A, "DB_File" ;
584
585is equivalent to:
586
18d2dc8c 587 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 588
f6b705ef 589See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 590in place of a filename.
591
f6b705ef 592=head2 In Memory Databases
593
594Berkeley DB allows the creation of in-memory databases by using NULL
595(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
596uses C<undef> instead of NULL to provide this functionality.
597
598=head1 DB_HASH
599
600The DB_HASH file format is probably the most commonly used of the three
601file formats that B<DB_File> supports. It is also very straightforward
602to use.
603
68dc0745 604=head2 A Simple Example
f6b705ef 605
606This example shows how to create a database, add key/value pairs to the
607database, delete keys/value pairs and finally how to enumerate the
608contents of the database.
609
610ab055 610 use strict ;
f6b705ef 611 use DB_File ;
610ab055 612 use vars qw( %h $k $v ) ;
f6b705ef 613
614 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
615 or die "Cannot open file 'fruit': $!\n";
616
617 # Add a few key/value pairs to the file
618 $h{"apple"} = "red" ;
619 $h{"orange"} = "orange" ;
620 $h{"banana"} = "yellow" ;
621 $h{"tomato"} = "red" ;
622
623 # Check for existence of a key
624 print "Banana Exists\n\n" if $h{"banana"} ;
625
626 # Delete a key/value pair.
627 delete $h{"apple"} ;
628
629 # print the contents of the file
630 while (($k, $v) = each %h)
631 { print "$k -> $v\n" }
632
633 untie %h ;
634
635here is the output:
636
637 Banana Exists
638
639 orange -> orange
640 tomato -> red
641 banana -> yellow
642
643Note that the like ordinary associative arrays, the order of the keys
644retrieved is in an apparently random order.
645
646=head1 DB_BTREE
647
648The DB_BTREE format is useful when you want to store data in a given
649order. By default the keys will be stored in lexical order, but as you
650will see from the example shown in the next section, it is very easy to
651define your own sorting function.
652
653=head2 Changing the BTREE sort order
654
655This script shows how to override the default sorting algorithm that
656BTREE uses. Instead of using the normal lexical ordering, a case
657insensitive compare function will be used.
88108326 658
610ab055 659 use strict ;
f6b705ef 660 use DB_File ;
610ab055 661
662 my %h ;
f6b705ef 663
664 sub Compare
665 {
666 my ($key1, $key2) = @_ ;
667 "\L$key1" cmp "\L$key2" ;
668 }
669
670 # specify the Perl sub that will do the comparison
671 $DB_BTREE->{'compare'} = \&Compare ;
672
673 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
674 or die "Cannot open file 'tree': $!\n" ;
675
676 # Add a key/value pair to the file
677 $h{'Wall'} = 'Larry' ;
678 $h{'Smith'} = 'John' ;
679 $h{'mouse'} = 'mickey' ;
680 $h{'duck'} = 'donald' ;
681
682 # Delete
683 delete $h{"duck"} ;
684
685 # Cycle through the keys printing them in order.
686 # Note it is not necessary to sort the keys as
687 # the btree will have kept them in order automatically.
688 foreach (keys %h)
689 { print "$_\n" }
690
691 untie %h ;
692
693Here is the output from the code above.
694
695 mouse
696 Smith
697 Wall
698
699There are a few point to bear in mind if you want to change the
700ordering in a BTREE database:
701
702=over 5
703
704=item 1.
705
706The new compare function must be specified when you create the database.
707
708=item 2.
709
710You cannot change the ordering once the database has been created. Thus
711you must use the same compare function every time you access the
88108326 712database.
713
f6b705ef 714=back
715
68dc0745 716=head2 Handling Duplicate Keys
f6b705ef 717
718The BTREE file type optionally allows a single key to be associated
719with an arbitrary number of values. This option is enabled by setting
720the flags element of C<$DB_BTREE> to R_DUP when creating the database.
721
88108326 722There are some difficulties in using the tied hash interface if you
723want to manipulate a BTREE database with duplicate keys. Consider this
724code:
725
610ab055 726 use strict ;
88108326 727 use DB_File ;
610ab055 728
729 use vars qw($filename %h ) ;
730
88108326 731 $filename = "tree" ;
732 unlink $filename ;
733
734 # Enable duplicate records
735 $DB_BTREE->{'flags'} = R_DUP ;
736
737 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
738 or die "Cannot open $filename: $!\n";
739
740 # Add some key/value pairs to the file
741 $h{'Wall'} = 'Larry' ;
742 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 743 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 744 $h{'Smith'} = 'John' ;
745 $h{'mouse'} = 'mickey' ;
746
747 # iterate through the associative array
748 # and print each key/value pair.
749 foreach (keys %h)
750 { print "$_ -> $h{$_}\n" }
751
f6b705ef 752 untie %h ;
753
88108326 754Here is the output:
755
756 Smith -> John
757 Wall -> Larry
758 Wall -> Larry
f6b705ef 759 Wall -> Larry
88108326 760 mouse -> mickey
761
f6b705ef 762As you can see 3 records have been successfully created with key C<Wall>
88108326 763- the only thing is, when they are retrieved from the database they
f6b705ef 764I<seem> to have the same value, namely C<Larry>. The problem is caused
765by the way that the associative array interface works. Basically, when
766the associative array interface is used to fetch the value associated
767with a given key, it will only ever retrieve the first value.
88108326 768
769Although it may not be immediately obvious from the code above, the
770associative array interface can be used to write values with duplicate
771keys, but it cannot be used to read them back from the database.
772
773The way to get around this problem is to use the Berkeley DB API method
774called C<seq>. This method allows sequential access to key/value
f6b705ef 775pairs. See L<THE API INTERFACE> for details of both the C<seq> method
776and the API in general.
88108326 777
778Here is the script above rewritten using the C<seq> API method.
779
610ab055 780 use strict ;
88108326 781 use DB_File ;
88108326 782
610ab055 783 use vars qw($filename $x %h $status $key $value) ;
784
88108326 785 $filename = "tree" ;
786 unlink $filename ;
787
788 # Enable duplicate records
789 $DB_BTREE->{'flags'} = R_DUP ;
790
791 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
792 or die "Cannot open $filename: $!\n";
793
794 # Add some key/value pairs to the file
795 $h{'Wall'} = 'Larry' ;
796 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 797 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 798 $h{'Smith'} = 'John' ;
799 $h{'mouse'} = 'mickey' ;
800
f6b705ef 801 # iterate through the btree using seq
88108326 802 # and print each key/value pair.
610ab055 803 $key = $value = 0 ;
f6b705ef 804 for ($status = $x->seq($key, $value, R_FIRST) ;
805 $status == 0 ;
806 $status = $x->seq($key, $value, R_NEXT) )
88108326 807 { print "$key -> $value\n" }
808
809 undef $x ;
810 untie %h ;
811
812that prints:
813
814 Smith -> John
815 Wall -> Brick
f6b705ef 816 Wall -> Brick
88108326 817 Wall -> Larry
818 mouse -> mickey
819
f6b705ef 820This time we have got all the key/value pairs, including the multiple
88108326 821values associated with the key C<Wall>.
822
68dc0745 823=head2 The get_dup() Method
f6b705ef 824
825B<DB_File> comes with a utility method, called C<get_dup>, to assist in
88108326 826reading duplicate values from BTREE databases. The method can take the
827following forms:
828
829 $count = $x->get_dup($key) ;
830 @list = $x->get_dup($key) ;
831 %list = $x->get_dup($key, 1) ;
832
833In a scalar context the method returns the number of values associated
834with the key, C<$key>.
835
836In list context, it returns all the values which match C<$key>. Note
f6b705ef 837that the values will be returned in an apparently random order.
88108326 838
7a2e2cd6 839In list context, if the second parameter is present and evaluates
840TRUE, the method returns an associative array. The keys of the
841associative array correspond to the values that matched in the BTREE
842and the values of the array are a count of the number of times that
843particular value occurred in the BTREE.
88108326 844
f6b705ef 845So assuming the database created above, we can use C<get_dup> like
88108326 846this:
847
610ab055 848 my $cnt = $x->get_dup("Wall") ;
88108326 849 print "Wall occurred $cnt times\n" ;
850
610ab055 851 my %hash = $x->get_dup("Wall", 1) ;
88108326 852 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 853 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 854
610ab055 855 my @list = $x->get_dup("Wall") ;
88108326 856 print "Wall => [@list]\n" ;
857
f6b705ef 858 @list = $x->get_dup("Smith") ;
88108326 859 print "Smith => [@list]\n" ;
860
f6b705ef 861 @list = $x->get_dup("Dog") ;
88108326 862 print "Dog => [@list]\n" ;
863
864
865and it will print:
866
f6b705ef 867 Wall occurred 3 times
88108326 868 Larry is there
f6b705ef 869 There are 2 Brick Walls
870 Wall => [Brick Brick Larry]
88108326 871 Smith => [John]
872 Dog => []
3b35bae3 873
f6b705ef 874=head2 Matching Partial Keys
875
876The BTREE interface has a feature which allows partial keys to be
877matched. This functionality is I<only> available when the C<seq> method
878is used along with the R_CURSOR flag.
879
880 $x->seq($key, $value, R_CURSOR) ;
881
882Here is the relevant quote from the dbopen man page where it defines
883the use of the R_CURSOR flag with seq:
884
f6b705ef 885 Note, for the DB_BTREE access method, the returned key is not
886 necessarily an exact match for the specified key. The returned key
887 is the smallest key greater than or equal to the specified key,
888 permitting partial key matches and range searches.
889
f6b705ef 890In the example script below, the C<match> sub uses this feature to find
891and print the first matching key/value pair given a partial key.
892
610ab055 893 use strict ;
f6b705ef 894 use DB_File ;
895 use Fcntl ;
610ab055 896
897 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 898
899 sub match
900 {
901 my $key = shift ;
610ab055 902 my $value = 0;
f6b705ef 903 my $orig_key = $key ;
904 $x->seq($key, $value, R_CURSOR) ;
905 print "$orig_key\t-> $key\t-> $value\n" ;
906 }
907
908 $filename = "tree" ;
909 unlink $filename ;
910
911 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
912 or die "Cannot open $filename: $!\n";
913
914 # Add some key/value pairs to the file
915 $h{'mouse'} = 'mickey' ;
916 $h{'Wall'} = 'Larry' ;
917 $h{'Walls'} = 'Brick' ;
918 $h{'Smith'} = 'John' ;
919
920
610ab055 921 $key = $value = 0 ;
f6b705ef 922 print "IN ORDER\n" ;
923 for ($st = $x->seq($key, $value, R_FIRST) ;
924 $st == 0 ;
925 $st = $x->seq($key, $value, R_NEXT) )
926
927 { print "$key -> $value\n" }
928
929 print "\nPARTIAL MATCH\n" ;
930
931 match "Wa" ;
932 match "A" ;
933 match "a" ;
934
935 undef $x ;
936 untie %h ;
937
938Here is the output:
939
940 IN ORDER
941 Smith -> John
942 Wall -> Larry
943 Walls -> Brick
944 mouse -> mickey
945
946 PARTIAL MATCH
947 Wa -> Wall -> Larry
948 A -> Smith -> John
949 a -> mouse -> mickey
950
951=head1 DB_RECNO
952
953DB_RECNO provides an interface to flat text files. Both variable and
954fixed length records are supported.
3b35bae3 955
88108326 956In order to make RECNO more compatible with Perl the array offset for
957all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 958
88108326 959As with normal Perl arrays, a RECNO array can be accessed using
960negative indexes. The index -1 refers to the last element of the array,
961-2 the second last, and so on. Attempting to access an element before
962the start of the array will raise a fatal run-time error.
3b35bae3 963
68dc0745 964=head2 The 'bval' Option
36477c24 965
966The operation of the bval option warrants some discussion. Here is the
967definition of bval from the Berkeley DB 1.85 recno manual page:
968
969 The delimiting byte to be used to mark the end of a
970 record for variable-length records, and the pad charac-
971 ter for fixed-length records. If no value is speci-
972 fied, newlines (``\n'') are used to mark the end of
973 variable-length records and fixed-length records are
974 padded with spaces.
975
976The second sentence is wrong. In actual fact bval will only default to
977C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
978openinfo parameter is used at all, the value that happens to be in bval
979will be used. That means you always have to specify bval when making
980use of any of the options in the openinfo parameter. This documentation
981error will be fixed in the next release of Berkeley DB.
982
983That clarifies the situation with regards Berkeley DB itself. What
984about B<DB_File>? Well, the behavior defined in the quote above is
985quite useful, so B<DB_File> conforms it.
986
987That means that you can specify other options (e.g. cachesize) and
988still have bval default to C<"\n"> for variable length records, and
989space for fixed length records.
990
f6b705ef 991=head2 A Simple Example
3b35bae3 992
f6b705ef 993Here is a simple example that uses RECNO.
994
610ab055 995 use strict ;
f6b705ef 996 use DB_File ;
f6b705ef 997
610ab055 998 my @h ;
f6b705ef 999 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
1000 or die "Cannot open file 'text': $!\n" ;
1001
1002 # Add a few key/value pairs to the file
1003 $h[0] = "orange" ;
1004 $h[1] = "blue" ;
1005 $h[2] = "yellow" ;
1006
1007 # Check for existence of a key
1008 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1009
1010 # use a negative index
1011 print "The last element is $h[-1]\n" ;
1012 print "The 2nd last element is $h[-2]\n" ;
1013
1014 untie @h ;
3b35bae3 1015
f6b705ef 1016Here is the output from the script:
1017
1018
1019 Element 1 Exists with value blue
1020 The last element is yellow
1021 The 2nd last element is blue
1022
1023=head2 Extra Methods
1024
1025As you can see from the example above, the tied array interface is
1026quite limited. To make the interface more useful, a number of methods
1027are supplied with B<DB_File> to simulate the standard array operations
1028that are not currently implemented in Perl's tied array interface. All
1029these methods are accessed via the object returned from the tie call.
1030
1031Here are the methods:
1032
1033=over 5
3b35bae3 1034
f6b705ef 1035=item B<$X-E<gt>push(list) ;>
1036
1037Pushes the elements of C<list> to the end of the array.
1038
1039=item B<$value = $X-E<gt>pop ;>
1040
1041Removes and returns the last element of the array.
1042
1043=item B<$X-E<gt>shift>
1044
1045Removes and returns the first element of the array.
1046
1047=item B<$X-E<gt>unshift(list) ;>
1048
1049Pushes the elements of C<list> to the start of the array.
1050
1051=item B<$X-E<gt>length>
1052
1053Returns the number of elements in the array.
1054
1055=back
1056
1057=head2 Another Example
1058
1059Here is a more complete example that makes use of some of the methods
1060described above. It also makes use of the API interface directly (see
1061L<THE API INTERFACE>).
1062
1063 use strict ;
1064 use vars qw(@h $H $file $i) ;
1065 use DB_File ;
1066 use Fcntl ;
1067
1068 $file = "text" ;
1069
1070 unlink $file ;
1071
1072 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1073 or die "Cannot open file $file: $!\n" ;
1074
1075 # first create a text file to play with
1076 $h[0] = "zero" ;
1077 $h[1] = "one" ;
1078 $h[2] = "two" ;
1079 $h[3] = "three" ;
1080 $h[4] = "four" ;
1081
1082
1083 # Print the records in order.
1084 #
1085 # The length method is needed here because evaluating a tied
1086 # array in a scalar context does not return the number of
1087 # elements in the array.
1088
1089 print "\nORIGINAL\n" ;
1090 foreach $i (0 .. $H->length - 1) {
1091 print "$i: $h[$i]\n" ;
1092 }
1093
1094 # use the push & pop methods
1095 $a = $H->pop ;
1096 $H->push("last") ;
1097 print "\nThe last record was [$a]\n" ;
1098
1099 # and the shift & unshift methods
1100 $a = $H->shift ;
1101 $H->unshift("first") ;
1102 print "The first record was [$a]\n" ;
1103
1104 # Use the API to add a new record after record 2.
1105 $i = 2 ;
1106 $H->put($i, "Newbie", R_IAFTER) ;
1107
1108 # and a new record before record 1.
1109 $i = 1 ;
1110 $H->put($i, "New One", R_IBEFORE) ;
1111
1112 # delete record 3
1113 $H->del(3) ;
1114
1115 # now print the records in reverse order
1116 print "\nREVERSE\n" ;
1117 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1118 { print "$i: $h[$i]\n" }
1119
1120 # same again, but use the API functions instead
1121 print "\nREVERSE again\n" ;
610ab055 1122 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1123 for ($s = $H->seq($k, $v, R_LAST) ;
1124 $s == 0 ;
1125 $s = $H->seq($k, $v, R_PREV))
1126 { print "$k: $v\n" }
1127
1128 undef $H ;
1129 untie @h ;
1130
1131and this is what it outputs:
1132
1133 ORIGINAL
1134 0: zero
1135 1: one
1136 2: two
1137 3: three
1138 4: four
1139
1140 The last record was [four]
1141 The first record was [zero]
1142
1143 REVERSE
1144 5: last
1145 4: three
1146 3: Newbie
1147 2: one
1148 1: New One
1149 0: first
1150
1151 REVERSE again
1152 5: last
1153 4: three
1154 3: Newbie
1155 2: one
1156 1: New One
1157 0: first
1158
1159Notes:
1160
1161=over 5
1162
1163=item 1.
1164
1165Rather than iterating through the array, C<@h> like this:
1166
1167 foreach $i (@h)
1168
1169it is necessary to use either this:
1170
1171 foreach $i (0 .. $H->length - 1)
1172
1173or this:
1174
1175 for ($a = $H->get($k, $v, R_FIRST) ;
1176 $a == 0 ;
1177 $a = $H->get($k, $v, R_NEXT) )
1178
1179=item 2.
1180
1181Notice that both times the C<put> method was used the record index was
1182specified using a variable, C<$i>, rather than the literal value
1183itself. This is because C<put> will return the record number of the
1184inserted line via that parameter.
1185
1186=back
1187
1188=head1 THE API INTERFACE
3b35bae3 1189
1190As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1191possible to make direct use of most of the API functions defined in the
8e07c86e 1192Berkeley DB documentation.
3b35bae3 1193
88108326 1194To do this you need to store a copy of the object returned from the tie.
3b35bae3 1195
88108326 1196 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1197
8e07c86e 1198Once you have done that, you can access the Berkeley DB API functions
88108326 1199as B<DB_File> methods directly like this:
3b35bae3 1200
1201 $db->put($key, $value, R_NOOVERWRITE) ;
1202
88108326 1203B<Important:> If you have saved a copy of the object returned from
1204C<tie>, the underlying database file will I<not> be closed until both
1205the tied variable is untied and all copies of the saved object are
610ab055 1206destroyed.
88108326 1207
1208 use DB_File ;
1209 $db = tie %hash, "DB_File", "filename"
1210 or die "Cannot tie filename: $!" ;
1211 ...
1212 undef $db ;
1213 untie %hash ;
1214
9a2c4ce3 1215See L<The untie() Gotcha> for more details.
778183f3 1216
88108326 1217All the functions defined in L<dbopen> are available except for
1218close() and dbopen() itself. The B<DB_File> method interface to the
1219supported functions have been implemented to mirror the way Berkeley DB
1220works whenever possible. In particular note that:
1221
1222=over 5
1223
1224=item *
1225
1226The methods return a status value. All return 0 on success.
1227All return -1 to signify an error and set C<$!> to the exact
1228error code. The return code 1 generally (but not always) means that the
1229key specified did not exist in the database.
1230
1231Other return codes are defined. See below and in the Berkeley DB
1232documentation for details. The Berkeley DB documentation should be used
1233as the definitive source.
1234
1235=item *
3b35bae3 1236
88108326 1237Whenever a Berkeley DB function returns data via one of its parameters,
1238the equivalent B<DB_File> method does exactly the same.
3b35bae3 1239
88108326 1240=item *
1241
1242If you are careful, it is possible to mix API calls with the tied
1243hash/array interface in the same piece of code. Although only a few of
1244the methods used to implement the tied interface currently make use of
1245the cursor, you should always assume that the cursor has been changed
1246any time the tied hash/array interface is used. As an example, this
1247code will probably not do what you expect:
1248
1249 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1250 or die "Cannot tie $filename: $!" ;
1251
1252 # Get the first key/value pair and set the cursor
1253 $X->seq($key, $value, R_FIRST) ;
1254
1255 # this line will modify the cursor
1256 $count = scalar keys %x ;
1257
1258 # Get the second key/value pair.
1259 # oops, it didn't, it got the last key/value pair!
1260 $X->seq($key, $value, R_NEXT) ;
1261
1262The code above can be rearranged to get around the problem, like this:
1263
1264 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1265 or die "Cannot tie $filename: $!" ;
1266
1267 # this line will modify the cursor
1268 $count = scalar keys %x ;
1269
1270 # Get the first key/value pair and set the cursor
1271 $X->seq($key, $value, R_FIRST) ;
1272
1273 # Get the second key/value pair.
1274 # worked this time.
1275 $X->seq($key, $value, R_NEXT) ;
1276
1277=back
1278
1279All the constants defined in L<dbopen> for use in the flags parameters
1280in the methods defined below are also available. Refer to the Berkeley
1281DB documentation for the precise meaning of the flags values.
1282
1283Below is a list of the methods available.
3b35bae3 1284
1285=over 5
1286
f6b705ef 1287=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1288
1289Given a key (C<$key>) this method reads the value associated with it
1290from the database. The value read from the database is returned in the
1291C<$value> parameter.
3b35bae3 1292
88108326 1293If the key does not exist the method returns 1.
3b35bae3 1294
88108326 1295No flags are currently defined for this method.
3b35bae3 1296
f6b705ef 1297=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1298
88108326 1299Stores the key/value pair in the database.
1300
1301If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1302will have the record number of the inserted key/value pair set.
3b35bae3 1303
88108326 1304Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1305R_SETCURSOR.
1306
f6b705ef 1307=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1308
88108326 1309Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1310
88108326 1311A return code of 1 means that the requested key was not in the
1312database.
3b35bae3 1313
88108326 1314R_CURSOR is the only valid flag at present.
3b35bae3 1315
f6b705ef 1316=item B<$status = $X-E<gt>fd ;>
3b35bae3 1317
88108326 1318Returns the file descriptor for the underlying database.
3b35bae3 1319
f6b705ef 1320See L<Locking Databases> for an example of how to make use of the
88108326 1321C<fd> method to lock your database.
3b35bae3 1322
f6b705ef 1323=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1324
88108326 1325This interface allows sequential retrieval from the database. See
1326L<dbopen> for full details.
1327
1328Both the C<$key> and C<$value> parameters will be set to the key/value
1329pair read from the database.
1330
1331The flags parameter is mandatory. The valid flag values are R_CURSOR,
1332R_FIRST, R_LAST, R_NEXT and R_PREV.
1333
f6b705ef 1334=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1335
1336Flushes any cached buffers to disk.
1337
1338R_RECNOSYNC is the only valid flag at present.
3b35bae3 1339
1340=back
1341
f6b705ef 1342=head1 HINTS AND TIPS
3b35bae3 1343
3b35bae3 1344
cb1a09d0 1345=head2 Locking Databases
3b35bae3 1346
cb1a09d0 1347Concurrent access of a read-write database by several parties requires
1348them all to use some kind of locking. Here's an example of Tom's that
1349uses the I<fd> method to get the file descriptor, and then a careful
1350open() to give something Perl will flock() for you. Run this repeatedly
1351in the background to watch the locks granted in proper order.
3b35bae3 1352
cb1a09d0 1353 use DB_File;
1354
1355 use strict;
1356
1357 sub LOCK_SH { 1 }
1358 sub LOCK_EX { 2 }
1359 sub LOCK_NB { 4 }
1360 sub LOCK_UN { 8 }
1361
1362 my($oldval, $fd, $db, %db, $value, $key);
1363
1364 $key = shift || 'default';
1365 $value = shift || 'magic';
1366
1367 $value .= " $$";
1368
1369 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1370 || die "dbcreat /tmp/foo.db $!";
1371 $fd = $db->fd;
1372 print "$$: db fd is $fd\n";
1373 open(DB_FH, "+<&=$fd") || die "dup $!";
1374
1375
1376 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1377 print "$$: CONTENTION; can't read during write update!
1378 Waiting for read lock ($!) ....";
1379 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1380 }
1381 print "$$: Read lock granted\n";
1382
1383 $oldval = $db{$key};
1384 print "$$: Old value was $oldval\n";
1385 flock(DB_FH, LOCK_UN);
1386
1387 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1388 print "$$: CONTENTION; must have exclusive lock!
1389 Waiting for write lock ($!) ....";
1390 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1391 }
1392
1393 print "$$: Write lock granted\n";
1394 $db{$key} = $value;
610ab055 1395 $db->sync; # to flush
cb1a09d0 1396 sleep 10;
1397
1398 flock(DB_FH, LOCK_UN);
88108326 1399 undef $db;
cb1a09d0 1400 untie %db;
1401 close(DB_FH);
1402 print "$$: Updated db to $key=$value\n";
1403
68dc0745 1404=head2 Sharing Databases With C Applications
f6b705ef 1405
1406There is no technical reason why a Berkeley DB database cannot be
1407shared by both a Perl and a C application.
1408
1409The vast majority of problems that are reported in this area boil down
1410to the fact that C strings are NULL terminated, whilst Perl strings are
1411not.
1412
1413Here is a real example. Netscape 2.0 keeps a record of the locations you
1414visit along with the time you last visited them in a DB_HASH database.
1415This is usually stored in the file F<~/.netscape/history.db>. The key
1416field in the database is the location string and the value field is the
1417time the location was last visited stored as a 4 byte binary value.
1418
1419If you haven't already guessed, the location string is stored with a
1420terminating NULL. This means you need to be careful when accessing the
1421database.
1422
1423Here is a snippet of code that is loosely based on Tom Christiansen's
1424I<ggh> script (available from your nearest CPAN archive in
1425F<authors/id/TOMC/scripts/nshist.gz>).
1426
610ab055 1427 use strict ;
f6b705ef 1428 use DB_File ;
1429 use Fcntl ;
f6b705ef 1430
610ab055 1431 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1432 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1433
1434 $HISTORY = "$dotdir/.netscape/history.db";
1435
1436 tie %hist_db, 'DB_File', $HISTORY
1437 or die "Cannot open $HISTORY: $!\n" ;;
1438
1439 # Dump the complete database
1440 while ( ($href, $binary_time) = each %hist_db ) {
1441
1442 # remove the terminating NULL
1443 $href =~ s/\x00$// ;
1444
1445 # convert the binary time into a user friendly string
1446 $date = localtime unpack("V", $binary_time);
1447 print "$date $href\n" ;
1448 }
1449
1450 # check for the existence of a specific key
1451 # remember to add the NULL
1452 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1453 $date = localtime unpack("V", $binary_time) ;
1454 print "Last visited mox.perl.com on $date\n" ;
1455 }
1456 else {
1457 print "Never visited mox.perl.com\n"
1458 }
1459
1460 untie %hist_db ;
1461
68dc0745 1462=head2 The untie() Gotcha
778183f3 1463
7a2e2cd6 1464If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1465recommended that you read L<perltie/The untie Gotcha>.
778183f3 1466
1467Even if you don't currently make use of the API interface, it is still
1468worth reading it.
1469
1470Here is an example which illustrates the problem from a B<DB_File>
1471perspective:
1472
1473 use DB_File ;
1474 use Fcntl ;
1475
1476 my %x ;
1477 my $X ;
1478
1479 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1480 or die "Cannot tie first time: $!" ;
1481
1482 $x{123} = 456 ;
1483
1484 untie %x ;
1485
1486 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1487 or die "Cannot tie second time: $!" ;
1488
1489 untie %x ;
1490
1491When run, the script will produce this error message:
1492
1493 Cannot tie second time: Invalid argument at bad.file line 14.
1494
1495Although the error message above refers to the second tie() statement
1496in the script, the source of the problem is really with the untie()
1497statement that precedes it.
1498
1499Having read L<perltie> you will probably have already guessed that the
1500error is caused by the extra copy of the tied object stored in C<$X>.
1501If you haven't, then the problem boils down to the fact that the
1502B<DB_File> destructor, DESTROY, will not be called until I<all>
1503references to the tied object are destroyed. Both the tied variable,
1504C<%x>, and C<$X> above hold a reference to the object. The call to
1505untie() will destroy the first, but C<$X> still holds a valid
1506reference, so the destructor will not get called and the database file
1507F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1508attempt to open a database that is alreday open via the catch-all
1509"Invalid argument" doesn't help.
1510
1511If you run the script with the C<-w> flag the error message becomes:
1512
1513 untie attempted while 1 inner references still exist at bad.file line 12.
1514 Cannot tie second time: Invalid argument at bad.file line 14.
1515
1516which pinpoints the real problem. Finally the script can now be
1517modified to fix the original problem by destroying the API object
1518before the untie:
1519
1520 ...
1521 $x{123} = 456 ;
1522
1523 undef $X ;
1524 untie %x ;
1525
1526 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1527 ...
1528
f6b705ef 1529
1530=head1 COMMON QUESTIONS
1531
1532=head2 Why is there Perl source in my database?
1533
1534If you look at the contents of a database file created by DB_File,
1535there can sometimes be part of a Perl script included in it.
1536
1537This happens because Berkeley DB uses dynamic memory to allocate
1538buffers which will subsequently be written to the database file. Being
1539dynamic, the memory could have been used for anything before DB
1540malloced it. As Berkeley DB doesn't clear the memory once it has been
1541allocated, the unused portions will contain random junk. In the case
1542where a Perl script gets written to the database, the random junk will
1543correspond to an area of dynamic memory that happened to be used during
1544the compilation of the script.
1545
1546Unless you don't like the possibility of there being part of your Perl
1547scripts embedded in a database file, this is nothing to worry about.
1548
1549=head2 How do I store complex data structures with DB_File?
1550
1551Although B<DB_File> cannot do this directly, there is a module which
1552can layer transparently over B<DB_File> to accomplish this feat.
1553
1554Check out the MLDBM module, available on CPAN in the directory
1555F<modules/by-module/MLDBM>.
1556
1557=head2 What does "Invalid Argument" mean?
1558
1559You will get this error message when one of the parameters in the
1560C<tie> call is wrong. Unfortunately there are quite a few parameters to
1561get wrong, so it can be difficult to figure out which one it is.
1562
1563Here are a couple of possibilities:
1564
1565=over 5
1566
1567=item 1.
1568
610ab055 1569Attempting to reopen a database without closing it.
f6b705ef 1570
1571=item 2.
1572
1573Using the O_WRONLY flag.
1574
1575=back
1576
1577=head2 What does "Bareword 'DB_File' not allowed" mean?
1578
1579You will encounter this particular error message when you have the
1580C<strict 'subs'> pragma (or the full strict pragma) in your script.
1581Consider this script:
1582
1583 use strict ;
1584 use DB_File ;
1585 use vars qw(%x) ;
1586 tie %x, DB_File, "filename" ;
1587
1588Running it produces the error in question:
1589
1590 Bareword "DB_File" not allowed while "strict subs" in use
1591
1592To get around the error, place the word C<DB_File> in either single or
1593double quotes, like this:
1594
1595 tie %x, "DB_File", "filename" ;
1596
1597Although it might seem like a real pain, it is really worth the effort
1598of having a C<use strict> in all your scripts.
1599
cb1a09d0 1600=head1 HISTORY
1601
1f70e1ea 1602Moved to the Changes file.
610ab055 1603
1f70e1ea 1604=head1 BUGS
05475680 1605
1f70e1ea 1606Some older versions of Berkeley DB had problems with fixed length
1607records using the RECNO file format. This problem has been fixed since
1608version 1.85 of Berkeley DB.
e858de61 1609
1f70e1ea 1610I am sure there are bugs in the code. If you do find any, or can
1611suggest any enhancements, I would welcome your comments.
a6ed719b 1612
1f70e1ea 1613=head1 AVAILABILITY
a6ed719b 1614
1f70e1ea 1615B<DB_File> comes with the standard Perl source distribution. Look in
1616the directory F<ext/DB_File>. Given the amount of time between releases
1617of Perl the version that ships with Perl is quite likely to be out of
1618date, so the most recent version can always be found on CPAN (see
1619L<perlmod/CPAN> for details), in the directory
1620F<modules/by-module/DB_File>.
a6ed719b 1621
1f70e1ea 1622This version of B<DB_File> will work with either version 1.x or 2.x of
1623Berkeley DB, but is limited to the functionality provided by version 1.
a6ed719b 1624
1f70e1ea 1625The official web site for Berkeley DB is
1626F<http://www.sleepycat.com/db>. The ftp equivalent is
1627F<ftp.sleepycat.com:/pub>. Both versions 1 and 2 of Berkeley DB are
1628available there.
93af7a87 1629
1f70e1ea 1630Alternatively, Berkeley DB version 1 is available at your nearest CPAN
1631archive in F<src/misc/db.1.85.tar.gz>.
e858de61 1632
1f70e1ea 1633If you are running IRIX, then get Berkeley DB version 1 from
1634F<http://reality.sgi.com/ariel>. It has the patches necessary to
1635compile properly on IRIX 5.3.
610ab055 1636
1f70e1ea 1637=head1 COPYRIGHT
3b35bae3 1638
1f70e1ea 1639Copyright (c) 1997 Paul Marquess. All rights reserved. This program is
1640free software; you can redistribute it and/or modify it under the same
1641terms as Perl itself.
3b35bae3 1642
1f70e1ea 1643Although B<DB_File> is covered by the Perl license, the library it
1644makes use of, namely Berkeley DB, is not. Berkeley DB has its own
1645copyright and its own license. Please take the time to read it.
3b35bae3 1646
1f70e1ea 1647The license for Berkeley DB version 2, and how it relates to DB_File
1648does need some extra clarification. Here are are few words taken from
1649the Berkeley DB FAQ regarding the version 2 license:
3b35bae3 1650
1f70e1ea 1651 The major difference is that the license for DB 2.0, when
1652 downloaded from the net, requires that the software that
1653 uses DB 2.0 be freely redistributable.
f6b705ef 1654
1f70e1ea 1655That means that if you want to use DB_File, and you have changed either
1656the source for Berkeley DB or Perl, then the changes must be freely
1657available.
68dc0745 1658
1f70e1ea 1659In the case of Perl, the term source refers to the complete source
1660code for Perl (e.g. sv.c, toke.c, perl.h) and any external modules that
1661you are using (e.g. DB_File, Tk).
3b35bae3 1662
1f70e1ea 1663Note that any Perl scripts that you write are your property - this
1664includes scripts that make use of DB_File. Neither the Perl license or
1665the Berkeley DB license place any restriction on what you have to do
1666with them.
88108326 1667
1f70e1ea 1668If you are in any doubt about the license situation, contact either the
1669Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 1670
1671
3b35bae3 1672=head1 SEE ALSO
1673
1674L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>
1675
3b35bae3 1676=head1 AUTHOR
1677
8e07c86e 1678The DB_File interface was written by Paul Marquess
88108326 1679E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
d3ef3b8a 1680Questions about the DB system itself may be addressed to
1681E<lt>db@sleepycat.com<gt>.
3b35bae3 1682
1683=cut