(Replaced by change #3921)
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6ca2e664 3# written by Paul Marquess (Paul.Marquess@btinternet.com)
a62982a8 4# last modified 3rd August 1999
5# version 1.69
36477c24 6#
cad2e5aa 7# Copyright (c) 1995-1999 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
18d2dc8c 101sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
102sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
103sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 104
105package DB_File::RECNOINFO ;
785da04d 106
88108326 107use strict ;
108
045291aa 109@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 110
111sub TIEHASH
112{
88108326 113 my $pkg = shift ;
114
36477c24 115 bless { VALID => { map {$_, 1}
116 qw( bval cachesize psize flags lorder reclen bfname )
117 },
118 GOT => {},
119 }, $pkg ;
8e07c86e 120}
121
88108326 122package DB_File::BTREEINFO ;
8e07c86e 123
88108326 124use strict ;
8e07c86e 125
88108326 126@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 127
88108326 128sub TIEHASH
8e07c86e 129{
88108326 130 my $pkg = shift ;
131
36477c24 132 bless { VALID => { map {$_, 1}
133 qw( flags cachesize maxkeypage minkeypage psize
134 compare prefix lorder )
135 },
136 GOT => {},
137 }, $pkg ;
8e07c86e 138}
139
140
8e07c86e 141package DB_File ;
785da04d 142
143use strict;
1f70e1ea 144use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version) ;
8e07c86e 145use Carp;
146
785da04d 147
a62982a8 148$VERSION = "1.69" ;
8e07c86e 149
150#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 151$DB_BTREE = new DB_File::BTREEINFO ;
152$DB_HASH = new DB_File::HASHINFO ;
153$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 154
785da04d 155require Tie::Hash;
8e07c86e 156require Exporter;
157use AutoLoader;
158require DynaLoader;
785da04d 159@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 160@EXPORT = qw(
161 $DB_BTREE $DB_HASH $DB_RECNO
88108326 162
8e07c86e 163 BTREEMAGIC
164 BTREEVERSION
165 DB_LOCK
166 DB_SHMEM
167 DB_TXN
168 HASHMAGIC
169 HASHVERSION
170 MAX_PAGE_NUMBER
171 MAX_PAGE_OFFSET
172 MAX_REC_NUMBER
173 RET_ERROR
174 RET_SPECIAL
175 RET_SUCCESS
176 R_CURSOR
177 R_DUP
178 R_FIRST
179 R_FIXEDLEN
180 R_IAFTER
181 R_IBEFORE
182 R_LAST
183 R_NEXT
184 R_NOKEY
185 R_NOOVERWRITE
186 R_PREV
187 R_RECNOSYNC
188 R_SETCURSOR
189 R_SNAPSHOT
190 __R_UNUSED
88108326 191
045291aa 192);
8e07c86e 193
194sub AUTOLOAD {
785da04d 195 my($constname);
8e07c86e 196 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 197 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 198 if ($! != 0) {
265f5c4a 199 if ($! =~ /Invalid/ || $!{EINVAL}) {
8e07c86e 200 $AutoLoader::AUTOLOAD = $AUTOLOAD;
201 goto &AutoLoader::AUTOLOAD;
202 }
203 else {
785da04d 204 my($pack,$file,$line) = caller;
8e07c86e 205 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
206";
207 }
208 }
209 eval "sub $AUTOLOAD { $val }";
210 goto &$AUTOLOAD;
211}
212
f6b705ef 213
a6ed719b 214eval {
1f70e1ea 215 # Make all Fcntl O_XXX constants available for importing
216 require Fcntl;
217 my @O = grep /^O_/, @Fcntl::EXPORT;
218 Fcntl->import(@O); # first we import what we want to export
219 push(@EXPORT, @O);
a6ed719b 220};
f6b705ef 221
1f70e1ea 222## import borrowed from IO::File
223## exports Fcntl constants if available.
224#sub import {
225# my $pkg = shift;
226# my $callpkg = caller;
227# Exporter::export $pkg, $callpkg, @_;
228# eval {
229# require Fcntl;
230# Exporter::export 'Fcntl', $callpkg, '/^O_/';
231# };
232#}
233
785da04d 234bootstrap DB_File $VERSION;
8e07c86e 235
236# Preloaded methods go here. Autoload methods go after __END__, and are
237# processed by the autosplit program.
238
05475680 239sub tie_hash_or_array
610ab055 240{
241 my (@arg) = @_ ;
05475680 242 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 243
244 $arg[4] = tied %{ $arg[4] }
245 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
246
1f70e1ea 247 # make recno in Berkeley DB version 2 work like recno in version 1.
248 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
249 $arg[1] and ! -e $arg[1]) {
250 open(FH, ">$arg[1]") or return undef ;
251 close FH ;
252 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
253 }
254
05475680 255 DoTie_($tieHASH, @arg) ;
610ab055 256}
257
05475680 258sub TIEHASH
259{
260 tie_hash_or_array(@_) ;
261}
262
263sub TIEARRAY
264{
265 tie_hash_or_array(@_) ;
266}
88108326 267
045291aa 268sub CLEAR
269{
1f70e1ea 270 my $self = shift;
271 my $key = "" ;
272 my $value = "" ;
273 my $status = $self->seq($key, $value, R_FIRST());
274 my @keys;
275
276 while ($status == 0) {
277 push @keys, $key;
278 $status = $self->seq($key, $value, R_NEXT());
279 }
280 foreach $key (reverse @keys) {
281 my $s = $self->del($key);
282 }
283}
284
045291aa 285sub EXTEND { }
286
287sub STORESIZE
288{
289 my $self = shift;
290 my $length = shift ;
291 my $current_length = $self->length() ;
292
293 if ($length < $current_length) {
294 my $key ;
295 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
296 { $self->del($key) }
297 }
a9fd575d 298 elsif ($length > $current_length) {
299 $self->put($length-1, "") ;
300 }
045291aa 301}
302
6ca2e664 303sub find_dup
304{
305 croak "Usage: \$db->find_dup(key,value)\n"
306 unless @_ == 3 ;
307
308 my $db = shift ;
309 my ($origkey, $value_wanted) = @_ ;
310 my ($key, $value) = ($origkey, 0);
311 my ($status) = 0 ;
312
313 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
314 $status == 0 ;
315 $status = $db->seq($key, $value, R_NEXT() ) ) {
316
317 return 0 if $key eq $origkey and $value eq $value_wanted ;
318 }
319
320 return $status ;
321}
322
323sub del_dup
324{
325 croak "Usage: \$db->del_dup(key,value)\n"
326 unless @_ == 3 ;
327
328 my $db = shift ;
329 my ($key, $value) = @_ ;
330 my ($status) = $db->find_dup($key, $value) ;
331 return $status if $status != 0 ;
332
333 $status = $db->del($key, R_CURSOR() ) ;
334 return $status ;
335}
336
88108326 337sub get_dup
338{
339 croak "Usage: \$db->get_dup(key [,flag])\n"
340 unless @_ == 2 or @_ == 3 ;
341
342 my $db = shift ;
343 my $key = shift ;
344 my $flag = shift ;
f6b705ef 345 my $value = 0 ;
88108326 346 my $origkey = $key ;
347 my $wantarray = wantarray ;
f6b705ef 348 my %values = () ;
88108326 349 my @values = () ;
350 my $counter = 0 ;
f6b705ef 351 my $status = 0 ;
88108326 352
f6b705ef 353 # iterate through the database until either EOF ($status == 0)
354 # or a different key is encountered ($key ne $origkey).
355 for ($status = $db->seq($key, $value, R_CURSOR()) ;
356 $status == 0 and $key eq $origkey ;
357 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 358
f6b705ef 359 # save the value or count number of matches
360 if ($wantarray) {
361 if ($flag)
362 { ++ $values{$value} }
363 else
364 { push (@values, $value) }
365 }
366 else
367 { ++ $counter }
88108326 368
88108326 369 }
370
f6b705ef 371 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 372}
373
374
8e07c86e 3751;
376__END__
377
3b35bae3 378=head1 NAME
379
1f70e1ea 380DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 381
382=head1 SYNOPSIS
383
384 use DB_File ;
88108326 385
386 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
387 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
388 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 389
3b35bae3 390 $status = $X->del($key [, $flags]) ;
391 $status = $X->put($key, $value [, $flags]) ;
392 $status = $X->get($key, $value [, $flags]) ;
760ac839 393 $status = $X->seq($key, $value, $flags) ;
3b35bae3 394 $status = $X->sync([$flags]) ;
395 $status = $X->fd ;
760ac839 396
f6b705ef 397 # BTREE only
88108326 398 $count = $X->get_dup($key) ;
399 @list = $X->get_dup($key) ;
400 %list = $X->get_dup($key, 1) ;
6ca2e664 401 $status = $X->find_dup($key, $value) ;
402 $status = $X->del_dup($key, $value) ;
88108326 403
f6b705ef 404 # RECNO only
405 $a = $X->length;
406 $a = $X->pop ;
407 $X->push(list);
408 $a = $X->shift;
409 $X->unshift(list);
410
cad2e5aa 411 # DBM Filters
412 $old_filter = $db->filter_store_key ( sub { ... } ) ;
413 $old_filter = $db->filter_store_value( sub { ... } ) ;
414 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
415 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
416
3b35bae3 417 untie %hash ;
418 untie @array ;
419
420=head1 DESCRIPTION
421
8e07c86e 422B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 423facilities provided by Berkeley DB version 1.x (if you have a newer
424version of DB, see L<Using DB_File with Berkeley DB version 2>). It is
425assumed that you have a copy of the Berkeley DB manual pages at hand
426when reading this documentation. The interface defined here mirrors the
427Berkeley DB interface closely.
68dc0745 428
8e07c86e 429Berkeley DB is a C library which provides a consistent interface to a
430number of database formats. B<DB_File> provides an interface to all
431three of the database types currently supported by Berkeley DB.
3b35bae3 432
433The file types are:
434
435=over 5
436
88108326 437=item B<DB_HASH>
3b35bae3 438
88108326 439This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 440files. This is equivalent to the functionality provided by other
441hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
442the files created using DB_HASH are not compatible with any of the
443other packages mentioned.
3b35bae3 444
8e07c86e 445A default hashing algorithm, which will be adequate for most
446applications, is built into Berkeley DB. If you do need to use your own
447hashing algorithm it is possible to write your own in Perl and have
448B<DB_File> use it instead.
3b35bae3 449
88108326 450=item B<DB_BTREE>
451
452The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 453sorted, balanced binary tree.
3b35bae3 454
8e07c86e 455As with the DB_HASH format, it is possible to provide a user defined
456Perl routine to perform the comparison of keys. By default, though, the
457keys are stored in lexical order.
3b35bae3 458
88108326 459=item B<DB_RECNO>
3b35bae3 460
8e07c86e 461DB_RECNO allows both fixed-length and variable-length flat text files
462to be manipulated using the same key/value pair interface as in DB_HASH
463and DB_BTREE. In this case the key will consist of a record (line)
464number.
3b35bae3 465
466=back
467
1f70e1ea 468=head2 Using DB_File with Berkeley DB version 2
469
470Although B<DB_File> is intended to be used with Berkeley DB version 1,
471it can also be used with version 2. In this case the interface is
472limited to the functionality provided by Berkeley DB 1.x. Anywhere the
473version 2 interface differs, B<DB_File> arranges for it to work like
474version 1. This feature allows B<DB_File> scripts that were built with
475version 1 to be migrated to version 2 without any changes.
476
477If you want to make use of the new features available in Berkeley DB
4782.x, use the Perl module B<BerkeleyDB> instead.
479
480At the time of writing this document the B<BerkeleyDB> module is still
481alpha quality (the version number is < 1.0), and so unsuitable for use
482in any serious development work. Once its version number is >= 1.0, it
483is considered stable enough for real work.
484
485B<Note:> The database file format has changed in Berkeley DB version 2.
486If you cannot recreate your databases, you must dump any existing
487databases with the C<db_dump185> utility that comes with Berkeley DB.
6ca2e664 488Once you have rebuilt DB_File to use Berkeley DB version 2, your
1f70e1ea 489databases can be recreated using C<db_load>. Refer to the Berkeley DB
490documentation for further details.
491
6ca2e664 492Please read L<"COPYRIGHT"> before using version 2.x of Berkeley DB with
1f70e1ea 493DB_File.
494
68dc0745 495=head2 Interface to Berkeley DB
3b35bae3 496
497B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 498in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
499allows B<DB_File> to access Berkeley DB files using either an
500associative array (for DB_HASH & DB_BTREE file types) or an ordinary
501array (for the DB_RECNO file type).
3b35bae3 502
88108326 503In addition to the tie() interface, it is also possible to access most
504of the functions provided in the Berkeley DB API directly.
f6b705ef 505See L<THE API INTERFACE>.
3b35bae3 506
88108326 507=head2 Opening a Berkeley DB Database File
3b35bae3 508
8e07c86e 509Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 510Here is the C prototype for dbopen():
3b35bae3 511
512 DB*
513 dbopen (const char * file, int flags, int mode,
514 DBTYPE type, const void * openinfo)
515
516The parameter C<type> is an enumeration which specifies which of the 3
517interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
518Depending on which of these is actually chosen, the final parameter,
519I<openinfo> points to a data structure which allows tailoring of the
520specific interface method.
521
8e07c86e 522This interface is handled slightly differently in B<DB_File>. Here is
88108326 523an equivalent call using B<DB_File>:
3b35bae3 524
88108326 525 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 526
8e07c86e 527The C<filename>, C<flags> and C<mode> parameters are the direct
528equivalent of their dbopen() counterparts. The final parameter $DB_HASH
529performs the function of both the C<type> and C<openinfo> parameters in
530dbopen().
3b35bae3 531
88108326 532In the example above $DB_HASH is actually a pre-defined reference to a
533hash object. B<DB_File> has three of these pre-defined references.
534Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 535
8e07c86e 536The keys allowed in each of these pre-defined references is limited to
537the names used in the equivalent C structure. So, for example, the
538$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 539C<ffactor>, C<hash>, C<lorder> and C<nelem>.
540
541To change one of these elements, just assign to it like this:
542
543 $DB_HASH->{'cachesize'} = 10000 ;
544
545The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
546usually adequate for most applications. If you do need to create extra
547instances of these objects, constructors are available for each file
548type.
549
550Here are examples of the constructors and the valid options available
551for DB_HASH, DB_BTREE and DB_RECNO respectively.
552
553 $a = new DB_File::HASHINFO ;
554 $a->{'bsize'} ;
555 $a->{'cachesize'} ;
556 $a->{'ffactor'};
557 $a->{'hash'} ;
558 $a->{'lorder'} ;
559 $a->{'nelem'} ;
560
561 $b = new DB_File::BTREEINFO ;
562 $b->{'flags'} ;
563 $b->{'cachesize'} ;
564 $b->{'maxkeypage'} ;
565 $b->{'minkeypage'} ;
566 $b->{'psize'} ;
567 $b->{'compare'} ;
568 $b->{'prefix'} ;
569 $b->{'lorder'} ;
570
571 $c = new DB_File::RECNOINFO ;
572 $c->{'bval'} ;
573 $c->{'cachesize'} ;
574 $c->{'psize'} ;
575 $c->{'flags'} ;
576 $c->{'lorder'} ;
577 $c->{'reclen'} ;
578 $c->{'bfname'} ;
579
580The values stored in the hashes above are mostly the direct equivalent
581of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 582default values - that means you don't have to set I<all> of the
88108326 583values when you only want to change one. Here is an example:
584
585 $a = new DB_File::HASHINFO ;
586 $a->{'cachesize'} = 12345 ;
587 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
588
36477c24 589A few of the options need extra discussion here. When used, the C
88108326 590equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
591to C functions. In B<DB_File> these keys are used to store references
592to Perl subs. Below are templates for each of the subs:
593
594 sub hash
595 {
596 my ($data) = @_ ;
597 ...
598 # return the hash value for $data
599 return $hash ;
600 }
3b35bae3 601
88108326 602 sub compare
603 {
604 my ($key, $key2) = @_ ;
605 ...
606 # return 0 if $key1 eq $key2
607 # -1 if $key1 lt $key2
608 # 1 if $key1 gt $key2
609 return (-1 , 0 or 1) ;
610 }
3b35bae3 611
88108326 612 sub prefix
613 {
614 my ($key, $key2) = @_ ;
615 ...
616 # return number of bytes of $key2 which are
617 # necessary to determine that it is greater than $key1
618 return $bytes ;
619 }
3b35bae3 620
f6b705ef 621See L<Changing the BTREE sort order> for an example of using the
622C<compare> template.
88108326 623
36477c24 624If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 625C<bval>, you should check out L<The 'bval' Option>.
36477c24 626
88108326 627=head2 Default Parameters
628
629It is possible to omit some or all of the final 4 parameters in the
630call to C<tie> and let them take default values. As DB_HASH is the most
631common file format used, the call:
632
633 tie %A, "DB_File", "filename" ;
634
635is equivalent to:
636
18d2dc8c 637 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 638
639It is also possible to omit the filename parameter as well, so the
640call:
641
642 tie %A, "DB_File" ;
643
644is equivalent to:
645
18d2dc8c 646 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 647
f6b705ef 648See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 649in place of a filename.
650
f6b705ef 651=head2 In Memory Databases
652
653Berkeley DB allows the creation of in-memory databases by using NULL
654(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
655uses C<undef> instead of NULL to provide this functionality.
656
657=head1 DB_HASH
658
659The DB_HASH file format is probably the most commonly used of the three
660file formats that B<DB_File> supports. It is also very straightforward
661to use.
662
68dc0745 663=head2 A Simple Example
f6b705ef 664
665This example shows how to create a database, add key/value pairs to the
666database, delete keys/value pairs and finally how to enumerate the
667contents of the database.
668
610ab055 669 use strict ;
f6b705ef 670 use DB_File ;
610ab055 671 use vars qw( %h $k $v ) ;
f6b705ef 672
2c2d71f5 673 unlink "fruit" ;
f6b705ef 674 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
675 or die "Cannot open file 'fruit': $!\n";
676
677 # Add a few key/value pairs to the file
678 $h{"apple"} = "red" ;
679 $h{"orange"} = "orange" ;
680 $h{"banana"} = "yellow" ;
681 $h{"tomato"} = "red" ;
682
683 # Check for existence of a key
684 print "Banana Exists\n\n" if $h{"banana"} ;
685
686 # Delete a key/value pair.
687 delete $h{"apple"} ;
688
689 # print the contents of the file
690 while (($k, $v) = each %h)
691 { print "$k -> $v\n" }
692
693 untie %h ;
694
695here is the output:
696
697 Banana Exists
698
699 orange -> orange
700 tomato -> red
701 banana -> yellow
702
703Note that the like ordinary associative arrays, the order of the keys
704retrieved is in an apparently random order.
705
706=head1 DB_BTREE
707
708The DB_BTREE format is useful when you want to store data in a given
709order. By default the keys will be stored in lexical order, but as you
710will see from the example shown in the next section, it is very easy to
711define your own sorting function.
712
713=head2 Changing the BTREE sort order
714
715This script shows how to override the default sorting algorithm that
716BTREE uses. Instead of using the normal lexical ordering, a case
717insensitive compare function will be used.
88108326 718
610ab055 719 use strict ;
f6b705ef 720 use DB_File ;
610ab055 721
722 my %h ;
f6b705ef 723
724 sub Compare
725 {
726 my ($key1, $key2) = @_ ;
727 "\L$key1" cmp "\L$key2" ;
728 }
729
730 # specify the Perl sub that will do the comparison
731 $DB_BTREE->{'compare'} = \&Compare ;
732
2c2d71f5 733 unlink "tree" ;
f6b705ef 734 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
735 or die "Cannot open file 'tree': $!\n" ;
736
737 # Add a key/value pair to the file
738 $h{'Wall'} = 'Larry' ;
739 $h{'Smith'} = 'John' ;
740 $h{'mouse'} = 'mickey' ;
741 $h{'duck'} = 'donald' ;
742
743 # Delete
744 delete $h{"duck"} ;
745
746 # Cycle through the keys printing them in order.
747 # Note it is not necessary to sort the keys as
748 # the btree will have kept them in order automatically.
749 foreach (keys %h)
750 { print "$_\n" }
751
752 untie %h ;
753
754Here is the output from the code above.
755
756 mouse
757 Smith
758 Wall
759
760There are a few point to bear in mind if you want to change the
761ordering in a BTREE database:
762
763=over 5
764
765=item 1.
766
767The new compare function must be specified when you create the database.
768
769=item 2.
770
771You cannot change the ordering once the database has been created. Thus
772you must use the same compare function every time you access the
88108326 773database.
774
f6b705ef 775=back
776
68dc0745 777=head2 Handling Duplicate Keys
f6b705ef 778
779The BTREE file type optionally allows a single key to be associated
780with an arbitrary number of values. This option is enabled by setting
781the flags element of C<$DB_BTREE> to R_DUP when creating the database.
782
88108326 783There are some difficulties in using the tied hash interface if you
784want to manipulate a BTREE database with duplicate keys. Consider this
785code:
786
610ab055 787 use strict ;
88108326 788 use DB_File ;
610ab055 789
790 use vars qw($filename %h ) ;
791
88108326 792 $filename = "tree" ;
793 unlink $filename ;
794
795 # Enable duplicate records
796 $DB_BTREE->{'flags'} = R_DUP ;
797
798 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
799 or die "Cannot open $filename: $!\n";
800
801 # Add some key/value pairs to the file
802 $h{'Wall'} = 'Larry' ;
803 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 804 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 805 $h{'Smith'} = 'John' ;
806 $h{'mouse'} = 'mickey' ;
807
808 # iterate through the associative array
809 # and print each key/value pair.
2c2d71f5 810 foreach (sort keys %h)
88108326 811 { print "$_ -> $h{$_}\n" }
812
f6b705ef 813 untie %h ;
814
88108326 815Here is the output:
816
817 Smith -> John
818 Wall -> Larry
819 Wall -> Larry
f6b705ef 820 Wall -> Larry
88108326 821 mouse -> mickey
822
f6b705ef 823As you can see 3 records have been successfully created with key C<Wall>
88108326 824- the only thing is, when they are retrieved from the database they
f6b705ef 825I<seem> to have the same value, namely C<Larry>. The problem is caused
826by the way that the associative array interface works. Basically, when
827the associative array interface is used to fetch the value associated
828with a given key, it will only ever retrieve the first value.
88108326 829
830Although it may not be immediately obvious from the code above, the
831associative array interface can be used to write values with duplicate
832keys, but it cannot be used to read them back from the database.
833
834The way to get around this problem is to use the Berkeley DB API method
835called C<seq>. This method allows sequential access to key/value
f6b705ef 836pairs. See L<THE API INTERFACE> for details of both the C<seq> method
837and the API in general.
88108326 838
839Here is the script above rewritten using the C<seq> API method.
840
610ab055 841 use strict ;
88108326 842 use DB_File ;
88108326 843
610ab055 844 use vars qw($filename $x %h $status $key $value) ;
845
88108326 846 $filename = "tree" ;
847 unlink $filename ;
848
849 # Enable duplicate records
850 $DB_BTREE->{'flags'} = R_DUP ;
851
852 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
853 or die "Cannot open $filename: $!\n";
854
855 # Add some key/value pairs to the file
856 $h{'Wall'} = 'Larry' ;
857 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 858 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 859 $h{'Smith'} = 'John' ;
860 $h{'mouse'} = 'mickey' ;
861
f6b705ef 862 # iterate through the btree using seq
88108326 863 # and print each key/value pair.
610ab055 864 $key = $value = 0 ;
f6b705ef 865 for ($status = $x->seq($key, $value, R_FIRST) ;
866 $status == 0 ;
867 $status = $x->seq($key, $value, R_NEXT) )
88108326 868 { print "$key -> $value\n" }
869
870 undef $x ;
871 untie %h ;
872
873that prints:
874
875 Smith -> John
876 Wall -> Brick
f6b705ef 877 Wall -> Brick
88108326 878 Wall -> Larry
879 mouse -> mickey
880
f6b705ef 881This time we have got all the key/value pairs, including the multiple
88108326 882values associated with the key C<Wall>.
883
6ca2e664 884To make life easier when dealing with duplicate keys, B<DB_File> comes with
885a few utility methods.
886
68dc0745 887=head2 The get_dup() Method
f6b705ef 888
6ca2e664 889The C<get_dup> method assists in
88108326 890reading duplicate values from BTREE databases. The method can take the
891following forms:
892
893 $count = $x->get_dup($key) ;
894 @list = $x->get_dup($key) ;
895 %list = $x->get_dup($key, 1) ;
896
897In a scalar context the method returns the number of values associated
898with the key, C<$key>.
899
900In list context, it returns all the values which match C<$key>. Note
f6b705ef 901that the values will be returned in an apparently random order.
88108326 902
7a2e2cd6 903In list context, if the second parameter is present and evaluates
904TRUE, the method returns an associative array. The keys of the
905associative array correspond to the values that matched in the BTREE
906and the values of the array are a count of the number of times that
907particular value occurred in the BTREE.
88108326 908
f6b705ef 909So assuming the database created above, we can use C<get_dup> like
88108326 910this:
911
2c2d71f5 912 use strict ;
913 use DB_File ;
914
915 use vars qw($filename $x %h ) ;
916
917 $filename = "tree" ;
918
919 # Enable duplicate records
920 $DB_BTREE->{'flags'} = R_DUP ;
921
922 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
923 or die "Cannot open $filename: $!\n";
924
610ab055 925 my $cnt = $x->get_dup("Wall") ;
88108326 926 print "Wall occurred $cnt times\n" ;
927
610ab055 928 my %hash = $x->get_dup("Wall", 1) ;
88108326 929 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 930 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 931
2c2d71f5 932 my @list = sort $x->get_dup("Wall") ;
88108326 933 print "Wall => [@list]\n" ;
934
f6b705ef 935 @list = $x->get_dup("Smith") ;
88108326 936 print "Smith => [@list]\n" ;
937
f6b705ef 938 @list = $x->get_dup("Dog") ;
88108326 939 print "Dog => [@list]\n" ;
940
941
942and it will print:
943
f6b705ef 944 Wall occurred 3 times
88108326 945 Larry is there
f6b705ef 946 There are 2 Brick Walls
947 Wall => [Brick Brick Larry]
88108326 948 Smith => [John]
949 Dog => []
3b35bae3 950
6ca2e664 951=head2 The find_dup() Method
952
953 $status = $X->find_dup($key, $value) ;
954
955This method checks for the existance of a specific key/value pair. If the
956pair exists, the cursor is left pointing to the pair and the method
957returns 0. Otherwise the method returns a non-zero value.
958
959Assuming the database from the previous example:
960
961 use strict ;
962 use DB_File ;
963
964 use vars qw($filename $x %h $found) ;
965
966 my $filename = "tree" ;
967
968 # Enable duplicate records
969 $DB_BTREE->{'flags'} = R_DUP ;
970
971 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
972 or die "Cannot open $filename: $!\n";
973
974 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
975 print "Larry Wall is $found there\n" ;
976
977 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
978 print "Harry Wall is $found there\n" ;
979
980 undef $x ;
981 untie %h ;
982
983prints this
984
2c2d71f5 985 Larry Wall is there
6ca2e664 986 Harry Wall is not there
987
988
989=head2 The del_dup() Method
990
991 $status = $X->del_dup($key, $value) ;
992
993This method deletes a specific key/value pair. It returns
9940 if they exist and have been deleted successfully.
995Otherwise the method returns a non-zero value.
996
997Again assuming the existance of the C<tree> database
998
999 use strict ;
1000 use DB_File ;
1001
1002 use vars qw($filename $x %h $found) ;
1003
1004 my $filename = "tree" ;
1005
1006 # Enable duplicate records
1007 $DB_BTREE->{'flags'} = R_DUP ;
1008
1009 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1010 or die "Cannot open $filename: $!\n";
1011
1012 $x->del_dup("Wall", "Larry") ;
1013
1014 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1015 print "Larry Wall is $found there\n" ;
1016
1017 undef $x ;
1018 untie %h ;
1019
1020prints this
1021
1022 Larry Wall is not there
1023
f6b705ef 1024=head2 Matching Partial Keys
1025
1026The BTREE interface has a feature which allows partial keys to be
1027matched. This functionality is I<only> available when the C<seq> method
1028is used along with the R_CURSOR flag.
1029
1030 $x->seq($key, $value, R_CURSOR) ;
1031
1032Here is the relevant quote from the dbopen man page where it defines
1033the use of the R_CURSOR flag with seq:
1034
f6b705ef 1035 Note, for the DB_BTREE access method, the returned key is not
1036 necessarily an exact match for the specified key. The returned key
1037 is the smallest key greater than or equal to the specified key,
1038 permitting partial key matches and range searches.
1039
f6b705ef 1040In the example script below, the C<match> sub uses this feature to find
1041and print the first matching key/value pair given a partial key.
1042
610ab055 1043 use strict ;
f6b705ef 1044 use DB_File ;
1045 use Fcntl ;
610ab055 1046
1047 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 1048
1049 sub match
1050 {
1051 my $key = shift ;
610ab055 1052 my $value = 0;
f6b705ef 1053 my $orig_key = $key ;
1054 $x->seq($key, $value, R_CURSOR) ;
1055 print "$orig_key\t-> $key\t-> $value\n" ;
1056 }
1057
1058 $filename = "tree" ;
1059 unlink $filename ;
1060
1061 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1062 or die "Cannot open $filename: $!\n";
1063
1064 # Add some key/value pairs to the file
1065 $h{'mouse'} = 'mickey' ;
1066 $h{'Wall'} = 'Larry' ;
1067 $h{'Walls'} = 'Brick' ;
1068 $h{'Smith'} = 'John' ;
1069
1070
610ab055 1071 $key = $value = 0 ;
f6b705ef 1072 print "IN ORDER\n" ;
1073 for ($st = $x->seq($key, $value, R_FIRST) ;
1074 $st == 0 ;
1075 $st = $x->seq($key, $value, R_NEXT) )
1076
2c2d71f5 1077 { print "$key -> $value\n" }
f6b705ef 1078
1079 print "\nPARTIAL MATCH\n" ;
1080
1081 match "Wa" ;
1082 match "A" ;
1083 match "a" ;
1084
1085 undef $x ;
1086 untie %h ;
1087
1088Here is the output:
1089
1090 IN ORDER
1091 Smith -> John
1092 Wall -> Larry
1093 Walls -> Brick
1094 mouse -> mickey
1095
1096 PARTIAL MATCH
1097 Wa -> Wall -> Larry
1098 A -> Smith -> John
1099 a -> mouse -> mickey
1100
1101=head1 DB_RECNO
1102
1103DB_RECNO provides an interface to flat text files. Both variable and
1104fixed length records are supported.
3b35bae3 1105
6ca2e664 1106In order to make RECNO more compatible with Perl, the array offset for
88108326 1107all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1108
88108326 1109As with normal Perl arrays, a RECNO array can be accessed using
1110negative indexes. The index -1 refers to the last element of the array,
1111-2 the second last, and so on. Attempting to access an element before
1112the start of the array will raise a fatal run-time error.
3b35bae3 1113
68dc0745 1114=head2 The 'bval' Option
36477c24 1115
1116The operation of the bval option warrants some discussion. Here is the
1117definition of bval from the Berkeley DB 1.85 recno manual page:
1118
1119 The delimiting byte to be used to mark the end of a
1120 record for variable-length records, and the pad charac-
1121 ter for fixed-length records. If no value is speci-
1122 fied, newlines (``\n'') are used to mark the end of
1123 variable-length records and fixed-length records are
1124 padded with spaces.
1125
1126The second sentence is wrong. In actual fact bval will only default to
1127C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1128openinfo parameter is used at all, the value that happens to be in bval
1129will be used. That means you always have to specify bval when making
1130use of any of the options in the openinfo parameter. This documentation
1131error will be fixed in the next release of Berkeley DB.
1132
1133That clarifies the situation with regards Berkeley DB itself. What
1134about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1135quite useful, so B<DB_File> conforms to it.
36477c24 1136
1137That means that you can specify other options (e.g. cachesize) and
1138still have bval default to C<"\n"> for variable length records, and
1139space for fixed length records.
1140
f6b705ef 1141=head2 A Simple Example
3b35bae3 1142
6ca2e664 1143Here is a simple example that uses RECNO (if you are using a version
1144of Perl earlier than 5.004_57 this example won't work -- see
1145L<Extra RECNO Methods> for a workaround).
f6b705ef 1146
610ab055 1147 use strict ;
f6b705ef 1148 use DB_File ;
f6b705ef 1149
2c2d71f5 1150 my $filename = "text" ;
1151 unlink $filename ;
1152
610ab055 1153 my @h ;
2c2d71f5 1154 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_RECNO
f6b705ef 1155 or die "Cannot open file 'text': $!\n" ;
1156
1157 # Add a few key/value pairs to the file
1158 $h[0] = "orange" ;
1159 $h[1] = "blue" ;
1160 $h[2] = "yellow" ;
1161
6ca2e664 1162 push @h, "green", "black" ;
1163
1164 my $elements = scalar @h ;
1165 print "The array contains $elements entries\n" ;
1166
1167 my $last = pop @h ;
1168 print "popped $last\n" ;
1169
1170 unshift @h, "white" ;
1171 my $first = shift @h ;
1172 print "shifted $first\n" ;
1173
f6b705ef 1174 # Check for existence of a key
1175 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1176
1177 # use a negative index
1178 print "The last element is $h[-1]\n" ;
1179 print "The 2nd last element is $h[-2]\n" ;
1180
1181 untie @h ;
3b35bae3 1182
f6b705ef 1183Here is the output from the script:
1184
6ca2e664 1185 The array contains 5 entries
1186 popped black
2c2d71f5 1187 shifted white
f6b705ef 1188 Element 1 Exists with value blue
6ca2e664 1189 The last element is green
1190 The 2nd last element is yellow
f6b705ef 1191
6ca2e664 1192=head2 Extra RECNO Methods
f6b705ef 1193
045291aa 1194If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664 1195array interface is quite limited. In the example script above
1196C<push>, C<pop>, C<shift>, C<unshift>
1197or determining the array length will not work with a tied array.
045291aa 1198
1199To make the interface more useful for older versions of Perl, a number
1200of methods are supplied with B<DB_File> to simulate the missing array
1201operations. All these methods are accessed via the object returned from
1202the tie call.
f6b705ef 1203
1204Here are the methods:
1205
1206=over 5
3b35bae3 1207
f6b705ef 1208=item B<$X-E<gt>push(list) ;>
1209
1210Pushes the elements of C<list> to the end of the array.
1211
1212=item B<$value = $X-E<gt>pop ;>
1213
1214Removes and returns the last element of the array.
1215
1216=item B<$X-E<gt>shift>
1217
1218Removes and returns the first element of the array.
1219
1220=item B<$X-E<gt>unshift(list) ;>
1221
1222Pushes the elements of C<list> to the start of the array.
1223
1224=item B<$X-E<gt>length>
1225
1226Returns the number of elements in the array.
1227
1228=back
1229
1230=head2 Another Example
1231
1232Here is a more complete example that makes use of some of the methods
1233described above. It also makes use of the API interface directly (see
1234L<THE API INTERFACE>).
1235
1236 use strict ;
1237 use vars qw(@h $H $file $i) ;
1238 use DB_File ;
1239 use Fcntl ;
1240
1241 $file = "text" ;
1242
1243 unlink $file ;
1244
1245 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1246 or die "Cannot open file $file: $!\n" ;
1247
1248 # first create a text file to play with
1249 $h[0] = "zero" ;
1250 $h[1] = "one" ;
1251 $h[2] = "two" ;
1252 $h[3] = "three" ;
1253 $h[4] = "four" ;
1254
1255
1256 # Print the records in order.
1257 #
1258 # The length method is needed here because evaluating a tied
1259 # array in a scalar context does not return the number of
1260 # elements in the array.
1261
1262 print "\nORIGINAL\n" ;
1263 foreach $i (0 .. $H->length - 1) {
1264 print "$i: $h[$i]\n" ;
1265 }
1266
1267 # use the push & pop methods
1268 $a = $H->pop ;
1269 $H->push("last") ;
1270 print "\nThe last record was [$a]\n" ;
1271
1272 # and the shift & unshift methods
1273 $a = $H->shift ;
1274 $H->unshift("first") ;
1275 print "The first record was [$a]\n" ;
1276
1277 # Use the API to add a new record after record 2.
1278 $i = 2 ;
1279 $H->put($i, "Newbie", R_IAFTER) ;
1280
1281 # and a new record before record 1.
1282 $i = 1 ;
1283 $H->put($i, "New One", R_IBEFORE) ;
1284
1285 # delete record 3
1286 $H->del(3) ;
1287
1288 # now print the records in reverse order
1289 print "\nREVERSE\n" ;
1290 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1291 { print "$i: $h[$i]\n" }
1292
1293 # same again, but use the API functions instead
1294 print "\nREVERSE again\n" ;
610ab055 1295 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1296 for ($s = $H->seq($k, $v, R_LAST) ;
1297 $s == 0 ;
1298 $s = $H->seq($k, $v, R_PREV))
1299 { print "$k: $v\n" }
1300
1301 undef $H ;
1302 untie @h ;
1303
1304and this is what it outputs:
1305
1306 ORIGINAL
1307 0: zero
1308 1: one
1309 2: two
1310 3: three
1311 4: four
1312
1313 The last record was [four]
1314 The first record was [zero]
1315
1316 REVERSE
1317 5: last
1318 4: three
1319 3: Newbie
1320 2: one
1321 1: New One
1322 0: first
1323
1324 REVERSE again
1325 5: last
1326 4: three
1327 3: Newbie
1328 2: one
1329 1: New One
1330 0: first
1331
1332Notes:
1333
1334=over 5
1335
1336=item 1.
1337
1338Rather than iterating through the array, C<@h> like this:
1339
1340 foreach $i (@h)
1341
1342it is necessary to use either this:
1343
1344 foreach $i (0 .. $H->length - 1)
1345
1346or this:
1347
1348 for ($a = $H->get($k, $v, R_FIRST) ;
1349 $a == 0 ;
1350 $a = $H->get($k, $v, R_NEXT) )
1351
1352=item 2.
1353
1354Notice that both times the C<put> method was used the record index was
1355specified using a variable, C<$i>, rather than the literal value
1356itself. This is because C<put> will return the record number of the
1357inserted line via that parameter.
1358
1359=back
1360
1361=head1 THE API INTERFACE
3b35bae3 1362
1363As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1364possible to make direct use of most of the API functions defined in the
8e07c86e 1365Berkeley DB documentation.
3b35bae3 1366
88108326 1367To do this you need to store a copy of the object returned from the tie.
3b35bae3 1368
88108326 1369 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1370
8e07c86e 1371Once you have done that, you can access the Berkeley DB API functions
88108326 1372as B<DB_File> methods directly like this:
3b35bae3 1373
1374 $db->put($key, $value, R_NOOVERWRITE) ;
1375
88108326 1376B<Important:> If you have saved a copy of the object returned from
1377C<tie>, the underlying database file will I<not> be closed until both
1378the tied variable is untied and all copies of the saved object are
610ab055 1379destroyed.
88108326 1380
1381 use DB_File ;
1382 $db = tie %hash, "DB_File", "filename"
1383 or die "Cannot tie filename: $!" ;
1384 ...
1385 undef $db ;
1386 untie %hash ;
1387
9a2c4ce3 1388See L<The untie() Gotcha> for more details.
778183f3 1389
88108326 1390All the functions defined in L<dbopen> are available except for
1391close() and dbopen() itself. The B<DB_File> method interface to the
1392supported functions have been implemented to mirror the way Berkeley DB
1393works whenever possible. In particular note that:
1394
1395=over 5
1396
1397=item *
1398
1399The methods return a status value. All return 0 on success.
1400All return -1 to signify an error and set C<$!> to the exact
1401error code. The return code 1 generally (but not always) means that the
1402key specified did not exist in the database.
1403
1404Other return codes are defined. See below and in the Berkeley DB
1405documentation for details. The Berkeley DB documentation should be used
1406as the definitive source.
1407
1408=item *
3b35bae3 1409
88108326 1410Whenever a Berkeley DB function returns data via one of its parameters,
1411the equivalent B<DB_File> method does exactly the same.
3b35bae3 1412
88108326 1413=item *
1414
1415If you are careful, it is possible to mix API calls with the tied
1416hash/array interface in the same piece of code. Although only a few of
1417the methods used to implement the tied interface currently make use of
1418the cursor, you should always assume that the cursor has been changed
1419any time the tied hash/array interface is used. As an example, this
1420code will probably not do what you expect:
1421
1422 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1423 or die "Cannot tie $filename: $!" ;
1424
1425 # Get the first key/value pair and set the cursor
1426 $X->seq($key, $value, R_FIRST) ;
1427
1428 # this line will modify the cursor
1429 $count = scalar keys %x ;
1430
1431 # Get the second key/value pair.
1432 # oops, it didn't, it got the last key/value pair!
1433 $X->seq($key, $value, R_NEXT) ;
1434
1435The code above can be rearranged to get around the problem, like this:
1436
1437 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1438 or die "Cannot tie $filename: $!" ;
1439
1440 # this line will modify the cursor
1441 $count = scalar keys %x ;
1442
1443 # Get the first key/value pair and set the cursor
1444 $X->seq($key, $value, R_FIRST) ;
1445
1446 # Get the second key/value pair.
1447 # worked this time.
1448 $X->seq($key, $value, R_NEXT) ;
1449
1450=back
1451
1452All the constants defined in L<dbopen> for use in the flags parameters
1453in the methods defined below are also available. Refer to the Berkeley
1454DB documentation for the precise meaning of the flags values.
1455
1456Below is a list of the methods available.
3b35bae3 1457
1458=over 5
1459
f6b705ef 1460=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1461
1462Given a key (C<$key>) this method reads the value associated with it
1463from the database. The value read from the database is returned in the
1464C<$value> parameter.
3b35bae3 1465
88108326 1466If the key does not exist the method returns 1.
3b35bae3 1467
88108326 1468No flags are currently defined for this method.
3b35bae3 1469
f6b705ef 1470=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1471
88108326 1472Stores the key/value pair in the database.
1473
1474If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1475will have the record number of the inserted key/value pair set.
3b35bae3 1476
88108326 1477Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1478R_SETCURSOR.
1479
f6b705ef 1480=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1481
88108326 1482Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1483
88108326 1484A return code of 1 means that the requested key was not in the
1485database.
3b35bae3 1486
88108326 1487R_CURSOR is the only valid flag at present.
3b35bae3 1488
f6b705ef 1489=item B<$status = $X-E<gt>fd ;>
3b35bae3 1490
88108326 1491Returns the file descriptor for the underlying database.
3b35bae3 1492
f6b705ef 1493See L<Locking Databases> for an example of how to make use of the
88108326 1494C<fd> method to lock your database.
3b35bae3 1495
f6b705ef 1496=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1497
88108326 1498This interface allows sequential retrieval from the database. See
1499L<dbopen> for full details.
1500
1501Both the C<$key> and C<$value> parameters will be set to the key/value
1502pair read from the database.
1503
1504The flags parameter is mandatory. The valid flag values are R_CURSOR,
1505R_FIRST, R_LAST, R_NEXT and R_PREV.
1506
f6b705ef 1507=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1508
1509Flushes any cached buffers to disk.
1510
1511R_RECNOSYNC is the only valid flag at present.
3b35bae3 1512
1513=back
1514
cad2e5aa 1515=head1 DBM FILTERS
1516
1517A DBM Filter is a piece of code that is be used when you I<always>
1518want to make the same transformation to all keys and/or values in a
1519DBM database.
1520
1521There are four methods associated with DBM Filters. All work identically,
1522and each is used to install (or uninstall) a single DBM Filter. Each
1523expects a single parameter, namely a reference to a sub. The only
1524difference between them is the place that the filter is installed.
1525
1526To summarise:
1527
1528=over 5
1529
1530=item B<filter_store_key>
1531
1532If a filter has been installed with this method, it will be invoked
1533every time you write a key to a DBM database.
1534
1535=item B<filter_store_value>
1536
1537If a filter has been installed with this method, it will be invoked
1538every time you write a value to a DBM database.
1539
1540
1541=item B<filter_fetch_key>
1542
1543If a filter has been installed with this method, it will be invoked
1544every time you read a key from a DBM database.
1545
1546=item B<filter_fetch_value>
1547
1548If a filter has been installed with this method, it will be invoked
1549every time you read a value from a DBM database.
1550
1551=back
1552
1553You can use any combination of the methods, from none, to all four.
1554
1555All filter methods return the existing filter, if present, or C<undef>
1556in not.
1557
1558To delete a filter pass C<undef> to it.
1559
1560=head2 The Filter
1561
1562When each filter is called by Perl, a local copy of C<$_> will contain
1563the key or value to be filtered. Filtering is achieved by modifying
1564the contents of C<$_>. The return code from the filter is ignored.
1565
1566=head2 An Example -- the NULL termination problem.
1567
1568Consider the following scenario. You have a DBM database
1569that you need to share with a third-party C application. The C application
1570assumes that I<all> keys and values are NULL terminated. Unfortunately
1571when Perl writes to DBM databases it doesn't use NULL termination, so
1572your Perl application will have to manage NULL termination itself. When
1573you write to the database you will have to use something like this:
1574
1575 $hash{"$key\0"} = "$value\0" ;
1576
1577Similarly the NULL needs to be taken into account when you are considering
1578the length of existing keys/values.
1579
1580It would be much better if you could ignore the NULL terminations issue
1581in the main application code and have a mechanism that automatically
1582added the terminating NULL to all keys and values whenever you write to
1583the database and have them removed when you read from the database. As I'm
1584sure you have already guessed, this is a problem that DBM Filters can
1585fix very easily.
1586
1587 use strict ;
1588 use DB_File ;
1589
1590 my %hash ;
1591 my $filename = "/tmp/filt" ;
1592 unlink $filename ;
1593
1594 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1595 or die "Cannot open $filename: $!\n" ;
1596
1597 # Install DBM Filters
1598 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1599 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1600 $db->filter_fetch_value( sub { s/\0$// } ) ;
1601 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1602
1603 $hash{"abc"} = "def" ;
1604 my $a = $hash{"ABC"} ;
1605 # ...
1606 undef $db ;
1607 untie %hash ;
1608
1609Hopefully the contents of each of the filters should be
1610self-explanatory. Both "fetch" filters remove the terminating NULL,
1611and both "store" filters add a terminating NULL.
1612
1613
1614=head2 Another Example -- Key is a C int.
1615
1616Here is another real-life example. By default, whenever Perl writes to
1617a DBM database it always writes the key and value as strings. So when
1618you use this:
1619
1620 $hash{12345} = "soemthing" ;
1621
1622the key 12345 will get stored in the DBM database as the 5 byte string
1623"12345". If you actually want the key to be stored in the DBM database
1624as a C int, you will have to use C<pack> when writing, and C<unpack>
1625when reading.
1626
1627Here is a DBM Filter that does it:
1628
1629 use strict ;
1630 use DB_File ;
1631 my %hash ;
1632 my $filename = "/tmp/filt" ;
1633 unlink $filename ;
1634
1635
1636 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1637 or die "Cannot open $filename: $!\n" ;
1638
1639 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1640 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1641 $hash{123} = "def" ;
1642 # ...
1643 undef $db ;
1644 untie %hash ;
1645
1646This time only two filters have been used -- we only need to manipulate
1647the contents of the key, so it wasn't necessary to install any value
1648filters.
1649
f6b705ef 1650=head1 HINTS AND TIPS
3b35bae3 1651
3b35bae3 1652
cb1a09d0 1653=head2 Locking Databases
3b35bae3 1654
cb1a09d0 1655Concurrent access of a read-write database by several parties requires
1656them all to use some kind of locking. Here's an example of Tom's that
1657uses the I<fd> method to get the file descriptor, and then a careful
1658open() to give something Perl will flock() for you. Run this repeatedly
1659in the background to watch the locks granted in proper order.
3b35bae3 1660
cb1a09d0 1661 use DB_File;
1662
1663 use strict;
1664
1665 sub LOCK_SH { 1 }
1666 sub LOCK_EX { 2 }
1667 sub LOCK_NB { 4 }
1668 sub LOCK_UN { 8 }
1669
1670 my($oldval, $fd, $db, %db, $value, $key);
1671
1672 $key = shift || 'default';
1673 $value = shift || 'magic';
1674
1675 $value .= " $$";
1676
1677 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1678 || die "dbcreat /tmp/foo.db $!";
1679 $fd = $db->fd;
1680 print "$$: db fd is $fd\n";
1681 open(DB_FH, "+<&=$fd") || die "dup $!";
1682
1683
1684 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1685 print "$$: CONTENTION; can't read during write update!
1686 Waiting for read lock ($!) ....";
1687 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1688 }
1689 print "$$: Read lock granted\n";
1690
1691 $oldval = $db{$key};
1692 print "$$: Old value was $oldval\n";
1693 flock(DB_FH, LOCK_UN);
1694
1695 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1696 print "$$: CONTENTION; must have exclusive lock!
1697 Waiting for write lock ($!) ....";
1698 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1699 }
1700
1701 print "$$: Write lock granted\n";
1702 $db{$key} = $value;
610ab055 1703 $db->sync; # to flush
cb1a09d0 1704 sleep 10;
1705
1706 flock(DB_FH, LOCK_UN);
88108326 1707 undef $db;
cb1a09d0 1708 untie %db;
1709 close(DB_FH);
1710 print "$$: Updated db to $key=$value\n";
1711
68dc0745 1712=head2 Sharing Databases With C Applications
f6b705ef 1713
1714There is no technical reason why a Berkeley DB database cannot be
1715shared by both a Perl and a C application.
1716
1717The vast majority of problems that are reported in this area boil down
1718to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 1719not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 1720
1721Here is a real example. Netscape 2.0 keeps a record of the locations you
1722visit along with the time you last visited them in a DB_HASH database.
1723This is usually stored in the file F<~/.netscape/history.db>. The key
1724field in the database is the location string and the value field is the
1725time the location was last visited stored as a 4 byte binary value.
1726
1727If you haven't already guessed, the location string is stored with a
1728terminating NULL. This means you need to be careful when accessing the
1729database.
1730
1731Here is a snippet of code that is loosely based on Tom Christiansen's
1732I<ggh> script (available from your nearest CPAN archive in
1733F<authors/id/TOMC/scripts/nshist.gz>).
1734
610ab055 1735 use strict ;
f6b705ef 1736 use DB_File ;
1737 use Fcntl ;
f6b705ef 1738
610ab055 1739 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1740 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1741
1742 $HISTORY = "$dotdir/.netscape/history.db";
1743
1744 tie %hist_db, 'DB_File', $HISTORY
1745 or die "Cannot open $HISTORY: $!\n" ;;
1746
1747 # Dump the complete database
1748 while ( ($href, $binary_time) = each %hist_db ) {
1749
1750 # remove the terminating NULL
1751 $href =~ s/\x00$// ;
1752
1753 # convert the binary time into a user friendly string
1754 $date = localtime unpack("V", $binary_time);
1755 print "$date $href\n" ;
1756 }
1757
1758 # check for the existence of a specific key
1759 # remember to add the NULL
1760 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1761 $date = localtime unpack("V", $binary_time) ;
1762 print "Last visited mox.perl.com on $date\n" ;
1763 }
1764 else {
1765 print "Never visited mox.perl.com\n"
1766 }
1767
1768 untie %hist_db ;
1769
68dc0745 1770=head2 The untie() Gotcha
778183f3 1771
7a2e2cd6 1772If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1773recommended that you read L<perltie/The untie Gotcha>.
778183f3 1774
1775Even if you don't currently make use of the API interface, it is still
1776worth reading it.
1777
1778Here is an example which illustrates the problem from a B<DB_File>
1779perspective:
1780
1781 use DB_File ;
1782 use Fcntl ;
1783
1784 my %x ;
1785 my $X ;
1786
1787 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1788 or die "Cannot tie first time: $!" ;
1789
1790 $x{123} = 456 ;
1791
1792 untie %x ;
1793
1794 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1795 or die "Cannot tie second time: $!" ;
1796
1797 untie %x ;
1798
1799When run, the script will produce this error message:
1800
1801 Cannot tie second time: Invalid argument at bad.file line 14.
1802
1803Although the error message above refers to the second tie() statement
1804in the script, the source of the problem is really with the untie()
1805statement that precedes it.
1806
1807Having read L<perltie> you will probably have already guessed that the
1808error is caused by the extra copy of the tied object stored in C<$X>.
1809If you haven't, then the problem boils down to the fact that the
1810B<DB_File> destructor, DESTROY, will not be called until I<all>
1811references to the tied object are destroyed. Both the tied variable,
1812C<%x>, and C<$X> above hold a reference to the object. The call to
1813untie() will destroy the first, but C<$X> still holds a valid
1814reference, so the destructor will not get called and the database file
1815F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1816attempt to open a database that is alreday open via the catch-all
1817"Invalid argument" doesn't help.
1818
1819If you run the script with the C<-w> flag the error message becomes:
1820
1821 untie attempted while 1 inner references still exist at bad.file line 12.
1822 Cannot tie second time: Invalid argument at bad.file line 14.
1823
1824which pinpoints the real problem. Finally the script can now be
1825modified to fix the original problem by destroying the API object
1826before the untie:
1827
1828 ...
1829 $x{123} = 456 ;
1830
1831 undef $X ;
1832 untie %x ;
1833
1834 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1835 ...
1836
f6b705ef 1837
1838=head1 COMMON QUESTIONS
1839
1840=head2 Why is there Perl source in my database?
1841
1842If you look at the contents of a database file created by DB_File,
1843there can sometimes be part of a Perl script included in it.
1844
1845This happens because Berkeley DB uses dynamic memory to allocate
1846buffers which will subsequently be written to the database file. Being
1847dynamic, the memory could have been used for anything before DB
1848malloced it. As Berkeley DB doesn't clear the memory once it has been
1849allocated, the unused portions will contain random junk. In the case
1850where a Perl script gets written to the database, the random junk will
1851correspond to an area of dynamic memory that happened to be used during
1852the compilation of the script.
1853
1854Unless you don't like the possibility of there being part of your Perl
1855scripts embedded in a database file, this is nothing to worry about.
1856
1857=head2 How do I store complex data structures with DB_File?
1858
1859Although B<DB_File> cannot do this directly, there is a module which
1860can layer transparently over B<DB_File> to accomplish this feat.
1861
1862Check out the MLDBM module, available on CPAN in the directory
1863F<modules/by-module/MLDBM>.
1864
1865=head2 What does "Invalid Argument" mean?
1866
1867You will get this error message when one of the parameters in the
1868C<tie> call is wrong. Unfortunately there are quite a few parameters to
1869get wrong, so it can be difficult to figure out which one it is.
1870
1871Here are a couple of possibilities:
1872
1873=over 5
1874
1875=item 1.
1876
610ab055 1877Attempting to reopen a database without closing it.
f6b705ef 1878
1879=item 2.
1880
1881Using the O_WRONLY flag.
1882
1883=back
1884
1885=head2 What does "Bareword 'DB_File' not allowed" mean?
1886
1887You will encounter this particular error message when you have the
1888C<strict 'subs'> pragma (or the full strict pragma) in your script.
1889Consider this script:
1890
1891 use strict ;
1892 use DB_File ;
1893 use vars qw(%x) ;
1894 tie %x, DB_File, "filename" ;
1895
1896Running it produces the error in question:
1897
1898 Bareword "DB_File" not allowed while "strict subs" in use
1899
1900To get around the error, place the word C<DB_File> in either single or
1901double quotes, like this:
1902
1903 tie %x, "DB_File", "filename" ;
1904
1905Although it might seem like a real pain, it is really worth the effort
1906of having a C<use strict> in all your scripts.
1907
cad2e5aa 1908=head1 REFERENCES
1909
1910Articles that are either about B<DB_File> or make use of it.
1911
1912=over 5
1913
1914=item 1.
1915
1916I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
1917Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
1918
1919=back
1920
cb1a09d0 1921=head1 HISTORY
1922
1f70e1ea 1923Moved to the Changes file.
610ab055 1924
1f70e1ea 1925=head1 BUGS
05475680 1926
1f70e1ea 1927Some older versions of Berkeley DB had problems with fixed length
1928records using the RECNO file format. This problem has been fixed since
1929version 1.85 of Berkeley DB.
e858de61 1930
1f70e1ea 1931I am sure there are bugs in the code. If you do find any, or can
1932suggest any enhancements, I would welcome your comments.
a6ed719b 1933
1f70e1ea 1934=head1 AVAILABILITY
a6ed719b 1935
1f70e1ea 1936B<DB_File> comes with the standard Perl source distribution. Look in
1937the directory F<ext/DB_File>. Given the amount of time between releases
1938of Perl the version that ships with Perl is quite likely to be out of
1939date, so the most recent version can always be found on CPAN (see
1940L<perlmod/CPAN> for details), in the directory
1941F<modules/by-module/DB_File>.
a6ed719b 1942
1f70e1ea 1943This version of B<DB_File> will work with either version 1.x or 2.x of
1944Berkeley DB, but is limited to the functionality provided by version 1.
a6ed719b 1945
cad2e5aa 1946The official web site for Berkeley DB is F<http://www.sleepycat.com>.
1947Both versions 1 and 2 of Berkeley DB are available there.
93af7a87 1948
1f70e1ea 1949Alternatively, Berkeley DB version 1 is available at your nearest CPAN
1950archive in F<src/misc/db.1.85.tar.gz>.
e858de61 1951
1f70e1ea 1952If you are running IRIX, then get Berkeley DB version 1 from
1953F<http://reality.sgi.com/ariel>. It has the patches necessary to
1954compile properly on IRIX 5.3.
610ab055 1955
1f70e1ea 1956=head1 COPYRIGHT
3b35bae3 1957
cad2e5aa 1958Copyright (c) 1995-1999 Paul Marquess. All rights reserved. This program
a9fd575d 1959is free software; you can redistribute it and/or modify it under the
1960same terms as Perl itself.
3b35bae3 1961
1f70e1ea 1962Although B<DB_File> is covered by the Perl license, the library it
1963makes use of, namely Berkeley DB, is not. Berkeley DB has its own
1964copyright and its own license. Please take the time to read it.
3b35bae3 1965
a9fd575d 1966Here are are few words taken from the Berkeley DB FAQ (at
1967http://www.sleepycat.com) regarding the license:
68dc0745 1968
a9fd575d 1969 Do I have to license DB to use it in Perl scripts?
3b35bae3 1970
a9fd575d 1971 No. The Berkeley DB license requires that software that uses
1972 Berkeley DB be freely redistributable. In the case of Perl, that
1973 software is Perl, and not your scripts. Any Perl scripts that you
1974 write are your property, including scripts that make use of
1975 Berkeley DB. Neither the Perl license nor the Berkeley DB license
1976 place any restriction on what you may do with them.
88108326 1977
1f70e1ea 1978If you are in any doubt about the license situation, contact either the
1979Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 1980
1981
3b35bae3 1982=head1 SEE ALSO
1983
9fe6733a 1984L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
1985L<dbmfilter>
3b35bae3 1986
3b35bae3 1987=head1 AUTHOR
1988
8e07c86e 1989The DB_File interface was written by Paul Marquess
6ca2e664 1990E<lt>Paul.Marquess@btinternet.comE<gt>.
d3ef3b8a 1991Questions about the DB system itself may be addressed to
1992E<lt>db@sleepycat.com<gt>.
3b35bae3 1993
1994=cut