DB_File 1.67
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6ca2e664 3# written by Paul Marquess (Paul.Marquess@btinternet.com)
c8e4dba7 4# last modified 6th June 1999
5# version 1.67
36477c24 6#
c8e4dba7 7# Copyright (c) 1995-1999 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
18d2dc8c 101sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
102sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
103sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 104
105package DB_File::RECNOINFO ;
785da04d 106
88108326 107use strict ;
108
045291aa 109@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 110
111sub TIEHASH
112{
88108326 113 my $pkg = shift ;
114
36477c24 115 bless { VALID => { map {$_, 1}
116 qw( bval cachesize psize flags lorder reclen bfname )
117 },
118 GOT => {},
119 }, $pkg ;
8e07c86e 120}
121
88108326 122package DB_File::BTREEINFO ;
8e07c86e 123
88108326 124use strict ;
8e07c86e 125
88108326 126@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 127
88108326 128sub TIEHASH
8e07c86e 129{
88108326 130 my $pkg = shift ;
131
36477c24 132 bless { VALID => { map {$_, 1}
133 qw( flags cachesize maxkeypage minkeypage psize
134 compare prefix lorder )
135 },
136 GOT => {},
137 }, $pkg ;
8e07c86e 138}
139
140
8e07c86e 141package DB_File ;
785da04d 142
143use strict;
1f70e1ea 144use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version) ;
8e07c86e 145use Carp;
146
785da04d 147
c8e4dba7 148$VERSION = "1.67" ;
8e07c86e 149
150#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 151$DB_BTREE = new DB_File::BTREEINFO ;
152$DB_HASH = new DB_File::HASHINFO ;
153$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 154
785da04d 155require Tie::Hash;
8e07c86e 156require Exporter;
157use AutoLoader;
158require DynaLoader;
785da04d 159@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 160@EXPORT = qw(
161 $DB_BTREE $DB_HASH $DB_RECNO
88108326 162
8e07c86e 163 BTREEMAGIC
164 BTREEVERSION
165 DB_LOCK
166 DB_SHMEM
167 DB_TXN
168 HASHMAGIC
169 HASHVERSION
170 MAX_PAGE_NUMBER
171 MAX_PAGE_OFFSET
172 MAX_REC_NUMBER
173 RET_ERROR
174 RET_SPECIAL
175 RET_SUCCESS
176 R_CURSOR
177 R_DUP
178 R_FIRST
179 R_FIXEDLEN
180 R_IAFTER
181 R_IBEFORE
182 R_LAST
183 R_NEXT
184 R_NOKEY
185 R_NOOVERWRITE
186 R_PREV
187 R_RECNOSYNC
188 R_SETCURSOR
189 R_SNAPSHOT
190 __R_UNUSED
88108326 191
045291aa 192);
8e07c86e 193
194sub AUTOLOAD {
785da04d 195 my($constname);
8e07c86e 196 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 197 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 198 if ($! != 0) {
199 if ($! =~ /Invalid/) {
200 $AutoLoader::AUTOLOAD = $AUTOLOAD;
201 goto &AutoLoader::AUTOLOAD;
202 }
203 else {
785da04d 204 my($pack,$file,$line) = caller;
8e07c86e 205 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
206";
207 }
208 }
209 eval "sub $AUTOLOAD { $val }";
210 goto &$AUTOLOAD;
211}
212
f6b705ef 213
a6ed719b 214eval {
1f70e1ea 215 # Make all Fcntl O_XXX constants available for importing
216 require Fcntl;
217 my @O = grep /^O_/, @Fcntl::EXPORT;
218 Fcntl->import(@O); # first we import what we want to export
219 push(@EXPORT, @O);
a6ed719b 220};
f6b705ef 221
1f70e1ea 222## import borrowed from IO::File
223## exports Fcntl constants if available.
224#sub import {
225# my $pkg = shift;
226# my $callpkg = caller;
227# Exporter::export $pkg, $callpkg, @_;
228# eval {
229# require Fcntl;
230# Exporter::export 'Fcntl', $callpkg, '/^O_/';
231# };
232#}
233
785da04d 234bootstrap DB_File $VERSION;
8e07c86e 235
236# Preloaded methods go here. Autoload methods go after __END__, and are
237# processed by the autosplit program.
238
05475680 239sub tie_hash_or_array
610ab055 240{
241 my (@arg) = @_ ;
05475680 242 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 243
244 $arg[4] = tied %{ $arg[4] }
245 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
246
1f70e1ea 247 # make recno in Berkeley DB version 2 work like recno in version 1.
248 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
249 $arg[1] and ! -e $arg[1]) {
250 open(FH, ">$arg[1]") or return undef ;
251 close FH ;
252 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
253 }
254
05475680 255 DoTie_($tieHASH, @arg) ;
610ab055 256}
257
05475680 258sub TIEHASH
259{
260 tie_hash_or_array(@_) ;
261}
262
263sub TIEARRAY
264{
265 tie_hash_or_array(@_) ;
266}
88108326 267
045291aa 268sub CLEAR
269{
1f70e1ea 270 my $self = shift;
271 my $key = "" ;
272 my $value = "" ;
273 my $status = $self->seq($key, $value, R_FIRST());
274 my @keys;
275
276 while ($status == 0) {
277 push @keys, $key;
278 $status = $self->seq($key, $value, R_NEXT());
279 }
280 foreach $key (reverse @keys) {
281 my $s = $self->del($key);
282 }
283}
284
045291aa 285sub EXTEND { }
286
287sub STORESIZE
288{
289 my $self = shift;
290 my $length = shift ;
291 my $current_length = $self->length() ;
292
293 if ($length < $current_length) {
294 my $key ;
295 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
296 { $self->del($key) }
297 }
a9fd575d 298 elsif ($length > $current_length) {
299 $self->put($length-1, "") ;
300 }
045291aa 301}
302
6ca2e664 303sub find_dup
304{
305 croak "Usage: \$db->find_dup(key,value)\n"
306 unless @_ == 3 ;
307
308 my $db = shift ;
309 my ($origkey, $value_wanted) = @_ ;
310 my ($key, $value) = ($origkey, 0);
311 my ($status) = 0 ;
312
313 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
314 $status == 0 ;
315 $status = $db->seq($key, $value, R_NEXT() ) ) {
316
317 return 0 if $key eq $origkey and $value eq $value_wanted ;
318 }
319
320 return $status ;
321}
322
323sub del_dup
324{
325 croak "Usage: \$db->del_dup(key,value)\n"
326 unless @_ == 3 ;
327
328 my $db = shift ;
329 my ($key, $value) = @_ ;
330 my ($status) = $db->find_dup($key, $value) ;
331 return $status if $status != 0 ;
332
333 $status = $db->del($key, R_CURSOR() ) ;
334 return $status ;
335}
336
88108326 337sub get_dup
338{
339 croak "Usage: \$db->get_dup(key [,flag])\n"
340 unless @_ == 2 or @_ == 3 ;
341
342 my $db = shift ;
343 my $key = shift ;
344 my $flag = shift ;
f6b705ef 345 my $value = 0 ;
88108326 346 my $origkey = $key ;
347 my $wantarray = wantarray ;
f6b705ef 348 my %values = () ;
88108326 349 my @values = () ;
350 my $counter = 0 ;
f6b705ef 351 my $status = 0 ;
88108326 352
f6b705ef 353 # iterate through the database until either EOF ($status == 0)
354 # or a different key is encountered ($key ne $origkey).
355 for ($status = $db->seq($key, $value, R_CURSOR()) ;
356 $status == 0 and $key eq $origkey ;
357 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 358
f6b705ef 359 # save the value or count number of matches
360 if ($wantarray) {
361 if ($flag)
362 { ++ $values{$value} }
363 else
364 { push (@values, $value) }
365 }
366 else
367 { ++ $counter }
88108326 368
88108326 369 }
370
f6b705ef 371 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 372}
373
374
8e07c86e 3751;
376__END__
377
3b35bae3 378=head1 NAME
379
1f70e1ea 380DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 381
382=head1 SYNOPSIS
383
384 use DB_File ;
88108326 385
386 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
387 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
388 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 389
3b35bae3 390 $status = $X->del($key [, $flags]) ;
391 $status = $X->put($key, $value [, $flags]) ;
392 $status = $X->get($key, $value [, $flags]) ;
760ac839 393 $status = $X->seq($key, $value, $flags) ;
3b35bae3 394 $status = $X->sync([$flags]) ;
395 $status = $X->fd ;
760ac839 396
f6b705ef 397 # BTREE only
88108326 398 $count = $X->get_dup($key) ;
399 @list = $X->get_dup($key) ;
400 %list = $X->get_dup($key, 1) ;
6ca2e664 401 $status = $X->find_dup($key, $value) ;
402 $status = $X->del_dup($key, $value) ;
88108326 403
f6b705ef 404 # RECNO only
405 $a = $X->length;
406 $a = $X->pop ;
407 $X->push(list);
408 $a = $X->shift;
409 $X->unshift(list);
410
c8e4dba7 411 # DBM Filters
412 $old_filter = $db->filter_store_key ( sub { ... } ) ;
413 $old_filter = $db->filter_store_value( sub { ... } ) ;
414 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
415 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
416
3b35bae3 417 untie %hash ;
418 untie @array ;
419
420=head1 DESCRIPTION
421
8e07c86e 422B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 423facilities provided by Berkeley DB version 1.x (if you have a newer
424version of DB, see L<Using DB_File with Berkeley DB version 2>). It is
425assumed that you have a copy of the Berkeley DB manual pages at hand
426when reading this documentation. The interface defined here mirrors the
427Berkeley DB interface closely.
68dc0745 428
8e07c86e 429Berkeley DB is a C library which provides a consistent interface to a
430number of database formats. B<DB_File> provides an interface to all
431three of the database types currently supported by Berkeley DB.
3b35bae3 432
433The file types are:
434
435=over 5
436
88108326 437=item B<DB_HASH>
3b35bae3 438
88108326 439This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 440files. This is equivalent to the functionality provided by other
441hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
442the files created using DB_HASH are not compatible with any of the
443other packages mentioned.
3b35bae3 444
8e07c86e 445A default hashing algorithm, which will be adequate for most
446applications, is built into Berkeley DB. If you do need to use your own
447hashing algorithm it is possible to write your own in Perl and have
448B<DB_File> use it instead.
3b35bae3 449
88108326 450=item B<DB_BTREE>
451
452The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 453sorted, balanced binary tree.
3b35bae3 454
8e07c86e 455As with the DB_HASH format, it is possible to provide a user defined
456Perl routine to perform the comparison of keys. By default, though, the
457keys are stored in lexical order.
3b35bae3 458
88108326 459=item B<DB_RECNO>
3b35bae3 460
8e07c86e 461DB_RECNO allows both fixed-length and variable-length flat text files
462to be manipulated using the same key/value pair interface as in DB_HASH
463and DB_BTREE. In this case the key will consist of a record (line)
464number.
3b35bae3 465
466=back
467
1f70e1ea 468=head2 Using DB_File with Berkeley DB version 2
469
470Although B<DB_File> is intended to be used with Berkeley DB version 1,
471it can also be used with version 2. In this case the interface is
472limited to the functionality provided by Berkeley DB 1.x. Anywhere the
473version 2 interface differs, B<DB_File> arranges for it to work like
474version 1. This feature allows B<DB_File> scripts that were built with
475version 1 to be migrated to version 2 without any changes.
476
477If you want to make use of the new features available in Berkeley DB
4782.x, use the Perl module B<BerkeleyDB> instead.
479
480At the time of writing this document the B<BerkeleyDB> module is still
481alpha quality (the version number is < 1.0), and so unsuitable for use
482in any serious development work. Once its version number is >= 1.0, it
483is considered stable enough for real work.
484
485B<Note:> The database file format has changed in Berkeley DB version 2.
486If you cannot recreate your databases, you must dump any existing
487databases with the C<db_dump185> utility that comes with Berkeley DB.
6ca2e664 488Once you have rebuilt DB_File to use Berkeley DB version 2, your
1f70e1ea 489databases can be recreated using C<db_load>. Refer to the Berkeley DB
490documentation for further details.
491
6ca2e664 492Please read L<"COPYRIGHT"> before using version 2.x of Berkeley DB with
1f70e1ea 493DB_File.
494
68dc0745 495=head2 Interface to Berkeley DB
3b35bae3 496
497B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 498in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
499allows B<DB_File> to access Berkeley DB files using either an
500associative array (for DB_HASH & DB_BTREE file types) or an ordinary
501array (for the DB_RECNO file type).
3b35bae3 502
88108326 503In addition to the tie() interface, it is also possible to access most
504of the functions provided in the Berkeley DB API directly.
f6b705ef 505See L<THE API INTERFACE>.
3b35bae3 506
88108326 507=head2 Opening a Berkeley DB Database File
3b35bae3 508
8e07c86e 509Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 510Here is the C prototype for dbopen():
3b35bae3 511
512 DB*
513 dbopen (const char * file, int flags, int mode,
514 DBTYPE type, const void * openinfo)
515
516The parameter C<type> is an enumeration which specifies which of the 3
517interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
518Depending on which of these is actually chosen, the final parameter,
519I<openinfo> points to a data structure which allows tailoring of the
520specific interface method.
521
8e07c86e 522This interface is handled slightly differently in B<DB_File>. Here is
88108326 523an equivalent call using B<DB_File>:
3b35bae3 524
88108326 525 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 526
8e07c86e 527The C<filename>, C<flags> and C<mode> parameters are the direct
528equivalent of their dbopen() counterparts. The final parameter $DB_HASH
529performs the function of both the C<type> and C<openinfo> parameters in
530dbopen().
3b35bae3 531
88108326 532In the example above $DB_HASH is actually a pre-defined reference to a
533hash object. B<DB_File> has three of these pre-defined references.
534Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 535
8e07c86e 536The keys allowed in each of these pre-defined references is limited to
537the names used in the equivalent C structure. So, for example, the
538$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 539C<ffactor>, C<hash>, C<lorder> and C<nelem>.
540
541To change one of these elements, just assign to it like this:
542
543 $DB_HASH->{'cachesize'} = 10000 ;
544
545The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
546usually adequate for most applications. If you do need to create extra
547instances of these objects, constructors are available for each file
548type.
549
550Here are examples of the constructors and the valid options available
551for DB_HASH, DB_BTREE and DB_RECNO respectively.
552
553 $a = new DB_File::HASHINFO ;
554 $a->{'bsize'} ;
555 $a->{'cachesize'} ;
556 $a->{'ffactor'};
557 $a->{'hash'} ;
558 $a->{'lorder'} ;
559 $a->{'nelem'} ;
560
561 $b = new DB_File::BTREEINFO ;
562 $b->{'flags'} ;
563 $b->{'cachesize'} ;
564 $b->{'maxkeypage'} ;
565 $b->{'minkeypage'} ;
566 $b->{'psize'} ;
567 $b->{'compare'} ;
568 $b->{'prefix'} ;
569 $b->{'lorder'} ;
570
571 $c = new DB_File::RECNOINFO ;
572 $c->{'bval'} ;
573 $c->{'cachesize'} ;
574 $c->{'psize'} ;
575 $c->{'flags'} ;
576 $c->{'lorder'} ;
577 $c->{'reclen'} ;
578 $c->{'bfname'} ;
579
580The values stored in the hashes above are mostly the direct equivalent
581of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 582default values - that means you don't have to set I<all> of the
88108326 583values when you only want to change one. Here is an example:
584
585 $a = new DB_File::HASHINFO ;
586 $a->{'cachesize'} = 12345 ;
587 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
588
36477c24 589A few of the options need extra discussion here. When used, the C
88108326 590equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
591to C functions. In B<DB_File> these keys are used to store references
592to Perl subs. Below are templates for each of the subs:
593
594 sub hash
595 {
596 my ($data) = @_ ;
597 ...
598 # return the hash value for $data
599 return $hash ;
600 }
3b35bae3 601
88108326 602 sub compare
603 {
604 my ($key, $key2) = @_ ;
605 ...
606 # return 0 if $key1 eq $key2
607 # -1 if $key1 lt $key2
608 # 1 if $key1 gt $key2
609 return (-1 , 0 or 1) ;
610 }
3b35bae3 611
88108326 612 sub prefix
613 {
614 my ($key, $key2) = @_ ;
615 ...
616 # return number of bytes of $key2 which are
617 # necessary to determine that it is greater than $key1
618 return $bytes ;
619 }
3b35bae3 620
f6b705ef 621See L<Changing the BTREE sort order> for an example of using the
622C<compare> template.
88108326 623
36477c24 624If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 625C<bval>, you should check out L<The 'bval' Option>.
36477c24 626
88108326 627=head2 Default Parameters
628
629It is possible to omit some or all of the final 4 parameters in the
630call to C<tie> and let them take default values. As DB_HASH is the most
631common file format used, the call:
632
633 tie %A, "DB_File", "filename" ;
634
635is equivalent to:
636
18d2dc8c 637 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 638
639It is also possible to omit the filename parameter as well, so the
640call:
641
642 tie %A, "DB_File" ;
643
644is equivalent to:
645
18d2dc8c 646 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 647
f6b705ef 648See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 649in place of a filename.
650
f6b705ef 651=head2 In Memory Databases
652
653Berkeley DB allows the creation of in-memory databases by using NULL
654(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
655uses C<undef> instead of NULL to provide this functionality.
656
657=head1 DB_HASH
658
659The DB_HASH file format is probably the most commonly used of the three
660file formats that B<DB_File> supports. It is also very straightforward
661to use.
662
68dc0745 663=head2 A Simple Example
f6b705ef 664
665This example shows how to create a database, add key/value pairs to the
666database, delete keys/value pairs and finally how to enumerate the
667contents of the database.
668
610ab055 669 use strict ;
f6b705ef 670 use DB_File ;
610ab055 671 use vars qw( %h $k $v ) ;
f6b705ef 672
673 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
674 or die "Cannot open file 'fruit': $!\n";
675
676 # Add a few key/value pairs to the file
677 $h{"apple"} = "red" ;
678 $h{"orange"} = "orange" ;
679 $h{"banana"} = "yellow" ;
680 $h{"tomato"} = "red" ;
681
682 # Check for existence of a key
683 print "Banana Exists\n\n" if $h{"banana"} ;
684
685 # Delete a key/value pair.
686 delete $h{"apple"} ;
687
688 # print the contents of the file
689 while (($k, $v) = each %h)
690 { print "$k -> $v\n" }
691
692 untie %h ;
693
694here is the output:
695
696 Banana Exists
697
698 orange -> orange
699 tomato -> red
700 banana -> yellow
701
702Note that the like ordinary associative arrays, the order of the keys
703retrieved is in an apparently random order.
704
705=head1 DB_BTREE
706
707The DB_BTREE format is useful when you want to store data in a given
708order. By default the keys will be stored in lexical order, but as you
709will see from the example shown in the next section, it is very easy to
710define your own sorting function.
711
712=head2 Changing the BTREE sort order
713
714This script shows how to override the default sorting algorithm that
715BTREE uses. Instead of using the normal lexical ordering, a case
716insensitive compare function will be used.
88108326 717
610ab055 718 use strict ;
f6b705ef 719 use DB_File ;
610ab055 720
721 my %h ;
f6b705ef 722
723 sub Compare
724 {
725 my ($key1, $key2) = @_ ;
726 "\L$key1" cmp "\L$key2" ;
727 }
728
729 # specify the Perl sub that will do the comparison
730 $DB_BTREE->{'compare'} = \&Compare ;
731
732 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
733 or die "Cannot open file 'tree': $!\n" ;
734
735 # Add a key/value pair to the file
736 $h{'Wall'} = 'Larry' ;
737 $h{'Smith'} = 'John' ;
738 $h{'mouse'} = 'mickey' ;
739 $h{'duck'} = 'donald' ;
740
741 # Delete
742 delete $h{"duck"} ;
743
744 # Cycle through the keys printing them in order.
745 # Note it is not necessary to sort the keys as
746 # the btree will have kept them in order automatically.
747 foreach (keys %h)
748 { print "$_\n" }
749
750 untie %h ;
751
752Here is the output from the code above.
753
754 mouse
755 Smith
756 Wall
757
758There are a few point to bear in mind if you want to change the
759ordering in a BTREE database:
760
761=over 5
762
763=item 1.
764
765The new compare function must be specified when you create the database.
766
767=item 2.
768
769You cannot change the ordering once the database has been created. Thus
770you must use the same compare function every time you access the
88108326 771database.
772
f6b705ef 773=back
774
68dc0745 775=head2 Handling Duplicate Keys
f6b705ef 776
777The BTREE file type optionally allows a single key to be associated
778with an arbitrary number of values. This option is enabled by setting
779the flags element of C<$DB_BTREE> to R_DUP when creating the database.
780
88108326 781There are some difficulties in using the tied hash interface if you
782want to manipulate a BTREE database with duplicate keys. Consider this
783code:
784
610ab055 785 use strict ;
88108326 786 use DB_File ;
610ab055 787
788 use vars qw($filename %h ) ;
789
88108326 790 $filename = "tree" ;
791 unlink $filename ;
792
793 # Enable duplicate records
794 $DB_BTREE->{'flags'} = R_DUP ;
795
796 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
797 or die "Cannot open $filename: $!\n";
798
799 # Add some key/value pairs to the file
800 $h{'Wall'} = 'Larry' ;
801 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 802 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 803 $h{'Smith'} = 'John' ;
804 $h{'mouse'} = 'mickey' ;
805
806 # iterate through the associative array
807 # and print each key/value pair.
808 foreach (keys %h)
809 { print "$_ -> $h{$_}\n" }
810
f6b705ef 811 untie %h ;
812
88108326 813Here is the output:
814
815 Smith -> John
816 Wall -> Larry
817 Wall -> Larry
f6b705ef 818 Wall -> Larry
88108326 819 mouse -> mickey
820
f6b705ef 821As you can see 3 records have been successfully created with key C<Wall>
88108326 822- the only thing is, when they are retrieved from the database they
f6b705ef 823I<seem> to have the same value, namely C<Larry>. The problem is caused
824by the way that the associative array interface works. Basically, when
825the associative array interface is used to fetch the value associated
826with a given key, it will only ever retrieve the first value.
88108326 827
828Although it may not be immediately obvious from the code above, the
829associative array interface can be used to write values with duplicate
830keys, but it cannot be used to read them back from the database.
831
832The way to get around this problem is to use the Berkeley DB API method
833called C<seq>. This method allows sequential access to key/value
f6b705ef 834pairs. See L<THE API INTERFACE> for details of both the C<seq> method
835and the API in general.
88108326 836
837Here is the script above rewritten using the C<seq> API method.
838
610ab055 839 use strict ;
88108326 840 use DB_File ;
88108326 841
610ab055 842 use vars qw($filename $x %h $status $key $value) ;
843
88108326 844 $filename = "tree" ;
845 unlink $filename ;
846
847 # Enable duplicate records
848 $DB_BTREE->{'flags'} = R_DUP ;
849
850 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
851 or die "Cannot open $filename: $!\n";
852
853 # Add some key/value pairs to the file
854 $h{'Wall'} = 'Larry' ;
855 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 856 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 857 $h{'Smith'} = 'John' ;
858 $h{'mouse'} = 'mickey' ;
859
f6b705ef 860 # iterate through the btree using seq
88108326 861 # and print each key/value pair.
610ab055 862 $key = $value = 0 ;
f6b705ef 863 for ($status = $x->seq($key, $value, R_FIRST) ;
864 $status == 0 ;
865 $status = $x->seq($key, $value, R_NEXT) )
88108326 866 { print "$key -> $value\n" }
867
868 undef $x ;
869 untie %h ;
870
871that prints:
872
873 Smith -> John
874 Wall -> Brick
f6b705ef 875 Wall -> Brick
88108326 876 Wall -> Larry
877 mouse -> mickey
878
f6b705ef 879This time we have got all the key/value pairs, including the multiple
88108326 880values associated with the key C<Wall>.
881
6ca2e664 882To make life easier when dealing with duplicate keys, B<DB_File> comes with
883a few utility methods.
884
68dc0745 885=head2 The get_dup() Method
f6b705ef 886
6ca2e664 887The C<get_dup> method assists in
88108326 888reading duplicate values from BTREE databases. The method can take the
889following forms:
890
891 $count = $x->get_dup($key) ;
892 @list = $x->get_dup($key) ;
893 %list = $x->get_dup($key, 1) ;
894
895In a scalar context the method returns the number of values associated
896with the key, C<$key>.
897
898In list context, it returns all the values which match C<$key>. Note
f6b705ef 899that the values will be returned in an apparently random order.
88108326 900
7a2e2cd6 901In list context, if the second parameter is present and evaluates
902TRUE, the method returns an associative array. The keys of the
903associative array correspond to the values that matched in the BTREE
904and the values of the array are a count of the number of times that
905particular value occurred in the BTREE.
88108326 906
f6b705ef 907So assuming the database created above, we can use C<get_dup> like
88108326 908this:
909
610ab055 910 my $cnt = $x->get_dup("Wall") ;
88108326 911 print "Wall occurred $cnt times\n" ;
912
610ab055 913 my %hash = $x->get_dup("Wall", 1) ;
88108326 914 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 915 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 916
610ab055 917 my @list = $x->get_dup("Wall") ;
88108326 918 print "Wall => [@list]\n" ;
919
f6b705ef 920 @list = $x->get_dup("Smith") ;
88108326 921 print "Smith => [@list]\n" ;
922
f6b705ef 923 @list = $x->get_dup("Dog") ;
88108326 924 print "Dog => [@list]\n" ;
925
926
927and it will print:
928
f6b705ef 929 Wall occurred 3 times
88108326 930 Larry is there
f6b705ef 931 There are 2 Brick Walls
932 Wall => [Brick Brick Larry]
88108326 933 Smith => [John]
934 Dog => []
3b35bae3 935
6ca2e664 936=head2 The find_dup() Method
937
938 $status = $X->find_dup($key, $value) ;
939
940This method checks for the existance of a specific key/value pair. If the
941pair exists, the cursor is left pointing to the pair and the method
942returns 0. Otherwise the method returns a non-zero value.
943
944Assuming the database from the previous example:
945
946 use strict ;
947 use DB_File ;
948
949 use vars qw($filename $x %h $found) ;
950
951 my $filename = "tree" ;
952
953 # Enable duplicate records
954 $DB_BTREE->{'flags'} = R_DUP ;
955
956 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
957 or die "Cannot open $filename: $!\n";
958
959 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
960 print "Larry Wall is $found there\n" ;
961
962 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
963 print "Harry Wall is $found there\n" ;
964
965 undef $x ;
966 untie %h ;
967
968prints this
969
970 Larry Wall is there
971 Harry Wall is not there
972
973
974=head2 The del_dup() Method
975
976 $status = $X->del_dup($key, $value) ;
977
978This method deletes a specific key/value pair. It returns
9790 if they exist and have been deleted successfully.
980Otherwise the method returns a non-zero value.
981
982Again assuming the existance of the C<tree> database
983
984 use strict ;
985 use DB_File ;
986
987 use vars qw($filename $x %h $found) ;
988
989 my $filename = "tree" ;
990
991 # Enable duplicate records
992 $DB_BTREE->{'flags'} = R_DUP ;
993
994 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
995 or die "Cannot open $filename: $!\n";
996
997 $x->del_dup("Wall", "Larry") ;
998
999 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1000 print "Larry Wall is $found there\n" ;
1001
1002 undef $x ;
1003 untie %h ;
1004
1005prints this
1006
1007 Larry Wall is not there
1008
f6b705ef 1009=head2 Matching Partial Keys
1010
1011The BTREE interface has a feature which allows partial keys to be
1012matched. This functionality is I<only> available when the C<seq> method
1013is used along with the R_CURSOR flag.
1014
1015 $x->seq($key, $value, R_CURSOR) ;
1016
1017Here is the relevant quote from the dbopen man page where it defines
1018the use of the R_CURSOR flag with seq:
1019
f6b705ef 1020 Note, for the DB_BTREE access method, the returned key is not
1021 necessarily an exact match for the specified key. The returned key
1022 is the smallest key greater than or equal to the specified key,
1023 permitting partial key matches and range searches.
1024
f6b705ef 1025In the example script below, the C<match> sub uses this feature to find
1026and print the first matching key/value pair given a partial key.
1027
610ab055 1028 use strict ;
f6b705ef 1029 use DB_File ;
1030 use Fcntl ;
610ab055 1031
1032 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 1033
1034 sub match
1035 {
1036 my $key = shift ;
610ab055 1037 my $value = 0;
f6b705ef 1038 my $orig_key = $key ;
1039 $x->seq($key, $value, R_CURSOR) ;
1040 print "$orig_key\t-> $key\t-> $value\n" ;
1041 }
1042
1043 $filename = "tree" ;
1044 unlink $filename ;
1045
1046 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1047 or die "Cannot open $filename: $!\n";
1048
1049 # Add some key/value pairs to the file
1050 $h{'mouse'} = 'mickey' ;
1051 $h{'Wall'} = 'Larry' ;
1052 $h{'Walls'} = 'Brick' ;
1053 $h{'Smith'} = 'John' ;
1054
1055
610ab055 1056 $key = $value = 0 ;
f6b705ef 1057 print "IN ORDER\n" ;
1058 for ($st = $x->seq($key, $value, R_FIRST) ;
1059 $st == 0 ;
1060 $st = $x->seq($key, $value, R_NEXT) )
1061
1062 { print "$key -> $value\n" }
1063
1064 print "\nPARTIAL MATCH\n" ;
1065
1066 match "Wa" ;
1067 match "A" ;
1068 match "a" ;
1069
1070 undef $x ;
1071 untie %h ;
1072
1073Here is the output:
1074
1075 IN ORDER
1076 Smith -> John
1077 Wall -> Larry
1078 Walls -> Brick
1079 mouse -> mickey
1080
1081 PARTIAL MATCH
1082 Wa -> Wall -> Larry
1083 A -> Smith -> John
1084 a -> mouse -> mickey
1085
1086=head1 DB_RECNO
1087
1088DB_RECNO provides an interface to flat text files. Both variable and
1089fixed length records are supported.
3b35bae3 1090
6ca2e664 1091In order to make RECNO more compatible with Perl, the array offset for
88108326 1092all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1093
88108326 1094As with normal Perl arrays, a RECNO array can be accessed using
1095negative indexes. The index -1 refers to the last element of the array,
1096-2 the second last, and so on. Attempting to access an element before
1097the start of the array will raise a fatal run-time error.
3b35bae3 1098
68dc0745 1099=head2 The 'bval' Option
36477c24 1100
1101The operation of the bval option warrants some discussion. Here is the
1102definition of bval from the Berkeley DB 1.85 recno manual page:
1103
1104 The delimiting byte to be used to mark the end of a
1105 record for variable-length records, and the pad charac-
1106 ter for fixed-length records. If no value is speci-
1107 fied, newlines (``\n'') are used to mark the end of
1108 variable-length records and fixed-length records are
1109 padded with spaces.
1110
1111The second sentence is wrong. In actual fact bval will only default to
1112C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1113openinfo parameter is used at all, the value that happens to be in bval
1114will be used. That means you always have to specify bval when making
1115use of any of the options in the openinfo parameter. This documentation
1116error will be fixed in the next release of Berkeley DB.
1117
1118That clarifies the situation with regards Berkeley DB itself. What
1119about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1120quite useful, so B<DB_File> conforms to it.
36477c24 1121
1122That means that you can specify other options (e.g. cachesize) and
1123still have bval default to C<"\n"> for variable length records, and
1124space for fixed length records.
1125
f6b705ef 1126=head2 A Simple Example
3b35bae3 1127
6ca2e664 1128Here is a simple example that uses RECNO (if you are using a version
1129of Perl earlier than 5.004_57 this example won't work -- see
1130L<Extra RECNO Methods> for a workaround).
f6b705ef 1131
610ab055 1132 use strict ;
f6b705ef 1133 use DB_File ;
f6b705ef 1134
610ab055 1135 my @h ;
f6b705ef 1136 tie @h, "DB_File", "text", O_RDWR|O_CREAT, 0640, $DB_RECNO
1137 or die "Cannot open file 'text': $!\n" ;
1138
1139 # Add a few key/value pairs to the file
1140 $h[0] = "orange" ;
1141 $h[1] = "blue" ;
1142 $h[2] = "yellow" ;
1143
6ca2e664 1144 push @h, "green", "black" ;
1145
1146 my $elements = scalar @h ;
1147 print "The array contains $elements entries\n" ;
1148
1149 my $last = pop @h ;
1150 print "popped $last\n" ;
1151
1152 unshift @h, "white" ;
1153 my $first = shift @h ;
1154 print "shifted $first\n" ;
1155
f6b705ef 1156 # Check for existence of a key
1157 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1158
1159 # use a negative index
1160 print "The last element is $h[-1]\n" ;
1161 print "The 2nd last element is $h[-2]\n" ;
1162
1163 untie @h ;
3b35bae3 1164
f6b705ef 1165Here is the output from the script:
1166
6ca2e664 1167 The array contains 5 entries
1168 popped black
1169 unshifted white
f6b705ef 1170 Element 1 Exists with value blue
6ca2e664 1171 The last element is green
1172 The 2nd last element is yellow
f6b705ef 1173
6ca2e664 1174=head2 Extra RECNO Methods
f6b705ef 1175
045291aa 1176If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664 1177array interface is quite limited. In the example script above
1178C<push>, C<pop>, C<shift>, C<unshift>
1179or determining the array length will not work with a tied array.
045291aa 1180
1181To make the interface more useful for older versions of Perl, a number
1182of methods are supplied with B<DB_File> to simulate the missing array
1183operations. All these methods are accessed via the object returned from
1184the tie call.
f6b705ef 1185
1186Here are the methods:
1187
1188=over 5
3b35bae3 1189
f6b705ef 1190=item B<$X-E<gt>push(list) ;>
1191
1192Pushes the elements of C<list> to the end of the array.
1193
1194=item B<$value = $X-E<gt>pop ;>
1195
1196Removes and returns the last element of the array.
1197
1198=item B<$X-E<gt>shift>
1199
1200Removes and returns the first element of the array.
1201
1202=item B<$X-E<gt>unshift(list) ;>
1203
1204Pushes the elements of C<list> to the start of the array.
1205
1206=item B<$X-E<gt>length>
1207
1208Returns the number of elements in the array.
1209
1210=back
1211
1212=head2 Another Example
1213
1214Here is a more complete example that makes use of some of the methods
1215described above. It also makes use of the API interface directly (see
1216L<THE API INTERFACE>).
1217
1218 use strict ;
1219 use vars qw(@h $H $file $i) ;
1220 use DB_File ;
1221 use Fcntl ;
1222
1223 $file = "text" ;
1224
1225 unlink $file ;
1226
1227 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1228 or die "Cannot open file $file: $!\n" ;
1229
1230 # first create a text file to play with
1231 $h[0] = "zero" ;
1232 $h[1] = "one" ;
1233 $h[2] = "two" ;
1234 $h[3] = "three" ;
1235 $h[4] = "four" ;
1236
1237
1238 # Print the records in order.
1239 #
1240 # The length method is needed here because evaluating a tied
1241 # array in a scalar context does not return the number of
1242 # elements in the array.
1243
1244 print "\nORIGINAL\n" ;
1245 foreach $i (0 .. $H->length - 1) {
1246 print "$i: $h[$i]\n" ;
1247 }
1248
1249 # use the push & pop methods
1250 $a = $H->pop ;
1251 $H->push("last") ;
1252 print "\nThe last record was [$a]\n" ;
1253
1254 # and the shift & unshift methods
1255 $a = $H->shift ;
1256 $H->unshift("first") ;
1257 print "The first record was [$a]\n" ;
1258
1259 # Use the API to add a new record after record 2.
1260 $i = 2 ;
1261 $H->put($i, "Newbie", R_IAFTER) ;
1262
1263 # and a new record before record 1.
1264 $i = 1 ;
1265 $H->put($i, "New One", R_IBEFORE) ;
1266
1267 # delete record 3
1268 $H->del(3) ;
1269
1270 # now print the records in reverse order
1271 print "\nREVERSE\n" ;
1272 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1273 { print "$i: $h[$i]\n" }
1274
1275 # same again, but use the API functions instead
1276 print "\nREVERSE again\n" ;
610ab055 1277 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1278 for ($s = $H->seq($k, $v, R_LAST) ;
1279 $s == 0 ;
1280 $s = $H->seq($k, $v, R_PREV))
1281 { print "$k: $v\n" }
1282
1283 undef $H ;
1284 untie @h ;
1285
1286and this is what it outputs:
1287
1288 ORIGINAL
1289 0: zero
1290 1: one
1291 2: two
1292 3: three
1293 4: four
1294
1295 The last record was [four]
1296 The first record was [zero]
1297
1298 REVERSE
1299 5: last
1300 4: three
1301 3: Newbie
1302 2: one
1303 1: New One
1304 0: first
1305
1306 REVERSE again
1307 5: last
1308 4: three
1309 3: Newbie
1310 2: one
1311 1: New One
1312 0: first
1313
1314Notes:
1315
1316=over 5
1317
1318=item 1.
1319
1320Rather than iterating through the array, C<@h> like this:
1321
1322 foreach $i (@h)
1323
1324it is necessary to use either this:
1325
1326 foreach $i (0 .. $H->length - 1)
1327
1328or this:
1329
1330 for ($a = $H->get($k, $v, R_FIRST) ;
1331 $a == 0 ;
1332 $a = $H->get($k, $v, R_NEXT) )
1333
1334=item 2.
1335
1336Notice that both times the C<put> method was used the record index was
1337specified using a variable, C<$i>, rather than the literal value
1338itself. This is because C<put> will return the record number of the
1339inserted line via that parameter.
1340
1341=back
1342
1343=head1 THE API INTERFACE
3b35bae3 1344
1345As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1346possible to make direct use of most of the API functions defined in the
8e07c86e 1347Berkeley DB documentation.
3b35bae3 1348
88108326 1349To do this you need to store a copy of the object returned from the tie.
3b35bae3 1350
88108326 1351 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1352
8e07c86e 1353Once you have done that, you can access the Berkeley DB API functions
88108326 1354as B<DB_File> methods directly like this:
3b35bae3 1355
1356 $db->put($key, $value, R_NOOVERWRITE) ;
1357
88108326 1358B<Important:> If you have saved a copy of the object returned from
1359C<tie>, the underlying database file will I<not> be closed until both
1360the tied variable is untied and all copies of the saved object are
610ab055 1361destroyed.
88108326 1362
1363 use DB_File ;
1364 $db = tie %hash, "DB_File", "filename"
1365 or die "Cannot tie filename: $!" ;
1366 ...
1367 undef $db ;
1368 untie %hash ;
1369
9a2c4ce3 1370See L<The untie() Gotcha> for more details.
778183f3 1371
88108326 1372All the functions defined in L<dbopen> are available except for
1373close() and dbopen() itself. The B<DB_File> method interface to the
1374supported functions have been implemented to mirror the way Berkeley DB
1375works whenever possible. In particular note that:
1376
1377=over 5
1378
1379=item *
1380
1381The methods return a status value. All return 0 on success.
1382All return -1 to signify an error and set C<$!> to the exact
1383error code. The return code 1 generally (but not always) means that the
1384key specified did not exist in the database.
1385
1386Other return codes are defined. See below and in the Berkeley DB
1387documentation for details. The Berkeley DB documentation should be used
1388as the definitive source.
1389
1390=item *
3b35bae3 1391
88108326 1392Whenever a Berkeley DB function returns data via one of its parameters,
1393the equivalent B<DB_File> method does exactly the same.
3b35bae3 1394
88108326 1395=item *
1396
1397If you are careful, it is possible to mix API calls with the tied
1398hash/array interface in the same piece of code. Although only a few of
1399the methods used to implement the tied interface currently make use of
1400the cursor, you should always assume that the cursor has been changed
1401any time the tied hash/array interface is used. As an example, this
1402code will probably not do what you expect:
1403
1404 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1405 or die "Cannot tie $filename: $!" ;
1406
1407 # Get the first key/value pair and set the cursor
1408 $X->seq($key, $value, R_FIRST) ;
1409
1410 # this line will modify the cursor
1411 $count = scalar keys %x ;
1412
1413 # Get the second key/value pair.
1414 # oops, it didn't, it got the last key/value pair!
1415 $X->seq($key, $value, R_NEXT) ;
1416
1417The code above can be rearranged to get around the problem, like this:
1418
1419 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1420 or die "Cannot tie $filename: $!" ;
1421
1422 # this line will modify the cursor
1423 $count = scalar keys %x ;
1424
1425 # Get the first key/value pair and set the cursor
1426 $X->seq($key, $value, R_FIRST) ;
1427
1428 # Get the second key/value pair.
1429 # worked this time.
1430 $X->seq($key, $value, R_NEXT) ;
1431
1432=back
1433
1434All the constants defined in L<dbopen> for use in the flags parameters
1435in the methods defined below are also available. Refer to the Berkeley
1436DB documentation for the precise meaning of the flags values.
1437
1438Below is a list of the methods available.
3b35bae3 1439
1440=over 5
1441
f6b705ef 1442=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1443
1444Given a key (C<$key>) this method reads the value associated with it
1445from the database. The value read from the database is returned in the
1446C<$value> parameter.
3b35bae3 1447
88108326 1448If the key does not exist the method returns 1.
3b35bae3 1449
88108326 1450No flags are currently defined for this method.
3b35bae3 1451
f6b705ef 1452=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1453
88108326 1454Stores the key/value pair in the database.
1455
1456If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1457will have the record number of the inserted key/value pair set.
3b35bae3 1458
88108326 1459Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1460R_SETCURSOR.
1461
f6b705ef 1462=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1463
88108326 1464Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1465
88108326 1466A return code of 1 means that the requested key was not in the
1467database.
3b35bae3 1468
88108326 1469R_CURSOR is the only valid flag at present.
3b35bae3 1470
f6b705ef 1471=item B<$status = $X-E<gt>fd ;>
3b35bae3 1472
88108326 1473Returns the file descriptor for the underlying database.
3b35bae3 1474
f6b705ef 1475See L<Locking Databases> for an example of how to make use of the
88108326 1476C<fd> method to lock your database.
3b35bae3 1477
f6b705ef 1478=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1479
88108326 1480This interface allows sequential retrieval from the database. See
1481L<dbopen> for full details.
1482
1483Both the C<$key> and C<$value> parameters will be set to the key/value
1484pair read from the database.
1485
1486The flags parameter is mandatory. The valid flag values are R_CURSOR,
1487R_FIRST, R_LAST, R_NEXT and R_PREV.
1488
f6b705ef 1489=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1490
1491Flushes any cached buffers to disk.
1492
1493R_RECNOSYNC is the only valid flag at present.
3b35bae3 1494
1495=back
1496
c8e4dba7 1497=head1 DBM FILTERS
1498
1499A DBM Filter is a piece of code that is be used when you I<always>
1500want to make the same transformation to all keys and/or values in a
1501DBM database.
1502
1503There are four methods associated with DBM Filters. All work identically,
1504and each is used to install (or uninstall) a single DBM Filter. Each
1505expects a single parameter, namely a reference to a sub. The only
1506difference between them is the place that the filter is installed.
1507
1508To summarise:
1509
1510=over 5
1511
1512=item B<filter_store_key>
1513
1514If a filter has been installed with this method, it will be invoked
1515every time you write a key to a DBM database.
1516
1517=item B<filter_store_value>
1518
1519If a filter has been installed with this method, it will be invoked
1520every time you write a value to a DBM database.
1521
1522
1523=item B<filter_fetch_key>
1524
1525If a filter has been installed with this method, it will be invoked
1526every time you read a key from a DBM database.
1527
1528=item B<filter_fetch_value>
1529
1530If a filter has been installed with this method, it will be invoked
1531every time you read a value from a DBM database.
1532
1533=back
1534
1535You can use any combination of the methods, from none, to all four.
1536
1537All filter methods return the existing filter, if present, or C<undef>
1538in not.
1539
1540To delete a filter pass C<undef> to it.
1541
1542=head2 The Filter
1543
1544When each filter is called by Perl, a local copy of C<$_> will contain
1545the key or value to be filtered. Filtering is achieved by modifying
1546the contents of C<$_>. The return code from the filter is ignored.
1547
1548=head2 An Example -- the NULL termination problem.
1549
1550Consider the following scenario. You have a DBM database
1551that you need to share with a third-party C application. The C application
1552assumes that I<all> keys and values are NULL terminated. Unfortunately
1553when Perl writes to DBM databases it doesn't use NULL termination, so
1554your Perl application will have to manage NULL termination itself. When
1555you write to the database you will have to use something like this:
1556
1557 $hash{"$key\0"} = "$value\0" ;
1558
1559Similarly the NULL needs to be taken into account when you are considering
1560the length of existing keys/values.
1561
1562It would be much better if you could ignore the NULL terminations issue
1563in the main application code and have a mechanism that automatically
1564added the terminating NULL to all keys and values whenever you write to
1565the database and have them removed when you read from the database. As I'm
1566sure you have already guessed, this is a problem that DBM Filters can
1567fix very easily.
1568
1569 use strict ;
1570 use DB_File ;
1571
1572 my %hash ;
1573 my $filename = "/tmp/filt" ;
1574 unlink $filename ;
1575
1576 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1577 or die "Cannot open $filename: $!\n" ;
1578
1579 # Install DBM Filters
1580 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1581 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1582 $db->filter_fetch_value( sub { s/\0$// } ) ;
1583 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1584
1585 $hash{"abc"} = "def" ;
1586 my $a = $hash{"ABC"} ;
1587 # ...
1588 undef $db ;
1589 untie %hash ;
1590
1591Hopefully the contents of each of the filters should be
1592self-explanatory. Both "fetch" filters remove the terminating NULL,
1593and both "store" filters add a terminating NULL.
1594
1595
1596=head2 Another Example -- Key is a C int.
1597
1598Here is another real-life example. By default, whenever Perl writes to
1599a DBM database it always writes the key and value as strings. So when
1600you use this:
1601
1602 $hash{12345} = "soemthing" ;
1603
1604the key 12345 will get stored in the DBM database as the 5 byte string
1605"12345". If you actually want the key to be stored in the DBM database
1606as a C int, you will have to use C<pack> when writing, and C<unpack>
1607when reading.
1608
1609Here is a DBM Filter that does it:
1610
1611 use strict ;
1612 use DB_File ;
1613 my %hash ;
1614 my $filename = "/tmp/filt" ;
1615 unlink $filename ;
1616
1617
1618 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1619 or die "Cannot open $filename: $!\n" ;
1620
1621 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1622 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1623 $hash{123} = "def" ;
1624 # ...
1625 undef $db ;
1626 untie %hash ;
1627
1628This time only two filters have been used -- we only need to manipulate
1629the contents of the key, so it wasn't necessary to install any value
1630filters.
1631
f6b705ef 1632=head1 HINTS AND TIPS
3b35bae3 1633
3b35bae3 1634
cb1a09d0 1635=head2 Locking Databases
3b35bae3 1636
cb1a09d0 1637Concurrent access of a read-write database by several parties requires
1638them all to use some kind of locking. Here's an example of Tom's that
1639uses the I<fd> method to get the file descriptor, and then a careful
1640open() to give something Perl will flock() for you. Run this repeatedly
1641in the background to watch the locks granted in proper order.
3b35bae3 1642
cb1a09d0 1643 use DB_File;
1644
1645 use strict;
1646
1647 sub LOCK_SH { 1 }
1648 sub LOCK_EX { 2 }
1649 sub LOCK_NB { 4 }
1650 sub LOCK_UN { 8 }
1651
1652 my($oldval, $fd, $db, %db, $value, $key);
1653
1654 $key = shift || 'default';
1655 $value = shift || 'magic';
1656
1657 $value .= " $$";
1658
1659 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1660 || die "dbcreat /tmp/foo.db $!";
1661 $fd = $db->fd;
1662 print "$$: db fd is $fd\n";
1663 open(DB_FH, "+<&=$fd") || die "dup $!";
1664
1665
1666 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1667 print "$$: CONTENTION; can't read during write update!
1668 Waiting for read lock ($!) ....";
1669 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1670 }
1671 print "$$: Read lock granted\n";
1672
1673 $oldval = $db{$key};
1674 print "$$: Old value was $oldval\n";
1675 flock(DB_FH, LOCK_UN);
1676
1677 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1678 print "$$: CONTENTION; must have exclusive lock!
1679 Waiting for write lock ($!) ....";
1680 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1681 }
1682
1683 print "$$: Write lock granted\n";
1684 $db{$key} = $value;
610ab055 1685 $db->sync; # to flush
cb1a09d0 1686 sleep 10;
1687
1688 flock(DB_FH, LOCK_UN);
88108326 1689 undef $db;
cb1a09d0 1690 untie %db;
1691 close(DB_FH);
1692 print "$$: Updated db to $key=$value\n";
1693
68dc0745 1694=head2 Sharing Databases With C Applications
f6b705ef 1695
1696There is no technical reason why a Berkeley DB database cannot be
1697shared by both a Perl and a C application.
1698
1699The vast majority of problems that are reported in this area boil down
1700to the fact that C strings are NULL terminated, whilst Perl strings are
c8e4dba7 1701not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 1702
1703Here is a real example. Netscape 2.0 keeps a record of the locations you
1704visit along with the time you last visited them in a DB_HASH database.
1705This is usually stored in the file F<~/.netscape/history.db>. The key
1706field in the database is the location string and the value field is the
1707time the location was last visited stored as a 4 byte binary value.
1708
1709If you haven't already guessed, the location string is stored with a
1710terminating NULL. This means you need to be careful when accessing the
1711database.
1712
1713Here is a snippet of code that is loosely based on Tom Christiansen's
1714I<ggh> script (available from your nearest CPAN archive in
1715F<authors/id/TOMC/scripts/nshist.gz>).
1716
610ab055 1717 use strict ;
f6b705ef 1718 use DB_File ;
1719 use Fcntl ;
f6b705ef 1720
610ab055 1721 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1722 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1723
1724 $HISTORY = "$dotdir/.netscape/history.db";
1725
1726 tie %hist_db, 'DB_File', $HISTORY
1727 or die "Cannot open $HISTORY: $!\n" ;;
1728
1729 # Dump the complete database
1730 while ( ($href, $binary_time) = each %hist_db ) {
1731
1732 # remove the terminating NULL
1733 $href =~ s/\x00$// ;
1734
1735 # convert the binary time into a user friendly string
1736 $date = localtime unpack("V", $binary_time);
1737 print "$date $href\n" ;
1738 }
1739
1740 # check for the existence of a specific key
1741 # remember to add the NULL
1742 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1743 $date = localtime unpack("V", $binary_time) ;
1744 print "Last visited mox.perl.com on $date\n" ;
1745 }
1746 else {
1747 print "Never visited mox.perl.com\n"
1748 }
1749
1750 untie %hist_db ;
1751
68dc0745 1752=head2 The untie() Gotcha
778183f3 1753
7a2e2cd6 1754If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1755recommended that you read L<perltie/The untie Gotcha>.
778183f3 1756
1757Even if you don't currently make use of the API interface, it is still
1758worth reading it.
1759
1760Here is an example which illustrates the problem from a B<DB_File>
1761perspective:
1762
1763 use DB_File ;
1764 use Fcntl ;
1765
1766 my %x ;
1767 my $X ;
1768
1769 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1770 or die "Cannot tie first time: $!" ;
1771
1772 $x{123} = 456 ;
1773
1774 untie %x ;
1775
1776 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1777 or die "Cannot tie second time: $!" ;
1778
1779 untie %x ;
1780
1781When run, the script will produce this error message:
1782
1783 Cannot tie second time: Invalid argument at bad.file line 14.
1784
1785Although the error message above refers to the second tie() statement
1786in the script, the source of the problem is really with the untie()
1787statement that precedes it.
1788
1789Having read L<perltie> you will probably have already guessed that the
1790error is caused by the extra copy of the tied object stored in C<$X>.
1791If you haven't, then the problem boils down to the fact that the
1792B<DB_File> destructor, DESTROY, will not be called until I<all>
1793references to the tied object are destroyed. Both the tied variable,
1794C<%x>, and C<$X> above hold a reference to the object. The call to
1795untie() will destroy the first, but C<$X> still holds a valid
1796reference, so the destructor will not get called and the database file
1797F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1798attempt to open a database that is alreday open via the catch-all
1799"Invalid argument" doesn't help.
1800
1801If you run the script with the C<-w> flag the error message becomes:
1802
1803 untie attempted while 1 inner references still exist at bad.file line 12.
1804 Cannot tie second time: Invalid argument at bad.file line 14.
1805
1806which pinpoints the real problem. Finally the script can now be
1807modified to fix the original problem by destroying the API object
1808before the untie:
1809
1810 ...
1811 $x{123} = 456 ;
1812
1813 undef $X ;
1814 untie %x ;
1815
1816 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1817 ...
1818
f6b705ef 1819
1820=head1 COMMON QUESTIONS
1821
1822=head2 Why is there Perl source in my database?
1823
1824If you look at the contents of a database file created by DB_File,
1825there can sometimes be part of a Perl script included in it.
1826
1827This happens because Berkeley DB uses dynamic memory to allocate
1828buffers which will subsequently be written to the database file. Being
1829dynamic, the memory could have been used for anything before DB
1830malloced it. As Berkeley DB doesn't clear the memory once it has been
1831allocated, the unused portions will contain random junk. In the case
1832where a Perl script gets written to the database, the random junk will
1833correspond to an area of dynamic memory that happened to be used during
1834the compilation of the script.
1835
1836Unless you don't like the possibility of there being part of your Perl
1837scripts embedded in a database file, this is nothing to worry about.
1838
1839=head2 How do I store complex data structures with DB_File?
1840
1841Although B<DB_File> cannot do this directly, there is a module which
1842can layer transparently over B<DB_File> to accomplish this feat.
1843
1844Check out the MLDBM module, available on CPAN in the directory
1845F<modules/by-module/MLDBM>.
1846
1847=head2 What does "Invalid Argument" mean?
1848
1849You will get this error message when one of the parameters in the
1850C<tie> call is wrong. Unfortunately there are quite a few parameters to
1851get wrong, so it can be difficult to figure out which one it is.
1852
1853Here are a couple of possibilities:
1854
1855=over 5
1856
1857=item 1.
1858
610ab055 1859Attempting to reopen a database without closing it.
f6b705ef 1860
1861=item 2.
1862
1863Using the O_WRONLY flag.
1864
1865=back
1866
1867=head2 What does "Bareword 'DB_File' not allowed" mean?
1868
1869You will encounter this particular error message when you have the
1870C<strict 'subs'> pragma (or the full strict pragma) in your script.
1871Consider this script:
1872
1873 use strict ;
1874 use DB_File ;
1875 use vars qw(%x) ;
1876 tie %x, DB_File, "filename" ;
1877
1878Running it produces the error in question:
1879
1880 Bareword "DB_File" not allowed while "strict subs" in use
1881
1882To get around the error, place the word C<DB_File> in either single or
1883double quotes, like this:
1884
1885 tie %x, "DB_File", "filename" ;
1886
1887Although it might seem like a real pain, it is really worth the effort
1888of having a C<use strict> in all your scripts.
1889
c8e4dba7 1890=head1 REFERENCES
1891
1892Articles that are either about B<DB_File> or make use of it.
1893
1894=over 5
1895
1896=item 1.
1897
1898I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
1899Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
1900
1901=back
1902
cb1a09d0 1903=head1 HISTORY
1904
1f70e1ea 1905Moved to the Changes file.
610ab055 1906
1f70e1ea 1907=head1 BUGS
05475680 1908
1f70e1ea 1909Some older versions of Berkeley DB had problems with fixed length
1910records using the RECNO file format. This problem has been fixed since
1911version 1.85 of Berkeley DB.
e858de61 1912
1f70e1ea 1913I am sure there are bugs in the code. If you do find any, or can
1914suggest any enhancements, I would welcome your comments.
a6ed719b 1915
1f70e1ea 1916=head1 AVAILABILITY
a6ed719b 1917
1f70e1ea 1918B<DB_File> comes with the standard Perl source distribution. Look in
1919the directory F<ext/DB_File>. Given the amount of time between releases
1920of Perl the version that ships with Perl is quite likely to be out of
1921date, so the most recent version can always be found on CPAN (see
1922L<perlmod/CPAN> for details), in the directory
1923F<modules/by-module/DB_File>.
a6ed719b 1924
1f70e1ea 1925This version of B<DB_File> will work with either version 1.x or 2.x of
1926Berkeley DB, but is limited to the functionality provided by version 1.
a6ed719b 1927
c8e4dba7 1928The official web site for Berkeley DB is F<http://www.sleepycat.com>.
1929Both versions 1 and 2 of Berkeley DB are available there.
93af7a87 1930
1f70e1ea 1931Alternatively, Berkeley DB version 1 is available at your nearest CPAN
1932archive in F<src/misc/db.1.85.tar.gz>.
e858de61 1933
1f70e1ea 1934If you are running IRIX, then get Berkeley DB version 1 from
1935F<http://reality.sgi.com/ariel>. It has the patches necessary to
1936compile properly on IRIX 5.3.
610ab055 1937
1f70e1ea 1938=head1 COPYRIGHT
3b35bae3 1939
c8e4dba7 1940Copyright (c) 1995-1999 Paul Marquess. All rights reserved. This program
a9fd575d 1941is free software; you can redistribute it and/or modify it under the
1942same terms as Perl itself.
3b35bae3 1943
1f70e1ea 1944Although B<DB_File> is covered by the Perl license, the library it
1945makes use of, namely Berkeley DB, is not. Berkeley DB has its own
1946copyright and its own license. Please take the time to read it.
3b35bae3 1947
a9fd575d 1948Here are are few words taken from the Berkeley DB FAQ (at
1949http://www.sleepycat.com) regarding the license:
68dc0745 1950
a9fd575d 1951 Do I have to license DB to use it in Perl scripts?
3b35bae3 1952
a9fd575d 1953 No. The Berkeley DB license requires that software that uses
1954 Berkeley DB be freely redistributable. In the case of Perl, that
1955 software is Perl, and not your scripts. Any Perl scripts that you
1956 write are your property, including scripts that make use of
1957 Berkeley DB. Neither the Perl license nor the Berkeley DB license
1958 place any restriction on what you may do with them.
88108326 1959
1f70e1ea 1960If you are in any doubt about the license situation, contact either the
1961Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 1962
1963
3b35bae3 1964=head1 SEE ALSO
1965
9fe6733a 1966L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
1967L<dbmfilter>
3b35bae3 1968
3b35bae3 1969=head1 AUTHOR
1970
8e07c86e 1971The DB_File interface was written by Paul Marquess
6ca2e664 1972E<lt>Paul.Marquess@btinternet.comE<gt>.
d3ef3b8a 1973Questions about the DB system itself may be addressed to
1974E<lt>db@sleepycat.com<gt>.
3b35bae3 1975
1976=cut