ext/DB_File/DB_File.pm

   1 # DB_File.pm -- Perl 5 interface to Berkeley DB
   2 #
   3 # written by Paul Marquess (Paul.Marquess@btinternet.com)
   4 # last modified 16th January 2000
   5 # version 1.72
   6 #
   7 #     Copyright (c) 1995-2000 Paul Marquess. All rights reserved.
   8 #     This program is free software; you can redistribute it and/or
   9 #     modify it under the same terms as Perl itself.
  10
  11
  12 package DB_File::HASHINFO ;
  13
  14 require 5.003 ;
  15
  16 use strict;
  17 use Carp;
  18 require Tie::Hash;
  19 @DB_File::HASHINFO::ISA = qw(Tie::Hash);
  20
  21 sub new
  22 {
  23     my $pkg = shift ;
  24     my %x ;
  25     tie %x, $pkg ;
  26     bless \%x, $pkg ;
  27 }
  28
  29
  30 sub TIEHASH
  31 {
  32     my $pkg = shift ;
  33
  34     bless { VALID => { map {$_, 1}
  35                        qw( bsize ffactor nelem cachesize hash lorder)
  36                      },
  37             GOT   => {}
  38           }, $pkg ;
  39 }
  40
  41
  42 sub FETCH
  43 {
  44     my $self  = shift ;
  45     my $key   = shift ;
  46
  47     return $self->{GOT}{$key} if exists $self->{VALID}{$key}  ;
  48
  49     my $pkg = ref $self ;
  50     croak "${pkg}::FETCH - Unknown element '$key'" ;
  51 }
  52
  53
  54 sub STORE
  55 {
  56     my $self  = shift ;
  57     my $key   = shift ;
  58     my $value = shift ;
  59
  60     if ( exists $self->{VALID}{$key} )
  61     {
  62         $self->{GOT}{$key} = $value ;
  63         return ;
  64     }
  65
  66     my $pkg = ref $self ;
  67     croak "${pkg}::STORE - Unknown element '$key'" ;
  68 }
  69
  70 sub DELETE
  71 {
  72     my $self = shift ;
  73     my $key  = shift ;
  74
  75     if ( exists $self->{VALID}{$key} )
  76     {
  77         delete $self->{GOT}{$key} ;
  78         return ;
  79     }
  80
  81     my $pkg = ref $self ;
  82     croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
  83 }
  84
  85 sub EXISTS
  86 {
  87     my $self = shift ;
  88     my $key  = shift ;
  89
  90     exists $self->{VALID}{$key} ;
  91 }
  92
  93 sub NotHere
  94 {
  95     my $self = shift ;
  96     my $method = shift ;
  97
  98     croak ref($self) . " does not define the method ${method}" ;
  99 }
 100
 101 sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
 102 sub NEXTKEY  { my $self = shift ; $self->NotHere("NEXTKEY") }
 103 sub CLEAR    { my $self = shift ; $self->NotHere("CLEAR") }
 104
 105 package DB_File::RECNOINFO ;
 106
 107 use strict ;
 108
 109 @DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
 110
 111 sub TIEHASH
 112 {
 113     my $pkg = shift ;
 114
 115     bless { VALID => { map {$_, 1}
 116                        qw( bval cachesize psize flags lorder reclen bfname )
 117                      },
 118             GOT   => {},
 119           }, $pkg ;
 120 }
 121
 122 package DB_File::BTREEINFO ;
 123
 124 use strict ;
 125
 126 @DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
 127
 128 sub TIEHASH
 129 {
 130     my $pkg = shift ;
 131
 132     bless { VALID => { map {$_, 1}
 133                        qw( flags cachesize maxkeypage minkeypage psize
 134                            compare prefix lorder )
 135                      },
 136             GOT   => {},
 137           }, $pkg ;
 138 }
 139
 140
 141 package DB_File ;
 142
 143 use strict;
 144 use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO
 145             $db_version $use_XSLoader
 146            ) ;
 147 use Carp;
 148
 149
 150 $VERSION = "1.72" ;
 151
 152 #typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
 153 $DB_BTREE = new DB_File::BTREEINFO ;
 154 $DB_HASH  = new DB_File::HASHINFO ;
 155 $DB_RECNO = new DB_File::RECNOINFO ;
 156
 157 require Tie::Hash;
 158 require Exporter;
 159 use AutoLoader;
 160 BEGIN {
 161     $use_XSLoader = 1 ;
 162     eval { require XSLoader } ;
 163
 164     if ($@) {
 165         $use_XSLoader = 0 ;
 166         require DynaLoader;
 167         @ISA = qw(DynaLoader);
 168     }
 169 }
 170
 171 push @ISA, qw(Tie::Hash Exporter);
 172 @EXPORT = qw(
 173         $DB_BTREE $DB_HASH $DB_RECNO
 174
 175         BTREEMAGIC
 176         BTREEVERSION
 177         DB_LOCK
 178         DB_SHMEM
 179         DB_TXN
 180         HASHMAGIC
 181         HASHVERSION
 182         MAX_PAGE_NUMBER
 183         MAX_PAGE_OFFSET
 184         MAX_REC_NUMBER
 185         RET_ERROR
 186         RET_SPECIAL
 187         RET_SUCCESS
 188         R_CURSOR
 189         R_DUP
 190         R_FIRST
 191         R_FIXEDLEN
 192         R_IAFTER
 193         R_IBEFORE
 194         R_LAST
 195         R_NEXT
 196         R_NOKEY
 197         R_NOOVERWRITE
 198         R_PREV
 199         R_RECNOSYNC
 200         R_SETCURSOR
 201         R_SNAPSHOT
 202         __R_UNUSED
 203
 204 );
 205
 206 sub AUTOLOAD {
 207     my($constname);
 208     ($constname = $AUTOLOAD) =~ s/.*:://;
 209     my $val = constant($constname, @_ ? $_[0] : 0);
 210     if ($! != 0) {
 211         if ($! =~ /Invalid/ || $!{EINVAL}) {
 212             $AutoLoader::AUTOLOAD = $AUTOLOAD;
 213             goto &AutoLoader::AUTOLOAD;
 214         }
 215         else {
 216             my($pack,$file,$line) = caller;
 217             croak "Your vendor has not defined DB macro $constname, used at $file line $line.
 218 ";
 219         }
 220     }
 221     eval "sub $AUTOLOAD { $val }";
 222     goto &$AUTOLOAD;
 223 }
 224
 225
 226 eval {
 227     # Make all Fcntl O_XXX constants available for importing
 228     require Fcntl;
 229     my @O = grep /^O_/, @Fcntl::EXPORT;
 230     Fcntl->import(@O);  # first we import what we want to export
 231     push(@EXPORT, @O);
 232 };
 233
 234 if ($use_XSLoader)
 235   { XSLoader::load("DB_File", $VERSION)}
 236 else
 237   { bootstrap DB_File $VERSION }
 238
 239 # Preloaded methods go here.  Autoload methods go after __END__, and are
 240 # processed by the autosplit program.
 241
 242 sub tie_hash_or_array
 243 {
 244     my (@arg) = @_ ;
 245     my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
 246
 247     $arg[4] = tied %{ $arg[4] }
 248         if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
 249
 250     # make recno in Berkeley DB version 2 work like recno in version 1.
 251     if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
 252         $arg[1] and ! -e $arg[1]) {
 253         open(FH, ">$arg[1]") or return undef ;
 254         close FH ;
 255         chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
 256     }
 257
 258     DoTie_($tieHASH, @arg) ;
 259 }
 260
 261 sub TIEHASH
 262 {
 263     tie_hash_or_array(@_) ;
 264 }
 265
 266 sub TIEARRAY
 267 {
 268     tie_hash_or_array(@_) ;
 269 }
 270
 271 sub CLEAR
 272 {
 273     my $self = shift;
 274     my $key = "" ;
 275     my $value = "" ;
 276     my $status = $self->seq($key, $value, R_FIRST());
 277     my @keys;
 278
 279     while ($status == 0) {
 280         push @keys, $key;
 281         $status = $self->seq($key, $value, R_NEXT());
 282     }
 283     foreach $key (reverse @keys) {
 284         my $s = $self->del($key);
 285     }
 286 }
 287
 288 sub EXTEND { }
 289
 290 sub STORESIZE
 291 {
 292     my $self = shift;
 293     my $length = shift ;
 294     my $current_length = $self->length() ;
 295
 296     if ($length < $current_length) {
 297         my $key ;
 298         for ($key = $current_length - 1 ; $key >= $length ; -- $key)
 299           { $self->del($key) }
 300     }
 301     elsif ($length > $current_length) {
 302         $self->put($length-1, "") ;
 303     }
 304 }
 305
 306 sub find_dup
 307 {
 308     croak "Usage: \$db->find_dup(key,value)\n"
 309         unless @_ == 3 ;
 310
 311     my $db        = shift ;
 312     my ($origkey, $value_wanted) = @_ ;
 313     my ($key, $value) = ($origkey, 0);
 314     my ($status) = 0 ;
 315
 316     for ($status = $db->seq($key, $value, R_CURSOR() ) ;
 317          $status == 0 ;
 318          $status = $db->seq($key, $value, R_NEXT() ) ) {
 319
 320         return 0 if $key eq $origkey and $value eq $value_wanted ;
 321     }
 322
 323     return $status ;
 324 }
 325
 326 sub del_dup
 327 {
 328     croak "Usage: \$db->del_dup(key,value)\n"
 329         unless @_ == 3 ;
 330
 331     my $db        = shift ;
 332     my ($key, $value) = @_ ;
 333     my ($status) = $db->find_dup($key, $value) ;
 334     return $status if $status != 0 ;
 335
 336     $status = $db->del($key, R_CURSOR() ) ;
 337     return $status ;
 338 }
 339
 340 sub get_dup
 341 {
 342     croak "Usage: \$db->get_dup(key [,flag])\n"
 343         unless @_ == 2 or @_ == 3 ;
 344
 345     my $db        = shift ;
 346     my $key       = shift ;
 347     my $flag      = shift ;
 348     my $value     = 0 ;
 349     my $origkey   = $key ;
 350     my $wantarray = wantarray ;
 351     my %values    = () ;
 352     my @values    = () ;
 353     my $counter   = 0 ;
 354     my $status    = 0 ;
 355
 356     # iterate through the database until either EOF ($status == 0)
 357     # or a different key is encountered ($key ne $origkey).
 358     for ($status = $db->seq($key, $value, R_CURSOR()) ;
 359          $status == 0 and $key eq $origkey ;
 360          $status = $db->seq($key, $value, R_NEXT()) ) {
 361
 362         # save the value or count number of matches
 363         if ($wantarray) {
 364             if ($flag)
 365                 { ++ $values{$value} }
 366             else
 367                 { push (@values, $value) }
 368         }
 369         else
 370             { ++ $counter }
 371
 372     }
 373
 374     return ($wantarray ? ($flag ? %values : @values) : $counter) ;
 375 }
 376
 377
 378 1;
 379 __END__
 380
 381 =head1 NAME
 382
 383 DB_File - Perl5 access to Berkeley DB version 1.x
 384
 385 =head1 SYNOPSIS
 386
 387  use DB_File ;
 388
 389  [$X =] tie %hash,  'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
 390  [$X =] tie %hash,  'DB_File', $filename, $flags, $mode, $DB_BTREE ;
 391  [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
 392
 393  $status = $X->del($key [, $flags]) ;
 394  $status = $X->put($key, $value [, $flags]) ;
 395  $status = $X->get($key, $value [, $flags]) ;
 396  $status = $X->seq($key, $value, $flags) ;
 397  $status = $X->sync([$flags]) ;
 398  $status = $X->fd ;
 399
 400  # BTREE only
 401  $count = $X->get_dup($key) ;
 402  @list  = $X->get_dup($key) ;
 403  %list  = $X->get_dup($key, 1) ;
 404  $status = $X->find_dup($key, $value) ;
 405  $status = $X->del_dup($key, $value) ;
 406
 407  # RECNO only
 408  $a = $X->length;
 409  $a = $X->pop ;
 410  $X->push(list);
 411  $a = $X->shift;
 412  $X->unshift(list);
 413
 414  # DBM Filters
 415  $old_filter = $db->filter_store_key  ( sub { ... } ) ;
 416  $old_filter = $db->filter_store_value( sub { ... } ) ;
 417  $old_filter = $db->filter_fetch_key  ( sub { ... } ) ;
 418  $old_filter = $db->filter_fetch_value( sub { ... } ) ;
 419
 420  untie %hash ;
 421  untie @array ;
 422
 423 =head1 DESCRIPTION
 424
 425 B<DB_File> is a module which allows Perl programs to make use of the
 426 facilities provided by Berkeley DB version 1.x (if you have a newer
 427 version of DB, see L<Using DB_File with Berkeley DB version 2 or 3>).
 428 It is assumed that you have a copy of the Berkeley DB manual pages at
 429 hand when reading this documentation. The interface defined here
 430 mirrors the Berkeley DB interface closely.
 431
 432 Berkeley DB is a C library which provides a consistent interface to a
 433 number of database formats.  B<DB_File> provides an interface to all
 434 three of the database types currently supported by Berkeley DB.
 435
 436 The file types are:
 437
 438 =over 5
 439
 440 =item B<DB_HASH>
 441
 442 This database type allows arbitrary key/value pairs to be stored in data
 443 files. This is equivalent to the functionality provided by other
 444 hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
 445 the files created using DB_HASH are not compatible with any of the
 446 other packages mentioned.
 447
 448 A default hashing algorithm, which will be adequate for most
 449 applications, is built into Berkeley DB. If you do need to use your own
 450 hashing algorithm it is possible to write your own in Perl and have
 451 B<DB_File> use it instead.
 452
 453 =item B<DB_BTREE>
 454
 455 The btree format allows arbitrary key/value pairs to be stored in a
 456 sorted, balanced binary tree.
 457
 458 As with the DB_HASH format, it is possible to provide a user defined
 459 Perl routine to perform the comparison of keys. By default, though, the
 460 keys are stored in lexical order.
 461
 462 =item B<DB_RECNO>
 463
 464 DB_RECNO allows both fixed-length and variable-length flat text files
 465 to be manipulated using the same key/value pair interface as in DB_HASH
 466 and DB_BTREE.  In this case the key will consist of a record (line)
 467 number.
 468
 469 =back
 470
 471 =head2 Using DB_File with Berkeley DB version 2 or 3
 472
 473 Although B<DB_File> is intended to be used with Berkeley DB version 1,
 474 it can also be used with version 2.or 3 In this case the interface is
 475 limited to the functionality provided by Berkeley DB 1.x. Anywhere the
 476 version 2 or 3 interface differs, B<DB_File> arranges for it to work
 477 like version 1. This feature allows B<DB_File> scripts that were built
 478 with version 1 to be migrated to version 2 or 3 without any changes.
 479
 480 If you want to make use of the new features available in Berkeley DB
 481 2.x or greater, use the Perl module B<BerkeleyDB> instead.
 482
 483 B<Note:> The database file format has changed in both Berkeley DB
 484 version 2 and 3. If you cannot recreate your databases, you must dump
 485 any existing databases with the C<db_dump185> utility that comes with
 486 Berkeley DB.
 487 Once you have rebuilt DB_File to use Berkeley DB version 2 or 3, your
 488 databases can be recreated using C<db_load>. Refer to the Berkeley DB
 489 documentation for further details.
 490
 491 Please read L<"COPYRIGHT"> before using version 2.x or 3.x of Berkeley
 492 DB with DB_File.
 493
 494 =head2 Interface to Berkeley DB
 495
 496 B<DB_File> allows access to Berkeley DB files using the tie() mechanism
 497 in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
 498 allows B<DB_File> to access Berkeley DB files using either an
 499 associative array (for DB_HASH & DB_BTREE file types) or an ordinary
 500 array (for the DB_RECNO file type).
 501
 502 In addition to the tie() interface, it is also possible to access most
 503 of the functions provided in the Berkeley DB API directly.
 504 See L<THE API INTERFACE>.
 505
 506 =head2 Opening a Berkeley DB Database File
 507
 508 Berkeley DB uses the function dbopen() to open or create a database.
 509 Here is the C prototype for dbopen():
 510
 511       DB*
 512       dbopen (const char * file, int flags, int mode,
 513               DBTYPE type, const void * openinfo)
 514
 515 The parameter C<type> is an enumeration which specifies which of the 3
 516 interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
 517 Depending on which of these is actually chosen, the final parameter,
 518 I<openinfo> points to a data structure which allows tailoring of the
 519 specific interface method.
 520
 521 This interface is handled slightly differently in B<DB_File>. Here is
 522 an equivalent call using B<DB_File>:
 523
 524         tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
 525
 526 The C<filename>, C<flags> and C<mode> parameters are the direct
 527 equivalent of their dbopen() counterparts. The final parameter $DB_HASH
 528 performs the function of both the C<type> and C<openinfo> parameters in
 529 dbopen().
 530
 531 In the example above $DB_HASH is actually a pre-defined reference to a
 532 hash object. B<DB_File> has three of these pre-defined references.
 533 Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
 534
 535 The keys allowed in each of these pre-defined references is limited to
 536 the names used in the equivalent C structure. So, for example, the
 537 $DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
 538 C<ffactor>, C<hash>, C<lorder> and C<nelem>.
 539
 540 To change one of these elements, just assign to it like this:
 541
 542         $DB_HASH->{'cachesize'} = 10000 ;
 543
 544 The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
 545 usually adequate for most applications.  If you do need to create extra
 546 instances of these objects, constructors are available for each file
 547 type.
 548
 549 Here are examples of the constructors and the valid options available
 550 for DB_HASH, DB_BTREE and DB_RECNO respectively.
 551
 552      $a = new DB_File::HASHINFO ;
 553      $a->{'bsize'} ;
 554      $a->{'cachesize'} ;
 555      $a->{'ffactor'};
 556      $a->{'hash'} ;
 557      $a->{'lorder'} ;
 558      $a->{'nelem'} ;
 559
 560      $b = new DB_File::BTREEINFO ;
 561      $b->{'flags'} ;
 562      $b->{'cachesize'} ;
 563      $b->{'maxkeypage'} ;
 564      $b->{'minkeypage'} ;
 565      $b->{'psize'} ;
 566      $b->{'compare'} ;
 567      $b->{'prefix'} ;
 568      $b->{'lorder'} ;
 569
 570      $c = new DB_File::RECNOINFO ;
 571      $c->{'bval'} ;
 572      $c->{'cachesize'} ;
 573      $c->{'psize'} ;
 574      $c->{'flags'} ;
 575      $c->{'lorder'} ;
 576      $c->{'reclen'} ;
 577      $c->{'bfname'} ;
 578
 579 The values stored in the hashes above are mostly the direct equivalent
 580 of their C counterpart. Like their C counterparts, all are set to a
 581 default values - that means you don't have to set I<all> of the
 582 values when you only want to change one. Here is an example:
 583
 584      $a = new DB_File::HASHINFO ;
 585      $a->{'cachesize'} =  12345 ;
 586      tie %y, 'DB_File', "filename", $flags, 0777, $a ;
 587
 588 A few of the options need extra discussion here. When used, the C
 589 equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
 590 to C functions. In B<DB_File> these keys are used to store references
 591 to Perl subs. Below are templates for each of the subs:
 592
 593     sub hash
 594     {
 595         my ($data) = @_ ;
 596         ...
 597         # return the hash value for $data
 598         return $hash ;
 599     }
 600
 601     sub compare
 602     {
 603         my ($key, $key2) = @_ ;
 604         ...
 605         # return  0 if $key1 eq $key2
 606         #        -1 if $key1 lt $key2
 607         #         1 if $key1 gt $key2
 608         return (-1 , 0 or 1) ;
 609     }
 610
 611     sub prefix
 612     {
 613         my ($key, $key2) = @_ ;
 614         ...
 615         # return number of bytes of $key2 which are
 616         # necessary to determine that it is greater than $key1
 617         return $bytes ;
 618     }
 619
 620 See L<Changing the BTREE sort order> for an example of using the
 621 C<compare> template.
 622
 623 If you are using the DB_RECNO interface and you intend making use of
 624 C<bval>, you should check out L<The 'bval' Option>.
 625
 626 =head2 Default Parameters
 627
 628 It is possible to omit some or all of the final 4 parameters in the
 629 call to C<tie> and let them take default values. As DB_HASH is the most
 630 common file format used, the call:
 631
 632     tie %A, "DB_File", "filename" ;
 633
 634 is equivalent to:
 635
 636     tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
 637
 638 It is also possible to omit the filename parameter as well, so the
 639 call:
 640
 641     tie %A, "DB_File" ;
 642
 643 is equivalent to:
 644
 645     tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
 646
 647 See L<In Memory Databases> for a discussion on the use of C<undef>
 648 in place of a filename.
 649
 650 =head2 In Memory Databases
 651
 652 Berkeley DB allows the creation of in-memory databases by using NULL
 653 (that is, a C<(char *)0> in C) in place of the filename.  B<DB_File>
 654 uses C<undef> instead of NULL to provide this functionality.
 655
 656 =head1 DB_HASH
 657
 658 The DB_HASH file format is probably the most commonly used of the three
 659 file formats that B<DB_File> supports. It is also very straightforward
 660 to use.
 661
 662 =head2 A Simple Example
 663
 664 This example shows how to create a database, add key/value pairs to the
 665 database, delete keys/value pairs and finally how to enumerate the
 666 contents of the database.
 667
 668     use strict ;
 669     use DB_File ;
 670     use vars qw( %h $k $v ) ;
 671
 672     unlink "fruit" ;
 673     tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
 674         or die "Cannot open file 'fruit': $!\n";
 675
 676     # Add a few key/value pairs to the file
 677     $h{"apple"} = "red" ;
 678     $h{"orange"} = "orange" ;
 679     $h{"banana"} = "yellow" ;
 680     $h{"tomato"} = "red" ;
 681
 682     # Check for existence of a key
 683     print "Banana Exists\n\n" if $h{"banana"} ;
 684
 685     # Delete a key/value pair.
 686     delete $h{"apple"} ;
 687
 688     # print the contents of the file
 689     while (($k, $v) = each %h)
 690       { print "$k -> $v\n" }
 691
 692     untie %h ;
 693
 694 here is the output:
 695
 696     Banana Exists
 697
 698     orange -> orange
 699     tomato -> red
 700     banana -> yellow
 701
 702 Note that the like ordinary associative arrays, the order of the keys
 703 retrieved is in an apparently random order.
 704
 705 =head1 DB_BTREE
 706
 707 The DB_BTREE format is useful when you want to store data in a given
 708 order. By default the keys will be stored in lexical order, but as you
 709 will see from the example shown in the next section, it is very easy to
 710 define your own sorting function.
 711
 712 =head2 Changing the BTREE sort order
 713
 714 This script shows how to override the default sorting algorithm that
 715 BTREE uses. Instead of using the normal lexical ordering, a case
 716 insensitive compare function will be used.
 717
 718     use strict ;
 719     use DB_File ;
 720
 721     my %h ;
 722
 723     sub Compare
 724     {
 725         my ($key1, $key2) = @_ ;
 726         "\L$key1" cmp "\L$key2" ;
 727     }
 728
 729     # specify the Perl sub that will do the comparison
 730     $DB_BTREE->{'compare'} = \&Compare ;
 731
 732     unlink "tree" ;
 733     tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
 734         or die "Cannot open file 'tree': $!\n" ;
 735
 736     # Add a key/value pair to the file
 737     $h{'Wall'} = 'Larry' ;
 738     $h{'Smith'} = 'John' ;
 739     $h{'mouse'} = 'mickey' ;
 740     $h{'duck'}  = 'donald' ;
 741
 742     # Delete
 743     delete $h{"duck"} ;
 744
 745     # Cycle through the keys printing them in order.
 746     # Note it is not necessary to sort the keys as
 747     # the btree will have kept them in order automatically.
 748     foreach (keys %h)
 749       { print "$_\n" }
 750
 751     untie %h ;
 752
 753 Here is the output from the code above.
 754
 755     mouse
 756     Smith
 757     Wall
 758
 759 There are a few point to bear in mind if you want to change the
 760 ordering in a BTREE database:
 761
 762 =over 5
 763
 764 =item 1.
 765
 766 The new compare function must be specified when you create the database.
 767
 768 =item 2.
 769
 770 You cannot change the ordering once the database has been created. Thus
 771 you must use the same compare function every time you access the
 772 database.
 773
 774 =back
 775
 776 =head2 Handling Duplicate Keys
 777
 778 The BTREE file type optionally allows a single key to be associated
 779 with an arbitrary number of values. This option is enabled by setting
 780 the flags element of C<$DB_BTREE> to R_DUP when creating the database.
 781
 782 There are some difficulties in using the tied hash interface if you
 783 want to manipulate a BTREE database with duplicate keys. Consider this
 784 code:
 785
 786     use strict ;
 787     use DB_File ;
 788
 789     use vars qw($filename %h ) ;
 790
 791     $filename = "tree" ;
 792     unlink $filename ;
 793
 794     # Enable duplicate records
 795     $DB_BTREE->{'flags'} = R_DUP ;
 796
 797     tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
 798         or die "Cannot open $filename: $!\n";
 799
 800     # Add some key/value pairs to the file
 801     $h{'Wall'} = 'Larry' ;
 802     $h{'Wall'} = 'Brick' ; # Note the duplicate key
 803     $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
 804     $h{'Smith'} = 'John' ;
 805     $h{'mouse'} = 'mickey' ;
 806
 807     # iterate through the associative array
 808     # and print each key/value pair.
 809     foreach (sort keys %h)
 810       { print "$_  -> $h{$_}\n" }
 811
 812     untie %h ;
 813
 814 Here is the output:
 815
 816     Smith   -> John
 817     Wall    -> Larry
 818     Wall    -> Larry
 819     Wall    -> Larry
 820     mouse   -> mickey
 821
 822 As you can see 3 records have been successfully created with key C<Wall>
 823 - the only thing is, when they are retrieved from the database they
 824 I<seem> to have the same value, namely C<Larry>. The problem is caused
 825 by the way that the associative array interface works. Basically, when
 826 the associative array interface is used to fetch the value associated
 827 with a given key, it will only ever retrieve the first value.
 828
 829 Although it may not be immediately obvious from the code above, the
 830 associative array interface can be used to write values with duplicate
 831 keys, but it cannot be used to read them back from the database.
 832
 833 The way to get around this problem is to use the Berkeley DB API method
 834 called C<seq>.  This method allows sequential access to key/value
 835 pairs. See L<THE API INTERFACE> for details of both the C<seq> method
 836 and the API in general.
 837
 838 Here is the script above rewritten using the C<seq> API method.
 839
 840     use strict ;
 841     use DB_File ;
 842
 843     use vars qw($filename $x %h $status $key $value) ;
 844
 845     $filename = "tree" ;
 846     unlink $filename ;
 847
 848     # Enable duplicate records
 849     $DB_BTREE->{'flags'} = R_DUP ;
 850
 851     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
 852         or die "Cannot open $filename: $!\n";
 853
 854     # Add some key/value pairs to the file
 855     $h{'Wall'} = 'Larry' ;
 856     $h{'Wall'} = 'Brick' ; # Note the duplicate key
 857     $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
 858     $h{'Smith'} = 'John' ;
 859     $h{'mouse'} = 'mickey' ;
 860
 861     # iterate through the btree using seq
 862     # and print each key/value pair.
 863     $key = $value = 0 ;
 864     for ($status = $x->seq($key, $value, R_FIRST) ;
 865          $status == 0 ;
 866          $status = $x->seq($key, $value, R_NEXT) )
 867       {  print "$key -> $value\n" }
 868
 869     undef $x ;
 870     untie %h ;
 871
 872 that prints:
 873
 874     Smith   -> John
 875     Wall    -> Brick
 876     Wall    -> Brick
 877     Wall    -> Larry
 878     mouse   -> mickey
 879
 880 This time we have got all the key/value pairs, including the multiple
 881 values associated with the key C<Wall>.
 882
 883 To make life easier when dealing with duplicate keys, B<DB_File> comes with
 884 a few utility methods.
 885
 886 =head2 The get_dup() Method
 887
 888 The C<get_dup> method assists in
 889 reading duplicate values from BTREE databases. The method can take the
 890 following forms:
 891
 892     $count = $x->get_dup($key) ;
 893     @list  = $x->get_dup($key) ;
 894     %list  = $x->get_dup($key, 1) ;
 895
 896 In a scalar context the method returns the number of values associated
 897 with the key, C<$key>.
 898
 899 In list context, it returns all the values which match C<$key>. Note
 900 that the values will be returned in an apparently random order.
 901
 902 In list context, if the second parameter is present and evaluates
 903 TRUE, the method returns an associative array. The keys of the
 904 associative array correspond to the values that matched in the BTREE
 905 and the values of the array are a count of the number of times that
 906 particular value occurred in the BTREE.
 907
 908 So assuming the database created above, we can use C<get_dup> like
 909 this:
 910
 911     use strict ;
 912     use DB_File ;
 913
 914     use vars qw($filename $x %h ) ;
 915
 916     $filename = "tree" ;
 917
 918     # Enable duplicate records
 919     $DB_BTREE->{'flags'} = R_DUP ;
 920
 921     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
 922         or die "Cannot open $filename: $!\n";
 923
 924     my $cnt  = $x->get_dup("Wall") ;
 925     print "Wall occurred $cnt times\n" ;
 926
 927     my %hash = $x->get_dup("Wall", 1) ;
 928     print "Larry is there\n" if $hash{'Larry'} ;
 929     print "There are $hash{'Brick'} Brick Walls\n" ;
 930
 931     my @list = sort $x->get_dup("Wall") ;
 932     print "Wall =>      [@list]\n" ;
 933
 934     @list = $x->get_dup("Smith") ;
 935     print "Smith =>     [@list]\n" ;
 936
 937     @list = $x->get_dup("Dog") ;
 938     print "Dog =>       [@list]\n" ;
 939
 940
 941 and it will print:
 942
 943     Wall occurred 3 times
 944     Larry is there
 945     There are 2 Brick Walls
 946     Wall =>     [Brick Brick Larry]
 947     Smith =>    [John]
 948     Dog =>      []
 949
 950 =head2 The find_dup() Method
 951
 952     $status = $X->find_dup($key, $value) ;
 953
 954 This method checks for the existence of a specific key/value pair. If the
 955 pair exists, the cursor is left pointing to the pair and the method
 956 returns 0. Otherwise the method returns a non-zero value.
 957
 958 Assuming the database from the previous example:
 959
 960     use strict ;
 961     use DB_File ;
 962
 963     use vars qw($filename $x %h $found) ;
 964
 965     my $filename = "tree" ;
 966
 967     # Enable duplicate records
 968     $DB_BTREE->{'flags'} = R_DUP ;
 969
 970     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
 971         or die "Cannot open $filename: $!\n";
 972
 973     $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
 974     print "Larry Wall is $found there\n" ;
 975
 976     $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
 977     print "Harry Wall is $found there\n" ;
 978
 979     undef $x ;
 980     untie %h ;
 981
 982 prints this
 983
 984     Larry Wall is  there
 985     Harry Wall is not there
 986
 987
 988 =head2 The del_dup() Method
 989
 990     $status = $X->del_dup($key, $value) ;
 991
 992 This method deletes a specific key/value pair. It returns
 993 0 if they exist and have been deleted successfully.
 994 Otherwise the method returns a non-zero value.
 995
 996 Again assuming the existence of the C<tree> database
 997
 998     use strict ;
 999     use DB_File ;
1000
1001     use vars qw($filename $x %h $found) ;
1002
1003     my $filename = "tree" ;
1004
1005     # Enable duplicate records
1006     $DB_BTREE->{'flags'} = R_DUP ;
1007
1008     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1009         or die "Cannot open $filename: $!\n";
1010
1011     $x->del_dup("Wall", "Larry") ;
1012
1013     $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1014     print "Larry Wall is $found there\n" ;
1015
1016     undef $x ;
1017     untie %h ;
1018
1019 prints this
1020
1021     Larry Wall is not there
1022
1023 =head2 Matching Partial Keys
1024
1025 The BTREE interface has a feature which allows partial keys to be
1026 matched. This functionality is I<only> available when the C<seq> method
1027 is used along with the R_CURSOR flag.
1028
1029     $x->seq($key, $value, R_CURSOR) ;
1030
1031 Here is the relevant quote from the dbopen man page where it defines
1032 the use of the R_CURSOR flag with seq:
1033
1034     Note, for the DB_BTREE access method, the returned key is not
1035     necessarily an exact match for the specified key. The returned key
1036     is the smallest key greater than or equal to the specified key,
1037     permitting partial key matches and range searches.
1038
1039 In the example script below, the C<match> sub uses this feature to find
1040 and print the first matching key/value pair given a partial key.
1041
1042     use strict ;
1043     use DB_File ;
1044     use Fcntl ;
1045
1046     use vars qw($filename $x %h $st $key $value) ;
1047
1048     sub match
1049     {
1050         my $key = shift ;
1051         my $value = 0;
1052         my $orig_key = $key ;
1053         $x->seq($key, $value, R_CURSOR) ;
1054         print "$orig_key\t-> $key\t-> $value\n" ;
1055     }
1056
1057     $filename = "tree" ;
1058     unlink $filename ;
1059
1060     $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1061         or die "Cannot open $filename: $!\n";
1062
1063     # Add some key/value pairs to the file
1064     $h{'mouse'} = 'mickey' ;
1065     $h{'Wall'} = 'Larry' ;
1066     $h{'Walls'} = 'Brick' ;
1067     $h{'Smith'} = 'John' ;
1068
1069
1070     $key = $value = 0 ;
1071     print "IN ORDER\n" ;
1072     for ($st = $x->seq($key, $value, R_FIRST) ;
1073          $st == 0 ;
1074          $st = $x->seq($key, $value, R_NEXT) )
1075
1076       {  print "$key    -> $value\n" }
1077
1078     print "\nPARTIAL MATCH\n" ;
1079
1080     match "Wa" ;
1081     match "A" ;
1082     match "a" ;
1083
1084     undef $x ;
1085     untie %h ;
1086
1087 Here is the output:
1088
1089     IN ORDER
1090     Smith -> John
1091     Wall  -> Larry
1092     Walls -> Brick
1093     mouse -> mickey
1094
1095     PARTIAL MATCH
1096     Wa -> Wall  -> Larry
1097     A  -> Smith -> John
1098     a  -> mouse -> mickey
1099
1100 =head1 DB_RECNO
1101
1102 DB_RECNO provides an interface to flat text files. Both variable and
1103 fixed length records are supported.
1104
1105 In order to make RECNO more compatible with Perl, the array offset for
1106 all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
1107
1108 As with normal Perl arrays, a RECNO array can be accessed using
1109 negative indexes. The index -1 refers to the last element of the array,
1110 -2 the second last, and so on. Attempting to access an element before
1111 the start of the array will raise a fatal run-time error.
1112
1113 =head2 The 'bval' Option
1114
1115 The operation of the bval option warrants some discussion. Here is the
1116 definition of bval from the Berkeley DB 1.85 recno manual page:
1117
1118     The delimiting byte to be used to mark  the  end  of  a
1119     record for variable-length records, and the pad charac-
1120     ter for fixed-length records.  If no  value  is  speci-
1121     fied,  newlines  (``\n'')  are  used to mark the end of
1122     variable-length records and  fixed-length  records  are
1123     padded with spaces.
1124
1125 The second sentence is wrong. In actual fact bval will only default to
1126 C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1127 openinfo parameter is used at all, the value that happens to be in bval
1128 will be used. That means you always have to specify bval when making
1129 use of any of the options in the openinfo parameter. This documentation
1130 error will be fixed in the next release of Berkeley DB.
1131
1132 That clarifies the situation with regards Berkeley DB itself. What
1133 about B<DB_File>? Well, the behavior defined in the quote above is
1134 quite useful, so B<DB_File> conforms to it.
1135
1136 That means that you can specify other options (e.g. cachesize) and
1137 still have bval default to C<"\n"> for variable length records, and
1138 space for fixed length records.
1139
1140 =head2 A Simple Example
1141
1142 Here is a simple example that uses RECNO (if you are using a version
1143 of Perl earlier than 5.004_57 this example won't work -- see
1144 L<Extra RECNO Methods> for a workaround).
1145
1146     use strict ;
1147     use DB_File ;
1148
1149     my $filename = "text" ;
1150     unlink $filename ;
1151
1152     my @h ;
1153     tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_RECNO
1154         or die "Cannot open file 'text': $!\n" ;
1155
1156     # Add a few key/value pairs to the file
1157     $h[0] = "orange" ;
1158     $h[1] = "blue" ;
1159     $h[2] = "yellow" ;
1160
1161     push @h, "green", "black" ;
1162
1163     my $elements = scalar @h ;
1164     print "The array contains $elements entries\n" ;
1165
1166     my $last = pop @h ;
1167     print "popped $last\n" ;
1168
1169     unshift @h, "white" ;
1170     my $first = shift @h ;
1171     print "shifted $first\n" ;
1172
1173     # Check for existence of a key
1174     print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1175
1176     # use a negative index
1177     print "The last element is $h[-1]\n" ;
1178     print "The 2nd last element is $h[-2]\n" ;
1179
1180     untie @h ;
1181
1182 Here is the output from the script:
1183
1184     The array contains 5 entries
1185     popped black
1186     shifted white
1187     Element 1 Exists with value blue
1188     The last element is green
1189     The 2nd last element is yellow
1190
1191 =head2 Extra RECNO Methods
1192
1193 If you are using a version of Perl earlier than 5.004_57, the tied
1194 array interface is quite limited. In the example script above
1195 C<push>, C<pop>, C<shift>, C<unshift>
1196 or determining the array length will not work with a tied array.
1197
1198 To make the interface more useful for older versions of Perl, a number
1199 of methods are supplied with B<DB_File> to simulate the missing array
1200 operations. All these methods are accessed via the object returned from
1201 the tie call.
1202
1203 Here are the methods:
1204
1205 =over 5
1206
1207 =item B<$X-E<gt>push(list) ;>
1208
1209 Pushes the elements of C<list> to the end of the array.
1210
1211 =item B<$value = $X-E<gt>pop ;>
1212
1213 Removes and returns the last element of the array.
1214
1215 =item B<$X-E<gt>shift>
1216
1217 Removes and returns the first element of the array.
1218
1219 =item B<$X-E<gt>unshift(list) ;>
1220
1221 Pushes the elements of C<list> to the start of the array.
1222
1223 =item B<$X-E<gt>length>
1224
1225 Returns the number of elements in the array.
1226
1227 =back
1228
1229 =head2 Another Example
1230
1231 Here is a more complete example that makes use of some of the methods
1232 described above. It also makes use of the API interface directly (see
1233 L<THE API INTERFACE>).
1234
1235     use strict ;
1236     use vars qw(@h $H $file $i) ;
1237     use DB_File ;
1238     use Fcntl ;
1239
1240     $file = "text" ;
1241
1242     unlink $file ;
1243
1244     $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1245         or die "Cannot open file $file: $!\n" ;
1246
1247     # first create a text file to play with
1248     $h[0] = "zero" ;
1249     $h[1] = "one" ;
1250     $h[2] = "two" ;
1251     $h[3] = "three" ;
1252     $h[4] = "four" ;
1253
1254
1255     # Print the records in order.
1256     #
1257     # The length method is needed here because evaluating a tied
1258     # array in a scalar context does not return the number of
1259     # elements in the array.
1260
1261     print "\nORIGINAL\n" ;
1262     foreach $i (0 .. $H->length - 1) {
1263         print "$i: $h[$i]\n" ;
1264     }
1265
1266     # use the push & pop methods
1267     $a = $H->pop ;
1268     $H->push("last") ;
1269     print "\nThe last record was [$a]\n" ;
1270
1271     # and the shift & unshift methods
1272     $a = $H->shift ;
1273     $H->unshift("first") ;
1274     print "The first record was [$a]\n" ;
1275
1276     # Use the API to add a new record after record 2.
1277     $i = 2 ;
1278     $H->put($i, "Newbie", R_IAFTER) ;
1279
1280     # and a new record before record 1.
1281     $i = 1 ;
1282     $H->put($i, "New One", R_IBEFORE) ;
1283
1284     # delete record 3
1285     $H->del(3) ;
1286
1287     # now print the records in reverse order
1288     print "\nREVERSE\n" ;
1289     for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1290       { print "$i: $h[$i]\n" }
1291
1292     # same again, but use the API functions instead
1293     print "\nREVERSE again\n" ;
1294     my ($s, $k, $v)  = (0, 0, 0) ;
1295     for ($s = $H->seq($k, $v, R_LAST) ;
1296              $s == 0 ;
1297              $s = $H->seq($k, $v, R_PREV))
1298       { print "$k: $v\n" }
1299
1300     undef $H ;
1301     untie @h ;
1302
1303 and this is what it outputs:
1304
1305     ORIGINAL
1306     0: zero
1307     1: one
1308     2: two
1309     3: three
1310     4: four
1311
1312     The last record was [four]
1313     The first record was [zero]
1314
1315     REVERSE
1316     5: last
1317     4: three
1318     3: Newbie
1319     2: one
1320     1: New One
1321     0: first
1322
1323     REVERSE again
1324     5: last
1325     4: three
1326     3: Newbie
1327     2: one
1328     1: New One
1329     0: first
1330
1331 Notes:
1332
1333 =over 5
1334
1335 =item 1.
1336
1337 Rather than iterating through the array, C<@h> like this:
1338
1339     foreach $i (@h)
1340
1341 it is necessary to use either this:
1342
1343     foreach $i (0 .. $H->length - 1)
1344
1345 or this:
1346
1347     for ($a = $H->get($k, $v, R_FIRST) ;
1348          $a == 0 ;
1349          $a = $H->get($k, $v, R_NEXT) )
1350
1351 =item 2.
1352
1353 Notice that both times the C<put> method was used the record index was
1354 specified using a variable, C<$i>, rather than the literal value
1355 itself. This is because C<put> will return the record number of the
1356 inserted line via that parameter.
1357
1358 =back
1359
1360 =head1 THE API INTERFACE
1361
1362 As well as accessing Berkeley DB using a tied hash or array, it is also
1363 possible to make direct use of most of the API functions defined in the
1364 Berkeley DB documentation.
1365
1366 To do this you need to store a copy of the object returned from the tie.
1367
1368         $db = tie %hash, "DB_File", "filename" ;
1369
1370 Once you have done that, you can access the Berkeley DB API functions
1371 as B<DB_File> methods directly like this:
1372
1373         $db->put($key, $value, R_NOOVERWRITE) ;
1374
1375 B<Important:> If you have saved a copy of the object returned from
1376 C<tie>, the underlying database file will I<not> be closed until both
1377 the tied variable is untied and all copies of the saved object are
1378 destroyed.
1379
1380     use DB_File ;
1381     $db = tie %hash, "DB_File", "filename"
1382         or die "Cannot tie filename: $!" ;
1383     ...
1384     undef $db ;
1385     untie %hash ;
1386
1387 See L<The untie() Gotcha> for more details.
1388
1389 All the functions defined in L<dbopen> are available except for
1390 close() and dbopen() itself. The B<DB_File> method interface to the
1391 supported functions have been implemented to mirror the way Berkeley DB
1392 works whenever possible. In particular note that:
1393
1394 =over 5
1395
1396 =item *
1397
1398 The methods return a status value. All return 0 on success.
1399 All return -1 to signify an error and set C<$!> to the exact
1400 error code. The return code 1 generally (but not always) means that the
1401 key specified did not exist in the database.
1402
1403 Other return codes are defined. See below and in the Berkeley DB
1404 documentation for details. The Berkeley DB documentation should be used
1405 as the definitive source.
1406
1407 =item *
1408
1409 Whenever a Berkeley DB function returns data via one of its parameters,
1410 the equivalent B<DB_File> method does exactly the same.
1411
1412 =item *
1413
1414 If you are careful, it is possible to mix API calls with the tied
1415 hash/array interface in the same piece of code. Although only a few of
1416 the methods used to implement the tied interface currently make use of
1417 the cursor, you should always assume that the cursor has been changed
1418 any time the tied hash/array interface is used. As an example, this
1419 code will probably not do what you expect:
1420
1421     $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1422         or die "Cannot tie $filename: $!" ;
1423
1424     # Get the first key/value pair and set  the cursor
1425     $X->seq($key, $value, R_FIRST) ;
1426
1427     # this line will modify the cursor
1428     $count = scalar keys %x ;
1429
1430     # Get the second key/value pair.
1431     # oops, it didn't, it got the last key/value pair!
1432     $X->seq($key, $value, R_NEXT) ;
1433
1434 The code above can be rearranged to get around the problem, like this:
1435
1436     $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1437         or die "Cannot tie $filename: $!" ;
1438
1439     # this line will modify the cursor
1440     $count = scalar keys %x ;
1441
1442     # Get the first key/value pair and set  the cursor
1443     $X->seq($key, $value, R_FIRST) ;
1444
1445     # Get the second key/value pair.
1446     # worked this time.
1447     $X->seq($key, $value, R_NEXT) ;
1448
1449 =back
1450
1451 All the constants defined in L<dbopen> for use in the flags parameters
1452 in the methods defined below are also available. Refer to the Berkeley
1453 DB documentation for the precise meaning of the flags values.
1454
1455 Below is a list of the methods available.
1456
1457 =over 5
1458
1459 =item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
1460
1461 Given a key (C<$key>) this method reads the value associated with it
1462 from the database. The value read from the database is returned in the
1463 C<$value> parameter.
1464
1465 If the key does not exist the method returns 1.
1466
1467 No flags are currently defined for this method.
1468
1469 =item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
1470
1471 Stores the key/value pair in the database.
1472
1473 If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
1474 will have the record number of the inserted key/value pair set.
1475
1476 Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1477 R_SETCURSOR.
1478
1479 =item B<$status = $X-E<gt>del($key [, $flags]) ;>
1480
1481 Removes all key/value pairs with key C<$key> from the database.
1482
1483 A return code of 1 means that the requested key was not in the
1484 database.
1485
1486 R_CURSOR is the only valid flag at present.
1487
1488 =item B<$status = $X-E<gt>fd ;>
1489
1490 Returns the file descriptor for the underlying database.
1491
1492 See L<Locking: The Trouble with fd> for an explanation for why you should
1493 not use C<fd> to lock your database.
1494
1495 =item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
1496
1497 This interface allows sequential retrieval from the database. See
1498 L<dbopen> for full details.
1499
1500 Both the C<$key> and C<$value> parameters will be set to the key/value
1501 pair read from the database.
1502
1503 The flags parameter is mandatory. The valid flag values are R_CURSOR,
1504 R_FIRST, R_LAST, R_NEXT and R_PREV.
1505
1506 =item B<$status = $X-E<gt>sync([$flags]) ;>
1507
1508 Flushes any cached buffers to disk.
1509
1510 R_RECNOSYNC is the only valid flag at present.
1511
1512 =back
1513
1514 =head1 DBM FILTERS
1515
1516 A DBM Filter is a piece of code that is be used when you I<always>
1517 want to make the same transformation to all keys and/or values in a
1518 DBM database.
1519
1520 There are four methods associated with DBM Filters. All work identically,
1521 and each is used to install (or uninstall) a single DBM Filter. Each
1522 expects a single parameter, namely a reference to a sub. The only
1523 difference between them is the place that the filter is installed.
1524
1525 To summarise:
1526
1527 =over 5
1528
1529 =item B<filter_store_key>
1530
1531 If a filter has been installed with this method, it will be invoked
1532 every time you write a key to a DBM database.
1533
1534 =item B<filter_store_value>
1535
1536 If a filter has been installed with this method, it will be invoked
1537 every time you write a value to a DBM database.
1538
1539
1540 =item B<filter_fetch_key>
1541
1542 If a filter has been installed with this method, it will be invoked
1543 every time you read a key from a DBM database.
1544
1545 =item B<filter_fetch_value>
1546
1547 If a filter has been installed with this method, it will be invoked
1548 every time you read a value from a DBM database.
1549
1550 =back
1551
1552 You can use any combination of the methods, from none, to all four.
1553
1554 All filter methods return the existing filter, if present, or C<undef>
1555 in not.
1556
1557 To delete a filter pass C<undef> to it.
1558
1559 =head2 The Filter
1560
1561 When each filter is called by Perl, a local copy of C<$_> will contain
1562 the key or value to be filtered. Filtering is achieved by modifying
1563 the contents of C<$_>. The return code from the filter is ignored.
1564
1565 =head2 An Example -- the NULL termination problem.
1566
1567 Consider the following scenario. You have a DBM database
1568 that you need to share with a third-party C application. The C application
1569 assumes that I<all> keys and values are NULL terminated. Unfortunately
1570 when Perl writes to DBM databases it doesn't use NULL termination, so
1571 your Perl application will have to manage NULL termination itself. When
1572 you write to the database you will have to use something like this:
1573
1574     $hash{"$key\0"} = "$value\0" ;
1575
1576 Similarly the NULL needs to be taken into account when you are considering
1577 the length of existing keys/values.
1578
1579 It would be much better if you could ignore the NULL terminations issue
1580 in the main application code and have a mechanism that automatically
1581 added the terminating NULL to all keys and values whenever you write to
1582 the database and have them removed when you read from the database. As I'm
1583 sure you have already guessed, this is a problem that DBM Filters can
1584 fix very easily.
1585
1586     use strict ;
1587     use DB_File ;
1588
1589     my %hash ;
1590     my $filename = "/tmp/filt" ;
1591     unlink $filename ;
1592
1593     my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1594       or die "Cannot open $filename: $!\n" ;
1595
1596     # Install DBM Filters
1597     $db->filter_fetch_key  ( sub { s/\0$//    } ) ;
1598     $db->filter_store_key  ( sub { $_ .= "\0" } ) ;
1599     $db->filter_fetch_value( sub { s/\0$//    } ) ;
1600     $db->filter_store_value( sub { $_ .= "\0" } ) ;
1601
1602     $hash{"abc"} = "def" ;
1603     my $a = $hash{"ABC"} ;
1604     # ...
1605     undef $db ;
1606     untie %hash ;
1607
1608 Hopefully the contents of each of the filters should be
1609 self-explanatory. Both "fetch" filters remove the terminating NULL,
1610 and both "store" filters add a terminating NULL.
1611
1612
1613 =head2 Another Example -- Key is a C int.
1614
1615 Here is another real-life example. By default, whenever Perl writes to
1616 a DBM database it always writes the key and value as strings. So when
1617 you use this:
1618
1619     $hash{12345} = "soemthing" ;
1620
1621 the key 12345 will get stored in the DBM database as the 5 byte string
1622 "12345". If you actually want the key to be stored in the DBM database
1623 as a C int, you will have to use C<pack> when writing, and C<unpack>
1624 when reading.
1625
1626 Here is a DBM Filter that does it:
1627
1628     use strict ;
1629     use DB_File ;
1630     my %hash ;
1631     my $filename = "/tmp/filt" ;
1632     unlink $filename ;
1633
1634
1635     my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1636       or die "Cannot open $filename: $!\n" ;
1637
1638     $db->filter_fetch_key  ( sub { $_ = unpack("i", $_) } ) ;
1639     $db->filter_store_key  ( sub { $_ = pack ("i", $_) } ) ;
1640     $hash{123} = "def" ;
1641     # ...
1642     undef $db ;
1643     untie %hash ;
1644
1645 This time only two filters have been used -- we only need to manipulate
1646 the contents of the key, so it wasn't necessary to install any value
1647 filters.
1648
1649 =head1 HINTS AND TIPS
1650
1651
1652 =head2 Locking: The Trouble with fd
1653
1654 Until version 1.72 of this module, the recommended technique for locking
1655 B<DB_File> databases was to flock the filehandle returned from the "fd"
1656 function. Unfortunately this technique has been shown to be fundamentally
1657 flawed (Kudos to David Harris for tracking this down). Use it at your own
1658 peril!
1659
1660 The locking technique went like this.
1661
1662     $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1663         || die "dbcreat /tmp/foo.db $!";
1664     $fd = $db->fd;
1665     open(DB_FH, "+<&=$fd") || die "dup $!";
1666     flock (DB_FH, LOCK_EX) || die "flock: $!";
1667     ...
1668     $db{"Tom"} = "Jerry" ;
1669     ...
1670     flock(DB_FH, LOCK_UN);
1671     undef $db;
1672     untie %db;
1673     close(DB_FH);
1674
1675 In simple terms, this is what happens:
1676
1677 =over 5
1678
1679 =item 1.
1680
1681 Use "tie" to open the database.
1682
1683 =item 2.
1684
1685 Lock the database with fd & flock.
1686
1687 =item 3.
1688
1689 Read & Write to the database.
1690
1691 =item 4.
1692
1693 Unlock and close the database.
1694
1695 =back
1696
1697 Here is the crux of the problem. A side-effect of opening the B<DB_File>
1698 database in step 2 is that an initial block from the database will get
1699 read from disk and cached in memory.
1700
1701 To see why this is a problem, consider what can happen when two processes,
1702 say "A" and "B", both want to update the same B<DB_File> database
1703 using the locking steps outlined above. Assume process "A" has already
1704 opened the database and has a write lock, but it hasn't actually updated
1705 the database yet (it has finished step 2, but not started step 3 yet). Now
1706 process "B" tries to open the same database - step 1 will succeed,
1707 but it will block on step 2 until process "A" releases the lock. The
1708 important thing to notice here is that at this point in time both
1709 processes will have cached identical initial blocks from the database.
1710
1711 Now process "A" updates the database and happens to change some of the
1712 data held in the initial buffer. Process "A" terminates, flushing
1713 all cached data to disk and releasing the database lock. At this point
1714 the database on disk will correctly reflect the changes made by process
1715 "A".
1716
1717 With the lock released, process "B" can now continue. It also updates the
1718 database and unfortunately it too modifies the data that was in its
1719 initial buffer. Once that data gets flushed to disk it will overwrite
1720 some/all of the changes process "A" made to the database.
1721
1722 The result of this scenario is at best a database that doesn't contain
1723 what you expect. At worst the database will corrupt.
1724
1725 The above won't happen every time competing process update the same
1726 B<DB_File> database, but it does illustrate why the technique should
1727 not be used.
1728
1729 =head2 Safe ways to lock a database
1730
1731 Starting with version 2.x, Berkeley DB  has internal support for locking.
1732 The companion module to this one, B<BerkeleyDB>, provides an interface
1733 to this locking functionality. If you are serious about locking
1734 Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1735
1736 If using B<BerkeleyDB> isn't an option, there are a number of modules
1737 available on CPAN that can be used to implement locking. Each one
1738 implements locking differently and has different goals in mind. It is
1739 therefore worth knowing the difference, so that you can pick the right
1740 one for your application. Here are the three locking wrappers:
1741
1742 =over 5
1743
1744 =item B<Tie::DB_Lock>
1745
1746 A B<DB_File> wrapper which creates copies of the database file for
1747 read access, so that you have a kind of a multiversioning concurrent read
1748 system. However, updates are still serial. Use for databases where reads
1749 may be lengthy and consistency problems may occur.
1750
1751 =item B<Tie::DB_LockFile>
1752
1753 A B<DB_File> wrapper that has the ability to lock and unlock the database
1754 while it is being used. Avoids the tie-before-flock problem by simply
1755 re-tie-ing the database when you get or drop a lock.  Because of the
1756 flexibility in dropping and re-acquiring the lock in the middle of a
1757 session, this can be massaged into a system that will work with long
1758 updates and/or reads if the application follows the hints in the POD
1759 documentation.
1760
1761 =item B<DB_File::Lock>
1762
1763 An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
1764 before tie-ing the database and drops the lock after the untie. Allows
1765 one to use the same lockfile for multiple databases to avoid deadlock
1766 problems, if desired. Use for databases where updates are reads are
1767 quick and simple flock locking semantics are enough.
1768
1769 =back
1770
1771 =head2 Sharing Databases With C Applications
1772
1773 There is no technical reason why a Berkeley DB database cannot be
1774 shared by both a Perl and a C application.
1775
1776 The vast majority of problems that are reported in this area boil down
1777 to the fact that C strings are NULL terminated, whilst Perl strings are
1778 not. See L<DBM FILTERS> for a generic way to work around this problem.
1779
1780 Here is a real example. Netscape 2.0 keeps a record of the locations you
1781 visit along with the time you last visited them in a DB_HASH database.
1782 This is usually stored in the file F<~/.netscape/history.db>. The key
1783 field in the database is the location string and the value field is the
1784 time the location was last visited stored as a 4 byte binary value.
1785
1786 If you haven't already guessed, the location string is stored with a
1787 terminating NULL. This means you need to be careful when accessing the
1788 database.
1789
1790 Here is a snippet of code that is loosely based on Tom Christiansen's
1791 I<ggh> script (available from your nearest CPAN archive in
1792 F<authors/id/TOMC/scripts/nshist.gz>).
1793
1794     use strict ;
1795     use DB_File ;
1796     use Fcntl ;
1797
1798     use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
1799     $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1800
1801     $HISTORY = "$dotdir/.netscape/history.db";
1802
1803     tie %hist_db, 'DB_File', $HISTORY
1804         or die "Cannot open $HISTORY: $!\n" ;;
1805
1806     # Dump the complete database
1807     while ( ($href, $binary_time) = each %hist_db ) {
1808
1809         # remove the terminating NULL
1810         $href =~ s/\x00$// ;
1811
1812         # convert the binary time into a user friendly string
1813         $date = localtime unpack("V", $binary_time);
1814         print "$date $href\n" ;
1815     }
1816
1817     # check for the existence of a specific key
1818     # remember to add the NULL
1819     if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1820         $date = localtime unpack("V", $binary_time) ;
1821         print "Last visited mox.perl.com on $date\n" ;
1822     }
1823     else {
1824         print "Never visited mox.perl.com\n"
1825     }
1826
1827     untie %hist_db ;
1828
1829 =head2 The untie() Gotcha
1830
1831 If you make use of the Berkeley DB API, it is I<very> strongly
1832 recommended that you read L<perltie/The untie Gotcha>.
1833
1834 Even if you don't currently make use of the API interface, it is still
1835 worth reading it.
1836
1837 Here is an example which illustrates the problem from a B<DB_File>
1838 perspective:
1839
1840     use DB_File ;
1841     use Fcntl ;
1842
1843     my %x ;
1844     my $X ;
1845
1846     $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1847         or die "Cannot tie first time: $!" ;
1848
1849     $x{123} = 456 ;
1850
1851     untie %x ;
1852
1853     tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1854         or die "Cannot tie second time: $!" ;
1855
1856     untie %x ;
1857
1858 When run, the script will produce this error message:
1859
1860     Cannot tie second time: Invalid argument at bad.file line 14.
1861
1862 Although the error message above refers to the second tie() statement
1863 in the script, the source of the problem is really with the untie()
1864 statement that precedes it.
1865
1866 Having read L<perltie> you will probably have already guessed that the
1867 error is caused by the extra copy of the tied object stored in C<$X>.
1868 If you haven't, then the problem boils down to the fact that the
1869 B<DB_File> destructor, DESTROY, will not be called until I<all>
1870 references to the tied object are destroyed. Both the tied variable,
1871 C<%x>, and C<$X> above hold a reference to the object. The call to
1872 untie() will destroy the first, but C<$X> still holds a valid
1873 reference, so the destructor will not get called and the database file
1874 F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1875 attempt to open a database that is already open via the catch-all
1876 "Invalid argument" doesn't help.
1877
1878 If you run the script with the C<-w> flag the error message becomes:
1879
1880     untie attempted while 1 inner references still exist at bad.file line 12.
1881     Cannot tie second time: Invalid argument at bad.file line 14.
1882
1883 which pinpoints the real problem. Finally the script can now be
1884 modified to fix the original problem by destroying the API object
1885 before the untie:
1886
1887     ...
1888     $x{123} = 456 ;
1889
1890     undef $X ;
1891     untie %x ;
1892
1893     $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1894     ...
1895
1896
1897 =head1 COMMON QUESTIONS
1898
1899 =head2 Why is there Perl source in my database?
1900
1901 If you look at the contents of a database file created by DB_File,
1902 there can sometimes be part of a Perl script included in it.
1903
1904 This happens because Berkeley DB uses dynamic memory to allocate
1905 buffers which will subsequently be written to the database file. Being
1906 dynamic, the memory could have been used for anything before DB
1907 malloced it. As Berkeley DB doesn't clear the memory once it has been
1908 allocated, the unused portions will contain random junk. In the case
1909 where a Perl script gets written to the database, the random junk will
1910 correspond to an area of dynamic memory that happened to be used during
1911 the compilation of the script.
1912
1913 Unless you don't like the possibility of there being part of your Perl
1914 scripts embedded in a database file, this is nothing to worry about.
1915
1916 =head2 How do I store complex data structures with DB_File?
1917
1918 Although B<DB_File> cannot do this directly, there is a module which
1919 can layer transparently over B<DB_File> to accomplish this feat.
1920
1921 Check out the MLDBM module, available on CPAN in the directory
1922 F<modules/by-module/MLDBM>.
1923
1924 =head2 What does "Invalid Argument" mean?
1925
1926 You will get this error message when one of the parameters in the
1927 C<tie> call is wrong. Unfortunately there are quite a few parameters to
1928 get wrong, so it can be difficult to figure out which one it is.
1929
1930 Here are a couple of possibilities:
1931
1932 =over 5
1933
1934 =item 1.
1935
1936 Attempting to reopen a database without closing it.
1937
1938 =item 2.
1939
1940 Using the O_WRONLY flag.
1941
1942 =back
1943
1944 =head2 What does "Bareword 'DB_File' not allowed" mean?
1945
1946 You will encounter this particular error message when you have the
1947 C<strict 'subs'> pragma (or the full strict pragma) in your script.
1948 Consider this script:
1949
1950     use strict ;
1951     use DB_File ;
1952     use vars qw(%x) ;
1953     tie %x, DB_File, "filename" ;
1954
1955 Running it produces the error in question:
1956
1957     Bareword "DB_File" not allowed while "strict subs" in use
1958
1959 To get around the error, place the word C<DB_File> in either single or
1960 double quotes, like this:
1961
1962     tie %x, "DB_File", "filename" ;
1963
1964 Although it might seem like a real pain, it is really worth the effort
1965 of having a C<use strict> in all your scripts.
1966
1967 =head1 REFERENCES
1968
1969 Articles that are either about B<DB_File> or make use of it.
1970
1971 =over 5
1972
1973 =item 1.
1974
1975 I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
1976 Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
1977
1978 =back
1979
1980 =head1 HISTORY
1981
1982 Moved to the Changes file.
1983
1984 =head1 BUGS
1985
1986 Some older versions of Berkeley DB had problems with fixed length
1987 records using the RECNO file format. This problem has been fixed since
1988 version 1.85 of Berkeley DB.
1989
1990 I am sure there are bugs in the code. If you do find any, or can
1991 suggest any enhancements, I would welcome your comments.
1992
1993 =head1 AVAILABILITY
1994
1995 B<DB_File> comes with the standard Perl source distribution. Look in
1996 the directory F<ext/DB_File>. Given the amount of time between releases
1997 of Perl the version that ships with Perl is quite likely to be out of
1998 date, so the most recent version can always be found on CPAN (see
1999 L<perlmod/CPAN> for details), in the directory
2000 F<modules/by-module/DB_File>.
2001
2002 This version of B<DB_File> will work with either version 1.x, 2.x or
2003 3.x of Berkeley DB, but is limited to the functionality provided by
2004 version 1.
2005
2006 The official web site for Berkeley DB is F<http://www.sleepycat.com>.
2007 All versions of Berkeley DB are available there.
2008
2009 Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2010 archive in F<src/misc/db.1.85.tar.gz>.
2011
2012 If you are running IRIX, then get Berkeley DB version 1 from
2013 F<http://reality.sgi.com/ariel>. It has the patches necessary to
2014 compile properly on IRIX 5.3.
2015
2016 =head1 COPYRIGHT
2017
2018 Copyright (c) 1995-1999 Paul Marquess. All rights reserved. This program
2019 is free software; you can redistribute it and/or modify it under the
2020 same terms as Perl itself.
2021
2022 Although B<DB_File> is covered by the Perl license, the library it
2023 makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2024 copyright and its own license. Please take the time to read it.
2025
2026 Here are are few words taken from the Berkeley DB FAQ (at
2027 F<http://www.sleepycat.com>) regarding the license:
2028
2029     Do I have to license DB to use it in Perl scripts?
2030
2031     No. The Berkeley DB license requires that software that uses
2032     Berkeley DB be freely redistributable. In the case of Perl, that
2033     software is Perl, and not your scripts. Any Perl scripts that you
2034     write are your property, including scripts that make use of
2035     Berkeley DB. Neither the Perl license nor the Berkeley DB license
2036     place any restriction on what you may do with them.
2037
2038 If you are in any doubt about the license situation, contact either the
2039 Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
2040
2041
2042 =head1 SEE ALSO
2043
2044 L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2045 L<dbmfilter>
2046
2047 =head1 AUTHOR
2048
2049 The DB_File interface was written by Paul Marquess
2050 E<lt>Paul.Marquess@btinternet.comE<gt>.
2051 Questions about the DB system itself may be addressed to
2052 E<lt>db@sleepycat.com<gt>.
2053
2054 =cut