# DB_File.pm -- Perl 5 interface to Berkeley DB
#
# written by Paul Marquess (pmarquess@bfsec.bt.co.uk)
-# last modified 27th Nov 1996
-# version 1.06
+# last modified 19th November 1998
+# version 1.61
+#
+# Copyright (c) 1995-8 Paul Marquess. All rights reserved.
+# This program is free software; you can redistribute it and/or
+# modify it under the same terms as Perl itself.
+
package DB_File::HASHINFO ;
{
my $pkg = shift ;
- bless { 'bsize' => 0,
- 'ffactor' => 0,
- 'nelem' => 0,
- 'cachesize' => 0,
- 'hash' => undef,
- 'lorder' => 0,
- }, $pkg ;
+ bless { VALID => { map {$_, 1}
+ qw( bsize ffactor nelem cachesize hash lorder)
+ },
+ GOT => {}
+ }, $pkg ;
}
my $self = shift ;
my $key = shift ;
- return $self->{$key} if exists $self->{$key} ;
+ return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
my $pkg = ref $self ;
croak "${pkg}::FETCH - Unknown element '$key'" ;
my $key = shift ;
my $value = shift ;
- if ( exists $self->{$key} )
+ if ( exists $self->{VALID}{$key} )
{
- $self->{$key} = $value ;
+ $self->{GOT}{$key} = $value ;
return ;
}
my $self = shift ;
my $key = shift ;
- if ( exists $self->{$key} )
+ if ( exists $self->{VALID}{$key} )
{
- delete $self->{$key} ;
+ delete $self->{GOT}{$key} ;
return ;
}
my $self = shift ;
my $key = shift ;
- exists $self->{$key} ;
+ exists $self->{VALID}{$key} ;
}
sub NotHere
{
- my $pkg = shift ;
+ my $self = shift ;
my $method = shift ;
- croak "${pkg} does not define the method ${method}" ;
+ croak ref($self) . " does not define the method ${method}" ;
}
-sub DESTROY { undef %{$_[0]} }
-sub FIRSTKEY { my $self = shift ; $self->NotHere(ref $self, "FIRSTKEY") }
-sub NEXTKEY { my $self = shift ; $self->NotHere(ref $self, "NEXTKEY") }
-sub CLEAR { my $self = shift ; $self->NotHere(ref $self, "CLEAR") }
+sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
+sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
+sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
package DB_File::RECNOINFO ;
{
my $pkg = shift ;
- bless { 'bval' => 0,
- 'cachesize' => 0,
- 'psize' => 0,
- 'flags' => 0,
- 'lorder' => 0,
- 'reclen' => 0,
- 'bfname' => "",
- }, $pkg ;
+ bless { VALID => { map {$_, 1}
+ qw( bval cachesize psize flags lorder reclen bfname )
+ },
+ GOT => {},
+ }, $pkg ;
}
package DB_File::BTREEINFO ;
{
my $pkg = shift ;
- bless { 'flags' => 0,
- 'cachesize' => 0,
- 'maxkeypage' => 0,
- 'minkeypage' => 0,
- 'psize' => 0,
- 'compare' => undef,
- 'prefix' => undef,
- 'lorder' => 0,
- }, $pkg ;
+ bless { VALID => { map {$_, 1}
+ qw( flags cachesize maxkeypage minkeypage psize
+ compare prefix lorder )
+ },
+ GOT => {},
+ }, $pkg ;
}
package DB_File ;
use strict;
-use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO) ;
+use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version) ;
use Carp;
-$VERSION = "1.06" ;
+$VERSION = "1.61" ;
#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
$DB_BTREE = new DB_File::BTREEINFO ;
}
-# import borrowed from IO::File
-# exports Fcntl constants if available.
-sub import {
- my $pkg = shift;
- my $callpkg = caller;
- Exporter::export $pkg, $callpkg;
- eval {
- require Fcntl;
- Exporter::export 'Fcntl', $callpkg;
- };
-}
+eval {
+ # Make all Fcntl O_XXX constants available for importing
+ require Fcntl;
+ my @O = grep /^O_/, @Fcntl::EXPORT;
+ Fcntl->import(@O); # first we import what we want to export
+ push(@EXPORT, @O);
+};
+
+## import borrowed from IO::File
+## exports Fcntl constants if available.
+#sub import {
+# my $pkg = shift;
+# my $callpkg = caller;
+# Exporter::export $pkg, $callpkg, @_;
+# eval {
+# require Fcntl;
+# Exporter::export 'Fcntl', $callpkg, '/^O_/';
+# };
+#}
bootstrap DB_File $VERSION;
# Preloaded methods go here. Autoload methods go after __END__, and are
# processed by the autosplit program.
-sub TIEHASH
+sub tie_hash_or_array
{
my (@arg) = @_ ;
+ my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
$arg[4] = tied %{ $arg[4] }
if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
- DoTie_(@arg) ;
+ # make recno in Berkeley DB version 2 work like recno in version 1.
+ if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
+ $arg[1] and ! -e $arg[1]) {
+ open(FH, ">$arg[1]") or return undef ;
+ close FH ;
+ chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
+ }
+
+ DoTie_($tieHASH, @arg) ;
}
-*TIEARRAY = \&TIEHASH ;
+sub TIEHASH
+{
+ tie_hash_or_array(@_) ;
+}
+sub TIEARRAY
+{
+ tie_hash_or_array(@_) ;
+}
+
+sub CLEAR
+{
+ my $self = shift;
+ my $key = "" ;
+ my $value = "" ;
+ my $status = $self->seq($key, $value, R_FIRST());
+ my @keys;
+
+ while ($status == 0) {
+ push @keys, $key;
+ $status = $self->seq($key, $value, R_NEXT());
+ }
+ foreach $key (reverse @keys) {
+ my $s = $self->del($key);
+ }
+}
+
+sub EXTEND { }
+
+sub STORESIZE
+{
+ my $self = shift;
+ my $length = shift ;
+ my $current_length = $self->length() ;
+
+ if ($length < $current_length) {
+ my $key ;
+ for ($key = $current_length - 1 ; $key >= $length ; -- $key)
+ { $self->del($key) }
+ }
+ elsif ($length > $current_length) {
+ $self->put($length-1, "") ;
+ }
+}
+
sub get_dup
{
croak "Usage: \$db->get_dup(key [,flag])\n"
1;
__END__
-=cut
-
=head1 NAME
-DB_File - Perl5 access to Berkeley DB
+DB_File - Perl5 access to Berkeley DB version 1.x
=head1 SYNOPSIS
=head1 DESCRIPTION
B<DB_File> is a module which allows Perl programs to make use of the
-facilities provided by Berkeley DB. If you intend to use this
-module you should really have a copy of the Berkeley DB manual pages at
-hand. The interface defined here mirrors the Berkeley DB interface
-closely.
+facilities provided by Berkeley DB version 1.x (if you have a newer
+version of DB, see L<Using DB_File with Berkeley DB version 2>). It is
+assumed that you have a copy of the Berkeley DB manual pages at hand
+when reading this documentation. The interface defined here mirrors the
+Berkeley DB interface closely.
Berkeley DB is a C library which provides a consistent interface to a
number of database formats. B<DB_File> provides an interface to all
=back
-=head2 How does DB_File interface to Berkeley DB?
+=head2 Using DB_File with Berkeley DB version 2
+
+Although B<DB_File> is intended to be used with Berkeley DB version 1,
+it can also be used with version 2. In this case the interface is
+limited to the functionality provided by Berkeley DB 1.x. Anywhere the
+version 2 interface differs, B<DB_File> arranges for it to work like
+version 1. This feature allows B<DB_File> scripts that were built with
+version 1 to be migrated to version 2 without any changes.
+
+If you want to make use of the new features available in Berkeley DB
+2.x, use the Perl module B<BerkeleyDB> instead.
+
+At the time of writing this document the B<BerkeleyDB> module is still
+alpha quality (the version number is < 1.0), and so unsuitable for use
+in any serious development work. Once its version number is >= 1.0, it
+is considered stable enough for real work.
+
+B<Note:> The database file format has changed in Berkeley DB version 2.
+If you cannot recreate your databases, you must dump any existing
+databases with the C<db_dump185> utility that comes with Berkeley DB.
+Once you have upgraded DB_File to use Berkeley DB version 2, your
+databases can be recreated using C<db_load>. Refer to the Berkeley DB
+documentation for further details.
+
+Please read L<COPYRIGHT> before using version 2.x of Berkeley DB with
+DB_File.
+
+=head2 Interface to Berkeley DB
B<DB_File> allows access to Berkeley DB files using the tie() mechanism
in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
$a->{'cachesize'} = 12345 ;
tie %y, 'DB_File', "filename", $flags, 0777, $a ;
-A few of the values need extra discussion here. When used, the C
+A few of the options need extra discussion here. When used, the C
equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
to C functions. In B<DB_File> these keys are used to store references
to Perl subs. Below are templates for each of the subs:
See L<Changing the BTREE sort order> for an example of using the
C<compare> template.
+If you are using the DB_RECNO interface and you intend making use of
+C<bval>, you should check out L<The 'bval' Option>.
+
=head2 Default Parameters
It is possible to omit some or all of the final 4 parameters in the
is equivalent to:
- tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0640, $DB_HASH ;
+ tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
It is also possible to omit the filename parameter as well, so the
call:
is equivalent to:
- tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0640, $DB_HASH ;
+ tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
See L<In Memory Databases> for a discussion on the use of C<undef>
in place of a filename.
file formats that B<DB_File> supports. It is also very straightforward
to use.
-=head2 A Simple Example.
+=head2 A Simple Example
This example shows how to create a database, add key/value pairs to the
database, delete keys/value pairs and finally how to enumerate the
=back
-=head2 Handling duplicate keys
+=head2 Handling Duplicate Keys
The BTREE file type optionally allows a single key to be associated
with an arbitrary number of values. This option is enabled by setting
This time we have got all the key/value pairs, including the multiple
values associated with the key C<Wall>.
-=head2 The get_dup method.
+=head2 The get_dup() Method
B<DB_File> comes with a utility method, called C<get_dup>, to assist in
reading duplicate values from BTREE databases. The method can take the
In list context, it returns all the values which match C<$key>. Note
that the values will be returned in an apparently random order.
-In list context, if the second parameter is present and evaluates TRUE,
-the method returns an associative array. The keys of the associative
-array correspond to the the values that matched in the BTREE and the
-values of the array are a count of the number of times that particular
-value occurred in the BTREE.
+In list context, if the second parameter is present and evaluates
+TRUE, the method returns an associative array. The keys of the
+associative array correspond to the values that matched in the BTREE
+and the values of the array are a count of the number of times that
+particular value occurred in the BTREE.
So assuming the database created above, we can use C<get_dup> like
this:
-2 the second last, and so on. Attempting to access an element before
the start of the array will raise a fatal run-time error.
+=head2 The 'bval' Option
+
+The operation of the bval option warrants some discussion. Here is the
+definition of bval from the Berkeley DB 1.85 recno manual page:
+
+ The delimiting byte to be used to mark the end of a
+ record for variable-length records, and the pad charac-
+ ter for fixed-length records. If no value is speci-
+ fied, newlines (``\n'') are used to mark the end of
+ variable-length records and fixed-length records are
+ padded with spaces.
+
+The second sentence is wrong. In actual fact bval will only default to
+C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
+openinfo parameter is used at all, the value that happens to be in bval
+will be used. That means you always have to specify bval when making
+use of any of the options in the openinfo parameter. This documentation
+error will be fixed in the next release of Berkeley DB.
+
+That clarifies the situation with regards Berkeley DB itself. What
+about B<DB_File>? Well, the behavior defined in the quote above is
+quite useful, so B<DB_File> conforms it.
+
+That means that you can specify other options (e.g. cachesize) and
+still have bval default to C<"\n"> for variable length records, and
+space for fixed length records.
+
=head2 A Simple Example
Here is a simple example that uses RECNO.
=head2 Extra Methods
-As you can see from the example above, the tied array interface is
-quite limited. To make the interface more useful, a number of methods
-are supplied with B<DB_File> to simulate the standard array operations
-that are not currently implemented in Perl's tied array interface. All
-these methods are accessed via the object returned from the tie call.
+If you are using a version of Perl earlier than 5.004_57, the tied
+array interface is quite limited. The example script above will work,
+but you won't be able to use C<push>, C<pop>, C<shift>, C<unshift>
+etc. with the tied array.
+
+To make the interface more useful for older versions of Perl, a number
+of methods are supplied with B<DB_File> to simulate the missing array
+operations. All these methods are accessed via the object returned from
+the tie call.
Here are the methods:
undef $db ;
untie %hash ;
+See L<The untie() Gotcha> for more details.
+
All the functions defined in L<dbopen> are available except for
close() and dbopen() itself. The B<DB_File> method interface to the
supported functions have been implemented to mirror the way Berkeley DB
close(DB_FH);
print "$$: Updated db to $key=$value\n";
-=head2 Sharing databases with C applications
+=head2 Sharing Databases With C Applications
There is no technical reason why a Berkeley DB database cannot be
shared by both a Perl and a C application.
untie %hist_db ;
+=head2 The untie() Gotcha
+
+If you make use of the Berkeley DB API, it is I<very> strongly
+recommended that you read L<perltie/The untie Gotcha>.
+
+Even if you don't currently make use of the API interface, it is still
+worth reading it.
+
+Here is an example which illustrates the problem from a B<DB_File>
+perspective:
+
+ use DB_File ;
+ use Fcntl ;
+
+ my %x ;
+ my $X ;
+
+ $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
+ or die "Cannot tie first time: $!" ;
+
+ $x{123} = 456 ;
+
+ untie %x ;
+
+ tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
+ or die "Cannot tie second time: $!" ;
+
+ untie %x ;
+
+When run, the script will produce this error message:
+
+ Cannot tie second time: Invalid argument at bad.file line 14.
+
+Although the error message above refers to the second tie() statement
+in the script, the source of the problem is really with the untie()
+statement that precedes it.
+
+Having read L<perltie> you will probably have already guessed that the
+error is caused by the extra copy of the tied object stored in C<$X>.
+If you haven't, then the problem boils down to the fact that the
+B<DB_File> destructor, DESTROY, will not be called until I<all>
+references to the tied object are destroyed. Both the tied variable,
+C<%x>, and C<$X> above hold a reference to the object. The call to
+untie() will destroy the first, but C<$X> still holds a valid
+reference, so the destructor will not get called and the database file
+F<tst.fil> will remain open. The fact that Berkeley DB then reports the
+attempt to open a database that is alreday open via the catch-all
+"Invalid argument" doesn't help.
+
+If you run the script with the C<-w> flag the error message becomes:
+
+ untie attempted while 1 inner references still exist at bad.file line 12.
+ Cannot tie second time: Invalid argument at bad.file line 14.
+
+which pinpoints the real problem. Finally the script can now be
+modified to fix the original problem by destroying the API object
+before the untie:
+
+ ...
+ $x{123} = 456 ;
+
+ undef $X ;
+ untie %x ;
+
+ $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
+ ...
+
=head1 COMMON QUESTIONS
=head1 HISTORY
-=over
-
-=item 0.1
-
-First Release.
-
-=item 0.2
-
-When B<DB_File> is opening a database file it no longer terminates the
-process if I<dbopen> returned an error. This allows file protection
-errors to be caught at run time. Thanks to Judith Grass
-E<lt>grass@cybercash.comE<gt> for spotting the bug.
-
-=item 0.3
-
-Added prototype support for multiple btree compare callbacks.
-
-=item 1.0
-
-B<DB_File> has been in use for over a year. To reflect that, the
-version number has been incremented to 1.0.
-
-Added complete support for multiple concurrent callbacks.
-
-Using the I<push> method on an empty list didn't work properly. This
-has been fixed.
-
-=item 1.01
-
-Fixed a core dump problem with SunOS.
-
-The return value from TIEHASH wasn't set to NULL when dbopen returned
-an error.
-
-=item 1.02
-
-Merged OS/2 specific code into DB_File.xs
-
-Removed some redundant code in DB_File.xs.
+Moved to the Changes file.
-Documentation update.
-
-Allow negative subscripts with RECNO interface.
-
-Changed the default flags from O_RDWR to O_CREAT|O_RDWR.
-
-The example code which showed how to lock a database needed a call to
-C<sync> added. Without it the resultant database file was empty.
-
-Added get_dup method.
-
-=item 1.03
-
-Documentation update.
-
-B<DB_File> now imports the constants (O_RDWR, O_CREAT etc.) from Fcntl
-automatically.
-
-The standard hash function C<exists> is now supported.
-
-Modified the behavior of get_dup. When it returns an associative
-array, the value is the count of the number of matching BTREE values.
-
-=item 1.04
-
-Minor documentation changes.
+=head1 BUGS
-Fixed a bug in hash_cb. Patches supplied by Dave Hammen,
-E<lt>hammen@gothamcity.jsc.nasa.govE<gt>.
+Some older versions of Berkeley DB had problems with fixed length
+records using the RECNO file format. This problem has been fixed since
+version 1.85 of Berkeley DB.
-Fixed a bug with the constructors for DB_File::HASHINFO,
-DB_File::BTREEINFO and DB_File::RECNOINFO. Also tidied up the
-constructors to make them C<-w> clean.
+I am sure there are bugs in the code. If you do find any, or can
+suggest any enhancements, I would welcome your comments.
-Reworked part of the test harness to be more locale friendly.
+=head1 AVAILABILITY
-=item 1.05
+B<DB_File> comes with the standard Perl source distribution. Look in
+the directory F<ext/DB_File>. Given the amount of time between releases
+of Perl the version that ships with Perl is quite likely to be out of
+date, so the most recent version can always be found on CPAN (see
+L<perlmod/CPAN> for details), in the directory
+F<modules/by-module/DB_File>.
-Made all scripts in the documentation C<strict> and C<-w> clean.
+This version of B<DB_File> will work with either version 1.x or 2.x of
+Berkeley DB, but is limited to the functionality provided by version 1.
-Added logic to F<DB_File.xs> to allow the module to be built after Perl
-is installed.
+The official web site for Berkeley DB is
+F<http://www.sleepycat.com/db>. The ftp equivalent is
+F<ftp.sleepycat.com:/pub>. Both versions 1 and 2 of Berkeley DB are
+available there.
-=item 1.06
+Alternatively, Berkeley DB version 1 is available at your nearest CPAN
+archive in F<src/misc/db.1.85.tar.gz>.
-Minor namespace cleanup: Localized C<PrintBtree>.
+If you are running IRIX, then get Berkeley DB version 1 from
+F<http://reality.sgi.com/ariel>. It has the patches necessary to
+compile properly on IRIX 5.3.
-=back
+=head1 COPYRIGHT
-=head1 BUGS
+Copyright (c) 1995-8 Paul Marquess. All rights reserved. This program
+is free software; you can redistribute it and/or modify it under the
+same terms as Perl itself.
-Some older versions of Berkeley DB had problems with fixed length
-records using the RECNO file format. The newest version at the time of
-writing was 1.85 - this seems to have fixed the problems with RECNO.
+Although B<DB_File> is covered by the Perl license, the library it
+makes use of, namely Berkeley DB, is not. Berkeley DB has its own
+copyright and its own license. Please take the time to read it.
-I am sure there are bugs in the code. If you do find any, or can
-suggest any enhancements, I would welcome your comments.
+Here are are few words taken from the Berkeley DB FAQ (at
+http://www.sleepycat.com) regarding the license:
-=head1 AVAILABILITY
+ Do I have to license DB to use it in Perl scripts?
-B<DB_File> comes with the standard Perl source distribution. Look in
-the directory F<ext/DB_File>.
+ No. The Berkeley DB license requires that software that uses
+ Berkeley DB be freely redistributable. In the case of Perl, that
+ software is Perl, and not your scripts. Any Perl scripts that you
+ write are your property, including scripts that make use of
+ Berkeley DB. Neither the Perl license nor the Berkeley DB license
+ place any restriction on what you may do with them.
-Berkeley DB is available at your nearest CPAN archive (see
-L<perlmod/"CPAN"> for a list) in F<src/misc/db.1.85.tar.gz>, or via the
-host F<ftp.cs.berkeley.edu> in F</ucb/4bsd/db.tar.gz>. Alternatively,
-check out the Berkeley DB home page at F<http://www.bostic.com/db>. It
-is I<not> under the GPL.
+If you are in any doubt about the license situation, contact either the
+Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
-If you are running IRIX, then get Berkeley DB from
-F<http://reality.sgi.com/ariel>. It has the patches necessary to
-compile properly on IRIX 5.3.
=head1 SEE ALSO
The DB_File interface was written by Paul Marquess
E<lt>pmarquess@bfsec.bt.co.ukE<gt>.
-Questions about the DB system itself may be addressed to Keith Bostic
-E<lt>bostic@cs.berkeley.eduE<gt>.
+Questions about the DB system itself may be addressed to
+E<lt>db@sleepycat.com<gt>.
=cut