update Changes, patchlevel etc.
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6ca2e664 3# written by Paul Marquess (Paul.Marquess@btinternet.com)
039d031f 4# last modified 4th September 1999
5# version 1.71
36477c24 6#
cad2e5aa 7# Copyright (c) 1995-1999 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
610ab055 14require 5.003 ;
15
785da04d 16use strict;
8e07c86e 17use Carp;
88108326 18require Tie::Hash;
19@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 20
88108326 21sub new
8e07c86e 22{
88108326 23 my $pkg = shift ;
24 my %x ;
25 tie %x, $pkg ;
26 bless \%x, $pkg ;
8e07c86e 27}
28
610ab055 29
88108326 30sub TIEHASH
31{
32 my $pkg = shift ;
33
36477c24 34 bless { VALID => { map {$_, 1}
35 qw( bsize ffactor nelem cachesize hash lorder)
36 },
37 GOT => {}
38 }, $pkg ;
88108326 39}
8e07c86e 40
610ab055 41
8e07c86e 42sub FETCH
43{
88108326 44 my $self = shift ;
45 my $key = shift ;
8e07c86e 46
36477c24 47 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 48
49 my $pkg = ref $self ;
50 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 51}
52
53
54sub STORE
55{
88108326 56 my $self = shift ;
57 my $key = shift ;
58 my $value = shift ;
59
36477c24 60 if ( exists $self->{VALID}{$key} )
8e07c86e 61 {
36477c24 62 $self->{GOT}{$key} = $value ;
8e07c86e 63 return ;
64 }
65
88108326 66 my $pkg = ref $self ;
67 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 68}
69
70sub DELETE
71{
88108326 72 my $self = shift ;
73 my $key = shift ;
74
36477c24 75 if ( exists $self->{VALID}{$key} )
8e07c86e 76 {
36477c24 77 delete $self->{GOT}{$key} ;
8e07c86e 78 return ;
79 }
80
88108326 81 my $pkg = ref $self ;
82 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 83}
84
88108326 85sub EXISTS
8e07c86e 86{
88108326 87 my $self = shift ;
88 my $key = shift ;
8e07c86e 89
36477c24 90 exists $self->{VALID}{$key} ;
8e07c86e 91}
92
88108326 93sub NotHere
8e07c86e 94{
18d2dc8c 95 my $self = shift ;
88108326 96 my $method = shift ;
8e07c86e 97
18d2dc8c 98 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 99}
100
18d2dc8c 101sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
102sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
103sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 104
105package DB_File::RECNOINFO ;
785da04d 106
88108326 107use strict ;
108
045291aa 109@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 110
111sub TIEHASH
112{
88108326 113 my $pkg = shift ;
114
36477c24 115 bless { VALID => { map {$_, 1}
116 qw( bval cachesize psize flags lorder reclen bfname )
117 },
118 GOT => {},
119 }, $pkg ;
8e07c86e 120}
121
88108326 122package DB_File::BTREEINFO ;
8e07c86e 123
88108326 124use strict ;
8e07c86e 125
88108326 126@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 127
88108326 128sub TIEHASH
8e07c86e 129{
88108326 130 my $pkg = shift ;
131
36477c24 132 bless { VALID => { map {$_, 1}
133 qw( flags cachesize maxkeypage minkeypage psize
134 compare prefix lorder )
135 },
136 GOT => {},
137 }, $pkg ;
8e07c86e 138}
139
140
8e07c86e 141package DB_File ;
785da04d 142
143use strict;
1f70e1ea 144use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO $db_version) ;
8e07c86e 145use Carp;
146
785da04d 147
039d031f 148$VERSION = "1.71" ;
8e07c86e 149
150#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 151$DB_BTREE = new DB_File::BTREEINFO ;
152$DB_HASH = new DB_File::HASHINFO ;
153$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 154
785da04d 155require Tie::Hash;
8e07c86e 156require Exporter;
157use AutoLoader;
158require DynaLoader;
785da04d 159@ISA = qw(Tie::Hash Exporter DynaLoader);
8e07c86e 160@EXPORT = qw(
161 $DB_BTREE $DB_HASH $DB_RECNO
88108326 162
8e07c86e 163 BTREEMAGIC
164 BTREEVERSION
165 DB_LOCK
166 DB_SHMEM
167 DB_TXN
168 HASHMAGIC
169 HASHVERSION
170 MAX_PAGE_NUMBER
171 MAX_PAGE_OFFSET
172 MAX_REC_NUMBER
173 RET_ERROR
174 RET_SPECIAL
175 RET_SUCCESS
176 R_CURSOR
177 R_DUP
178 R_FIRST
179 R_FIXEDLEN
180 R_IAFTER
181 R_IBEFORE
182 R_LAST
183 R_NEXT
184 R_NOKEY
185 R_NOOVERWRITE
186 R_PREV
187 R_RECNOSYNC
188 R_SETCURSOR
189 R_SNAPSHOT
190 __R_UNUSED
88108326 191
045291aa 192);
8e07c86e 193
194sub AUTOLOAD {
785da04d 195 my($constname);
8e07c86e 196 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 197 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 198 if ($! != 0) {
265f5c4a 199 if ($! =~ /Invalid/ || $!{EINVAL}) {
8e07c86e 200 $AutoLoader::AUTOLOAD = $AUTOLOAD;
201 goto &AutoLoader::AUTOLOAD;
202 }
203 else {
785da04d 204 my($pack,$file,$line) = caller;
8e07c86e 205 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
206";
207 }
208 }
209 eval "sub $AUTOLOAD { $val }";
210 goto &$AUTOLOAD;
211}
212
f6b705ef 213
a6ed719b 214eval {
1f70e1ea 215 # Make all Fcntl O_XXX constants available for importing
216 require Fcntl;
217 my @O = grep /^O_/, @Fcntl::EXPORT;
218 Fcntl->import(@O); # first we import what we want to export
219 push(@EXPORT, @O);
a6ed719b 220};
f6b705ef 221
1f70e1ea 222## import borrowed from IO::File
223## exports Fcntl constants if available.
224#sub import {
225# my $pkg = shift;
226# my $callpkg = caller;
227# Exporter::export $pkg, $callpkg, @_;
228# eval {
229# require Fcntl;
230# Exporter::export 'Fcntl', $callpkg, '/^O_/';
231# };
232#}
233
785da04d 234bootstrap DB_File $VERSION;
8e07c86e 235
236# Preloaded methods go here. Autoload methods go after __END__, and are
237# processed by the autosplit program.
238
05475680 239sub tie_hash_or_array
610ab055 240{
241 my (@arg) = @_ ;
05475680 242 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 243
244 $arg[4] = tied %{ $arg[4] }
245 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
246
1f70e1ea 247 # make recno in Berkeley DB version 2 work like recno in version 1.
248 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
249 $arg[1] and ! -e $arg[1]) {
250 open(FH, ">$arg[1]") or return undef ;
251 close FH ;
252 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
253 }
254
05475680 255 DoTie_($tieHASH, @arg) ;
610ab055 256}
257
05475680 258sub TIEHASH
259{
260 tie_hash_or_array(@_) ;
261}
262
263sub TIEARRAY
264{
265 tie_hash_or_array(@_) ;
266}
88108326 267
045291aa 268sub CLEAR
269{
1f70e1ea 270 my $self = shift;
271 my $key = "" ;
272 my $value = "" ;
273 my $status = $self->seq($key, $value, R_FIRST());
274 my @keys;
275
276 while ($status == 0) {
277 push @keys, $key;
278 $status = $self->seq($key, $value, R_NEXT());
279 }
280 foreach $key (reverse @keys) {
281 my $s = $self->del($key);
282 }
283}
284
045291aa 285sub EXTEND { }
286
287sub STORESIZE
288{
289 my $self = shift;
290 my $length = shift ;
291 my $current_length = $self->length() ;
292
293 if ($length < $current_length) {
294 my $key ;
295 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
296 { $self->del($key) }
297 }
a9fd575d 298 elsif ($length > $current_length) {
299 $self->put($length-1, "") ;
300 }
045291aa 301}
302
6ca2e664 303sub find_dup
304{
305 croak "Usage: \$db->find_dup(key,value)\n"
306 unless @_ == 3 ;
307
308 my $db = shift ;
309 my ($origkey, $value_wanted) = @_ ;
310 my ($key, $value) = ($origkey, 0);
311 my ($status) = 0 ;
312
313 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
314 $status == 0 ;
315 $status = $db->seq($key, $value, R_NEXT() ) ) {
316
317 return 0 if $key eq $origkey and $value eq $value_wanted ;
318 }
319
320 return $status ;
321}
322
323sub del_dup
324{
325 croak "Usage: \$db->del_dup(key,value)\n"
326 unless @_ == 3 ;
327
328 my $db = shift ;
329 my ($key, $value) = @_ ;
330 my ($status) = $db->find_dup($key, $value) ;
331 return $status if $status != 0 ;
332
333 $status = $db->del($key, R_CURSOR() ) ;
334 return $status ;
335}
336
88108326 337sub get_dup
338{
339 croak "Usage: \$db->get_dup(key [,flag])\n"
340 unless @_ == 2 or @_ == 3 ;
341
342 my $db = shift ;
343 my $key = shift ;
344 my $flag = shift ;
f6b705ef 345 my $value = 0 ;
88108326 346 my $origkey = $key ;
347 my $wantarray = wantarray ;
f6b705ef 348 my %values = () ;
88108326 349 my @values = () ;
350 my $counter = 0 ;
f6b705ef 351 my $status = 0 ;
88108326 352
f6b705ef 353 # iterate through the database until either EOF ($status == 0)
354 # or a different key is encountered ($key ne $origkey).
355 for ($status = $db->seq($key, $value, R_CURSOR()) ;
356 $status == 0 and $key eq $origkey ;
357 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 358
f6b705ef 359 # save the value or count number of matches
360 if ($wantarray) {
361 if ($flag)
362 { ++ $values{$value} }
363 else
364 { push (@values, $value) }
365 }
366 else
367 { ++ $counter }
88108326 368
88108326 369 }
370
f6b705ef 371 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 372}
373
374
8e07c86e 3751;
376__END__
377
3b35bae3 378=head1 NAME
379
1f70e1ea 380DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 381
382=head1 SYNOPSIS
383
384 use DB_File ;
88108326 385
386 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
387 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
388 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 389
3b35bae3 390 $status = $X->del($key [, $flags]) ;
391 $status = $X->put($key, $value [, $flags]) ;
392 $status = $X->get($key, $value [, $flags]) ;
760ac839 393 $status = $X->seq($key, $value, $flags) ;
3b35bae3 394 $status = $X->sync([$flags]) ;
395 $status = $X->fd ;
760ac839 396
f6b705ef 397 # BTREE only
88108326 398 $count = $X->get_dup($key) ;
399 @list = $X->get_dup($key) ;
400 %list = $X->get_dup($key, 1) ;
6ca2e664 401 $status = $X->find_dup($key, $value) ;
402 $status = $X->del_dup($key, $value) ;
88108326 403
f6b705ef 404 # RECNO only
405 $a = $X->length;
406 $a = $X->pop ;
407 $X->push(list);
408 $a = $X->shift;
409 $X->unshift(list);
410
cad2e5aa 411 # DBM Filters
412 $old_filter = $db->filter_store_key ( sub { ... } ) ;
413 $old_filter = $db->filter_store_value( sub { ... } ) ;
414 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
415 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
416
3b35bae3 417 untie %hash ;
418 untie @array ;
419
420=head1 DESCRIPTION
421
8e07c86e 422B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 423facilities provided by Berkeley DB version 1.x (if you have a newer
039d031f 424version of DB, see L<Using DB_File with Berkeley DB version 2 or 3>).
425It is assumed that you have a copy of the Berkeley DB manual pages at
426hand when reading this documentation. The interface defined here
427mirrors the Berkeley DB interface closely.
68dc0745 428
8e07c86e 429Berkeley DB is a C library which provides a consistent interface to a
430number of database formats. B<DB_File> provides an interface to all
431three of the database types currently supported by Berkeley DB.
3b35bae3 432
433The file types are:
434
435=over 5
436
88108326 437=item B<DB_HASH>
3b35bae3 438
88108326 439This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 440files. This is equivalent to the functionality provided by other
441hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
442the files created using DB_HASH are not compatible with any of the
443other packages mentioned.
3b35bae3 444
8e07c86e 445A default hashing algorithm, which will be adequate for most
446applications, is built into Berkeley DB. If you do need to use your own
447hashing algorithm it is possible to write your own in Perl and have
448B<DB_File> use it instead.
3b35bae3 449
88108326 450=item B<DB_BTREE>
451
452The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 453sorted, balanced binary tree.
3b35bae3 454
8e07c86e 455As with the DB_HASH format, it is possible to provide a user defined
456Perl routine to perform the comparison of keys. By default, though, the
457keys are stored in lexical order.
3b35bae3 458
88108326 459=item B<DB_RECNO>
3b35bae3 460
8e07c86e 461DB_RECNO allows both fixed-length and variable-length flat text files
462to be manipulated using the same key/value pair interface as in DB_HASH
463and DB_BTREE. In this case the key will consist of a record (line)
464number.
3b35bae3 465
466=back
467
039d031f 468=head2 Using DB_File with Berkeley DB version 2 or 3
1f70e1ea 469
470Although B<DB_File> is intended to be used with Berkeley DB version 1,
039d031f 471it can also be used with version 2.or 3 In this case the interface is
1f70e1ea 472limited to the functionality provided by Berkeley DB 1.x. Anywhere the
039d031f 473version 2 or 3 interface differs, B<DB_File> arranges for it to work
474like version 1. This feature allows B<DB_File> scripts that were built
475with version 1 to be migrated to version 2 or 3 without any changes.
1f70e1ea 476
477If you want to make use of the new features available in Berkeley DB
039d031f 4782.x or 3.x, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 479
480At the time of writing this document the B<BerkeleyDB> module is still
481alpha quality (the version number is < 1.0), and so unsuitable for use
482in any serious development work. Once its version number is >= 1.0, it
483is considered stable enough for real work.
484
039d031f 485B<Note:> The database file format has changed in both Berkeley DB
486version 2 and 3. If you cannot recreate your databases, you must dump
487any existing databases with the C<db_dump185> utility that comes with
488Berkeley DB.
489Once you have rebuilt DB_File to use Berkeley DB version 2 or 3, your
1f70e1ea 490databases can be recreated using C<db_load>. Refer to the Berkeley DB
491documentation for further details.
492
039d031f 493Please read L<"COPYRIGHT"> before using version 2.x or 3.x of Berkeley
494DB with DB_File.
1f70e1ea 495
68dc0745 496=head2 Interface to Berkeley DB
3b35bae3 497
498B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 499in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
500allows B<DB_File> to access Berkeley DB files using either an
501associative array (for DB_HASH & DB_BTREE file types) or an ordinary
502array (for the DB_RECNO file type).
3b35bae3 503
88108326 504In addition to the tie() interface, it is also possible to access most
505of the functions provided in the Berkeley DB API directly.
f6b705ef 506See L<THE API INTERFACE>.
3b35bae3 507
88108326 508=head2 Opening a Berkeley DB Database File
3b35bae3 509
8e07c86e 510Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 511Here is the C prototype for dbopen():
3b35bae3 512
513 DB*
514 dbopen (const char * file, int flags, int mode,
515 DBTYPE type, const void * openinfo)
516
517The parameter C<type> is an enumeration which specifies which of the 3
518interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
519Depending on which of these is actually chosen, the final parameter,
520I<openinfo> points to a data structure which allows tailoring of the
521specific interface method.
522
8e07c86e 523This interface is handled slightly differently in B<DB_File>. Here is
88108326 524an equivalent call using B<DB_File>:
3b35bae3 525
88108326 526 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 527
8e07c86e 528The C<filename>, C<flags> and C<mode> parameters are the direct
529equivalent of their dbopen() counterparts. The final parameter $DB_HASH
530performs the function of both the C<type> and C<openinfo> parameters in
531dbopen().
3b35bae3 532
88108326 533In the example above $DB_HASH is actually a pre-defined reference to a
534hash object. B<DB_File> has three of these pre-defined references.
535Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 536
8e07c86e 537The keys allowed in each of these pre-defined references is limited to
538the names used in the equivalent C structure. So, for example, the
539$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 540C<ffactor>, C<hash>, C<lorder> and C<nelem>.
541
542To change one of these elements, just assign to it like this:
543
544 $DB_HASH->{'cachesize'} = 10000 ;
545
546The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
547usually adequate for most applications. If you do need to create extra
548instances of these objects, constructors are available for each file
549type.
550
551Here are examples of the constructors and the valid options available
552for DB_HASH, DB_BTREE and DB_RECNO respectively.
553
554 $a = new DB_File::HASHINFO ;
555 $a->{'bsize'} ;
556 $a->{'cachesize'} ;
557 $a->{'ffactor'};
558 $a->{'hash'} ;
559 $a->{'lorder'} ;
560 $a->{'nelem'} ;
561
562 $b = new DB_File::BTREEINFO ;
563 $b->{'flags'} ;
564 $b->{'cachesize'} ;
565 $b->{'maxkeypage'} ;
566 $b->{'minkeypage'} ;
567 $b->{'psize'} ;
568 $b->{'compare'} ;
569 $b->{'prefix'} ;
570 $b->{'lorder'} ;
571
572 $c = new DB_File::RECNOINFO ;
573 $c->{'bval'} ;
574 $c->{'cachesize'} ;
575 $c->{'psize'} ;
576 $c->{'flags'} ;
577 $c->{'lorder'} ;
578 $c->{'reclen'} ;
579 $c->{'bfname'} ;
580
581The values stored in the hashes above are mostly the direct equivalent
582of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 583default values - that means you don't have to set I<all> of the
88108326 584values when you only want to change one. Here is an example:
585
586 $a = new DB_File::HASHINFO ;
587 $a->{'cachesize'} = 12345 ;
588 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
589
36477c24 590A few of the options need extra discussion here. When used, the C
88108326 591equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
592to C functions. In B<DB_File> these keys are used to store references
593to Perl subs. Below are templates for each of the subs:
594
595 sub hash
596 {
597 my ($data) = @_ ;
598 ...
599 # return the hash value for $data
600 return $hash ;
601 }
3b35bae3 602
88108326 603 sub compare
604 {
605 my ($key, $key2) = @_ ;
606 ...
607 # return 0 if $key1 eq $key2
608 # -1 if $key1 lt $key2
609 # 1 if $key1 gt $key2
610 return (-1 , 0 or 1) ;
611 }
3b35bae3 612
88108326 613 sub prefix
614 {
615 my ($key, $key2) = @_ ;
616 ...
617 # return number of bytes of $key2 which are
618 # necessary to determine that it is greater than $key1
619 return $bytes ;
620 }
3b35bae3 621
f6b705ef 622See L<Changing the BTREE sort order> for an example of using the
623C<compare> template.
88108326 624
36477c24 625If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 626C<bval>, you should check out L<The 'bval' Option>.
36477c24 627
88108326 628=head2 Default Parameters
629
630It is possible to omit some or all of the final 4 parameters in the
631call to C<tie> and let them take default values. As DB_HASH is the most
632common file format used, the call:
633
634 tie %A, "DB_File", "filename" ;
635
636is equivalent to:
637
18d2dc8c 638 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 639
640It is also possible to omit the filename parameter as well, so the
641call:
642
643 tie %A, "DB_File" ;
644
645is equivalent to:
646
18d2dc8c 647 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 648
f6b705ef 649See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 650in place of a filename.
651
f6b705ef 652=head2 In Memory Databases
653
654Berkeley DB allows the creation of in-memory databases by using NULL
655(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
656uses C<undef> instead of NULL to provide this functionality.
657
658=head1 DB_HASH
659
660The DB_HASH file format is probably the most commonly used of the three
661file formats that B<DB_File> supports. It is also very straightforward
662to use.
663
68dc0745 664=head2 A Simple Example
f6b705ef 665
666This example shows how to create a database, add key/value pairs to the
667database, delete keys/value pairs and finally how to enumerate the
668contents of the database.
669
610ab055 670 use strict ;
f6b705ef 671 use DB_File ;
610ab055 672 use vars qw( %h $k $v ) ;
f6b705ef 673
2c2d71f5 674 unlink "fruit" ;
f6b705ef 675 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
676 or die "Cannot open file 'fruit': $!\n";
677
678 # Add a few key/value pairs to the file
679 $h{"apple"} = "red" ;
680 $h{"orange"} = "orange" ;
681 $h{"banana"} = "yellow" ;
682 $h{"tomato"} = "red" ;
683
684 # Check for existence of a key
685 print "Banana Exists\n\n" if $h{"banana"} ;
686
687 # Delete a key/value pair.
688 delete $h{"apple"} ;
689
690 # print the contents of the file
691 while (($k, $v) = each %h)
692 { print "$k -> $v\n" }
693
694 untie %h ;
695
696here is the output:
697
698 Banana Exists
699
700 orange -> orange
701 tomato -> red
702 banana -> yellow
703
704Note that the like ordinary associative arrays, the order of the keys
705retrieved is in an apparently random order.
706
707=head1 DB_BTREE
708
709The DB_BTREE format is useful when you want to store data in a given
710order. By default the keys will be stored in lexical order, but as you
711will see from the example shown in the next section, it is very easy to
712define your own sorting function.
713
714=head2 Changing the BTREE sort order
715
716This script shows how to override the default sorting algorithm that
717BTREE uses. Instead of using the normal lexical ordering, a case
718insensitive compare function will be used.
88108326 719
610ab055 720 use strict ;
f6b705ef 721 use DB_File ;
610ab055 722
723 my %h ;
f6b705ef 724
725 sub Compare
726 {
727 my ($key1, $key2) = @_ ;
728 "\L$key1" cmp "\L$key2" ;
729 }
730
731 # specify the Perl sub that will do the comparison
732 $DB_BTREE->{'compare'} = \&Compare ;
733
2c2d71f5 734 unlink "tree" ;
f6b705ef 735 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
736 or die "Cannot open file 'tree': $!\n" ;
737
738 # Add a key/value pair to the file
739 $h{'Wall'} = 'Larry' ;
740 $h{'Smith'} = 'John' ;
741 $h{'mouse'} = 'mickey' ;
742 $h{'duck'} = 'donald' ;
743
744 # Delete
745 delete $h{"duck"} ;
746
747 # Cycle through the keys printing them in order.
748 # Note it is not necessary to sort the keys as
749 # the btree will have kept them in order automatically.
750 foreach (keys %h)
751 { print "$_\n" }
752
753 untie %h ;
754
755Here is the output from the code above.
756
757 mouse
758 Smith
759 Wall
760
761There are a few point to bear in mind if you want to change the
762ordering in a BTREE database:
763
764=over 5
765
766=item 1.
767
768The new compare function must be specified when you create the database.
769
770=item 2.
771
772You cannot change the ordering once the database has been created. Thus
773you must use the same compare function every time you access the
88108326 774database.
775
f6b705ef 776=back
777
68dc0745 778=head2 Handling Duplicate Keys
f6b705ef 779
780The BTREE file type optionally allows a single key to be associated
781with an arbitrary number of values. This option is enabled by setting
782the flags element of C<$DB_BTREE> to R_DUP when creating the database.
783
88108326 784There are some difficulties in using the tied hash interface if you
785want to manipulate a BTREE database with duplicate keys. Consider this
786code:
787
610ab055 788 use strict ;
88108326 789 use DB_File ;
610ab055 790
791 use vars qw($filename %h ) ;
792
88108326 793 $filename = "tree" ;
794 unlink $filename ;
795
796 # Enable duplicate records
797 $DB_BTREE->{'flags'} = R_DUP ;
798
799 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
800 or die "Cannot open $filename: $!\n";
801
802 # Add some key/value pairs to the file
803 $h{'Wall'} = 'Larry' ;
804 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 805 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 806 $h{'Smith'} = 'John' ;
807 $h{'mouse'} = 'mickey' ;
808
809 # iterate through the associative array
810 # and print each key/value pair.
2c2d71f5 811 foreach (sort keys %h)
88108326 812 { print "$_ -> $h{$_}\n" }
813
f6b705ef 814 untie %h ;
815
88108326 816Here is the output:
817
818 Smith -> John
819 Wall -> Larry
820 Wall -> Larry
f6b705ef 821 Wall -> Larry
88108326 822 mouse -> mickey
823
f6b705ef 824As you can see 3 records have been successfully created with key C<Wall>
88108326 825- the only thing is, when they are retrieved from the database they
f6b705ef 826I<seem> to have the same value, namely C<Larry>. The problem is caused
827by the way that the associative array interface works. Basically, when
828the associative array interface is used to fetch the value associated
829with a given key, it will only ever retrieve the first value.
88108326 830
831Although it may not be immediately obvious from the code above, the
832associative array interface can be used to write values with duplicate
833keys, but it cannot be used to read them back from the database.
834
835The way to get around this problem is to use the Berkeley DB API method
836called C<seq>. This method allows sequential access to key/value
f6b705ef 837pairs. See L<THE API INTERFACE> for details of both the C<seq> method
838and the API in general.
88108326 839
840Here is the script above rewritten using the C<seq> API method.
841
610ab055 842 use strict ;
88108326 843 use DB_File ;
88108326 844
610ab055 845 use vars qw($filename $x %h $status $key $value) ;
846
88108326 847 $filename = "tree" ;
848 unlink $filename ;
849
850 # Enable duplicate records
851 $DB_BTREE->{'flags'} = R_DUP ;
852
853 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
854 or die "Cannot open $filename: $!\n";
855
856 # Add some key/value pairs to the file
857 $h{'Wall'} = 'Larry' ;
858 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 859 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 860 $h{'Smith'} = 'John' ;
861 $h{'mouse'} = 'mickey' ;
862
f6b705ef 863 # iterate through the btree using seq
88108326 864 # and print each key/value pair.
610ab055 865 $key = $value = 0 ;
f6b705ef 866 for ($status = $x->seq($key, $value, R_FIRST) ;
867 $status == 0 ;
868 $status = $x->seq($key, $value, R_NEXT) )
88108326 869 { print "$key -> $value\n" }
870
871 undef $x ;
872 untie %h ;
873
874that prints:
875
876 Smith -> John
877 Wall -> Brick
f6b705ef 878 Wall -> Brick
88108326 879 Wall -> Larry
880 mouse -> mickey
881
f6b705ef 882This time we have got all the key/value pairs, including the multiple
88108326 883values associated with the key C<Wall>.
884
6ca2e664 885To make life easier when dealing with duplicate keys, B<DB_File> comes with
886a few utility methods.
887
68dc0745 888=head2 The get_dup() Method
f6b705ef 889
6ca2e664 890The C<get_dup> method assists in
88108326 891reading duplicate values from BTREE databases. The method can take the
892following forms:
893
894 $count = $x->get_dup($key) ;
895 @list = $x->get_dup($key) ;
896 %list = $x->get_dup($key, 1) ;
897
898In a scalar context the method returns the number of values associated
899with the key, C<$key>.
900
901In list context, it returns all the values which match C<$key>. Note
f6b705ef 902that the values will be returned in an apparently random order.
88108326 903
7a2e2cd6 904In list context, if the second parameter is present and evaluates
905TRUE, the method returns an associative array. The keys of the
906associative array correspond to the values that matched in the BTREE
907and the values of the array are a count of the number of times that
908particular value occurred in the BTREE.
88108326 909
f6b705ef 910So assuming the database created above, we can use C<get_dup> like
88108326 911this:
912
2c2d71f5 913 use strict ;
914 use DB_File ;
915
916 use vars qw($filename $x %h ) ;
917
918 $filename = "tree" ;
919
920 # Enable duplicate records
921 $DB_BTREE->{'flags'} = R_DUP ;
922
923 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
924 or die "Cannot open $filename: $!\n";
925
610ab055 926 my $cnt = $x->get_dup("Wall") ;
88108326 927 print "Wall occurred $cnt times\n" ;
928
610ab055 929 my %hash = $x->get_dup("Wall", 1) ;
88108326 930 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 931 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 932
2c2d71f5 933 my @list = sort $x->get_dup("Wall") ;
88108326 934 print "Wall => [@list]\n" ;
935
f6b705ef 936 @list = $x->get_dup("Smith") ;
88108326 937 print "Smith => [@list]\n" ;
938
f6b705ef 939 @list = $x->get_dup("Dog") ;
88108326 940 print "Dog => [@list]\n" ;
941
942
943and it will print:
944
f6b705ef 945 Wall occurred 3 times
88108326 946 Larry is there
f6b705ef 947 There are 2 Brick Walls
948 Wall => [Brick Brick Larry]
88108326 949 Smith => [John]
950 Dog => []
3b35bae3 951
6ca2e664 952=head2 The find_dup() Method
953
954 $status = $X->find_dup($key, $value) ;
955
956This method checks for the existance of a specific key/value pair. If the
957pair exists, the cursor is left pointing to the pair and the method
958returns 0. Otherwise the method returns a non-zero value.
959
960Assuming the database from the previous example:
961
962 use strict ;
963 use DB_File ;
964
965 use vars qw($filename $x %h $found) ;
966
967 my $filename = "tree" ;
968
969 # Enable duplicate records
970 $DB_BTREE->{'flags'} = R_DUP ;
971
972 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
973 or die "Cannot open $filename: $!\n";
974
975 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
976 print "Larry Wall is $found there\n" ;
977
978 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
979 print "Harry Wall is $found there\n" ;
980
981 undef $x ;
982 untie %h ;
983
984prints this
985
2c2d71f5 986 Larry Wall is there
6ca2e664 987 Harry Wall is not there
988
989
990=head2 The del_dup() Method
991
992 $status = $X->del_dup($key, $value) ;
993
994This method deletes a specific key/value pair. It returns
9950 if they exist and have been deleted successfully.
996Otherwise the method returns a non-zero value.
997
998Again assuming the existance of the C<tree> database
999
1000 use strict ;
1001 use DB_File ;
1002
1003 use vars qw($filename $x %h $found) ;
1004
1005 my $filename = "tree" ;
1006
1007 # Enable duplicate records
1008 $DB_BTREE->{'flags'} = R_DUP ;
1009
1010 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1011 or die "Cannot open $filename: $!\n";
1012
1013 $x->del_dup("Wall", "Larry") ;
1014
1015 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1016 print "Larry Wall is $found there\n" ;
1017
1018 undef $x ;
1019 untie %h ;
1020
1021prints this
1022
1023 Larry Wall is not there
1024
f6b705ef 1025=head2 Matching Partial Keys
1026
1027The BTREE interface has a feature which allows partial keys to be
1028matched. This functionality is I<only> available when the C<seq> method
1029is used along with the R_CURSOR flag.
1030
1031 $x->seq($key, $value, R_CURSOR) ;
1032
1033Here is the relevant quote from the dbopen man page where it defines
1034the use of the R_CURSOR flag with seq:
1035
f6b705ef 1036 Note, for the DB_BTREE access method, the returned key is not
1037 necessarily an exact match for the specified key. The returned key
1038 is the smallest key greater than or equal to the specified key,
1039 permitting partial key matches and range searches.
1040
f6b705ef 1041In the example script below, the C<match> sub uses this feature to find
1042and print the first matching key/value pair given a partial key.
1043
610ab055 1044 use strict ;
f6b705ef 1045 use DB_File ;
1046 use Fcntl ;
610ab055 1047
1048 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 1049
1050 sub match
1051 {
1052 my $key = shift ;
610ab055 1053 my $value = 0;
f6b705ef 1054 my $orig_key = $key ;
1055 $x->seq($key, $value, R_CURSOR) ;
1056 print "$orig_key\t-> $key\t-> $value\n" ;
1057 }
1058
1059 $filename = "tree" ;
1060 unlink $filename ;
1061
1062 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1063 or die "Cannot open $filename: $!\n";
1064
1065 # Add some key/value pairs to the file
1066 $h{'mouse'} = 'mickey' ;
1067 $h{'Wall'} = 'Larry' ;
1068 $h{'Walls'} = 'Brick' ;
1069 $h{'Smith'} = 'John' ;
1070
1071
610ab055 1072 $key = $value = 0 ;
f6b705ef 1073 print "IN ORDER\n" ;
1074 for ($st = $x->seq($key, $value, R_FIRST) ;
1075 $st == 0 ;
1076 $st = $x->seq($key, $value, R_NEXT) )
1077
2c2d71f5 1078 { print "$key -> $value\n" }
f6b705ef 1079
1080 print "\nPARTIAL MATCH\n" ;
1081
1082 match "Wa" ;
1083 match "A" ;
1084 match "a" ;
1085
1086 undef $x ;
1087 untie %h ;
1088
1089Here is the output:
1090
1091 IN ORDER
1092 Smith -> John
1093 Wall -> Larry
1094 Walls -> Brick
1095 mouse -> mickey
1096
1097 PARTIAL MATCH
1098 Wa -> Wall -> Larry
1099 A -> Smith -> John
1100 a -> mouse -> mickey
1101
1102=head1 DB_RECNO
1103
1104DB_RECNO provides an interface to flat text files. Both variable and
1105fixed length records are supported.
3b35bae3 1106
6ca2e664 1107In order to make RECNO more compatible with Perl, the array offset for
88108326 1108all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1109
88108326 1110As with normal Perl arrays, a RECNO array can be accessed using
1111negative indexes. The index -1 refers to the last element of the array,
1112-2 the second last, and so on. Attempting to access an element before
1113the start of the array will raise a fatal run-time error.
3b35bae3 1114
68dc0745 1115=head2 The 'bval' Option
36477c24 1116
1117The operation of the bval option warrants some discussion. Here is the
1118definition of bval from the Berkeley DB 1.85 recno manual page:
1119
1120 The delimiting byte to be used to mark the end of a
1121 record for variable-length records, and the pad charac-
1122 ter for fixed-length records. If no value is speci-
1123 fied, newlines (``\n'') are used to mark the end of
1124 variable-length records and fixed-length records are
1125 padded with spaces.
1126
1127The second sentence is wrong. In actual fact bval will only default to
1128C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1129openinfo parameter is used at all, the value that happens to be in bval
1130will be used. That means you always have to specify bval when making
1131use of any of the options in the openinfo parameter. This documentation
1132error will be fixed in the next release of Berkeley DB.
1133
1134That clarifies the situation with regards Berkeley DB itself. What
1135about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1136quite useful, so B<DB_File> conforms to it.
36477c24 1137
1138That means that you can specify other options (e.g. cachesize) and
1139still have bval default to C<"\n"> for variable length records, and
1140space for fixed length records.
1141
f6b705ef 1142=head2 A Simple Example
3b35bae3 1143
6ca2e664 1144Here is a simple example that uses RECNO (if you are using a version
1145of Perl earlier than 5.004_57 this example won't work -- see
1146L<Extra RECNO Methods> for a workaround).
f6b705ef 1147
610ab055 1148 use strict ;
f6b705ef 1149 use DB_File ;
f6b705ef 1150
2c2d71f5 1151 my $filename = "text" ;
1152 unlink $filename ;
1153
610ab055 1154 my @h ;
2c2d71f5 1155 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_RECNO
f6b705ef 1156 or die "Cannot open file 'text': $!\n" ;
1157
1158 # Add a few key/value pairs to the file
1159 $h[0] = "orange" ;
1160 $h[1] = "blue" ;
1161 $h[2] = "yellow" ;
1162
6ca2e664 1163 push @h, "green", "black" ;
1164
1165 my $elements = scalar @h ;
1166 print "The array contains $elements entries\n" ;
1167
1168 my $last = pop @h ;
1169 print "popped $last\n" ;
1170
1171 unshift @h, "white" ;
1172 my $first = shift @h ;
1173 print "shifted $first\n" ;
1174
f6b705ef 1175 # Check for existence of a key
1176 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1177
1178 # use a negative index
1179 print "The last element is $h[-1]\n" ;
1180 print "The 2nd last element is $h[-2]\n" ;
1181
1182 untie @h ;
3b35bae3 1183
f6b705ef 1184Here is the output from the script:
1185
6ca2e664 1186 The array contains 5 entries
1187 popped black
2c2d71f5 1188 shifted white
f6b705ef 1189 Element 1 Exists with value blue
6ca2e664 1190 The last element is green
1191 The 2nd last element is yellow
f6b705ef 1192
6ca2e664 1193=head2 Extra RECNO Methods
f6b705ef 1194
045291aa 1195If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664 1196array interface is quite limited. In the example script above
1197C<push>, C<pop>, C<shift>, C<unshift>
1198or determining the array length will not work with a tied array.
045291aa 1199
1200To make the interface more useful for older versions of Perl, a number
1201of methods are supplied with B<DB_File> to simulate the missing array
1202operations. All these methods are accessed via the object returned from
1203the tie call.
f6b705ef 1204
1205Here are the methods:
1206
1207=over 5
3b35bae3 1208
f6b705ef 1209=item B<$X-E<gt>push(list) ;>
1210
1211Pushes the elements of C<list> to the end of the array.
1212
1213=item B<$value = $X-E<gt>pop ;>
1214
1215Removes and returns the last element of the array.
1216
1217=item B<$X-E<gt>shift>
1218
1219Removes and returns the first element of the array.
1220
1221=item B<$X-E<gt>unshift(list) ;>
1222
1223Pushes the elements of C<list> to the start of the array.
1224
1225=item B<$X-E<gt>length>
1226
1227Returns the number of elements in the array.
1228
1229=back
1230
1231=head2 Another Example
1232
1233Here is a more complete example that makes use of some of the methods
1234described above. It also makes use of the API interface directly (see
1235L<THE API INTERFACE>).
1236
1237 use strict ;
1238 use vars qw(@h $H $file $i) ;
1239 use DB_File ;
1240 use Fcntl ;
1241
1242 $file = "text" ;
1243
1244 unlink $file ;
1245
1246 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1247 or die "Cannot open file $file: $!\n" ;
1248
1249 # first create a text file to play with
1250 $h[0] = "zero" ;
1251 $h[1] = "one" ;
1252 $h[2] = "two" ;
1253 $h[3] = "three" ;
1254 $h[4] = "four" ;
1255
1256
1257 # Print the records in order.
1258 #
1259 # The length method is needed here because evaluating a tied
1260 # array in a scalar context does not return the number of
1261 # elements in the array.
1262
1263 print "\nORIGINAL\n" ;
1264 foreach $i (0 .. $H->length - 1) {
1265 print "$i: $h[$i]\n" ;
1266 }
1267
1268 # use the push & pop methods
1269 $a = $H->pop ;
1270 $H->push("last") ;
1271 print "\nThe last record was [$a]\n" ;
1272
1273 # and the shift & unshift methods
1274 $a = $H->shift ;
1275 $H->unshift("first") ;
1276 print "The first record was [$a]\n" ;
1277
1278 # Use the API to add a new record after record 2.
1279 $i = 2 ;
1280 $H->put($i, "Newbie", R_IAFTER) ;
1281
1282 # and a new record before record 1.
1283 $i = 1 ;
1284 $H->put($i, "New One", R_IBEFORE) ;
1285
1286 # delete record 3
1287 $H->del(3) ;
1288
1289 # now print the records in reverse order
1290 print "\nREVERSE\n" ;
1291 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1292 { print "$i: $h[$i]\n" }
1293
1294 # same again, but use the API functions instead
1295 print "\nREVERSE again\n" ;
610ab055 1296 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1297 for ($s = $H->seq($k, $v, R_LAST) ;
1298 $s == 0 ;
1299 $s = $H->seq($k, $v, R_PREV))
1300 { print "$k: $v\n" }
1301
1302 undef $H ;
1303 untie @h ;
1304
1305and this is what it outputs:
1306
1307 ORIGINAL
1308 0: zero
1309 1: one
1310 2: two
1311 3: three
1312 4: four
1313
1314 The last record was [four]
1315 The first record was [zero]
1316
1317 REVERSE
1318 5: last
1319 4: three
1320 3: Newbie
1321 2: one
1322 1: New One
1323 0: first
1324
1325 REVERSE again
1326 5: last
1327 4: three
1328 3: Newbie
1329 2: one
1330 1: New One
1331 0: first
1332
1333Notes:
1334
1335=over 5
1336
1337=item 1.
1338
1339Rather than iterating through the array, C<@h> like this:
1340
1341 foreach $i (@h)
1342
1343it is necessary to use either this:
1344
1345 foreach $i (0 .. $H->length - 1)
1346
1347or this:
1348
1349 for ($a = $H->get($k, $v, R_FIRST) ;
1350 $a == 0 ;
1351 $a = $H->get($k, $v, R_NEXT) )
1352
1353=item 2.
1354
1355Notice that both times the C<put> method was used the record index was
1356specified using a variable, C<$i>, rather than the literal value
1357itself. This is because C<put> will return the record number of the
1358inserted line via that parameter.
1359
1360=back
1361
1362=head1 THE API INTERFACE
3b35bae3 1363
1364As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1365possible to make direct use of most of the API functions defined in the
8e07c86e 1366Berkeley DB documentation.
3b35bae3 1367
88108326 1368To do this you need to store a copy of the object returned from the tie.
3b35bae3 1369
88108326 1370 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1371
8e07c86e 1372Once you have done that, you can access the Berkeley DB API functions
88108326 1373as B<DB_File> methods directly like this:
3b35bae3 1374
1375 $db->put($key, $value, R_NOOVERWRITE) ;
1376
88108326 1377B<Important:> If you have saved a copy of the object returned from
1378C<tie>, the underlying database file will I<not> be closed until both
1379the tied variable is untied and all copies of the saved object are
610ab055 1380destroyed.
88108326 1381
1382 use DB_File ;
1383 $db = tie %hash, "DB_File", "filename"
1384 or die "Cannot tie filename: $!" ;
1385 ...
1386 undef $db ;
1387 untie %hash ;
1388
9a2c4ce3 1389See L<The untie() Gotcha> for more details.
778183f3 1390
88108326 1391All the functions defined in L<dbopen> are available except for
1392close() and dbopen() itself. The B<DB_File> method interface to the
1393supported functions have been implemented to mirror the way Berkeley DB
1394works whenever possible. In particular note that:
1395
1396=over 5
1397
1398=item *
1399
1400The methods return a status value. All return 0 on success.
1401All return -1 to signify an error and set C<$!> to the exact
1402error code. The return code 1 generally (but not always) means that the
1403key specified did not exist in the database.
1404
1405Other return codes are defined. See below and in the Berkeley DB
1406documentation for details. The Berkeley DB documentation should be used
1407as the definitive source.
1408
1409=item *
3b35bae3 1410
88108326 1411Whenever a Berkeley DB function returns data via one of its parameters,
1412the equivalent B<DB_File> method does exactly the same.
3b35bae3 1413
88108326 1414=item *
1415
1416If you are careful, it is possible to mix API calls with the tied
1417hash/array interface in the same piece of code. Although only a few of
1418the methods used to implement the tied interface currently make use of
1419the cursor, you should always assume that the cursor has been changed
1420any time the tied hash/array interface is used. As an example, this
1421code will probably not do what you expect:
1422
1423 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1424 or die "Cannot tie $filename: $!" ;
1425
1426 # Get the first key/value pair and set the cursor
1427 $X->seq($key, $value, R_FIRST) ;
1428
1429 # this line will modify the cursor
1430 $count = scalar keys %x ;
1431
1432 # Get the second key/value pair.
1433 # oops, it didn't, it got the last key/value pair!
1434 $X->seq($key, $value, R_NEXT) ;
1435
1436The code above can be rearranged to get around the problem, like this:
1437
1438 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1439 or die "Cannot tie $filename: $!" ;
1440
1441 # this line will modify the cursor
1442 $count = scalar keys %x ;
1443
1444 # Get the first key/value pair and set the cursor
1445 $X->seq($key, $value, R_FIRST) ;
1446
1447 # Get the second key/value pair.
1448 # worked this time.
1449 $X->seq($key, $value, R_NEXT) ;
1450
1451=back
1452
1453All the constants defined in L<dbopen> for use in the flags parameters
1454in the methods defined below are also available. Refer to the Berkeley
1455DB documentation for the precise meaning of the flags values.
1456
1457Below is a list of the methods available.
3b35bae3 1458
1459=over 5
1460
f6b705ef 1461=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1462
1463Given a key (C<$key>) this method reads the value associated with it
1464from the database. The value read from the database is returned in the
1465C<$value> parameter.
3b35bae3 1466
88108326 1467If the key does not exist the method returns 1.
3b35bae3 1468
88108326 1469No flags are currently defined for this method.
3b35bae3 1470
f6b705ef 1471=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1472
88108326 1473Stores the key/value pair in the database.
1474
1475If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1476will have the record number of the inserted key/value pair set.
3b35bae3 1477
88108326 1478Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1479R_SETCURSOR.
1480
f6b705ef 1481=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1482
88108326 1483Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1484
88108326 1485A return code of 1 means that the requested key was not in the
1486database.
3b35bae3 1487
88108326 1488R_CURSOR is the only valid flag at present.
3b35bae3 1489
f6b705ef 1490=item B<$status = $X-E<gt>fd ;>
3b35bae3 1491
88108326 1492Returns the file descriptor for the underlying database.
3b35bae3 1493
f6b705ef 1494See L<Locking Databases> for an example of how to make use of the
88108326 1495C<fd> method to lock your database.
3b35bae3 1496
f6b705ef 1497=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1498
88108326 1499This interface allows sequential retrieval from the database. See
1500L<dbopen> for full details.
1501
1502Both the C<$key> and C<$value> parameters will be set to the key/value
1503pair read from the database.
1504
1505The flags parameter is mandatory. The valid flag values are R_CURSOR,
1506R_FIRST, R_LAST, R_NEXT and R_PREV.
1507
f6b705ef 1508=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1509
1510Flushes any cached buffers to disk.
1511
1512R_RECNOSYNC is the only valid flag at present.
3b35bae3 1513
1514=back
1515
cad2e5aa 1516=head1 DBM FILTERS
1517
1518A DBM Filter is a piece of code that is be used when you I<always>
1519want to make the same transformation to all keys and/or values in a
1520DBM database.
1521
1522There are four methods associated with DBM Filters. All work identically,
1523and each is used to install (or uninstall) a single DBM Filter. Each
1524expects a single parameter, namely a reference to a sub. The only
1525difference between them is the place that the filter is installed.
1526
1527To summarise:
1528
1529=over 5
1530
1531=item B<filter_store_key>
1532
1533If a filter has been installed with this method, it will be invoked
1534every time you write a key to a DBM database.
1535
1536=item B<filter_store_value>
1537
1538If a filter has been installed with this method, it will be invoked
1539every time you write a value to a DBM database.
1540
1541
1542=item B<filter_fetch_key>
1543
1544If a filter has been installed with this method, it will be invoked
1545every time you read a key from a DBM database.
1546
1547=item B<filter_fetch_value>
1548
1549If a filter has been installed with this method, it will be invoked
1550every time you read a value from a DBM database.
1551
1552=back
1553
1554You can use any combination of the methods, from none, to all four.
1555
1556All filter methods return the existing filter, if present, or C<undef>
1557in not.
1558
1559To delete a filter pass C<undef> to it.
1560
1561=head2 The Filter
1562
1563When each filter is called by Perl, a local copy of C<$_> will contain
1564the key or value to be filtered. Filtering is achieved by modifying
1565the contents of C<$_>. The return code from the filter is ignored.
1566
1567=head2 An Example -- the NULL termination problem.
1568
1569Consider the following scenario. You have a DBM database
1570that you need to share with a third-party C application. The C application
1571assumes that I<all> keys and values are NULL terminated. Unfortunately
1572when Perl writes to DBM databases it doesn't use NULL termination, so
1573your Perl application will have to manage NULL termination itself. When
1574you write to the database you will have to use something like this:
1575
1576 $hash{"$key\0"} = "$value\0" ;
1577
1578Similarly the NULL needs to be taken into account when you are considering
1579the length of existing keys/values.
1580
1581It would be much better if you could ignore the NULL terminations issue
1582in the main application code and have a mechanism that automatically
1583added the terminating NULL to all keys and values whenever you write to
1584the database and have them removed when you read from the database. As I'm
1585sure you have already guessed, this is a problem that DBM Filters can
1586fix very easily.
1587
1588 use strict ;
1589 use DB_File ;
1590
1591 my %hash ;
1592 my $filename = "/tmp/filt" ;
1593 unlink $filename ;
1594
1595 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1596 or die "Cannot open $filename: $!\n" ;
1597
1598 # Install DBM Filters
1599 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1600 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1601 $db->filter_fetch_value( sub { s/\0$// } ) ;
1602 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1603
1604 $hash{"abc"} = "def" ;
1605 my $a = $hash{"ABC"} ;
1606 # ...
1607 undef $db ;
1608 untie %hash ;
1609
1610Hopefully the contents of each of the filters should be
1611self-explanatory. Both "fetch" filters remove the terminating NULL,
1612and both "store" filters add a terminating NULL.
1613
1614
1615=head2 Another Example -- Key is a C int.
1616
1617Here is another real-life example. By default, whenever Perl writes to
1618a DBM database it always writes the key and value as strings. So when
1619you use this:
1620
1621 $hash{12345} = "soemthing" ;
1622
1623the key 12345 will get stored in the DBM database as the 5 byte string
1624"12345". If you actually want the key to be stored in the DBM database
1625as a C int, you will have to use C<pack> when writing, and C<unpack>
1626when reading.
1627
1628Here is a DBM Filter that does it:
1629
1630 use strict ;
1631 use DB_File ;
1632 my %hash ;
1633 my $filename = "/tmp/filt" ;
1634 unlink $filename ;
1635
1636
1637 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1638 or die "Cannot open $filename: $!\n" ;
1639
1640 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1641 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1642 $hash{123} = "def" ;
1643 # ...
1644 undef $db ;
1645 untie %hash ;
1646
1647This time only two filters have been used -- we only need to manipulate
1648the contents of the key, so it wasn't necessary to install any value
1649filters.
1650
f6b705ef 1651=head1 HINTS AND TIPS
3b35bae3 1652
3b35bae3 1653
cb1a09d0 1654=head2 Locking Databases
3b35bae3 1655
cb1a09d0 1656Concurrent access of a read-write database by several parties requires
1657them all to use some kind of locking. Here's an example of Tom's that
1658uses the I<fd> method to get the file descriptor, and then a careful
1659open() to give something Perl will flock() for you. Run this repeatedly
1660in the background to watch the locks granted in proper order.
3b35bae3 1661
cb1a09d0 1662 use DB_File;
1663
1664 use strict;
1665
1666 sub LOCK_SH { 1 }
1667 sub LOCK_EX { 2 }
1668 sub LOCK_NB { 4 }
1669 sub LOCK_UN { 8 }
1670
1671 my($oldval, $fd, $db, %db, $value, $key);
1672
1673 $key = shift || 'default';
1674 $value = shift || 'magic';
1675
1676 $value .= " $$";
1677
1678 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1679 || die "dbcreat /tmp/foo.db $!";
1680 $fd = $db->fd;
1681 print "$$: db fd is $fd\n";
1682 open(DB_FH, "+<&=$fd") || die "dup $!";
1683
1684
1685 unless (flock (DB_FH, LOCK_SH | LOCK_NB)) {
1686 print "$$: CONTENTION; can't read during write update!
1687 Waiting for read lock ($!) ....";
1688 unless (flock (DB_FH, LOCK_SH)) { die "flock: $!" }
1689 }
1690 print "$$: Read lock granted\n";
1691
1692 $oldval = $db{$key};
1693 print "$$: Old value was $oldval\n";
1694 flock(DB_FH, LOCK_UN);
1695
1696 unless (flock (DB_FH, LOCK_EX | LOCK_NB)) {
1697 print "$$: CONTENTION; must have exclusive lock!
1698 Waiting for write lock ($!) ....";
1699 unless (flock (DB_FH, LOCK_EX)) { die "flock: $!" }
1700 }
1701
1702 print "$$: Write lock granted\n";
1703 $db{$key} = $value;
610ab055 1704 $db->sync; # to flush
cb1a09d0 1705 sleep 10;
1706
1707 flock(DB_FH, LOCK_UN);
88108326 1708 undef $db;
cb1a09d0 1709 untie %db;
1710 close(DB_FH);
1711 print "$$: Updated db to $key=$value\n";
1712
68dc0745 1713=head2 Sharing Databases With C Applications
f6b705ef 1714
1715There is no technical reason why a Berkeley DB database cannot be
1716shared by both a Perl and a C application.
1717
1718The vast majority of problems that are reported in this area boil down
1719to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 1720not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 1721
1722Here is a real example. Netscape 2.0 keeps a record of the locations you
1723visit along with the time you last visited them in a DB_HASH database.
1724This is usually stored in the file F<~/.netscape/history.db>. The key
1725field in the database is the location string and the value field is the
1726time the location was last visited stored as a 4 byte binary value.
1727
1728If you haven't already guessed, the location string is stored with a
1729terminating NULL. This means you need to be careful when accessing the
1730database.
1731
1732Here is a snippet of code that is loosely based on Tom Christiansen's
1733I<ggh> script (available from your nearest CPAN archive in
1734F<authors/id/TOMC/scripts/nshist.gz>).
1735
610ab055 1736 use strict ;
f6b705ef 1737 use DB_File ;
1738 use Fcntl ;
f6b705ef 1739
610ab055 1740 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1741 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1742
1743 $HISTORY = "$dotdir/.netscape/history.db";
1744
1745 tie %hist_db, 'DB_File', $HISTORY
1746 or die "Cannot open $HISTORY: $!\n" ;;
1747
1748 # Dump the complete database
1749 while ( ($href, $binary_time) = each %hist_db ) {
1750
1751 # remove the terminating NULL
1752 $href =~ s/\x00$// ;
1753
1754 # convert the binary time into a user friendly string
1755 $date = localtime unpack("V", $binary_time);
1756 print "$date $href\n" ;
1757 }
1758
1759 # check for the existence of a specific key
1760 # remember to add the NULL
1761 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
1762 $date = localtime unpack("V", $binary_time) ;
1763 print "Last visited mox.perl.com on $date\n" ;
1764 }
1765 else {
1766 print "Never visited mox.perl.com\n"
1767 }
1768
1769 untie %hist_db ;
1770
68dc0745 1771=head2 The untie() Gotcha
778183f3 1772
7a2e2cd6 1773If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 1774recommended that you read L<perltie/The untie Gotcha>.
778183f3 1775
1776Even if you don't currently make use of the API interface, it is still
1777worth reading it.
1778
1779Here is an example which illustrates the problem from a B<DB_File>
1780perspective:
1781
1782 use DB_File ;
1783 use Fcntl ;
1784
1785 my %x ;
1786 my $X ;
1787
1788 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
1789 or die "Cannot tie first time: $!" ;
1790
1791 $x{123} = 456 ;
1792
1793 untie %x ;
1794
1795 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1796 or die "Cannot tie second time: $!" ;
1797
1798 untie %x ;
1799
1800When run, the script will produce this error message:
1801
1802 Cannot tie second time: Invalid argument at bad.file line 14.
1803
1804Although the error message above refers to the second tie() statement
1805in the script, the source of the problem is really with the untie()
1806statement that precedes it.
1807
1808Having read L<perltie> you will probably have already guessed that the
1809error is caused by the extra copy of the tied object stored in C<$X>.
1810If you haven't, then the problem boils down to the fact that the
1811B<DB_File> destructor, DESTROY, will not be called until I<all>
1812references to the tied object are destroyed. Both the tied variable,
1813C<%x>, and C<$X> above hold a reference to the object. The call to
1814untie() will destroy the first, but C<$X> still holds a valid
1815reference, so the destructor will not get called and the database file
1816F<tst.fil> will remain open. The fact that Berkeley DB then reports the
1817attempt to open a database that is alreday open via the catch-all
1818"Invalid argument" doesn't help.
1819
1820If you run the script with the C<-w> flag the error message becomes:
1821
1822 untie attempted while 1 inner references still exist at bad.file line 12.
1823 Cannot tie second time: Invalid argument at bad.file line 14.
1824
1825which pinpoints the real problem. Finally the script can now be
1826modified to fix the original problem by destroying the API object
1827before the untie:
1828
1829 ...
1830 $x{123} = 456 ;
1831
1832 undef $X ;
1833 untie %x ;
1834
1835 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
1836 ...
1837
f6b705ef 1838
1839=head1 COMMON QUESTIONS
1840
1841=head2 Why is there Perl source in my database?
1842
1843If you look at the contents of a database file created by DB_File,
1844there can sometimes be part of a Perl script included in it.
1845
1846This happens because Berkeley DB uses dynamic memory to allocate
1847buffers which will subsequently be written to the database file. Being
1848dynamic, the memory could have been used for anything before DB
1849malloced it. As Berkeley DB doesn't clear the memory once it has been
1850allocated, the unused portions will contain random junk. In the case
1851where a Perl script gets written to the database, the random junk will
1852correspond to an area of dynamic memory that happened to be used during
1853the compilation of the script.
1854
1855Unless you don't like the possibility of there being part of your Perl
1856scripts embedded in a database file, this is nothing to worry about.
1857
1858=head2 How do I store complex data structures with DB_File?
1859
1860Although B<DB_File> cannot do this directly, there is a module which
1861can layer transparently over B<DB_File> to accomplish this feat.
1862
1863Check out the MLDBM module, available on CPAN in the directory
1864F<modules/by-module/MLDBM>.
1865
1866=head2 What does "Invalid Argument" mean?
1867
1868You will get this error message when one of the parameters in the
1869C<tie> call is wrong. Unfortunately there are quite a few parameters to
1870get wrong, so it can be difficult to figure out which one it is.
1871
1872Here are a couple of possibilities:
1873
1874=over 5
1875
1876=item 1.
1877
610ab055 1878Attempting to reopen a database without closing it.
f6b705ef 1879
1880=item 2.
1881
1882Using the O_WRONLY flag.
1883
1884=back
1885
1886=head2 What does "Bareword 'DB_File' not allowed" mean?
1887
1888You will encounter this particular error message when you have the
1889C<strict 'subs'> pragma (or the full strict pragma) in your script.
1890Consider this script:
1891
1892 use strict ;
1893 use DB_File ;
1894 use vars qw(%x) ;
1895 tie %x, DB_File, "filename" ;
1896
1897Running it produces the error in question:
1898
1899 Bareword "DB_File" not allowed while "strict subs" in use
1900
1901To get around the error, place the word C<DB_File> in either single or
1902double quotes, like this:
1903
1904 tie %x, "DB_File", "filename" ;
1905
1906Although it might seem like a real pain, it is really worth the effort
1907of having a C<use strict> in all your scripts.
1908
cad2e5aa 1909=head1 REFERENCES
1910
1911Articles that are either about B<DB_File> or make use of it.
1912
1913=over 5
1914
1915=item 1.
1916
1917I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
1918Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
1919
1920=back
1921
cb1a09d0 1922=head1 HISTORY
1923
1f70e1ea 1924Moved to the Changes file.
610ab055 1925
1f70e1ea 1926=head1 BUGS
05475680 1927
1f70e1ea 1928Some older versions of Berkeley DB had problems with fixed length
1929records using the RECNO file format. This problem has been fixed since
1930version 1.85 of Berkeley DB.
e858de61 1931
1f70e1ea 1932I am sure there are bugs in the code. If you do find any, or can
1933suggest any enhancements, I would welcome your comments.
a6ed719b 1934
1f70e1ea 1935=head1 AVAILABILITY
a6ed719b 1936
1f70e1ea 1937B<DB_File> comes with the standard Perl source distribution. Look in
1938the directory F<ext/DB_File>. Given the amount of time between releases
1939of Perl the version that ships with Perl is quite likely to be out of
1940date, so the most recent version can always be found on CPAN (see
1941L<perlmod/CPAN> for details), in the directory
1942F<modules/by-module/DB_File>.
a6ed719b 1943
039d031f 1944This version of B<DB_File> will work with either version 1.x, 2.x or
19453.x of Berkeley DB, but is limited to the functionality provided by
1946version 1.
a6ed719b 1947
cad2e5aa 1948The official web site for Berkeley DB is F<http://www.sleepycat.com>.
039d031f 1949All versions of Berkeley DB are available there.
93af7a87 1950
1f70e1ea 1951Alternatively, Berkeley DB version 1 is available at your nearest CPAN
1952archive in F<src/misc/db.1.85.tar.gz>.
e858de61 1953
1f70e1ea 1954If you are running IRIX, then get Berkeley DB version 1 from
1955F<http://reality.sgi.com/ariel>. It has the patches necessary to
1956compile properly on IRIX 5.3.
610ab055 1957
1f70e1ea 1958=head1 COPYRIGHT
3b35bae3 1959
cad2e5aa 1960Copyright (c) 1995-1999 Paul Marquess. All rights reserved. This program
a9fd575d 1961is free software; you can redistribute it and/or modify it under the
1962same terms as Perl itself.
3b35bae3 1963
1f70e1ea 1964Although B<DB_File> is covered by the Perl license, the library it
1965makes use of, namely Berkeley DB, is not. Berkeley DB has its own
1966copyright and its own license. Please take the time to read it.
3b35bae3 1967
a9fd575d 1968Here are are few words taken from the Berkeley DB FAQ (at
1969http://www.sleepycat.com) regarding the license:
68dc0745 1970
a9fd575d 1971 Do I have to license DB to use it in Perl scripts?
3b35bae3 1972
a9fd575d 1973 No. The Berkeley DB license requires that software that uses
1974 Berkeley DB be freely redistributable. In the case of Perl, that
1975 software is Perl, and not your scripts. Any Perl scripts that you
1976 write are your property, including scripts that make use of
1977 Berkeley DB. Neither the Perl license nor the Berkeley DB license
1978 place any restriction on what you may do with them.
88108326 1979
1f70e1ea 1980If you are in any doubt about the license situation, contact either the
1981Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 1982
1983
3b35bae3 1984=head1 SEE ALSO
1985
9fe6733a 1986L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
1987L<dbmfilter>
3b35bae3 1988
3b35bae3 1989=head1 AUTHOR
1990
8e07c86e 1991The DB_File interface was written by Paul Marquess
6ca2e664 1992E<lt>Paul.Marquess@btinternet.comE<gt>.
d3ef3b8a 1993Questions about the DB system itself may be addressed to
1994E<lt>db@sleepycat.com<gt>.
3b35bae3 1995
1996=cut