DB_File 1.803
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6ca2e664 3# written by Paul Marquess (Paul.Marquess@btinternet.com)
962cee9f 4# last modified 1st March 2002
5# version 1.803
36477c24 6#
d63909e4 7# Copyright (c) 1995-2002 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
e5021521 14require 5.00404;
610ab055 15
3245f058 16use warnings;
785da04d 17use strict;
8e07c86e 18use Carp;
88108326 19require Tie::Hash;
20@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 21
88108326 22sub new
8e07c86e 23{
88108326 24 my $pkg = shift ;
25 my %x ;
26 tie %x, $pkg ;
27 bless \%x, $pkg ;
8e07c86e 28}
29
610ab055 30
88108326 31sub TIEHASH
32{
33 my $pkg = shift ;
34
36477c24 35 bless { VALID => { map {$_, 1}
36 qw( bsize ffactor nelem cachesize hash lorder)
37 },
38 GOT => {}
39 }, $pkg ;
88108326 40}
8e07c86e 41
610ab055 42
8e07c86e 43sub FETCH
44{
88108326 45 my $self = shift ;
46 my $key = shift ;
8e07c86e 47
36477c24 48 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 49
50 my $pkg = ref $self ;
51 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 52}
53
54
55sub STORE
56{
88108326 57 my $self = shift ;
58 my $key = shift ;
59 my $value = shift ;
60
36477c24 61 if ( exists $self->{VALID}{$key} )
8e07c86e 62 {
36477c24 63 $self->{GOT}{$key} = $value ;
8e07c86e 64 return ;
65 }
66
88108326 67 my $pkg = ref $self ;
68 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 69}
70
71sub DELETE
72{
88108326 73 my $self = shift ;
74 my $key = shift ;
75
36477c24 76 if ( exists $self->{VALID}{$key} )
8e07c86e 77 {
36477c24 78 delete $self->{GOT}{$key} ;
8e07c86e 79 return ;
80 }
81
88108326 82 my $pkg = ref $self ;
83 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 84}
85
88108326 86sub EXISTS
8e07c86e 87{
88108326 88 my $self = shift ;
89 my $key = shift ;
8e07c86e 90
36477c24 91 exists $self->{VALID}{$key} ;
8e07c86e 92}
93
88108326 94sub NotHere
8e07c86e 95{
18d2dc8c 96 my $self = shift ;
88108326 97 my $method = shift ;
8e07c86e 98
18d2dc8c 99 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 100}
101
18d2dc8c 102sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
103sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
104sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 105
106package DB_File::RECNOINFO ;
785da04d 107
3245f058 108use warnings;
88108326 109use strict ;
110
045291aa 111@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 112
113sub TIEHASH
114{
88108326 115 my $pkg = shift ;
116
36477c24 117 bless { VALID => { map {$_, 1}
118 qw( bval cachesize psize flags lorder reclen bfname )
119 },
120 GOT => {},
121 }, $pkg ;
8e07c86e 122}
123
88108326 124package DB_File::BTREEINFO ;
8e07c86e 125
3245f058 126use warnings;
88108326 127use strict ;
8e07c86e 128
88108326 129@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 130
88108326 131sub TIEHASH
8e07c86e 132{
88108326 133 my $pkg = shift ;
134
36477c24 135 bless { VALID => { map {$_, 1}
136 qw( flags cachesize maxkeypage minkeypage psize
137 compare prefix lorder )
138 },
139 GOT => {},
140 }, $pkg ;
8e07c86e 141}
142
143
8e07c86e 144package DB_File ;
785da04d 145
3245f058 146use warnings;
785da04d 147use strict;
07200f1b 148our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
149our ($db_version, $use_XSLoader);
8e07c86e 150use Carp;
151
785da04d 152
962cee9f 153$VERSION = "1.803" ;
8e07c86e 154
155#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 156$DB_BTREE = new DB_File::BTREEINFO ;
157$DB_HASH = new DB_File::HASHINFO ;
158$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 159
785da04d 160require Tie::Hash;
8e07c86e 161require Exporter;
162use AutoLoader;
b90e71be 163BEGIN {
164 $use_XSLoader = 1 ;
e5021521 165 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be 166
167 if ($@) {
168 $use_XSLoader = 0 ;
169 require DynaLoader;
170 @ISA = qw(DynaLoader);
171 }
172}
173
174push @ISA, qw(Tie::Hash Exporter);
8e07c86e 175@EXPORT = qw(
176 $DB_BTREE $DB_HASH $DB_RECNO
88108326 177
8e07c86e 178 BTREEMAGIC
179 BTREEVERSION
180 DB_LOCK
181 DB_SHMEM
182 DB_TXN
183 HASHMAGIC
184 HASHVERSION
185 MAX_PAGE_NUMBER
186 MAX_PAGE_OFFSET
187 MAX_REC_NUMBER
188 RET_ERROR
189 RET_SPECIAL
190 RET_SUCCESS
191 R_CURSOR
192 R_DUP
193 R_FIRST
194 R_FIXEDLEN
195 R_IAFTER
196 R_IBEFORE
197 R_LAST
198 R_NEXT
199 R_NOKEY
200 R_NOOVERWRITE
201 R_PREV
202 R_RECNOSYNC
203 R_SETCURSOR
204 R_SNAPSHOT
205 __R_UNUSED
88108326 206
045291aa 207);
8e07c86e 208
209sub AUTOLOAD {
785da04d 210 my($constname);
8e07c86e 211 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b 212 my ($error, $val) = constant($constname);
213 Carp::croak $error if $error;
57c77851 214 no strict 'refs';
215 *{$AUTOLOAD} = sub { $val };
216 goto &{$AUTOLOAD};
07200f1b 217}
8e07c86e 218
f6b705ef 219
a6ed719b 220eval {
1f70e1ea 221 # Make all Fcntl O_XXX constants available for importing
222 require Fcntl;
223 my @O = grep /^O_/, @Fcntl::EXPORT;
224 Fcntl->import(@O); # first we import what we want to export
225 push(@EXPORT, @O);
a6ed719b 226};
f6b705ef 227
b90e71be 228if ($use_XSLoader)
229 { XSLoader::load("DB_File", $VERSION)}
230else
231 { bootstrap DB_File $VERSION }
8e07c86e 232
233# Preloaded methods go here. Autoload methods go after __END__, and are
234# processed by the autosplit program.
235
05475680 236sub tie_hash_or_array
610ab055 237{
238 my (@arg) = @_ ;
05475680 239 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 240
241 $arg[4] = tied %{ $arg[4] }
242 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
243
1f70e1ea 244 # make recno in Berkeley DB version 2 work like recno in version 1.
245 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
246 $arg[1] and ! -e $arg[1]) {
247 open(FH, ">$arg[1]") or return undef ;
248 close FH ;
249 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
250 }
251
05475680 252 DoTie_($tieHASH, @arg) ;
610ab055 253}
254
05475680 255sub TIEHASH
256{
257 tie_hash_or_array(@_) ;
258}
259
260sub TIEARRAY
261{
262 tie_hash_or_array(@_) ;
263}
88108326 264
045291aa 265sub CLEAR
266{
1f70e1ea 267 my $self = shift;
3245f058 268 my $key = 0 ;
1f70e1ea 269 my $value = "" ;
270 my $status = $self->seq($key, $value, R_FIRST());
271 my @keys;
272
273 while ($status == 0) {
274 push @keys, $key;
275 $status = $self->seq($key, $value, R_NEXT());
276 }
277 foreach $key (reverse @keys) {
278 my $s = $self->del($key);
279 }
280}
281
045291aa 282sub EXTEND { }
283
284sub STORESIZE
285{
286 my $self = shift;
287 my $length = shift ;
288 my $current_length = $self->length() ;
289
290 if ($length < $current_length) {
291 my $key ;
292 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
293 { $self->del($key) }
294 }
a9fd575d 295 elsif ($length > $current_length) {
296 $self->put($length-1, "") ;
297 }
045291aa 298}
299
c5da4faf 300
301sub SPLICE
302{
303 my $self = shift;
304 my $offset = shift;
305 if (not defined $offset) {
306 carp 'Use of uninitialized value in splice';
307 $offset = 0;
308 }
309
310 my $length = @_ ? shift : 0;
311 # Carping about definedness comes _after_ the OFFSET sanity check.
312 # This is so we get the same error messages as Perl's splice().
313 #
314
315 my @list = @_;
316
317 my $size = $self->FETCHSIZE();
318
319 # 'If OFFSET is negative then it start that far from the end of
320 # the array.'
321 #
322 if ($offset < 0) {
323 my $new_offset = $size + $offset;
324 if ($new_offset < 0) {
325 die "Modification of non-creatable array value attempted, "
326 . "subscript $offset";
327 }
328 $offset = $new_offset;
329 }
330
331 if ($offset > $size) {
332 $offset = $size;
333 }
334
335 if (not defined $length) {
336 carp 'Use of uninitialized value in splice';
337 $length = 0;
338 }
339
340 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
341 if (not defined $length) {
342 $length = $size - $offset;
343 }
344
345 # 'If LENGTH is negative, leave that many elements off the end of
346 # the array.'
347 #
348 if ($length < 0) {
349 $length = $size - $offset + $length;
350
351 if ($length < 0) {
352 # The user must have specified a length bigger than the
353 # length of the array passed in. But perl's splice()
354 # doesn't catch this, it just behaves as for length=0.
355 #
356 $length = 0;
357 }
358 }
359
360 if ($length > $size - $offset) {
361 $length = $size - $offset;
362 }
363
364 # $num_elems holds the current number of elements in the database.
365 my $num_elems = $size;
366
367 # 'Removes the elements designated by OFFSET and LENGTH from an
368 # array,'...
369 #
370 my @removed = ();
371 foreach (0 .. $length - 1) {
372 my $old;
373 my $status = $self->get($offset, $old);
374 if ($status != 0) {
375 my $msg = "error from Berkeley DB on get($offset, \$old)";
376 if ($status == 1) {
377 $msg .= ' (no such element?)';
378 }
379 else {
380 $msg .= ": error status $status";
381 if (defined $! and $! ne '') {
382 $msg .= ", message $!";
383 }
384 }
385 die $msg;
386 }
387 push @removed, $old;
388
389 $status = $self->del($offset);
390 if ($status != 0) {
391 my $msg = "error from Berkeley DB on del($offset)";
392 if ($status == 1) {
393 $msg .= ' (no such element?)';
394 }
395 else {
396 $msg .= ": error status $status";
397 if (defined $! and $! ne '') {
398 $msg .= ", message $!";
399 }
400 }
401 die $msg;
402 }
403
404 -- $num_elems;
405 }
406
407 # ...'and replaces them with the elements of LIST, if any.'
408 my $pos = $offset;
409 while (defined (my $elem = shift @list)) {
410 my $old_pos = $pos;
411 my $status;
412 if ($pos >= $num_elems) {
413 $status = $self->put($pos, $elem);
414 }
415 else {
416 $status = $self->put($pos, $elem, $self->R_IBEFORE);
417 }
418
419 if ($status != 0) {
420 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
421 if ($status == 1) {
422 $msg .= ' (no such element?)';
423 }
424 else {
425 $msg .= ", error status $status";
426 if (defined $! and $! ne '') {
427 $msg .= ", message $!";
428 }
429 }
430 die $msg;
431 }
432
433 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
434 if $old_pos != $pos;
435
436 ++ $pos;
437 ++ $num_elems;
438 }
439
440 if (wantarray) {
441 # 'In list context, returns the elements removed from the
442 # array.'
443 #
444 return @removed;
445 }
446 elsif (defined wantarray and not wantarray) {
447 # 'In scalar context, returns the last element removed, or
448 # undef if no elements are removed.'
449 #
450 if (@removed) {
451 my $last = pop @removed;
452 return "$last";
453 }
454 else {
455 return undef;
456 }
457 }
458 elsif (not defined wantarray) {
459 # Void context
460 }
461 else { die }
462}
463sub ::DB_File::splice { &SPLICE }
464
6ca2e664 465sub find_dup
466{
467 croak "Usage: \$db->find_dup(key,value)\n"
468 unless @_ == 3 ;
469
470 my $db = shift ;
471 my ($origkey, $value_wanted) = @_ ;
472 my ($key, $value) = ($origkey, 0);
473 my ($status) = 0 ;
474
475 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
476 $status == 0 ;
477 $status = $db->seq($key, $value, R_NEXT() ) ) {
478
479 return 0 if $key eq $origkey and $value eq $value_wanted ;
480 }
481
482 return $status ;
483}
484
485sub del_dup
486{
487 croak "Usage: \$db->del_dup(key,value)\n"
488 unless @_ == 3 ;
489
490 my $db = shift ;
491 my ($key, $value) = @_ ;
492 my ($status) = $db->find_dup($key, $value) ;
493 return $status if $status != 0 ;
494
495 $status = $db->del($key, R_CURSOR() ) ;
496 return $status ;
497}
498
88108326 499sub get_dup
500{
501 croak "Usage: \$db->get_dup(key [,flag])\n"
502 unless @_ == 2 or @_ == 3 ;
503
504 my $db = shift ;
505 my $key = shift ;
506 my $flag = shift ;
f6b705ef 507 my $value = 0 ;
88108326 508 my $origkey = $key ;
509 my $wantarray = wantarray ;
f6b705ef 510 my %values = () ;
88108326 511 my @values = () ;
512 my $counter = 0 ;
f6b705ef 513 my $status = 0 ;
88108326 514
f6b705ef 515 # iterate through the database until either EOF ($status == 0)
516 # or a different key is encountered ($key ne $origkey).
517 for ($status = $db->seq($key, $value, R_CURSOR()) ;
518 $status == 0 and $key eq $origkey ;
519 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 520
f6b705ef 521 # save the value or count number of matches
522 if ($wantarray) {
523 if ($flag)
524 { ++ $values{$value} }
525 else
526 { push (@values, $value) }
527 }
528 else
529 { ++ $counter }
88108326 530
88108326 531 }
532
f6b705ef 533 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 534}
535
536
8e07c86e 5371;
538__END__
539
3b35bae3 540=head1 NAME
541
1f70e1ea 542DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 543
544=head1 SYNOPSIS
545
bbc7dcd2 546 use DB_File;
547
88108326 548 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
549 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
550 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 551
3b35bae3 552 $status = $X->del($key [, $flags]) ;
553 $status = $X->put($key, $value [, $flags]) ;
554 $status = $X->get($key, $value [, $flags]) ;
760ac839 555 $status = $X->seq($key, $value, $flags) ;
3b35bae3 556 $status = $X->sync([$flags]) ;
557 $status = $X->fd ;
760ac839 558
f6b705ef 559 # BTREE only
88108326 560 $count = $X->get_dup($key) ;
561 @list = $X->get_dup($key) ;
562 %list = $X->get_dup($key, 1) ;
6ca2e664 563 $status = $X->find_dup($key, $value) ;
564 $status = $X->del_dup($key, $value) ;
88108326 565
f6b705ef 566 # RECNO only
567 $a = $X->length;
568 $a = $X->pop ;
569 $X->push(list);
570 $a = $X->shift;
571 $X->unshift(list);
c5da4faf 572 @r = $X->splice(offset, length, elements);
f6b705ef 573
cad2e5aa 574 # DBM Filters
575 $old_filter = $db->filter_store_key ( sub { ... } ) ;
576 $old_filter = $db->filter_store_value( sub { ... } ) ;
577 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
578 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
579
3b35bae3 580 untie %hash ;
581 untie @array ;
582
583=head1 DESCRIPTION
584
8e07c86e 585B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 586facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 587version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f 588It is assumed that you have a copy of the Berkeley DB manual pages at
589hand when reading this documentation. The interface defined here
590mirrors the Berkeley DB interface closely.
68dc0745 591
8e07c86e 592Berkeley DB is a C library which provides a consistent interface to a
593number of database formats. B<DB_File> provides an interface to all
594three of the database types currently supported by Berkeley DB.
3b35bae3 595
596The file types are:
597
598=over 5
599
88108326 600=item B<DB_HASH>
3b35bae3 601
88108326 602This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 603files. This is equivalent to the functionality provided by other
604hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
605the files created using DB_HASH are not compatible with any of the
606other packages mentioned.
3b35bae3 607
8e07c86e 608A default hashing algorithm, which will be adequate for most
609applications, is built into Berkeley DB. If you do need to use your own
610hashing algorithm it is possible to write your own in Perl and have
611B<DB_File> use it instead.
3b35bae3 612
88108326 613=item B<DB_BTREE>
614
615The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 616sorted, balanced binary tree.
3b35bae3 617
8e07c86e 618As with the DB_HASH format, it is possible to provide a user defined
619Perl routine to perform the comparison of keys. By default, though, the
620keys are stored in lexical order.
3b35bae3 621
88108326 622=item B<DB_RECNO>
3b35bae3 623
8e07c86e 624DB_RECNO allows both fixed-length and variable-length flat text files
625to be manipulated using the same key/value pair interface as in DB_HASH
626and DB_BTREE. In this case the key will consist of a record (line)
627number.
3b35bae3 628
629=back
630
e5021521 631=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea 632
633Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 634it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 635limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 636version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 637like version 1. This feature allows B<DB_File> scripts that were built
e5021521 638with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea 639
640If you want to make use of the new features available in Berkeley DB
b90e71be 6412.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 642
e5021521 643B<Note:> The database file format has changed multiple times in Berkeley
644DB version 2, 3 and 4. If you cannot recreate your databases, you
645must dump any existing databases with either the C<db_dump> or the
646C<db_dump185> utility that comes with Berkeley DB.
647Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
648your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea 649documentation for further details.
650
e5021521 651Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 652DB with DB_File.
1f70e1ea 653
68dc0745 654=head2 Interface to Berkeley DB
3b35bae3 655
656B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 657in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
658allows B<DB_File> to access Berkeley DB files using either an
659associative array (for DB_HASH & DB_BTREE file types) or an ordinary
660array (for the DB_RECNO file type).
3b35bae3 661
88108326 662In addition to the tie() interface, it is also possible to access most
663of the functions provided in the Berkeley DB API directly.
f6b705ef 664See L<THE API INTERFACE>.
3b35bae3 665
88108326 666=head2 Opening a Berkeley DB Database File
3b35bae3 667
8e07c86e 668Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 669Here is the C prototype for dbopen():
3b35bae3 670
671 DB*
672 dbopen (const char * file, int flags, int mode,
673 DBTYPE type, const void * openinfo)
674
675The parameter C<type> is an enumeration which specifies which of the 3
676interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
677Depending on which of these is actually chosen, the final parameter,
678I<openinfo> points to a data structure which allows tailoring of the
679specific interface method.
680
8e07c86e 681This interface is handled slightly differently in B<DB_File>. Here is
88108326 682an equivalent call using B<DB_File>:
3b35bae3 683
88108326 684 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 685
8e07c86e 686The C<filename>, C<flags> and C<mode> parameters are the direct
687equivalent of their dbopen() counterparts. The final parameter $DB_HASH
688performs the function of both the C<type> and C<openinfo> parameters in
689dbopen().
3b35bae3 690
88108326 691In the example above $DB_HASH is actually a pre-defined reference to a
692hash object. B<DB_File> has three of these pre-defined references.
693Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 694
8e07c86e 695The keys allowed in each of these pre-defined references is limited to
696the names used in the equivalent C structure. So, for example, the
697$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 698C<ffactor>, C<hash>, C<lorder> and C<nelem>.
699
700To change one of these elements, just assign to it like this:
701
702 $DB_HASH->{'cachesize'} = 10000 ;
703
704The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
705usually adequate for most applications. If you do need to create extra
706instances of these objects, constructors are available for each file
707type.
708
709Here are examples of the constructors and the valid options available
710for DB_HASH, DB_BTREE and DB_RECNO respectively.
711
712 $a = new DB_File::HASHINFO ;
713 $a->{'bsize'} ;
714 $a->{'cachesize'} ;
715 $a->{'ffactor'};
716 $a->{'hash'} ;
717 $a->{'lorder'} ;
718 $a->{'nelem'} ;
719
720 $b = new DB_File::BTREEINFO ;
721 $b->{'flags'} ;
722 $b->{'cachesize'} ;
723 $b->{'maxkeypage'} ;
724 $b->{'minkeypage'} ;
725 $b->{'psize'} ;
726 $b->{'compare'} ;
727 $b->{'prefix'} ;
728 $b->{'lorder'} ;
729
730 $c = new DB_File::RECNOINFO ;
731 $c->{'bval'} ;
732 $c->{'cachesize'} ;
733 $c->{'psize'} ;
734 $c->{'flags'} ;
735 $c->{'lorder'} ;
736 $c->{'reclen'} ;
737 $c->{'bfname'} ;
738
739The values stored in the hashes above are mostly the direct equivalent
740of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 741default values - that means you don't have to set I<all> of the
88108326 742values when you only want to change one. Here is an example:
743
744 $a = new DB_File::HASHINFO ;
745 $a->{'cachesize'} = 12345 ;
746 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
747
36477c24 748A few of the options need extra discussion here. When used, the C
88108326 749equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
750to C functions. In B<DB_File> these keys are used to store references
751to Perl subs. Below are templates for each of the subs:
752
753 sub hash
754 {
755 my ($data) = @_ ;
756 ...
757 # return the hash value for $data
758 return $hash ;
759 }
3b35bae3 760
88108326 761 sub compare
762 {
763 my ($key, $key2) = @_ ;
764 ...
765 # return 0 if $key1 eq $key2
766 # -1 if $key1 lt $key2
767 # 1 if $key1 gt $key2
768 return (-1 , 0 or 1) ;
769 }
3b35bae3 770
88108326 771 sub prefix
772 {
773 my ($key, $key2) = @_ ;
774 ...
775 # return number of bytes of $key2 which are
776 # necessary to determine that it is greater than $key1
777 return $bytes ;
778 }
3b35bae3 779
f6b705ef 780See L<Changing the BTREE sort order> for an example of using the
781C<compare> template.
88108326 782
36477c24 783If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 784C<bval>, you should check out L<The 'bval' Option>.
36477c24 785
88108326 786=head2 Default Parameters
787
788It is possible to omit some or all of the final 4 parameters in the
789call to C<tie> and let them take default values. As DB_HASH is the most
790common file format used, the call:
791
792 tie %A, "DB_File", "filename" ;
793
794is equivalent to:
795
18d2dc8c 796 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 797
798It is also possible to omit the filename parameter as well, so the
799call:
800
801 tie %A, "DB_File" ;
802
803is equivalent to:
804
18d2dc8c 805 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 806
f6b705ef 807See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 808in place of a filename.
809
f6b705ef 810=head2 In Memory Databases
811
812Berkeley DB allows the creation of in-memory databases by using NULL
813(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
814uses C<undef> instead of NULL to provide this functionality.
815
816=head1 DB_HASH
817
818The DB_HASH file format is probably the most commonly used of the three
819file formats that B<DB_File> supports. It is also very straightforward
820to use.
821
68dc0745 822=head2 A Simple Example
f6b705ef 823
824This example shows how to create a database, add key/value pairs to the
825database, delete keys/value pairs and finally how to enumerate the
826contents of the database.
827
3245f058 828 use warnings ;
610ab055 829 use strict ;
f6b705ef 830 use DB_File ;
07200f1b 831 our (%h, $k, $v) ;
f6b705ef 832
2c2d71f5 833 unlink "fruit" ;
45a340cb 834 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef 835 or die "Cannot open file 'fruit': $!\n";
836
837 # Add a few key/value pairs to the file
838 $h{"apple"} = "red" ;
839 $h{"orange"} = "orange" ;
840 $h{"banana"} = "yellow" ;
841 $h{"tomato"} = "red" ;
842
843 # Check for existence of a key
844 print "Banana Exists\n\n" if $h{"banana"} ;
845
846 # Delete a key/value pair.
847 delete $h{"apple"} ;
848
849 # print the contents of the file
850 while (($k, $v) = each %h)
851 { print "$k -> $v\n" }
852
853 untie %h ;
854
855here is the output:
856
857 Banana Exists
bbc7dcd2 858
f6b705ef 859 orange -> orange
860 tomato -> red
861 banana -> yellow
862
863Note that the like ordinary associative arrays, the order of the keys
864retrieved is in an apparently random order.
865
866=head1 DB_BTREE
867
868The DB_BTREE format is useful when you want to store data in a given
869order. By default the keys will be stored in lexical order, but as you
870will see from the example shown in the next section, it is very easy to
871define your own sorting function.
872
873=head2 Changing the BTREE sort order
874
875This script shows how to override the default sorting algorithm that
876BTREE uses. Instead of using the normal lexical ordering, a case
877insensitive compare function will be used.
88108326 878
3245f058 879 use warnings ;
610ab055 880 use strict ;
f6b705ef 881 use DB_File ;
610ab055 882
883 my %h ;
f6b705ef 884
885 sub Compare
886 {
887 my ($key1, $key2) = @_ ;
888 "\L$key1" cmp "\L$key2" ;
889 }
890
891 # specify the Perl sub that will do the comparison
892 $DB_BTREE->{'compare'} = \&Compare ;
893
2c2d71f5 894 unlink "tree" ;
45a340cb 895 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 896 or die "Cannot open file 'tree': $!\n" ;
897
898 # Add a key/value pair to the file
899 $h{'Wall'} = 'Larry' ;
900 $h{'Smith'} = 'John' ;
901 $h{'mouse'} = 'mickey' ;
902 $h{'duck'} = 'donald' ;
903
904 # Delete
905 delete $h{"duck"} ;
906
907 # Cycle through the keys printing them in order.
908 # Note it is not necessary to sort the keys as
909 # the btree will have kept them in order automatically.
910 foreach (keys %h)
911 { print "$_\n" }
912
913 untie %h ;
914
915Here is the output from the code above.
916
917 mouse
918 Smith
919 Wall
920
921There are a few point to bear in mind if you want to change the
922ordering in a BTREE database:
923
924=over 5
925
926=item 1.
927
928The new compare function must be specified when you create the database.
929
930=item 2.
931
932You cannot change the ordering once the database has been created. Thus
933you must use the same compare function every time you access the
88108326 934database.
935
39793c41 936=item 3
937
938Duplicate keys are entirely defined by the comparison function.
939In the case-insensitive example above, the keys: 'KEY' and 'key'
940would be considered duplicates, and assigning to the second one
52ffee89 941would overwrite the first. If duplicates are allowed for (with the
39793c41 942R_DUPS flag discussed below), only a single copy of duplicate keys
943is stored in the database --- so (again with example above) assigning
944three values to the keys: 'KEY', 'Key', and 'key' would leave just
945the first key: 'KEY' in the database with three values. For some
946situations this results in information loss, so care should be taken
947to provide fully qualified comparison functions when necessary.
948For example, the above comparison routine could be modified to
949additionally compare case-sensitively if two keys are equal in the
950case insensitive comparison:
951
952 sub compare {
953 my($key1, $key2) = @_;
954 lc $key1 cmp lc $key2 ||
955 $key1 cmp $key2;
956 }
957
958And now you will only have duplicates when the keys themselves
959are truly the same. (note: in versions of the db library prior to
960about November 1996, such duplicate keys were retained so it was
961possible to recover the original keys in sets of keys that
962compared as equal).
963
964
f6b705ef 965=back
966
68dc0745 967=head2 Handling Duplicate Keys
f6b705ef 968
969The BTREE file type optionally allows a single key to be associated
970with an arbitrary number of values. This option is enabled by setting
971the flags element of C<$DB_BTREE> to R_DUP when creating the database.
972
88108326 973There are some difficulties in using the tied hash interface if you
974want to manipulate a BTREE database with duplicate keys. Consider this
975code:
976
3245f058 977 use warnings ;
610ab055 978 use strict ;
88108326 979 use DB_File ;
610ab055 980
962cee9f 981 my ($filename, %h) ;
610ab055 982
88108326 983 $filename = "tree" ;
984 unlink $filename ;
bbc7dcd2 985
88108326 986 # Enable duplicate records
987 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 988
45a340cb 989 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 990 or die "Cannot open $filename: $!\n";
bbc7dcd2 991
88108326 992 # Add some key/value pairs to the file
993 $h{'Wall'} = 'Larry' ;
994 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 995 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 996 $h{'Smith'} = 'John' ;
997 $h{'mouse'} = 'mickey' ;
998
999 # iterate through the associative array
1000 # and print each key/value pair.
2c2d71f5 1001 foreach (sort keys %h)
88108326 1002 { print "$_ -> $h{$_}\n" }
1003
f6b705ef 1004 untie %h ;
1005
88108326 1006Here is the output:
1007
1008 Smith -> John
1009 Wall -> Larry
1010 Wall -> Larry
f6b705ef 1011 Wall -> Larry
88108326 1012 mouse -> mickey
1013
f6b705ef 1014As you can see 3 records have been successfully created with key C<Wall>
88108326 1015- the only thing is, when they are retrieved from the database they
f6b705ef 1016I<seem> to have the same value, namely C<Larry>. The problem is caused
1017by the way that the associative array interface works. Basically, when
1018the associative array interface is used to fetch the value associated
1019with a given key, it will only ever retrieve the first value.
88108326 1020
1021Although it may not be immediately obvious from the code above, the
1022associative array interface can be used to write values with duplicate
1023keys, but it cannot be used to read them back from the database.
1024
1025The way to get around this problem is to use the Berkeley DB API method
1026called C<seq>. This method allows sequential access to key/value
f6b705ef 1027pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1028and the API in general.
88108326 1029
1030Here is the script above rewritten using the C<seq> API method.
1031
3245f058 1032 use warnings ;
610ab055 1033 use strict ;
88108326 1034 use DB_File ;
bbc7dcd2 1035
962cee9f 1036 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1037
88108326 1038 $filename = "tree" ;
1039 unlink $filename ;
bbc7dcd2 1040
88108326 1041 # Enable duplicate records
1042 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1043
45a340cb 1044 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1045 or die "Cannot open $filename: $!\n";
bbc7dcd2 1046
88108326 1047 # Add some key/value pairs to the file
1048 $h{'Wall'} = 'Larry' ;
1049 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1050 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1051 $h{'Smith'} = 'John' ;
1052 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1053
f6b705ef 1054 # iterate through the btree using seq
88108326 1055 # and print each key/value pair.
610ab055 1056 $key = $value = 0 ;
f6b705ef 1057 for ($status = $x->seq($key, $value, R_FIRST) ;
1058 $status == 0 ;
1059 $status = $x->seq($key, $value, R_NEXT) )
88108326 1060 { print "$key -> $value\n" }
bbc7dcd2 1061
88108326 1062 undef $x ;
1063 untie %h ;
1064
1065that prints:
1066
1067 Smith -> John
1068 Wall -> Brick
f6b705ef 1069 Wall -> Brick
88108326 1070 Wall -> Larry
1071 mouse -> mickey
1072
f6b705ef 1073This time we have got all the key/value pairs, including the multiple
88108326 1074values associated with the key C<Wall>.
1075
6ca2e664 1076To make life easier when dealing with duplicate keys, B<DB_File> comes with
1077a few utility methods.
1078
68dc0745 1079=head2 The get_dup() Method
f6b705ef 1080
6ca2e664 1081The C<get_dup> method assists in
88108326 1082reading duplicate values from BTREE databases. The method can take the
1083following forms:
1084
1085 $count = $x->get_dup($key) ;
1086 @list = $x->get_dup($key) ;
1087 %list = $x->get_dup($key, 1) ;
1088
1089In a scalar context the method returns the number of values associated
1090with the key, C<$key>.
1091
1092In list context, it returns all the values which match C<$key>. Note
f6b705ef 1093that the values will be returned in an apparently random order.
88108326 1094
7a2e2cd6 1095In list context, if the second parameter is present and evaluates
1096TRUE, the method returns an associative array. The keys of the
1097associative array correspond to the values that matched in the BTREE
1098and the values of the array are a count of the number of times that
1099particular value occurred in the BTREE.
88108326 1100
f6b705ef 1101So assuming the database created above, we can use C<get_dup> like
88108326 1102this:
1103
3245f058 1104 use warnings ;
2c2d71f5 1105 use strict ;
1106 use DB_File ;
bbc7dcd2 1107
962cee9f 1108 my ($filename, $x, %h) ;
2c2d71f5 1109
1110 $filename = "tree" ;
bbc7dcd2 1111
2c2d71f5 1112 # Enable duplicate records
1113 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1114
45a340cb 1115 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5 1116 or die "Cannot open $filename: $!\n";
1117
610ab055 1118 my $cnt = $x->get_dup("Wall") ;
88108326 1119 print "Wall occurred $cnt times\n" ;
1120
610ab055 1121 my %hash = $x->get_dup("Wall", 1) ;
88108326 1122 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1123 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1124
2c2d71f5 1125 my @list = sort $x->get_dup("Wall") ;
88108326 1126 print "Wall => [@list]\n" ;
1127
f6b705ef 1128 @list = $x->get_dup("Smith") ;
88108326 1129 print "Smith => [@list]\n" ;
bbc7dcd2 1130
f6b705ef 1131 @list = $x->get_dup("Dog") ;
88108326 1132 print "Dog => [@list]\n" ;
1133
1134
1135and it will print:
1136
f6b705ef 1137 Wall occurred 3 times
88108326 1138 Larry is there
f6b705ef 1139 There are 2 Brick Walls
1140 Wall => [Brick Brick Larry]
88108326 1141 Smith => [John]
1142 Dog => []
3b35bae3 1143
6ca2e664 1144=head2 The find_dup() Method
1145
1146 $status = $X->find_dup($key, $value) ;
1147
b90e71be 1148This method checks for the existence of a specific key/value pair. If the
6ca2e664 1149pair exists, the cursor is left pointing to the pair and the method
1150returns 0. Otherwise the method returns a non-zero value.
1151
1152Assuming the database from the previous example:
1153
3245f058 1154 use warnings ;
6ca2e664 1155 use strict ;
1156 use DB_File ;
bbc7dcd2 1157
962cee9f 1158 my ($filename, $x, %h, $found) ;
6ca2e664 1159
07200f1b 1160 $filename = "tree" ;
bbc7dcd2 1161
6ca2e664 1162 # Enable duplicate records
1163 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1164
45a340cb 1165 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664 1166 or die "Cannot open $filename: $!\n";
1167
1168 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1169 print "Larry Wall is $found there\n" ;
bbc7dcd2 1170
6ca2e664 1171 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1172 print "Harry Wall is $found there\n" ;
bbc7dcd2 1173
6ca2e664 1174 undef $x ;
1175 untie %h ;
1176
1177prints this
1178
2c2d71f5 1179 Larry Wall is there
6ca2e664 1180 Harry Wall is not there
1181
1182
1183=head2 The del_dup() Method
1184
1185 $status = $X->del_dup($key, $value) ;
1186
1187This method deletes a specific key/value pair. It returns
11880 if they exist and have been deleted successfully.
1189Otherwise the method returns a non-zero value.
1190
b90e71be 1191Again assuming the existence of the C<tree> database
6ca2e664 1192
3245f058 1193 use warnings ;
6ca2e664 1194 use strict ;
1195 use DB_File ;
bbc7dcd2 1196
962cee9f 1197 my ($filename, $x, %h, $found) ;
6ca2e664 1198
07200f1b 1199 $filename = "tree" ;
bbc7dcd2 1200
6ca2e664 1201 # Enable duplicate records
1202 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1203
45a340cb 1204 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664 1205 or die "Cannot open $filename: $!\n";
1206
1207 $x->del_dup("Wall", "Larry") ;
1208
1209 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1210 print "Larry Wall is $found there\n" ;
bbc7dcd2 1211
6ca2e664 1212 undef $x ;
1213 untie %h ;
1214
1215prints this
1216
1217 Larry Wall is not there
1218
f6b705ef 1219=head2 Matching Partial Keys
1220
1221The BTREE interface has a feature which allows partial keys to be
1222matched. This functionality is I<only> available when the C<seq> method
1223is used along with the R_CURSOR flag.
1224
1225 $x->seq($key, $value, R_CURSOR) ;
1226
1227Here is the relevant quote from the dbopen man page where it defines
1228the use of the R_CURSOR flag with seq:
1229
f6b705ef 1230 Note, for the DB_BTREE access method, the returned key is not
1231 necessarily an exact match for the specified key. The returned key
1232 is the smallest key greater than or equal to the specified key,
1233 permitting partial key matches and range searches.
1234
f6b705ef 1235In the example script below, the C<match> sub uses this feature to find
1236and print the first matching key/value pair given a partial key.
1237
3245f058 1238 use warnings ;
610ab055 1239 use strict ;
f6b705ef 1240 use DB_File ;
1241 use Fcntl ;
610ab055 1242
962cee9f 1243 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef 1244
1245 sub match
1246 {
1247 my $key = shift ;
610ab055 1248 my $value = 0;
f6b705ef 1249 my $orig_key = $key ;
1250 $x->seq($key, $value, R_CURSOR) ;
1251 print "$orig_key\t-> $key\t-> $value\n" ;
1252 }
1253
1254 $filename = "tree" ;
1255 unlink $filename ;
1256
45a340cb 1257 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1258 or die "Cannot open $filename: $!\n";
bbc7dcd2 1259
f6b705ef 1260 # Add some key/value pairs to the file
1261 $h{'mouse'} = 'mickey' ;
1262 $h{'Wall'} = 'Larry' ;
1263 $h{'Walls'} = 'Brick' ;
1264 $h{'Smith'} = 'John' ;
bbc7dcd2 1265
f6b705ef 1266
610ab055 1267 $key = $value = 0 ;
f6b705ef 1268 print "IN ORDER\n" ;
1269 for ($st = $x->seq($key, $value, R_FIRST) ;
1270 $st == 0 ;
1271 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1272
2c2d71f5 1273 { print "$key -> $value\n" }
bbc7dcd2 1274
f6b705ef 1275 print "\nPARTIAL MATCH\n" ;
1276
1277 match "Wa" ;
1278 match "A" ;
1279 match "a" ;
1280
1281 undef $x ;
1282 untie %h ;
1283
1284Here is the output:
1285
1286 IN ORDER
1287 Smith -> John
1288 Wall -> Larry
1289 Walls -> Brick
1290 mouse -> mickey
1291
1292 PARTIAL MATCH
1293 Wa -> Wall -> Larry
1294 A -> Smith -> John
1295 a -> mouse -> mickey
1296
1297=head1 DB_RECNO
1298
1299DB_RECNO provides an interface to flat text files. Both variable and
1300fixed length records are supported.
3b35bae3 1301
6ca2e664 1302In order to make RECNO more compatible with Perl, the array offset for
88108326 1303all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1304
88108326 1305As with normal Perl arrays, a RECNO array can be accessed using
1306negative indexes. The index -1 refers to the last element of the array,
1307-2 the second last, and so on. Attempting to access an element before
1308the start of the array will raise a fatal run-time error.
3b35bae3 1309
68dc0745 1310=head2 The 'bval' Option
36477c24 1311
1312The operation of the bval option warrants some discussion. Here is the
1313definition of bval from the Berkeley DB 1.85 recno manual page:
1314
1315 The delimiting byte to be used to mark the end of a
1316 record for variable-length records, and the pad charac-
1317 ter for fixed-length records. If no value is speci-
1318 fied, newlines (``\n'') are used to mark the end of
1319 variable-length records and fixed-length records are
1320 padded with spaces.
1321
1322The second sentence is wrong. In actual fact bval will only default to
1323C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1324openinfo parameter is used at all, the value that happens to be in bval
1325will be used. That means you always have to specify bval when making
1326use of any of the options in the openinfo parameter. This documentation
1327error will be fixed in the next release of Berkeley DB.
1328
1329That clarifies the situation with regards Berkeley DB itself. What
1330about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1331quite useful, so B<DB_File> conforms to it.
36477c24 1332
1333That means that you can specify other options (e.g. cachesize) and
1334still have bval default to C<"\n"> for variable length records, and
1335space for fixed length records.
1336
c5da4faf 1337Also note that the bval option only allows you to specify a single byte
1338as a delimeter.
1339
f6b705ef 1340=head2 A Simple Example
3b35bae3 1341
6ca2e664 1342Here is a simple example that uses RECNO (if you are using a version
1343of Perl earlier than 5.004_57 this example won't work -- see
1344L<Extra RECNO Methods> for a workaround).
f6b705ef 1345
3245f058 1346 use warnings ;
610ab055 1347 use strict ;
f6b705ef 1348 use DB_File ;
f6b705ef 1349
2c2d71f5 1350 my $filename = "text" ;
1351 unlink $filename ;
1352
610ab055 1353 my @h ;
45a340cb 1354 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1355 or die "Cannot open file 'text': $!\n" ;
1356
1357 # Add a few key/value pairs to the file
1358 $h[0] = "orange" ;
1359 $h[1] = "blue" ;
1360 $h[2] = "yellow" ;
1361
6ca2e664 1362 push @h, "green", "black" ;
1363
1364 my $elements = scalar @h ;
1365 print "The array contains $elements entries\n" ;
1366
1367 my $last = pop @h ;
1368 print "popped $last\n" ;
1369
1370 unshift @h, "white" ;
1371 my $first = shift @h ;
1372 print "shifted $first\n" ;
1373
f6b705ef 1374 # Check for existence of a key
1375 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1376
1377 # use a negative index
1378 print "The last element is $h[-1]\n" ;
1379 print "The 2nd last element is $h[-2]\n" ;
1380
1381 untie @h ;
3b35bae3 1382
f6b705ef 1383Here is the output from the script:
1384
6ca2e664 1385 The array contains 5 entries
1386 popped black
2c2d71f5 1387 shifted white
f6b705ef 1388 Element 1 Exists with value blue
6ca2e664 1389 The last element is green
1390 The 2nd last element is yellow
f6b705ef 1391
6ca2e664 1392=head2 Extra RECNO Methods
f6b705ef 1393
045291aa 1394If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664 1395array interface is quite limited. In the example script above
1396C<push>, C<pop>, C<shift>, C<unshift>
1397or determining the array length will not work with a tied array.
045291aa 1398
1399To make the interface more useful for older versions of Perl, a number
1400of methods are supplied with B<DB_File> to simulate the missing array
1401operations. All these methods are accessed via the object returned from
1402the tie call.
f6b705ef 1403
1404Here are the methods:
1405
1406=over 5
3b35bae3 1407
f6b705ef 1408=item B<$X-E<gt>push(list) ;>
1409
1410Pushes the elements of C<list> to the end of the array.
1411
1412=item B<$value = $X-E<gt>pop ;>
1413
1414Removes and returns the last element of the array.
1415
1416=item B<$X-E<gt>shift>
1417
1418Removes and returns the first element of the array.
1419
1420=item B<$X-E<gt>unshift(list) ;>
1421
1422Pushes the elements of C<list> to the start of the array.
1423
1424=item B<$X-E<gt>length>
1425
1426Returns the number of elements in the array.
1427
c5da4faf 1428=item B<$X-E<gt>splice(offset, length, elements);>
1429
1430Returns a splice of the the array.
1431
f6b705ef 1432=back
1433
1434=head2 Another Example
1435
1436Here is a more complete example that makes use of some of the methods
1437described above. It also makes use of the API interface directly (see
1438L<THE API INTERFACE>).
1439
3245f058 1440 use warnings ;
f6b705ef 1441 use strict ;
962cee9f 1442 my (@h, $H, $file, $i) ;
f6b705ef 1443 use DB_File ;
1444 use Fcntl ;
bbc7dcd2 1445
f6b705ef 1446 $file = "text" ;
1447
1448 unlink $file ;
1449
45a340cb 1450 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1451 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1452
f6b705ef 1453 # first create a text file to play with
1454 $h[0] = "zero" ;
1455 $h[1] = "one" ;
1456 $h[2] = "two" ;
1457 $h[3] = "three" ;
1458 $h[4] = "four" ;
1459
bbc7dcd2 1460
f6b705ef 1461 # Print the records in order.
1462 #
1463 # The length method is needed here because evaluating a tied
1464 # array in a scalar context does not return the number of
1465 # elements in the array.
1466
1467 print "\nORIGINAL\n" ;
1468 foreach $i (0 .. $H->length - 1) {
1469 print "$i: $h[$i]\n" ;
1470 }
1471
1472 # use the push & pop methods
1473 $a = $H->pop ;
1474 $H->push("last") ;
1475 print "\nThe last record was [$a]\n" ;
1476
1477 # and the shift & unshift methods
1478 $a = $H->shift ;
1479 $H->unshift("first") ;
1480 print "The first record was [$a]\n" ;
1481
1482 # Use the API to add a new record after record 2.
1483 $i = 2 ;
1484 $H->put($i, "Newbie", R_IAFTER) ;
1485
1486 # and a new record before record 1.
1487 $i = 1 ;
1488 $H->put($i, "New One", R_IBEFORE) ;
1489
1490 # delete record 3
1491 $H->del(3) ;
1492
1493 # now print the records in reverse order
1494 print "\nREVERSE\n" ;
1495 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1496 { print "$i: $h[$i]\n" }
1497
1498 # same again, but use the API functions instead
1499 print "\nREVERSE again\n" ;
610ab055 1500 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1501 for ($s = $H->seq($k, $v, R_LAST) ;
1502 $s == 0 ;
1503 $s = $H->seq($k, $v, R_PREV))
1504 { print "$k: $v\n" }
1505
1506 undef $H ;
1507 untie @h ;
1508
1509and this is what it outputs:
1510
1511 ORIGINAL
1512 0: zero
1513 1: one
1514 2: two
1515 3: three
1516 4: four
1517
1518 The last record was [four]
1519 The first record was [zero]
1520
1521 REVERSE
1522 5: last
1523 4: three
1524 3: Newbie
1525 2: one
1526 1: New One
1527 0: first
1528
1529 REVERSE again
1530 5: last
1531 4: three
1532 3: Newbie
1533 2: one
1534 1: New One
1535 0: first
1536
1537Notes:
1538
1539=over 5
1540
1541=item 1.
1542
1543Rather than iterating through the array, C<@h> like this:
1544
1545 foreach $i (@h)
1546
1547it is necessary to use either this:
1548
1549 foreach $i (0 .. $H->length - 1)
1550
1551or this:
1552
1553 for ($a = $H->get($k, $v, R_FIRST) ;
1554 $a == 0 ;
1555 $a = $H->get($k, $v, R_NEXT) )
1556
1557=item 2.
1558
1559Notice that both times the C<put> method was used the record index was
1560specified using a variable, C<$i>, rather than the literal value
1561itself. This is because C<put> will return the record number of the
1562inserted line via that parameter.
1563
1564=back
1565
1566=head1 THE API INTERFACE
3b35bae3 1567
1568As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1569possible to make direct use of most of the API functions defined in the
8e07c86e 1570Berkeley DB documentation.
3b35bae3 1571
88108326 1572To do this you need to store a copy of the object returned from the tie.
3b35bae3 1573
88108326 1574 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1575
8e07c86e 1576Once you have done that, you can access the Berkeley DB API functions
88108326 1577as B<DB_File> methods directly like this:
3b35bae3 1578
1579 $db->put($key, $value, R_NOOVERWRITE) ;
1580
88108326 1581B<Important:> If you have saved a copy of the object returned from
1582C<tie>, the underlying database file will I<not> be closed until both
1583the tied variable is untied and all copies of the saved object are
610ab055 1584destroyed.
88108326 1585
1586 use DB_File ;
1587 $db = tie %hash, "DB_File", "filename"
1588 or die "Cannot tie filename: $!" ;
1589 ...
1590 undef $db ;
1591 untie %hash ;
1592
9a2c4ce3 1593See L<The untie() Gotcha> for more details.
778183f3 1594
88108326 1595All the functions defined in L<dbopen> are available except for
1596close() and dbopen() itself. The B<DB_File> method interface to the
1597supported functions have been implemented to mirror the way Berkeley DB
1598works whenever possible. In particular note that:
1599
1600=over 5
1601
1602=item *
1603
1604The methods return a status value. All return 0 on success.
1605All return -1 to signify an error and set C<$!> to the exact
1606error code. The return code 1 generally (but not always) means that the
1607key specified did not exist in the database.
1608
1609Other return codes are defined. See below and in the Berkeley DB
1610documentation for details. The Berkeley DB documentation should be used
1611as the definitive source.
1612
1613=item *
3b35bae3 1614
88108326 1615Whenever a Berkeley DB function returns data via one of its parameters,
1616the equivalent B<DB_File> method does exactly the same.
3b35bae3 1617
88108326 1618=item *
1619
1620If you are careful, it is possible to mix API calls with the tied
1621hash/array interface in the same piece of code. Although only a few of
1622the methods used to implement the tied interface currently make use of
1623the cursor, you should always assume that the cursor has been changed
1624any time the tied hash/array interface is used. As an example, this
1625code will probably not do what you expect:
1626
1627 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1628 or die "Cannot tie $filename: $!" ;
1629
1630 # Get the first key/value pair and set the cursor
1631 $X->seq($key, $value, R_FIRST) ;
1632
1633 # this line will modify the cursor
1634 $count = scalar keys %x ;
1635
1636 # Get the second key/value pair.
1637 # oops, it didn't, it got the last key/value pair!
1638 $X->seq($key, $value, R_NEXT) ;
1639
1640The code above can be rearranged to get around the problem, like this:
1641
1642 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1643 or die "Cannot tie $filename: $!" ;
1644
1645 # this line will modify the cursor
1646 $count = scalar keys %x ;
1647
1648 # Get the first key/value pair and set the cursor
1649 $X->seq($key, $value, R_FIRST) ;
1650
1651 # Get the second key/value pair.
1652 # worked this time.
1653 $X->seq($key, $value, R_NEXT) ;
1654
1655=back
1656
1657All the constants defined in L<dbopen> for use in the flags parameters
1658in the methods defined below are also available. Refer to the Berkeley
1659DB documentation for the precise meaning of the flags values.
1660
1661Below is a list of the methods available.
3b35bae3 1662
1663=over 5
1664
f6b705ef 1665=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1666
1667Given a key (C<$key>) this method reads the value associated with it
1668from the database. The value read from the database is returned in the
1669C<$value> parameter.
3b35bae3 1670
88108326 1671If the key does not exist the method returns 1.
3b35bae3 1672
88108326 1673No flags are currently defined for this method.
3b35bae3 1674
f6b705ef 1675=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1676
88108326 1677Stores the key/value pair in the database.
1678
1679If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1680will have the record number of the inserted key/value pair set.
3b35bae3 1681
88108326 1682Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1683R_SETCURSOR.
1684
f6b705ef 1685=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1686
88108326 1687Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1688
88108326 1689A return code of 1 means that the requested key was not in the
1690database.
3b35bae3 1691
88108326 1692R_CURSOR is the only valid flag at present.
3b35bae3 1693
f6b705ef 1694=item B<$status = $X-E<gt>fd ;>
3b35bae3 1695
88108326 1696Returns the file descriptor for the underlying database.
3b35bae3 1697
b90e71be 1698See L<Locking: The Trouble with fd> for an explanation for why you should
1699not use C<fd> to lock your database.
3b35bae3 1700
f6b705ef 1701=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1702
88108326 1703This interface allows sequential retrieval from the database. See
1704L<dbopen> for full details.
1705
1706Both the C<$key> and C<$value> parameters will be set to the key/value
1707pair read from the database.
1708
1709The flags parameter is mandatory. The valid flag values are R_CURSOR,
1710R_FIRST, R_LAST, R_NEXT and R_PREV.
1711
f6b705ef 1712=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1713
1714Flushes any cached buffers to disk.
1715
1716R_RECNOSYNC is the only valid flag at present.
3b35bae3 1717
1718=back
1719
cad2e5aa 1720=head1 DBM FILTERS
1721
1722A DBM Filter is a piece of code that is be used when you I<always>
1723want to make the same transformation to all keys and/or values in a
1724DBM database.
1725
1726There are four methods associated with DBM Filters. All work identically,
1727and each is used to install (or uninstall) a single DBM Filter. Each
1728expects a single parameter, namely a reference to a sub. The only
1729difference between them is the place that the filter is installed.
1730
1731To summarise:
1732
1733=over 5
1734
1735=item B<filter_store_key>
1736
1737If a filter has been installed with this method, it will be invoked
1738every time you write a key to a DBM database.
1739
1740=item B<filter_store_value>
1741
1742If a filter has been installed with this method, it will be invoked
1743every time you write a value to a DBM database.
1744
1745
1746=item B<filter_fetch_key>
1747
1748If a filter has been installed with this method, it will be invoked
1749every time you read a key from a DBM database.
1750
1751=item B<filter_fetch_value>
1752
1753If a filter has been installed with this method, it will be invoked
1754every time you read a value from a DBM database.
1755
1756=back
1757
1758You can use any combination of the methods, from none, to all four.
1759
1760All filter methods return the existing filter, if present, or C<undef>
1761in not.
1762
1763To delete a filter pass C<undef> to it.
1764
1765=head2 The Filter
1766
1767When each filter is called by Perl, a local copy of C<$_> will contain
1768the key or value to be filtered. Filtering is achieved by modifying
1769the contents of C<$_>. The return code from the filter is ignored.
1770
1771=head2 An Example -- the NULL termination problem.
1772
1773Consider the following scenario. You have a DBM database
1774that you need to share with a third-party C application. The C application
1775assumes that I<all> keys and values are NULL terminated. Unfortunately
1776when Perl writes to DBM databases it doesn't use NULL termination, so
1777your Perl application will have to manage NULL termination itself. When
1778you write to the database you will have to use something like this:
1779
1780 $hash{"$key\0"} = "$value\0" ;
1781
1782Similarly the NULL needs to be taken into account when you are considering
1783the length of existing keys/values.
1784
1785It would be much better if you could ignore the NULL terminations issue
1786in the main application code and have a mechanism that automatically
1787added the terminating NULL to all keys and values whenever you write to
1788the database and have them removed when you read from the database. As I'm
1789sure you have already guessed, this is a problem that DBM Filters can
1790fix very easily.
1791
3245f058 1792 use warnings ;
cad2e5aa 1793 use strict ;
1794 use DB_File ;
1795
1796 my %hash ;
1797 my $filename = "/tmp/filt" ;
1798 unlink $filename ;
1799
1800 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1801 or die "Cannot open $filename: $!\n" ;
1802
1803 # Install DBM Filters
1804 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1805 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1806 $db->filter_fetch_value( sub { s/\0$// } ) ;
1807 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1808
1809 $hash{"abc"} = "def" ;
1810 my $a = $hash{"ABC"} ;
1811 # ...
1812 undef $db ;
1813 untie %hash ;
1814
1815Hopefully the contents of each of the filters should be
1816self-explanatory. Both "fetch" filters remove the terminating NULL,
1817and both "store" filters add a terminating NULL.
1818
1819
1820=head2 Another Example -- Key is a C int.
1821
1822Here is another real-life example. By default, whenever Perl writes to
1823a DBM database it always writes the key and value as strings. So when
1824you use this:
1825
1826 $hash{12345} = "soemthing" ;
1827
1828the key 12345 will get stored in the DBM database as the 5 byte string
1829"12345". If you actually want the key to be stored in the DBM database
1830as a C int, you will have to use C<pack> when writing, and C<unpack>
1831when reading.
1832
1833Here is a DBM Filter that does it:
1834
3245f058 1835 use warnings ;
cad2e5aa 1836 use strict ;
1837 use DB_File ;
1838 my %hash ;
1839 my $filename = "/tmp/filt" ;
1840 unlink $filename ;
1841
1842
1843 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1844 or die "Cannot open $filename: $!\n" ;
1845
1846 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1847 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1848 $hash{123} = "def" ;
1849 # ...
1850 undef $db ;
1851 untie %hash ;
1852
1853This time only two filters have been used -- we only need to manipulate
1854the contents of the key, so it wasn't necessary to install any value
1855filters.
1856
f6b705ef 1857=head1 HINTS AND TIPS
3b35bae3 1858
3b35bae3 1859
b90e71be 1860=head2 Locking: The Trouble with fd
3b35bae3 1861
b90e71be 1862Until version 1.72 of this module, the recommended technique for locking
1863B<DB_File> databases was to flock the filehandle returned from the "fd"
1864function. Unfortunately this technique has been shown to be fundamentally
1865flawed (Kudos to David Harris for tracking this down). Use it at your own
1866peril!
3b35bae3 1867
b90e71be 1868The locking technique went like this.
cb1a09d0 1869
45a340cb 1870 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0666)
b90e71be 1871 || die "dbcreat /tmp/foo.db $!";
1872 $fd = $db->fd;
1873 open(DB_FH, "+<&=$fd") || die "dup $!";
1874 flock (DB_FH, LOCK_EX) || die "flock: $!";
1875 ...
1876 $db{"Tom"} = "Jerry" ;
1877 ...
1878 flock(DB_FH, LOCK_UN);
1879 undef $db;
1880 untie %db;
1881 close(DB_FH);
cb1a09d0 1882
b90e71be 1883In simple terms, this is what happens:
cb1a09d0 1884
b90e71be 1885=over 5
cb1a09d0 1886
b90e71be 1887=item 1.
cb1a09d0 1888
b90e71be 1889Use "tie" to open the database.
cb1a09d0 1890
b90e71be 1891=item 2.
cb1a09d0 1892
b90e71be 1893Lock the database with fd & flock.
cb1a09d0 1894
b90e71be 1895=item 3.
cb1a09d0 1896
b90e71be 1897Read & Write to the database.
cb1a09d0 1898
b90e71be 1899=item 4.
cb1a09d0 1900
b90e71be 1901Unlock and close the database.
cb1a09d0 1902
b90e71be 1903=back
1904
1905Here is the crux of the problem. A side-effect of opening the B<DB_File>
1906database in step 2 is that an initial block from the database will get
1907read from disk and cached in memory.
1908
1909To see why this is a problem, consider what can happen when two processes,
1910say "A" and "B", both want to update the same B<DB_File> database
1911using the locking steps outlined above. Assume process "A" has already
1912opened the database and has a write lock, but it hasn't actually updated
1913the database yet (it has finished step 2, but not started step 3 yet). Now
1914process "B" tries to open the same database - step 1 will succeed,
1915but it will block on step 2 until process "A" releases the lock. The
1916important thing to notice here is that at this point in time both
1917processes will have cached identical initial blocks from the database.
1918
1919Now process "A" updates the database and happens to change some of the
1920data held in the initial buffer. Process "A" terminates, flushing
1921all cached data to disk and releasing the database lock. At this point
1922the database on disk will correctly reflect the changes made by process
1923"A".
1924
1925With the lock released, process "B" can now continue. It also updates the
1926database and unfortunately it too modifies the data that was in its
1927initial buffer. Once that data gets flushed to disk it will overwrite
1928some/all of the changes process "A" made to the database.
1929
1930The result of this scenario is at best a database that doesn't contain
1931what you expect. At worst the database will corrupt.
1932
1933The above won't happen every time competing process update the same
1934B<DB_File> database, but it does illustrate why the technique should
1935not be used.
1936
1937=head2 Safe ways to lock a database
1938
1939Starting with version 2.x, Berkeley DB has internal support for locking.
1940The companion module to this one, B<BerkeleyDB>, provides an interface
1941to this locking functionality. If you are serious about locking
1942Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1943
1944If using B<BerkeleyDB> isn't an option, there are a number of modules
1945available on CPAN that can be used to implement locking. Each one
1946implements locking differently and has different goals in mind. It is
1947therefore worth knowing the difference, so that you can pick the right
1948one for your application. Here are the three locking wrappers:
1949
1950=over 5
1951
1952=item B<Tie::DB_Lock>
1953
1954A B<DB_File> wrapper which creates copies of the database file for
1955read access, so that you have a kind of a multiversioning concurrent read
1956system. However, updates are still serial. Use for databases where reads
1957may be lengthy and consistency problems may occur.
1958
1959=item B<Tie::DB_LockFile>
1960
1961A B<DB_File> wrapper that has the ability to lock and unlock the database
1962while it is being used. Avoids the tie-before-flock problem by simply
1963re-tie-ing the database when you get or drop a lock. Because of the
1964flexibility in dropping and re-acquiring the lock in the middle of a
1965session, this can be massaged into a system that will work with long
1966updates and/or reads if the application follows the hints in the POD
1967documentation.
1968
1969=item B<DB_File::Lock>
1970
1971An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
1972before tie-ing the database and drops the lock after the untie. Allows
1973one to use the same lockfile for multiple databases to avoid deadlock
1974problems, if desired. Use for databases where updates are reads are
1975quick and simple flock locking semantics are enough.
1976
1977=back
cb1a09d0 1978
68dc0745 1979=head2 Sharing Databases With C Applications
f6b705ef 1980
1981There is no technical reason why a Berkeley DB database cannot be
1982shared by both a Perl and a C application.
1983
1984The vast majority of problems that are reported in this area boil down
1985to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 1986not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 1987
1988Here is a real example. Netscape 2.0 keeps a record of the locations you
1989visit along with the time you last visited them in a DB_HASH database.
1990This is usually stored in the file F<~/.netscape/history.db>. The key
1991field in the database is the location string and the value field is the
1992time the location was last visited stored as a 4 byte binary value.
1993
1994If you haven't already guessed, the location string is stored with a
1995terminating NULL. This means you need to be careful when accessing the
1996database.
1997
1998Here is a snippet of code that is loosely based on Tom Christiansen's
1999I<ggh> script (available from your nearest CPAN archive in
2000F<authors/id/TOMC/scripts/nshist.gz>).
2001
3245f058 2002 use warnings ;
610ab055 2003 use strict ;
f6b705ef 2004 use DB_File ;
2005 use Fcntl ;
f6b705ef 2006
962cee9f 2007 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef 2008 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2009
2010 $HISTORY = "$dotdir/.netscape/history.db";
2011
2012 tie %hist_db, 'DB_File', $HISTORY
2013 or die "Cannot open $HISTORY: $!\n" ;;
2014
2015 # Dump the complete database
2016 while ( ($href, $binary_time) = each %hist_db ) {
2017
2018 # remove the terminating NULL
2019 $href =~ s/\x00$// ;
2020
2021 # convert the binary time into a user friendly string
2022 $date = localtime unpack("V", $binary_time);
2023 print "$date $href\n" ;
2024 }
2025
2026 # check for the existence of a specific key
2027 # remember to add the NULL
2028 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2029 $date = localtime unpack("V", $binary_time) ;
2030 print "Last visited mox.perl.com on $date\n" ;
2031 }
2032 else {
2033 print "Never visited mox.perl.com\n"
2034 }
2035
2036 untie %hist_db ;
2037
68dc0745 2038=head2 The untie() Gotcha
778183f3 2039
7a2e2cd6 2040If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2041recommended that you read L<perltie/The untie Gotcha>.
778183f3 2042
2043Even if you don't currently make use of the API interface, it is still
2044worth reading it.
2045
2046Here is an example which illustrates the problem from a B<DB_File>
2047perspective:
2048
2049 use DB_File ;
2050 use Fcntl ;
2051
2052 my %x ;
2053 my $X ;
2054
2055 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2056 or die "Cannot tie first time: $!" ;
2057
2058 $x{123} = 456 ;
2059
2060 untie %x ;
2061
2062 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2063 or die "Cannot tie second time: $!" ;
2064
2065 untie %x ;
2066
2067When run, the script will produce this error message:
2068
2069 Cannot tie second time: Invalid argument at bad.file line 14.
2070
2071Although the error message above refers to the second tie() statement
2072in the script, the source of the problem is really with the untie()
2073statement that precedes it.
2074
2075Having read L<perltie> you will probably have already guessed that the
2076error is caused by the extra copy of the tied object stored in C<$X>.
2077If you haven't, then the problem boils down to the fact that the
2078B<DB_File> destructor, DESTROY, will not be called until I<all>
2079references to the tied object are destroyed. Both the tied variable,
2080C<%x>, and C<$X> above hold a reference to the object. The call to
2081untie() will destroy the first, but C<$X> still holds a valid
2082reference, so the destructor will not get called and the database file
2083F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2084attempt to open a database that is already open via the catch-all
778183f3 2085"Invalid argument" doesn't help.
2086
2087If you run the script with the C<-w> flag the error message becomes:
2088
2089 untie attempted while 1 inner references still exist at bad.file line 12.
2090 Cannot tie second time: Invalid argument at bad.file line 14.
2091
2092which pinpoints the real problem. Finally the script can now be
2093modified to fix the original problem by destroying the API object
2094before the untie:
2095
2096 ...
2097 $x{123} = 456 ;
2098
2099 undef $X ;
2100 untie %x ;
2101
2102 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2103 ...
2104
f6b705ef 2105
2106=head1 COMMON QUESTIONS
2107
2108=head2 Why is there Perl source in my database?
2109
2110If you look at the contents of a database file created by DB_File,
2111there can sometimes be part of a Perl script included in it.
2112
2113This happens because Berkeley DB uses dynamic memory to allocate
2114buffers which will subsequently be written to the database file. Being
2115dynamic, the memory could have been used for anything before DB
2116malloced it. As Berkeley DB doesn't clear the memory once it has been
2117allocated, the unused portions will contain random junk. In the case
2118where a Perl script gets written to the database, the random junk will
2119correspond to an area of dynamic memory that happened to be used during
2120the compilation of the script.
2121
2122Unless you don't like the possibility of there being part of your Perl
2123scripts embedded in a database file, this is nothing to worry about.
2124
2125=head2 How do I store complex data structures with DB_File?
2126
2127Although B<DB_File> cannot do this directly, there is a module which
2128can layer transparently over B<DB_File> to accomplish this feat.
2129
2130Check out the MLDBM module, available on CPAN in the directory
2131F<modules/by-module/MLDBM>.
2132
2133=head2 What does "Invalid Argument" mean?
2134
2135You will get this error message when one of the parameters in the
2136C<tie> call is wrong. Unfortunately there are quite a few parameters to
2137get wrong, so it can be difficult to figure out which one it is.
2138
2139Here are a couple of possibilities:
2140
2141=over 5
2142
2143=item 1.
2144
610ab055 2145Attempting to reopen a database without closing it.
f6b705ef 2146
2147=item 2.
2148
2149Using the O_WRONLY flag.
2150
2151=back
2152
2153=head2 What does "Bareword 'DB_File' not allowed" mean?
2154
2155You will encounter this particular error message when you have the
2156C<strict 'subs'> pragma (or the full strict pragma) in your script.
2157Consider this script:
2158
3245f058 2159 use warnings ;
f6b705ef 2160 use strict ;
2161 use DB_File ;
07200f1b 2162 my %x ;
f6b705ef 2163 tie %x, DB_File, "filename" ;
2164
2165Running it produces the error in question:
2166
2167 Bareword "DB_File" not allowed while "strict subs" in use
2168
2169To get around the error, place the word C<DB_File> in either single or
2170double quotes, like this:
2171
2172 tie %x, "DB_File", "filename" ;
2173
2174Although it might seem like a real pain, it is really worth the effort
2175of having a C<use strict> in all your scripts.
2176
cad2e5aa 2177=head1 REFERENCES
2178
2179Articles that are either about B<DB_File> or make use of it.
2180
2181=over 5
2182
2183=item 1.
2184
2185I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2186Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2187
2188=back
2189
cb1a09d0 2190=head1 HISTORY
2191
1f70e1ea 2192Moved to the Changes file.
610ab055 2193
1f70e1ea 2194=head1 BUGS
05475680 2195
1f70e1ea 2196Some older versions of Berkeley DB had problems with fixed length
2197records using the RECNO file format. This problem has been fixed since
2198version 1.85 of Berkeley DB.
e858de61 2199
1f70e1ea 2200I am sure there are bugs in the code. If you do find any, or can
2201suggest any enhancements, I would welcome your comments.
a6ed719b 2202
1f70e1ea 2203=head1 AVAILABILITY
a6ed719b 2204
1f70e1ea 2205B<DB_File> comes with the standard Perl source distribution. Look in
2206the directory F<ext/DB_File>. Given the amount of time between releases
2207of Perl the version that ships with Perl is quite likely to be out of
2208date, so the most recent version can always be found on CPAN (see
2209L<perlmod/CPAN> for details), in the directory
2210F<modules/by-module/DB_File>.
a6ed719b 2211
039d031f 2212This version of B<DB_File> will work with either version 1.x, 2.x or
22133.x of Berkeley DB, but is limited to the functionality provided by
2214version 1.
a6ed719b 2215
cad2e5aa 2216The official web site for Berkeley DB is F<http://www.sleepycat.com>.
039d031f 2217All versions of Berkeley DB are available there.
93af7a87 2218
1f70e1ea 2219Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2220archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2221
1f70e1ea 2222If you are running IRIX, then get Berkeley DB version 1 from
2223F<http://reality.sgi.com/ariel>. It has the patches necessary to
2224compile properly on IRIX 5.3.
610ab055 2225
1f70e1ea 2226=head1 COPYRIGHT
3b35bae3 2227
d63909e4 2228Copyright (c) 1995-2002 Paul Marquess. All rights reserved. This program
a9fd575d 2229is free software; you can redistribute it and/or modify it under the
2230same terms as Perl itself.
3b35bae3 2231
1f70e1ea 2232Although B<DB_File> is covered by the Perl license, the library it
2233makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2234copyright and its own license. Please take the time to read it.
3b35bae3 2235
a9fd575d 2236Here are are few words taken from the Berkeley DB FAQ (at
b90e71be 2237F<http://www.sleepycat.com>) regarding the license:
68dc0745 2238
a9fd575d 2239 Do I have to license DB to use it in Perl scripts?
3b35bae3 2240
a9fd575d 2241 No. The Berkeley DB license requires that software that uses
2242 Berkeley DB be freely redistributable. In the case of Perl, that
2243 software is Perl, and not your scripts. Any Perl scripts that you
2244 write are your property, including scripts that make use of
2245 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2246 place any restriction on what you may do with them.
88108326 2247
1f70e1ea 2248If you are in any doubt about the license situation, contact either the
2249Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 2250
2251
3b35bae3 2252=head1 SEE ALSO
2253
9fe6733a 2254L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2255L<dbmfilter>
3b35bae3 2256
3b35bae3 2257=head1 AUTHOR
2258
8e07c86e 2259The DB_File interface was written by Paul Marquess
6ca2e664 2260E<lt>Paul.Marquess@btinternet.comE<gt>.
d3ef3b8a 2261Questions about the DB system itself may be addressed to
2262E<lt>db@sleepycat.com<gt>.
3b35bae3 2263
2264=cut