Many of the feared z/OS failures turned out to be false alarms.
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6ca2e664 3# written by Paul Marquess (Paul.Marquess@btinternet.com)
962cee9f 4# last modified 1st March 2002
d85a743d 5# version 1.804
36477c24 6#
d63909e4 7# Copyright (c) 1995-2002 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
e5021521 14require 5.00404;
610ab055 15
3245f058 16use warnings;
785da04d 17use strict;
8e07c86e 18use Carp;
88108326 19require Tie::Hash;
20@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 21
88108326 22sub new
8e07c86e 23{
88108326 24 my $pkg = shift ;
25 my %x ;
26 tie %x, $pkg ;
27 bless \%x, $pkg ;
8e07c86e 28}
29
610ab055 30
88108326 31sub TIEHASH
32{
33 my $pkg = shift ;
34
36477c24 35 bless { VALID => { map {$_, 1}
36 qw( bsize ffactor nelem cachesize hash lorder)
37 },
38 GOT => {}
39 }, $pkg ;
88108326 40}
8e07c86e 41
610ab055 42
8e07c86e 43sub FETCH
44{
88108326 45 my $self = shift ;
46 my $key = shift ;
8e07c86e 47
36477c24 48 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 49
50 my $pkg = ref $self ;
51 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 52}
53
54
55sub STORE
56{
88108326 57 my $self = shift ;
58 my $key = shift ;
59 my $value = shift ;
60
36477c24 61 if ( exists $self->{VALID}{$key} )
8e07c86e 62 {
36477c24 63 $self->{GOT}{$key} = $value ;
8e07c86e 64 return ;
65 }
66
88108326 67 my $pkg = ref $self ;
68 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 69}
70
71sub DELETE
72{
88108326 73 my $self = shift ;
74 my $key = shift ;
75
36477c24 76 if ( exists $self->{VALID}{$key} )
8e07c86e 77 {
36477c24 78 delete $self->{GOT}{$key} ;
8e07c86e 79 return ;
80 }
81
88108326 82 my $pkg = ref $self ;
83 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 84}
85
88108326 86sub EXISTS
8e07c86e 87{
88108326 88 my $self = shift ;
89 my $key = shift ;
8e07c86e 90
36477c24 91 exists $self->{VALID}{$key} ;
8e07c86e 92}
93
88108326 94sub NotHere
8e07c86e 95{
18d2dc8c 96 my $self = shift ;
88108326 97 my $method = shift ;
8e07c86e 98
18d2dc8c 99 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 100}
101
18d2dc8c 102sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
103sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
104sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 105
106package DB_File::RECNOINFO ;
785da04d 107
3245f058 108use warnings;
88108326 109use strict ;
110
045291aa 111@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 112
113sub TIEHASH
114{
88108326 115 my $pkg = shift ;
116
36477c24 117 bless { VALID => { map {$_, 1}
118 qw( bval cachesize psize flags lorder reclen bfname )
119 },
120 GOT => {},
121 }, $pkg ;
8e07c86e 122}
123
88108326 124package DB_File::BTREEINFO ;
8e07c86e 125
3245f058 126use warnings;
88108326 127use strict ;
8e07c86e 128
88108326 129@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 130
88108326 131sub TIEHASH
8e07c86e 132{
88108326 133 my $pkg = shift ;
134
36477c24 135 bless { VALID => { map {$_, 1}
136 qw( flags cachesize maxkeypage minkeypage psize
137 compare prefix lorder )
138 },
139 GOT => {},
140 }, $pkg ;
8e07c86e 141}
142
143
8e07c86e 144package DB_File ;
785da04d 145
3245f058 146use warnings;
785da04d 147use strict;
07200f1b 148our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
d85a743d 149our ($db_version, $use_XSLoader, $splice_end_array);
8e07c86e 150use Carp;
151
785da04d 152
d85a743d 153$VERSION = "1.804" ;
154
155{
156 local $SIG{__WARN__} = sub {$splice_end_array = "@_";};
157 my @a =(1); splice(@a, 3);
158 $splice_end_array =
159 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
160}
8e07c86e 161
162#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 163$DB_BTREE = new DB_File::BTREEINFO ;
164$DB_HASH = new DB_File::HASHINFO ;
165$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 166
785da04d 167require Tie::Hash;
8e07c86e 168require Exporter;
169use AutoLoader;
b90e71be 170BEGIN {
171 $use_XSLoader = 1 ;
e5021521 172 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be 173
174 if ($@) {
175 $use_XSLoader = 0 ;
176 require DynaLoader;
177 @ISA = qw(DynaLoader);
178 }
179}
180
181push @ISA, qw(Tie::Hash Exporter);
8e07c86e 182@EXPORT = qw(
183 $DB_BTREE $DB_HASH $DB_RECNO
88108326 184
8e07c86e 185 BTREEMAGIC
186 BTREEVERSION
187 DB_LOCK
188 DB_SHMEM
189 DB_TXN
190 HASHMAGIC
191 HASHVERSION
192 MAX_PAGE_NUMBER
193 MAX_PAGE_OFFSET
194 MAX_REC_NUMBER
195 RET_ERROR
196 RET_SPECIAL
197 RET_SUCCESS
198 R_CURSOR
199 R_DUP
200 R_FIRST
201 R_FIXEDLEN
202 R_IAFTER
203 R_IBEFORE
204 R_LAST
205 R_NEXT
206 R_NOKEY
207 R_NOOVERWRITE
208 R_PREV
209 R_RECNOSYNC
210 R_SETCURSOR
211 R_SNAPSHOT
212 __R_UNUSED
88108326 213
045291aa 214);
8e07c86e 215
216sub AUTOLOAD {
785da04d 217 my($constname);
8e07c86e 218 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b 219 my ($error, $val) = constant($constname);
220 Carp::croak $error if $error;
57c77851 221 no strict 'refs';
222 *{$AUTOLOAD} = sub { $val };
223 goto &{$AUTOLOAD};
07200f1b 224}
8e07c86e 225
f6b705ef 226
a6ed719b 227eval {
1f70e1ea 228 # Make all Fcntl O_XXX constants available for importing
229 require Fcntl;
230 my @O = grep /^O_/, @Fcntl::EXPORT;
231 Fcntl->import(@O); # first we import what we want to export
232 push(@EXPORT, @O);
a6ed719b 233};
f6b705ef 234
b90e71be 235if ($use_XSLoader)
236 { XSLoader::load("DB_File", $VERSION)}
237else
238 { bootstrap DB_File $VERSION }
8e07c86e 239
240# Preloaded methods go here. Autoload methods go after __END__, and are
241# processed by the autosplit program.
242
05475680 243sub tie_hash_or_array
610ab055 244{
245 my (@arg) = @_ ;
05475680 246 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 247
248 $arg[4] = tied %{ $arg[4] }
249 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
250
1f70e1ea 251 # make recno in Berkeley DB version 2 work like recno in version 1.
252 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
253 $arg[1] and ! -e $arg[1]) {
254 open(FH, ">$arg[1]") or return undef ;
255 close FH ;
256 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
257 }
258
05475680 259 DoTie_($tieHASH, @arg) ;
610ab055 260}
261
05475680 262sub TIEHASH
263{
264 tie_hash_or_array(@_) ;
265}
266
267sub TIEARRAY
268{
269 tie_hash_or_array(@_) ;
270}
88108326 271
045291aa 272sub CLEAR
273{
1f70e1ea 274 my $self = shift;
3245f058 275 my $key = 0 ;
1f70e1ea 276 my $value = "" ;
277 my $status = $self->seq($key, $value, R_FIRST());
278 my @keys;
279
280 while ($status == 0) {
281 push @keys, $key;
282 $status = $self->seq($key, $value, R_NEXT());
283 }
284 foreach $key (reverse @keys) {
285 my $s = $self->del($key);
286 }
287}
288
045291aa 289sub EXTEND { }
290
291sub STORESIZE
292{
293 my $self = shift;
294 my $length = shift ;
295 my $current_length = $self->length() ;
296
297 if ($length < $current_length) {
298 my $key ;
299 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
300 { $self->del($key) }
301 }
a9fd575d 302 elsif ($length > $current_length) {
303 $self->put($length-1, "") ;
304 }
045291aa 305}
306
c5da4faf 307
308sub SPLICE
309{
310 my $self = shift;
311 my $offset = shift;
312 if (not defined $offset) {
d85a743d 313 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf 314 $offset = 0;
315 }
316
317 my $length = @_ ? shift : 0;
318 # Carping about definedness comes _after_ the OFFSET sanity check.
319 # This is so we get the same error messages as Perl's splice().
320 #
321
322 my @list = @_;
323
324 my $size = $self->FETCHSIZE();
325
326 # 'If OFFSET is negative then it start that far from the end of
327 # the array.'
328 #
329 if ($offset < 0) {
330 my $new_offset = $size + $offset;
331 if ($new_offset < 0) {
332 die "Modification of non-creatable array value attempted, "
333 . "subscript $offset";
334 }
335 $offset = $new_offset;
336 }
337
c5da4faf 338 if (not defined $length) {
d85a743d 339 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf 340 $length = 0;
341 }
342
d85a743d 343 if ($offset > $size) {
344 $offset = $size;
345 warnings::warnif('misc', 'splice() offset past end of array')
346 if $splice_end_array;
347 }
348
c5da4faf 349 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
350 if (not defined $length) {
351 $length = $size - $offset;
352 }
353
354 # 'If LENGTH is negative, leave that many elements off the end of
355 # the array.'
356 #
357 if ($length < 0) {
358 $length = $size - $offset + $length;
359
360 if ($length < 0) {
361 # The user must have specified a length bigger than the
362 # length of the array passed in. But perl's splice()
363 # doesn't catch this, it just behaves as for length=0.
364 #
365 $length = 0;
366 }
367 }
368
369 if ($length > $size - $offset) {
370 $length = $size - $offset;
371 }
372
373 # $num_elems holds the current number of elements in the database.
374 my $num_elems = $size;
375
376 # 'Removes the elements designated by OFFSET and LENGTH from an
377 # array,'...
378 #
379 my @removed = ();
380 foreach (0 .. $length - 1) {
381 my $old;
382 my $status = $self->get($offset, $old);
383 if ($status != 0) {
384 my $msg = "error from Berkeley DB on get($offset, \$old)";
385 if ($status == 1) {
386 $msg .= ' (no such element?)';
387 }
388 else {
389 $msg .= ": error status $status";
390 if (defined $! and $! ne '') {
391 $msg .= ", message $!";
392 }
393 }
394 die $msg;
395 }
396 push @removed, $old;
397
398 $status = $self->del($offset);
399 if ($status != 0) {
400 my $msg = "error from Berkeley DB on del($offset)";
401 if ($status == 1) {
402 $msg .= ' (no such element?)';
403 }
404 else {
405 $msg .= ": error status $status";
406 if (defined $! and $! ne '') {
407 $msg .= ", message $!";
408 }
409 }
410 die $msg;
411 }
412
413 -- $num_elems;
414 }
415
416 # ...'and replaces them with the elements of LIST, if any.'
417 my $pos = $offset;
418 while (defined (my $elem = shift @list)) {
419 my $old_pos = $pos;
420 my $status;
421 if ($pos >= $num_elems) {
422 $status = $self->put($pos, $elem);
423 }
424 else {
425 $status = $self->put($pos, $elem, $self->R_IBEFORE);
426 }
427
428 if ($status != 0) {
429 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
430 if ($status == 1) {
431 $msg .= ' (no such element?)';
432 }
433 else {
434 $msg .= ", error status $status";
435 if (defined $! and $! ne '') {
436 $msg .= ", message $!";
437 }
438 }
439 die $msg;
440 }
441
442 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
443 if $old_pos != $pos;
444
445 ++ $pos;
446 ++ $num_elems;
447 }
448
449 if (wantarray) {
450 # 'In list context, returns the elements removed from the
451 # array.'
452 #
453 return @removed;
454 }
455 elsif (defined wantarray and not wantarray) {
456 # 'In scalar context, returns the last element removed, or
457 # undef if no elements are removed.'
458 #
459 if (@removed) {
460 my $last = pop @removed;
461 return "$last";
462 }
463 else {
464 return undef;
465 }
466 }
467 elsif (not defined wantarray) {
468 # Void context
469 }
470 else { die }
471}
472sub ::DB_File::splice { &SPLICE }
473
6ca2e664 474sub find_dup
475{
476 croak "Usage: \$db->find_dup(key,value)\n"
477 unless @_ == 3 ;
478
479 my $db = shift ;
480 my ($origkey, $value_wanted) = @_ ;
481 my ($key, $value) = ($origkey, 0);
482 my ($status) = 0 ;
483
484 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
485 $status == 0 ;
486 $status = $db->seq($key, $value, R_NEXT() ) ) {
487
488 return 0 if $key eq $origkey and $value eq $value_wanted ;
489 }
490
491 return $status ;
492}
493
494sub del_dup
495{
496 croak "Usage: \$db->del_dup(key,value)\n"
497 unless @_ == 3 ;
498
499 my $db = shift ;
500 my ($key, $value) = @_ ;
501 my ($status) = $db->find_dup($key, $value) ;
502 return $status if $status != 0 ;
503
504 $status = $db->del($key, R_CURSOR() ) ;
505 return $status ;
506}
507
88108326 508sub get_dup
509{
510 croak "Usage: \$db->get_dup(key [,flag])\n"
511 unless @_ == 2 or @_ == 3 ;
512
513 my $db = shift ;
514 my $key = shift ;
515 my $flag = shift ;
f6b705ef 516 my $value = 0 ;
88108326 517 my $origkey = $key ;
518 my $wantarray = wantarray ;
f6b705ef 519 my %values = () ;
88108326 520 my @values = () ;
521 my $counter = 0 ;
f6b705ef 522 my $status = 0 ;
88108326 523
f6b705ef 524 # iterate through the database until either EOF ($status == 0)
525 # or a different key is encountered ($key ne $origkey).
526 for ($status = $db->seq($key, $value, R_CURSOR()) ;
527 $status == 0 and $key eq $origkey ;
528 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 529
f6b705ef 530 # save the value or count number of matches
531 if ($wantarray) {
532 if ($flag)
533 { ++ $values{$value} }
534 else
535 { push (@values, $value) }
536 }
537 else
538 { ++ $counter }
88108326 539
88108326 540 }
541
f6b705ef 542 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 543}
544
545
8e07c86e 5461;
547__END__
548
3b35bae3 549=head1 NAME
550
1f70e1ea 551DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 552
553=head1 SYNOPSIS
554
bbc7dcd2 555 use DB_File;
556
88108326 557 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
558 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
559 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 560
3b35bae3 561 $status = $X->del($key [, $flags]) ;
562 $status = $X->put($key, $value [, $flags]) ;
563 $status = $X->get($key, $value [, $flags]) ;
760ac839 564 $status = $X->seq($key, $value, $flags) ;
3b35bae3 565 $status = $X->sync([$flags]) ;
566 $status = $X->fd ;
760ac839 567
f6b705ef 568 # BTREE only
88108326 569 $count = $X->get_dup($key) ;
570 @list = $X->get_dup($key) ;
571 %list = $X->get_dup($key, 1) ;
6ca2e664 572 $status = $X->find_dup($key, $value) ;
573 $status = $X->del_dup($key, $value) ;
88108326 574
f6b705ef 575 # RECNO only
576 $a = $X->length;
577 $a = $X->pop ;
578 $X->push(list);
579 $a = $X->shift;
580 $X->unshift(list);
c5da4faf 581 @r = $X->splice(offset, length, elements);
f6b705ef 582
cad2e5aa 583 # DBM Filters
584 $old_filter = $db->filter_store_key ( sub { ... } ) ;
585 $old_filter = $db->filter_store_value( sub { ... } ) ;
586 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
587 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
588
3b35bae3 589 untie %hash ;
590 untie @array ;
591
592=head1 DESCRIPTION
593
8e07c86e 594B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 595facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 596version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f 597It is assumed that you have a copy of the Berkeley DB manual pages at
598hand when reading this documentation. The interface defined here
599mirrors the Berkeley DB interface closely.
68dc0745 600
8e07c86e 601Berkeley DB is a C library which provides a consistent interface to a
602number of database formats. B<DB_File> provides an interface to all
603three of the database types currently supported by Berkeley DB.
3b35bae3 604
605The file types are:
606
607=over 5
608
88108326 609=item B<DB_HASH>
3b35bae3 610
88108326 611This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 612files. This is equivalent to the functionality provided by other
613hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
614the files created using DB_HASH are not compatible with any of the
615other packages mentioned.
3b35bae3 616
8e07c86e 617A default hashing algorithm, which will be adequate for most
618applications, is built into Berkeley DB. If you do need to use your own
619hashing algorithm it is possible to write your own in Perl and have
620B<DB_File> use it instead.
3b35bae3 621
88108326 622=item B<DB_BTREE>
623
624The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 625sorted, balanced binary tree.
3b35bae3 626
8e07c86e 627As with the DB_HASH format, it is possible to provide a user defined
628Perl routine to perform the comparison of keys. By default, though, the
629keys are stored in lexical order.
3b35bae3 630
88108326 631=item B<DB_RECNO>
3b35bae3 632
8e07c86e 633DB_RECNO allows both fixed-length and variable-length flat text files
634to be manipulated using the same key/value pair interface as in DB_HASH
635and DB_BTREE. In this case the key will consist of a record (line)
636number.
3b35bae3 637
638=back
639
e5021521 640=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea 641
642Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 643it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 644limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 645version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 646like version 1. This feature allows B<DB_File> scripts that were built
e5021521 647with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea 648
649If you want to make use of the new features available in Berkeley DB
b90e71be 6502.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 651
e5021521 652B<Note:> The database file format has changed multiple times in Berkeley
653DB version 2, 3 and 4. If you cannot recreate your databases, you
654must dump any existing databases with either the C<db_dump> or the
655C<db_dump185> utility that comes with Berkeley DB.
656Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
657your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea 658documentation for further details.
659
e5021521 660Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 661DB with DB_File.
1f70e1ea 662
68dc0745 663=head2 Interface to Berkeley DB
3b35bae3 664
665B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 666in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
667allows B<DB_File> to access Berkeley DB files using either an
668associative array (for DB_HASH & DB_BTREE file types) or an ordinary
669array (for the DB_RECNO file type).
3b35bae3 670
88108326 671In addition to the tie() interface, it is also possible to access most
672of the functions provided in the Berkeley DB API directly.
f6b705ef 673See L<THE API INTERFACE>.
3b35bae3 674
88108326 675=head2 Opening a Berkeley DB Database File
3b35bae3 676
8e07c86e 677Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 678Here is the C prototype for dbopen():
3b35bae3 679
680 DB*
681 dbopen (const char * file, int flags, int mode,
682 DBTYPE type, const void * openinfo)
683
684The parameter C<type> is an enumeration which specifies which of the 3
685interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
686Depending on which of these is actually chosen, the final parameter,
687I<openinfo> points to a data structure which allows tailoring of the
688specific interface method.
689
8e07c86e 690This interface is handled slightly differently in B<DB_File>. Here is
88108326 691an equivalent call using B<DB_File>:
3b35bae3 692
88108326 693 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 694
8e07c86e 695The C<filename>, C<flags> and C<mode> parameters are the direct
696equivalent of their dbopen() counterparts. The final parameter $DB_HASH
697performs the function of both the C<type> and C<openinfo> parameters in
698dbopen().
3b35bae3 699
88108326 700In the example above $DB_HASH is actually a pre-defined reference to a
701hash object. B<DB_File> has three of these pre-defined references.
702Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 703
8e07c86e 704The keys allowed in each of these pre-defined references is limited to
705the names used in the equivalent C structure. So, for example, the
706$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 707C<ffactor>, C<hash>, C<lorder> and C<nelem>.
708
709To change one of these elements, just assign to it like this:
710
711 $DB_HASH->{'cachesize'} = 10000 ;
712
713The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
714usually adequate for most applications. If you do need to create extra
715instances of these objects, constructors are available for each file
716type.
717
718Here are examples of the constructors and the valid options available
719for DB_HASH, DB_BTREE and DB_RECNO respectively.
720
721 $a = new DB_File::HASHINFO ;
722 $a->{'bsize'} ;
723 $a->{'cachesize'} ;
724 $a->{'ffactor'};
725 $a->{'hash'} ;
726 $a->{'lorder'} ;
727 $a->{'nelem'} ;
728
729 $b = new DB_File::BTREEINFO ;
730 $b->{'flags'} ;
731 $b->{'cachesize'} ;
732 $b->{'maxkeypage'} ;
733 $b->{'minkeypage'} ;
734 $b->{'psize'} ;
735 $b->{'compare'} ;
736 $b->{'prefix'} ;
737 $b->{'lorder'} ;
738
739 $c = new DB_File::RECNOINFO ;
740 $c->{'bval'} ;
741 $c->{'cachesize'} ;
742 $c->{'psize'} ;
743 $c->{'flags'} ;
744 $c->{'lorder'} ;
745 $c->{'reclen'} ;
746 $c->{'bfname'} ;
747
748The values stored in the hashes above are mostly the direct equivalent
749of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 750default values - that means you don't have to set I<all> of the
88108326 751values when you only want to change one. Here is an example:
752
753 $a = new DB_File::HASHINFO ;
754 $a->{'cachesize'} = 12345 ;
755 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
756
36477c24 757A few of the options need extra discussion here. When used, the C
88108326 758equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
759to C functions. In B<DB_File> these keys are used to store references
760to Perl subs. Below are templates for each of the subs:
761
762 sub hash
763 {
764 my ($data) = @_ ;
765 ...
766 # return the hash value for $data
767 return $hash ;
768 }
3b35bae3 769
88108326 770 sub compare
771 {
772 my ($key, $key2) = @_ ;
773 ...
774 # return 0 if $key1 eq $key2
775 # -1 if $key1 lt $key2
776 # 1 if $key1 gt $key2
777 return (-1 , 0 or 1) ;
778 }
3b35bae3 779
88108326 780 sub prefix
781 {
782 my ($key, $key2) = @_ ;
783 ...
784 # return number of bytes of $key2 which are
785 # necessary to determine that it is greater than $key1
786 return $bytes ;
787 }
3b35bae3 788
f6b705ef 789See L<Changing the BTREE sort order> for an example of using the
790C<compare> template.
88108326 791
36477c24 792If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 793C<bval>, you should check out L<The 'bval' Option>.
36477c24 794
88108326 795=head2 Default Parameters
796
797It is possible to omit some or all of the final 4 parameters in the
798call to C<tie> and let them take default values. As DB_HASH is the most
799common file format used, the call:
800
801 tie %A, "DB_File", "filename" ;
802
803is equivalent to:
804
18d2dc8c 805 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 806
807It is also possible to omit the filename parameter as well, so the
808call:
809
810 tie %A, "DB_File" ;
811
812is equivalent to:
813
18d2dc8c 814 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 815
f6b705ef 816See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 817in place of a filename.
818
f6b705ef 819=head2 In Memory Databases
820
821Berkeley DB allows the creation of in-memory databases by using NULL
822(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
823uses C<undef> instead of NULL to provide this functionality.
824
825=head1 DB_HASH
826
827The DB_HASH file format is probably the most commonly used of the three
828file formats that B<DB_File> supports. It is also very straightforward
829to use.
830
68dc0745 831=head2 A Simple Example
f6b705ef 832
833This example shows how to create a database, add key/value pairs to the
834database, delete keys/value pairs and finally how to enumerate the
835contents of the database.
836
3245f058 837 use warnings ;
610ab055 838 use strict ;
f6b705ef 839 use DB_File ;
07200f1b 840 our (%h, $k, $v) ;
f6b705ef 841
2c2d71f5 842 unlink "fruit" ;
45a340cb 843 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef 844 or die "Cannot open file 'fruit': $!\n";
845
846 # Add a few key/value pairs to the file
847 $h{"apple"} = "red" ;
848 $h{"orange"} = "orange" ;
849 $h{"banana"} = "yellow" ;
850 $h{"tomato"} = "red" ;
851
852 # Check for existence of a key
853 print "Banana Exists\n\n" if $h{"banana"} ;
854
855 # Delete a key/value pair.
856 delete $h{"apple"} ;
857
858 # print the contents of the file
859 while (($k, $v) = each %h)
860 { print "$k -> $v\n" }
861
862 untie %h ;
863
864here is the output:
865
866 Banana Exists
bbc7dcd2 867
f6b705ef 868 orange -> orange
869 tomato -> red
870 banana -> yellow
871
872Note that the like ordinary associative arrays, the order of the keys
873retrieved is in an apparently random order.
874
875=head1 DB_BTREE
876
877The DB_BTREE format is useful when you want to store data in a given
878order. By default the keys will be stored in lexical order, but as you
879will see from the example shown in the next section, it is very easy to
880define your own sorting function.
881
882=head2 Changing the BTREE sort order
883
884This script shows how to override the default sorting algorithm that
885BTREE uses. Instead of using the normal lexical ordering, a case
886insensitive compare function will be used.
88108326 887
3245f058 888 use warnings ;
610ab055 889 use strict ;
f6b705ef 890 use DB_File ;
610ab055 891
892 my %h ;
f6b705ef 893
894 sub Compare
895 {
896 my ($key1, $key2) = @_ ;
897 "\L$key1" cmp "\L$key2" ;
898 }
899
900 # specify the Perl sub that will do the comparison
901 $DB_BTREE->{'compare'} = \&Compare ;
902
2c2d71f5 903 unlink "tree" ;
45a340cb 904 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 905 or die "Cannot open file 'tree': $!\n" ;
906
907 # Add a key/value pair to the file
908 $h{'Wall'} = 'Larry' ;
909 $h{'Smith'} = 'John' ;
910 $h{'mouse'} = 'mickey' ;
911 $h{'duck'} = 'donald' ;
912
913 # Delete
914 delete $h{"duck"} ;
915
916 # Cycle through the keys printing them in order.
917 # Note it is not necessary to sort the keys as
918 # the btree will have kept them in order automatically.
919 foreach (keys %h)
920 { print "$_\n" }
921
922 untie %h ;
923
924Here is the output from the code above.
925
926 mouse
927 Smith
928 Wall
929
930There are a few point to bear in mind if you want to change the
931ordering in a BTREE database:
932
933=over 5
934
935=item 1.
936
937The new compare function must be specified when you create the database.
938
939=item 2.
940
941You cannot change the ordering once the database has been created. Thus
942you must use the same compare function every time you access the
88108326 943database.
944
39793c41 945=item 3
946
947Duplicate keys are entirely defined by the comparison function.
948In the case-insensitive example above, the keys: 'KEY' and 'key'
949would be considered duplicates, and assigning to the second one
52ffee89 950would overwrite the first. If duplicates are allowed for (with the
39793c41 951R_DUPS flag discussed below), only a single copy of duplicate keys
952is stored in the database --- so (again with example above) assigning
953three values to the keys: 'KEY', 'Key', and 'key' would leave just
954the first key: 'KEY' in the database with three values. For some
955situations this results in information loss, so care should be taken
956to provide fully qualified comparison functions when necessary.
957For example, the above comparison routine could be modified to
958additionally compare case-sensitively if two keys are equal in the
959case insensitive comparison:
960
961 sub compare {
962 my($key1, $key2) = @_;
963 lc $key1 cmp lc $key2 ||
964 $key1 cmp $key2;
965 }
966
967And now you will only have duplicates when the keys themselves
968are truly the same. (note: in versions of the db library prior to
969about November 1996, such duplicate keys were retained so it was
970possible to recover the original keys in sets of keys that
971compared as equal).
972
973
f6b705ef 974=back
975
68dc0745 976=head2 Handling Duplicate Keys
f6b705ef 977
978The BTREE file type optionally allows a single key to be associated
979with an arbitrary number of values. This option is enabled by setting
980the flags element of C<$DB_BTREE> to R_DUP when creating the database.
981
88108326 982There are some difficulties in using the tied hash interface if you
983want to manipulate a BTREE database with duplicate keys. Consider this
984code:
985
3245f058 986 use warnings ;
610ab055 987 use strict ;
88108326 988 use DB_File ;
610ab055 989
962cee9f 990 my ($filename, %h) ;
610ab055 991
88108326 992 $filename = "tree" ;
993 unlink $filename ;
bbc7dcd2 994
88108326 995 # Enable duplicate records
996 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 997
45a340cb 998 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 999 or die "Cannot open $filename: $!\n";
bbc7dcd2 1000
88108326 1001 # Add some key/value pairs to the file
1002 $h{'Wall'} = 'Larry' ;
1003 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1004 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1005 $h{'Smith'} = 'John' ;
1006 $h{'mouse'} = 'mickey' ;
1007
1008 # iterate through the associative array
1009 # and print each key/value pair.
2c2d71f5 1010 foreach (sort keys %h)
88108326 1011 { print "$_ -> $h{$_}\n" }
1012
f6b705ef 1013 untie %h ;
1014
88108326 1015Here is the output:
1016
1017 Smith -> John
1018 Wall -> Larry
1019 Wall -> Larry
f6b705ef 1020 Wall -> Larry
88108326 1021 mouse -> mickey
1022
f6b705ef 1023As you can see 3 records have been successfully created with key C<Wall>
88108326 1024- the only thing is, when they are retrieved from the database they
f6b705ef 1025I<seem> to have the same value, namely C<Larry>. The problem is caused
1026by the way that the associative array interface works. Basically, when
1027the associative array interface is used to fetch the value associated
1028with a given key, it will only ever retrieve the first value.
88108326 1029
1030Although it may not be immediately obvious from the code above, the
1031associative array interface can be used to write values with duplicate
1032keys, but it cannot be used to read them back from the database.
1033
1034The way to get around this problem is to use the Berkeley DB API method
1035called C<seq>. This method allows sequential access to key/value
f6b705ef 1036pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1037and the API in general.
88108326 1038
1039Here is the script above rewritten using the C<seq> API method.
1040
3245f058 1041 use warnings ;
610ab055 1042 use strict ;
88108326 1043 use DB_File ;
bbc7dcd2 1044
962cee9f 1045 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1046
88108326 1047 $filename = "tree" ;
1048 unlink $filename ;
bbc7dcd2 1049
88108326 1050 # Enable duplicate records
1051 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1052
45a340cb 1053 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1054 or die "Cannot open $filename: $!\n";
bbc7dcd2 1055
88108326 1056 # Add some key/value pairs to the file
1057 $h{'Wall'} = 'Larry' ;
1058 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1059 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1060 $h{'Smith'} = 'John' ;
1061 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1062
f6b705ef 1063 # iterate through the btree using seq
88108326 1064 # and print each key/value pair.
610ab055 1065 $key = $value = 0 ;
f6b705ef 1066 for ($status = $x->seq($key, $value, R_FIRST) ;
1067 $status == 0 ;
1068 $status = $x->seq($key, $value, R_NEXT) )
88108326 1069 { print "$key -> $value\n" }
bbc7dcd2 1070
88108326 1071 undef $x ;
1072 untie %h ;
1073
1074that prints:
1075
1076 Smith -> John
1077 Wall -> Brick
f6b705ef 1078 Wall -> Brick
88108326 1079 Wall -> Larry
1080 mouse -> mickey
1081
f6b705ef 1082This time we have got all the key/value pairs, including the multiple
88108326 1083values associated with the key C<Wall>.
1084
6ca2e664 1085To make life easier when dealing with duplicate keys, B<DB_File> comes with
1086a few utility methods.
1087
68dc0745 1088=head2 The get_dup() Method
f6b705ef 1089
6ca2e664 1090The C<get_dup> method assists in
88108326 1091reading duplicate values from BTREE databases. The method can take the
1092following forms:
1093
1094 $count = $x->get_dup($key) ;
1095 @list = $x->get_dup($key) ;
1096 %list = $x->get_dup($key, 1) ;
1097
1098In a scalar context the method returns the number of values associated
1099with the key, C<$key>.
1100
1101In list context, it returns all the values which match C<$key>. Note
f6b705ef 1102that the values will be returned in an apparently random order.
88108326 1103
7a2e2cd6 1104In list context, if the second parameter is present and evaluates
1105TRUE, the method returns an associative array. The keys of the
1106associative array correspond to the values that matched in the BTREE
1107and the values of the array are a count of the number of times that
1108particular value occurred in the BTREE.
88108326 1109
f6b705ef 1110So assuming the database created above, we can use C<get_dup> like
88108326 1111this:
1112
3245f058 1113 use warnings ;
2c2d71f5 1114 use strict ;
1115 use DB_File ;
bbc7dcd2 1116
962cee9f 1117 my ($filename, $x, %h) ;
2c2d71f5 1118
1119 $filename = "tree" ;
bbc7dcd2 1120
2c2d71f5 1121 # Enable duplicate records
1122 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1123
45a340cb 1124 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5 1125 or die "Cannot open $filename: $!\n";
1126
610ab055 1127 my $cnt = $x->get_dup("Wall") ;
88108326 1128 print "Wall occurred $cnt times\n" ;
1129
610ab055 1130 my %hash = $x->get_dup("Wall", 1) ;
88108326 1131 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1132 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1133
2c2d71f5 1134 my @list = sort $x->get_dup("Wall") ;
88108326 1135 print "Wall => [@list]\n" ;
1136
f6b705ef 1137 @list = $x->get_dup("Smith") ;
88108326 1138 print "Smith => [@list]\n" ;
bbc7dcd2 1139
f6b705ef 1140 @list = $x->get_dup("Dog") ;
88108326 1141 print "Dog => [@list]\n" ;
1142
1143
1144and it will print:
1145
f6b705ef 1146 Wall occurred 3 times
88108326 1147 Larry is there
f6b705ef 1148 There are 2 Brick Walls
1149 Wall => [Brick Brick Larry]
88108326 1150 Smith => [John]
1151 Dog => []
3b35bae3 1152
6ca2e664 1153=head2 The find_dup() Method
1154
1155 $status = $X->find_dup($key, $value) ;
1156
b90e71be 1157This method checks for the existence of a specific key/value pair. If the
6ca2e664 1158pair exists, the cursor is left pointing to the pair and the method
1159returns 0. Otherwise the method returns a non-zero value.
1160
1161Assuming the database from the previous example:
1162
3245f058 1163 use warnings ;
6ca2e664 1164 use strict ;
1165 use DB_File ;
bbc7dcd2 1166
962cee9f 1167 my ($filename, $x, %h, $found) ;
6ca2e664 1168
07200f1b 1169 $filename = "tree" ;
bbc7dcd2 1170
6ca2e664 1171 # Enable duplicate records
1172 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1173
45a340cb 1174 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664 1175 or die "Cannot open $filename: $!\n";
1176
1177 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1178 print "Larry Wall is $found there\n" ;
bbc7dcd2 1179
6ca2e664 1180 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1181 print "Harry Wall is $found there\n" ;
bbc7dcd2 1182
6ca2e664 1183 undef $x ;
1184 untie %h ;
1185
1186prints this
1187
2c2d71f5 1188 Larry Wall is there
6ca2e664 1189 Harry Wall is not there
1190
1191
1192=head2 The del_dup() Method
1193
1194 $status = $X->del_dup($key, $value) ;
1195
1196This method deletes a specific key/value pair. It returns
11970 if they exist and have been deleted successfully.
1198Otherwise the method returns a non-zero value.
1199
b90e71be 1200Again assuming the existence of the C<tree> database
6ca2e664 1201
3245f058 1202 use warnings ;
6ca2e664 1203 use strict ;
1204 use DB_File ;
bbc7dcd2 1205
962cee9f 1206 my ($filename, $x, %h, $found) ;
6ca2e664 1207
07200f1b 1208 $filename = "tree" ;
bbc7dcd2 1209
6ca2e664 1210 # Enable duplicate records
1211 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1212
45a340cb 1213 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664 1214 or die "Cannot open $filename: $!\n";
1215
1216 $x->del_dup("Wall", "Larry") ;
1217
1218 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1219 print "Larry Wall is $found there\n" ;
bbc7dcd2 1220
6ca2e664 1221 undef $x ;
1222 untie %h ;
1223
1224prints this
1225
1226 Larry Wall is not there
1227
f6b705ef 1228=head2 Matching Partial Keys
1229
1230The BTREE interface has a feature which allows partial keys to be
1231matched. This functionality is I<only> available when the C<seq> method
1232is used along with the R_CURSOR flag.
1233
1234 $x->seq($key, $value, R_CURSOR) ;
1235
1236Here is the relevant quote from the dbopen man page where it defines
1237the use of the R_CURSOR flag with seq:
1238
f6b705ef 1239 Note, for the DB_BTREE access method, the returned key is not
1240 necessarily an exact match for the specified key. The returned key
1241 is the smallest key greater than or equal to the specified key,
1242 permitting partial key matches and range searches.
1243
f6b705ef 1244In the example script below, the C<match> sub uses this feature to find
1245and print the first matching key/value pair given a partial key.
1246
3245f058 1247 use warnings ;
610ab055 1248 use strict ;
f6b705ef 1249 use DB_File ;
1250 use Fcntl ;
610ab055 1251
962cee9f 1252 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef 1253
1254 sub match
1255 {
1256 my $key = shift ;
610ab055 1257 my $value = 0;
f6b705ef 1258 my $orig_key = $key ;
1259 $x->seq($key, $value, R_CURSOR) ;
1260 print "$orig_key\t-> $key\t-> $value\n" ;
1261 }
1262
1263 $filename = "tree" ;
1264 unlink $filename ;
1265
45a340cb 1266 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1267 or die "Cannot open $filename: $!\n";
bbc7dcd2 1268
f6b705ef 1269 # Add some key/value pairs to the file
1270 $h{'mouse'} = 'mickey' ;
1271 $h{'Wall'} = 'Larry' ;
1272 $h{'Walls'} = 'Brick' ;
1273 $h{'Smith'} = 'John' ;
bbc7dcd2 1274
f6b705ef 1275
610ab055 1276 $key = $value = 0 ;
f6b705ef 1277 print "IN ORDER\n" ;
1278 for ($st = $x->seq($key, $value, R_FIRST) ;
1279 $st == 0 ;
1280 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1281
2c2d71f5 1282 { print "$key -> $value\n" }
bbc7dcd2 1283
f6b705ef 1284 print "\nPARTIAL MATCH\n" ;
1285
1286 match "Wa" ;
1287 match "A" ;
1288 match "a" ;
1289
1290 undef $x ;
1291 untie %h ;
1292
1293Here is the output:
1294
1295 IN ORDER
1296 Smith -> John
1297 Wall -> Larry
1298 Walls -> Brick
1299 mouse -> mickey
1300
1301 PARTIAL MATCH
1302 Wa -> Wall -> Larry
1303 A -> Smith -> John
1304 a -> mouse -> mickey
1305
1306=head1 DB_RECNO
1307
1308DB_RECNO provides an interface to flat text files. Both variable and
1309fixed length records are supported.
3b35bae3 1310
6ca2e664 1311In order to make RECNO more compatible with Perl, the array offset for
88108326 1312all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1313
88108326 1314As with normal Perl arrays, a RECNO array can be accessed using
1315negative indexes. The index -1 refers to the last element of the array,
1316-2 the second last, and so on. Attempting to access an element before
1317the start of the array will raise a fatal run-time error.
3b35bae3 1318
68dc0745 1319=head2 The 'bval' Option
36477c24 1320
1321The operation of the bval option warrants some discussion. Here is the
1322definition of bval from the Berkeley DB 1.85 recno manual page:
1323
1324 The delimiting byte to be used to mark the end of a
1325 record for variable-length records, and the pad charac-
1326 ter for fixed-length records. If no value is speci-
1327 fied, newlines (``\n'') are used to mark the end of
1328 variable-length records and fixed-length records are
1329 padded with spaces.
1330
1331The second sentence is wrong. In actual fact bval will only default to
1332C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1333openinfo parameter is used at all, the value that happens to be in bval
1334will be used. That means you always have to specify bval when making
1335use of any of the options in the openinfo parameter. This documentation
1336error will be fixed in the next release of Berkeley DB.
1337
1338That clarifies the situation with regards Berkeley DB itself. What
1339about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1340quite useful, so B<DB_File> conforms to it.
36477c24 1341
1342That means that you can specify other options (e.g. cachesize) and
1343still have bval default to C<"\n"> for variable length records, and
1344space for fixed length records.
1345
c5da4faf 1346Also note that the bval option only allows you to specify a single byte
1347as a delimeter.
1348
f6b705ef 1349=head2 A Simple Example
3b35bae3 1350
6ca2e664 1351Here is a simple example that uses RECNO (if you are using a version
1352of Perl earlier than 5.004_57 this example won't work -- see
1353L<Extra RECNO Methods> for a workaround).
f6b705ef 1354
3245f058 1355 use warnings ;
610ab055 1356 use strict ;
f6b705ef 1357 use DB_File ;
f6b705ef 1358
2c2d71f5 1359 my $filename = "text" ;
1360 unlink $filename ;
1361
610ab055 1362 my @h ;
45a340cb 1363 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1364 or die "Cannot open file 'text': $!\n" ;
1365
1366 # Add a few key/value pairs to the file
1367 $h[0] = "orange" ;
1368 $h[1] = "blue" ;
1369 $h[2] = "yellow" ;
1370
6ca2e664 1371 push @h, "green", "black" ;
1372
1373 my $elements = scalar @h ;
1374 print "The array contains $elements entries\n" ;
1375
1376 my $last = pop @h ;
1377 print "popped $last\n" ;
1378
1379 unshift @h, "white" ;
1380 my $first = shift @h ;
1381 print "shifted $first\n" ;
1382
f6b705ef 1383 # Check for existence of a key
1384 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1385
1386 # use a negative index
1387 print "The last element is $h[-1]\n" ;
1388 print "The 2nd last element is $h[-2]\n" ;
1389
1390 untie @h ;
3b35bae3 1391
f6b705ef 1392Here is the output from the script:
1393
6ca2e664 1394 The array contains 5 entries
1395 popped black
2c2d71f5 1396 shifted white
f6b705ef 1397 Element 1 Exists with value blue
6ca2e664 1398 The last element is green
1399 The 2nd last element is yellow
f6b705ef 1400
6ca2e664 1401=head2 Extra RECNO Methods
f6b705ef 1402
045291aa 1403If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664 1404array interface is quite limited. In the example script above
1405C<push>, C<pop>, C<shift>, C<unshift>
1406or determining the array length will not work with a tied array.
045291aa 1407
1408To make the interface more useful for older versions of Perl, a number
1409of methods are supplied with B<DB_File> to simulate the missing array
1410operations. All these methods are accessed via the object returned from
1411the tie call.
f6b705ef 1412
1413Here are the methods:
1414
1415=over 5
3b35bae3 1416
f6b705ef 1417=item B<$X-E<gt>push(list) ;>
1418
1419Pushes the elements of C<list> to the end of the array.
1420
1421=item B<$value = $X-E<gt>pop ;>
1422
1423Removes and returns the last element of the array.
1424
1425=item B<$X-E<gt>shift>
1426
1427Removes and returns the first element of the array.
1428
1429=item B<$X-E<gt>unshift(list) ;>
1430
1431Pushes the elements of C<list> to the start of the array.
1432
1433=item B<$X-E<gt>length>
1434
1435Returns the number of elements in the array.
1436
c5da4faf 1437=item B<$X-E<gt>splice(offset, length, elements);>
1438
1439Returns a splice of the the array.
1440
f6b705ef 1441=back
1442
1443=head2 Another Example
1444
1445Here is a more complete example that makes use of some of the methods
1446described above. It also makes use of the API interface directly (see
1447L<THE API INTERFACE>).
1448
3245f058 1449 use warnings ;
f6b705ef 1450 use strict ;
962cee9f 1451 my (@h, $H, $file, $i) ;
f6b705ef 1452 use DB_File ;
1453 use Fcntl ;
bbc7dcd2 1454
f6b705ef 1455 $file = "text" ;
1456
1457 unlink $file ;
1458
45a340cb 1459 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1460 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1461
f6b705ef 1462 # first create a text file to play with
1463 $h[0] = "zero" ;
1464 $h[1] = "one" ;
1465 $h[2] = "two" ;
1466 $h[3] = "three" ;
1467 $h[4] = "four" ;
1468
bbc7dcd2 1469
f6b705ef 1470 # Print the records in order.
1471 #
1472 # The length method is needed here because evaluating a tied
1473 # array in a scalar context does not return the number of
1474 # elements in the array.
1475
1476 print "\nORIGINAL\n" ;
1477 foreach $i (0 .. $H->length - 1) {
1478 print "$i: $h[$i]\n" ;
1479 }
1480
1481 # use the push & pop methods
1482 $a = $H->pop ;
1483 $H->push("last") ;
1484 print "\nThe last record was [$a]\n" ;
1485
1486 # and the shift & unshift methods
1487 $a = $H->shift ;
1488 $H->unshift("first") ;
1489 print "The first record was [$a]\n" ;
1490
1491 # Use the API to add a new record after record 2.
1492 $i = 2 ;
1493 $H->put($i, "Newbie", R_IAFTER) ;
1494
1495 # and a new record before record 1.
1496 $i = 1 ;
1497 $H->put($i, "New One", R_IBEFORE) ;
1498
1499 # delete record 3
1500 $H->del(3) ;
1501
1502 # now print the records in reverse order
1503 print "\nREVERSE\n" ;
1504 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1505 { print "$i: $h[$i]\n" }
1506
1507 # same again, but use the API functions instead
1508 print "\nREVERSE again\n" ;
610ab055 1509 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1510 for ($s = $H->seq($k, $v, R_LAST) ;
1511 $s == 0 ;
1512 $s = $H->seq($k, $v, R_PREV))
1513 { print "$k: $v\n" }
1514
1515 undef $H ;
1516 untie @h ;
1517
1518and this is what it outputs:
1519
1520 ORIGINAL
1521 0: zero
1522 1: one
1523 2: two
1524 3: three
1525 4: four
1526
1527 The last record was [four]
1528 The first record was [zero]
1529
1530 REVERSE
1531 5: last
1532 4: three
1533 3: Newbie
1534 2: one
1535 1: New One
1536 0: first
1537
1538 REVERSE again
1539 5: last
1540 4: three
1541 3: Newbie
1542 2: one
1543 1: New One
1544 0: first
1545
1546Notes:
1547
1548=over 5
1549
1550=item 1.
1551
1552Rather than iterating through the array, C<@h> like this:
1553
1554 foreach $i (@h)
1555
1556it is necessary to use either this:
1557
1558 foreach $i (0 .. $H->length - 1)
1559
1560or this:
1561
1562 for ($a = $H->get($k, $v, R_FIRST) ;
1563 $a == 0 ;
1564 $a = $H->get($k, $v, R_NEXT) )
1565
1566=item 2.
1567
1568Notice that both times the C<put> method was used the record index was
1569specified using a variable, C<$i>, rather than the literal value
1570itself. This is because C<put> will return the record number of the
1571inserted line via that parameter.
1572
1573=back
1574
1575=head1 THE API INTERFACE
3b35bae3 1576
1577As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1578possible to make direct use of most of the API functions defined in the
8e07c86e 1579Berkeley DB documentation.
3b35bae3 1580
88108326 1581To do this you need to store a copy of the object returned from the tie.
3b35bae3 1582
88108326 1583 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1584
8e07c86e 1585Once you have done that, you can access the Berkeley DB API functions
88108326 1586as B<DB_File> methods directly like this:
3b35bae3 1587
1588 $db->put($key, $value, R_NOOVERWRITE) ;
1589
88108326 1590B<Important:> If you have saved a copy of the object returned from
1591C<tie>, the underlying database file will I<not> be closed until both
1592the tied variable is untied and all copies of the saved object are
610ab055 1593destroyed.
88108326 1594
1595 use DB_File ;
1596 $db = tie %hash, "DB_File", "filename"
1597 or die "Cannot tie filename: $!" ;
1598 ...
1599 undef $db ;
1600 untie %hash ;
1601
9a2c4ce3 1602See L<The untie() Gotcha> for more details.
778183f3 1603
88108326 1604All the functions defined in L<dbopen> are available except for
1605close() and dbopen() itself. The B<DB_File> method interface to the
1606supported functions have been implemented to mirror the way Berkeley DB
1607works whenever possible. In particular note that:
1608
1609=over 5
1610
1611=item *
1612
1613The methods return a status value. All return 0 on success.
1614All return -1 to signify an error and set C<$!> to the exact
1615error code. The return code 1 generally (but not always) means that the
1616key specified did not exist in the database.
1617
1618Other return codes are defined. See below and in the Berkeley DB
1619documentation for details. The Berkeley DB documentation should be used
1620as the definitive source.
1621
1622=item *
3b35bae3 1623
88108326 1624Whenever a Berkeley DB function returns data via one of its parameters,
1625the equivalent B<DB_File> method does exactly the same.
3b35bae3 1626
88108326 1627=item *
1628
1629If you are careful, it is possible to mix API calls with the tied
1630hash/array interface in the same piece of code. Although only a few of
1631the methods used to implement the tied interface currently make use of
1632the cursor, you should always assume that the cursor has been changed
1633any time the tied hash/array interface is used. As an example, this
1634code will probably not do what you expect:
1635
1636 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1637 or die "Cannot tie $filename: $!" ;
1638
1639 # Get the first key/value pair and set the cursor
1640 $X->seq($key, $value, R_FIRST) ;
1641
1642 # this line will modify the cursor
1643 $count = scalar keys %x ;
1644
1645 # Get the second key/value pair.
1646 # oops, it didn't, it got the last key/value pair!
1647 $X->seq($key, $value, R_NEXT) ;
1648
1649The code above can be rearranged to get around the problem, like this:
1650
1651 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1652 or die "Cannot tie $filename: $!" ;
1653
1654 # this line will modify the cursor
1655 $count = scalar keys %x ;
1656
1657 # Get the first key/value pair and set the cursor
1658 $X->seq($key, $value, R_FIRST) ;
1659
1660 # Get the second key/value pair.
1661 # worked this time.
1662 $X->seq($key, $value, R_NEXT) ;
1663
1664=back
1665
1666All the constants defined in L<dbopen> for use in the flags parameters
1667in the methods defined below are also available. Refer to the Berkeley
1668DB documentation for the precise meaning of the flags values.
1669
1670Below is a list of the methods available.
3b35bae3 1671
1672=over 5
1673
f6b705ef 1674=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1675
1676Given a key (C<$key>) this method reads the value associated with it
1677from the database. The value read from the database is returned in the
1678C<$value> parameter.
3b35bae3 1679
88108326 1680If the key does not exist the method returns 1.
3b35bae3 1681
88108326 1682No flags are currently defined for this method.
3b35bae3 1683
f6b705ef 1684=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1685
88108326 1686Stores the key/value pair in the database.
1687
1688If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1689will have the record number of the inserted key/value pair set.
3b35bae3 1690
88108326 1691Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1692R_SETCURSOR.
1693
f6b705ef 1694=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1695
88108326 1696Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1697
88108326 1698A return code of 1 means that the requested key was not in the
1699database.
3b35bae3 1700
88108326 1701R_CURSOR is the only valid flag at present.
3b35bae3 1702
f6b705ef 1703=item B<$status = $X-E<gt>fd ;>
3b35bae3 1704
88108326 1705Returns the file descriptor for the underlying database.
3b35bae3 1706
b90e71be 1707See L<Locking: The Trouble with fd> for an explanation for why you should
1708not use C<fd> to lock your database.
3b35bae3 1709
f6b705ef 1710=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1711
88108326 1712This interface allows sequential retrieval from the database. See
1713L<dbopen> for full details.
1714
1715Both the C<$key> and C<$value> parameters will be set to the key/value
1716pair read from the database.
1717
1718The flags parameter is mandatory. The valid flag values are R_CURSOR,
1719R_FIRST, R_LAST, R_NEXT and R_PREV.
1720
f6b705ef 1721=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1722
1723Flushes any cached buffers to disk.
1724
1725R_RECNOSYNC is the only valid flag at present.
3b35bae3 1726
1727=back
1728
cad2e5aa 1729=head1 DBM FILTERS
1730
1731A DBM Filter is a piece of code that is be used when you I<always>
1732want to make the same transformation to all keys and/or values in a
1733DBM database.
1734
1735There are four methods associated with DBM Filters. All work identically,
1736and each is used to install (or uninstall) a single DBM Filter. Each
1737expects a single parameter, namely a reference to a sub. The only
1738difference between them is the place that the filter is installed.
1739
1740To summarise:
1741
1742=over 5
1743
1744=item B<filter_store_key>
1745
1746If a filter has been installed with this method, it will be invoked
1747every time you write a key to a DBM database.
1748
1749=item B<filter_store_value>
1750
1751If a filter has been installed with this method, it will be invoked
1752every time you write a value to a DBM database.
1753
1754
1755=item B<filter_fetch_key>
1756
1757If a filter has been installed with this method, it will be invoked
1758every time you read a key from a DBM database.
1759
1760=item B<filter_fetch_value>
1761
1762If a filter has been installed with this method, it will be invoked
1763every time you read a value from a DBM database.
1764
1765=back
1766
1767You can use any combination of the methods, from none, to all four.
1768
1769All filter methods return the existing filter, if present, or C<undef>
1770in not.
1771
1772To delete a filter pass C<undef> to it.
1773
1774=head2 The Filter
1775
1776When each filter is called by Perl, a local copy of C<$_> will contain
1777the key or value to be filtered. Filtering is achieved by modifying
1778the contents of C<$_>. The return code from the filter is ignored.
1779
1780=head2 An Example -- the NULL termination problem.
1781
1782Consider the following scenario. You have a DBM database
1783that you need to share with a third-party C application. The C application
1784assumes that I<all> keys and values are NULL terminated. Unfortunately
1785when Perl writes to DBM databases it doesn't use NULL termination, so
1786your Perl application will have to manage NULL termination itself. When
1787you write to the database you will have to use something like this:
1788
1789 $hash{"$key\0"} = "$value\0" ;
1790
1791Similarly the NULL needs to be taken into account when you are considering
1792the length of existing keys/values.
1793
1794It would be much better if you could ignore the NULL terminations issue
1795in the main application code and have a mechanism that automatically
1796added the terminating NULL to all keys and values whenever you write to
1797the database and have them removed when you read from the database. As I'm
1798sure you have already guessed, this is a problem that DBM Filters can
1799fix very easily.
1800
3245f058 1801 use warnings ;
cad2e5aa 1802 use strict ;
1803 use DB_File ;
1804
1805 my %hash ;
1806 my $filename = "/tmp/filt" ;
1807 unlink $filename ;
1808
1809 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1810 or die "Cannot open $filename: $!\n" ;
1811
1812 # Install DBM Filters
1813 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1814 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1815 $db->filter_fetch_value( sub { s/\0$// } ) ;
1816 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1817
1818 $hash{"abc"} = "def" ;
1819 my $a = $hash{"ABC"} ;
1820 # ...
1821 undef $db ;
1822 untie %hash ;
1823
1824Hopefully the contents of each of the filters should be
1825self-explanatory. Both "fetch" filters remove the terminating NULL,
1826and both "store" filters add a terminating NULL.
1827
1828
1829=head2 Another Example -- Key is a C int.
1830
1831Here is another real-life example. By default, whenever Perl writes to
1832a DBM database it always writes the key and value as strings. So when
1833you use this:
1834
1835 $hash{12345} = "soemthing" ;
1836
1837the key 12345 will get stored in the DBM database as the 5 byte string
1838"12345". If you actually want the key to be stored in the DBM database
1839as a C int, you will have to use C<pack> when writing, and C<unpack>
1840when reading.
1841
1842Here is a DBM Filter that does it:
1843
3245f058 1844 use warnings ;
cad2e5aa 1845 use strict ;
1846 use DB_File ;
1847 my %hash ;
1848 my $filename = "/tmp/filt" ;
1849 unlink $filename ;
1850
1851
1852 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1853 or die "Cannot open $filename: $!\n" ;
1854
1855 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1856 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1857 $hash{123} = "def" ;
1858 # ...
1859 undef $db ;
1860 untie %hash ;
1861
1862This time only two filters have been used -- we only need to manipulate
1863the contents of the key, so it wasn't necessary to install any value
1864filters.
1865
f6b705ef 1866=head1 HINTS AND TIPS
3b35bae3 1867
3b35bae3 1868
b90e71be 1869=head2 Locking: The Trouble with fd
3b35bae3 1870
b90e71be 1871Until version 1.72 of this module, the recommended technique for locking
1872B<DB_File> databases was to flock the filehandle returned from the "fd"
1873function. Unfortunately this technique has been shown to be fundamentally
1874flawed (Kudos to David Harris for tracking this down). Use it at your own
1875peril!
3b35bae3 1876
b90e71be 1877The locking technique went like this.
cb1a09d0 1878
45a340cb 1879 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0666)
b90e71be 1880 || die "dbcreat /tmp/foo.db $!";
1881 $fd = $db->fd;
1882 open(DB_FH, "+<&=$fd") || die "dup $!";
1883 flock (DB_FH, LOCK_EX) || die "flock: $!";
1884 ...
1885 $db{"Tom"} = "Jerry" ;
1886 ...
1887 flock(DB_FH, LOCK_UN);
1888 undef $db;
1889 untie %db;
1890 close(DB_FH);
cb1a09d0 1891
b90e71be 1892In simple terms, this is what happens:
cb1a09d0 1893
b90e71be 1894=over 5
cb1a09d0 1895
b90e71be 1896=item 1.
cb1a09d0 1897
b90e71be 1898Use "tie" to open the database.
cb1a09d0 1899
b90e71be 1900=item 2.
cb1a09d0 1901
b90e71be 1902Lock the database with fd & flock.
cb1a09d0 1903
b90e71be 1904=item 3.
cb1a09d0 1905
b90e71be 1906Read & Write to the database.
cb1a09d0 1907
b90e71be 1908=item 4.
cb1a09d0 1909
b90e71be 1910Unlock and close the database.
cb1a09d0 1911
b90e71be 1912=back
1913
1914Here is the crux of the problem. A side-effect of opening the B<DB_File>
1915database in step 2 is that an initial block from the database will get
1916read from disk and cached in memory.
1917
1918To see why this is a problem, consider what can happen when two processes,
1919say "A" and "B", both want to update the same B<DB_File> database
1920using the locking steps outlined above. Assume process "A" has already
1921opened the database and has a write lock, but it hasn't actually updated
1922the database yet (it has finished step 2, but not started step 3 yet). Now
1923process "B" tries to open the same database - step 1 will succeed,
1924but it will block on step 2 until process "A" releases the lock. The
1925important thing to notice here is that at this point in time both
1926processes will have cached identical initial blocks from the database.
1927
1928Now process "A" updates the database and happens to change some of the
1929data held in the initial buffer. Process "A" terminates, flushing
1930all cached data to disk and releasing the database lock. At this point
1931the database on disk will correctly reflect the changes made by process
1932"A".
1933
1934With the lock released, process "B" can now continue. It also updates the
1935database and unfortunately it too modifies the data that was in its
1936initial buffer. Once that data gets flushed to disk it will overwrite
1937some/all of the changes process "A" made to the database.
1938
1939The result of this scenario is at best a database that doesn't contain
1940what you expect. At worst the database will corrupt.
1941
1942The above won't happen every time competing process update the same
1943B<DB_File> database, but it does illustrate why the technique should
1944not be used.
1945
1946=head2 Safe ways to lock a database
1947
1948Starting with version 2.x, Berkeley DB has internal support for locking.
1949The companion module to this one, B<BerkeleyDB>, provides an interface
1950to this locking functionality. If you are serious about locking
1951Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1952
1953If using B<BerkeleyDB> isn't an option, there are a number of modules
1954available on CPAN that can be used to implement locking. Each one
1955implements locking differently and has different goals in mind. It is
1956therefore worth knowing the difference, so that you can pick the right
1957one for your application. Here are the three locking wrappers:
1958
1959=over 5
1960
1961=item B<Tie::DB_Lock>
1962
1963A B<DB_File> wrapper which creates copies of the database file for
1964read access, so that you have a kind of a multiversioning concurrent read
1965system. However, updates are still serial. Use for databases where reads
1966may be lengthy and consistency problems may occur.
1967
1968=item B<Tie::DB_LockFile>
1969
1970A B<DB_File> wrapper that has the ability to lock and unlock the database
1971while it is being used. Avoids the tie-before-flock problem by simply
1972re-tie-ing the database when you get or drop a lock. Because of the
1973flexibility in dropping and re-acquiring the lock in the middle of a
1974session, this can be massaged into a system that will work with long
1975updates and/or reads if the application follows the hints in the POD
1976documentation.
1977
1978=item B<DB_File::Lock>
1979
1980An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
1981before tie-ing the database and drops the lock after the untie. Allows
1982one to use the same lockfile for multiple databases to avoid deadlock
1983problems, if desired. Use for databases where updates are reads are
1984quick and simple flock locking semantics are enough.
1985
1986=back
cb1a09d0 1987
68dc0745 1988=head2 Sharing Databases With C Applications
f6b705ef 1989
1990There is no technical reason why a Berkeley DB database cannot be
1991shared by both a Perl and a C application.
1992
1993The vast majority of problems that are reported in this area boil down
1994to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 1995not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 1996
1997Here is a real example. Netscape 2.0 keeps a record of the locations you
1998visit along with the time you last visited them in a DB_HASH database.
1999This is usually stored in the file F<~/.netscape/history.db>. The key
2000field in the database is the location string and the value field is the
2001time the location was last visited stored as a 4 byte binary value.
2002
2003If you haven't already guessed, the location string is stored with a
2004terminating NULL. This means you need to be careful when accessing the
2005database.
2006
2007Here is a snippet of code that is loosely based on Tom Christiansen's
2008I<ggh> script (available from your nearest CPAN archive in
2009F<authors/id/TOMC/scripts/nshist.gz>).
2010
3245f058 2011 use warnings ;
610ab055 2012 use strict ;
f6b705ef 2013 use DB_File ;
2014 use Fcntl ;
f6b705ef 2015
962cee9f 2016 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef 2017 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2018
2019 $HISTORY = "$dotdir/.netscape/history.db";
2020
2021 tie %hist_db, 'DB_File', $HISTORY
2022 or die "Cannot open $HISTORY: $!\n" ;;
2023
2024 # Dump the complete database
2025 while ( ($href, $binary_time) = each %hist_db ) {
2026
2027 # remove the terminating NULL
2028 $href =~ s/\x00$// ;
2029
2030 # convert the binary time into a user friendly string
2031 $date = localtime unpack("V", $binary_time);
2032 print "$date $href\n" ;
2033 }
2034
2035 # check for the existence of a specific key
2036 # remember to add the NULL
2037 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2038 $date = localtime unpack("V", $binary_time) ;
2039 print "Last visited mox.perl.com on $date\n" ;
2040 }
2041 else {
2042 print "Never visited mox.perl.com\n"
2043 }
2044
2045 untie %hist_db ;
2046
68dc0745 2047=head2 The untie() Gotcha
778183f3 2048
7a2e2cd6 2049If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2050recommended that you read L<perltie/The untie Gotcha>.
778183f3 2051
2052Even if you don't currently make use of the API interface, it is still
2053worth reading it.
2054
2055Here is an example which illustrates the problem from a B<DB_File>
2056perspective:
2057
2058 use DB_File ;
2059 use Fcntl ;
2060
2061 my %x ;
2062 my $X ;
2063
2064 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2065 or die "Cannot tie first time: $!" ;
2066
2067 $x{123} = 456 ;
2068
2069 untie %x ;
2070
2071 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2072 or die "Cannot tie second time: $!" ;
2073
2074 untie %x ;
2075
2076When run, the script will produce this error message:
2077
2078 Cannot tie second time: Invalid argument at bad.file line 14.
2079
2080Although the error message above refers to the second tie() statement
2081in the script, the source of the problem is really with the untie()
2082statement that precedes it.
2083
2084Having read L<perltie> you will probably have already guessed that the
2085error is caused by the extra copy of the tied object stored in C<$X>.
2086If you haven't, then the problem boils down to the fact that the
2087B<DB_File> destructor, DESTROY, will not be called until I<all>
2088references to the tied object are destroyed. Both the tied variable,
2089C<%x>, and C<$X> above hold a reference to the object. The call to
2090untie() will destroy the first, but C<$X> still holds a valid
2091reference, so the destructor will not get called and the database file
2092F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2093attempt to open a database that is already open via the catch-all
778183f3 2094"Invalid argument" doesn't help.
2095
2096If you run the script with the C<-w> flag the error message becomes:
2097
2098 untie attempted while 1 inner references still exist at bad.file line 12.
2099 Cannot tie second time: Invalid argument at bad.file line 14.
2100
2101which pinpoints the real problem. Finally the script can now be
2102modified to fix the original problem by destroying the API object
2103before the untie:
2104
2105 ...
2106 $x{123} = 456 ;
2107
2108 undef $X ;
2109 untie %x ;
2110
2111 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2112 ...
2113
f6b705ef 2114
2115=head1 COMMON QUESTIONS
2116
2117=head2 Why is there Perl source in my database?
2118
2119If you look at the contents of a database file created by DB_File,
2120there can sometimes be part of a Perl script included in it.
2121
2122This happens because Berkeley DB uses dynamic memory to allocate
2123buffers which will subsequently be written to the database file. Being
2124dynamic, the memory could have been used for anything before DB
2125malloced it. As Berkeley DB doesn't clear the memory once it has been
2126allocated, the unused portions will contain random junk. In the case
2127where a Perl script gets written to the database, the random junk will
2128correspond to an area of dynamic memory that happened to be used during
2129the compilation of the script.
2130
2131Unless you don't like the possibility of there being part of your Perl
2132scripts embedded in a database file, this is nothing to worry about.
2133
2134=head2 How do I store complex data structures with DB_File?
2135
2136Although B<DB_File> cannot do this directly, there is a module which
2137can layer transparently over B<DB_File> to accomplish this feat.
2138
2139Check out the MLDBM module, available on CPAN in the directory
2140F<modules/by-module/MLDBM>.
2141
2142=head2 What does "Invalid Argument" mean?
2143
2144You will get this error message when one of the parameters in the
2145C<tie> call is wrong. Unfortunately there are quite a few parameters to
2146get wrong, so it can be difficult to figure out which one it is.
2147
2148Here are a couple of possibilities:
2149
2150=over 5
2151
2152=item 1.
2153
610ab055 2154Attempting to reopen a database without closing it.
f6b705ef 2155
2156=item 2.
2157
2158Using the O_WRONLY flag.
2159
2160=back
2161
2162=head2 What does "Bareword 'DB_File' not allowed" mean?
2163
2164You will encounter this particular error message when you have the
2165C<strict 'subs'> pragma (or the full strict pragma) in your script.
2166Consider this script:
2167
3245f058 2168 use warnings ;
f6b705ef 2169 use strict ;
2170 use DB_File ;
07200f1b 2171 my %x ;
f6b705ef 2172 tie %x, DB_File, "filename" ;
2173
2174Running it produces the error in question:
2175
2176 Bareword "DB_File" not allowed while "strict subs" in use
2177
2178To get around the error, place the word C<DB_File> in either single or
2179double quotes, like this:
2180
2181 tie %x, "DB_File", "filename" ;
2182
2183Although it might seem like a real pain, it is really worth the effort
2184of having a C<use strict> in all your scripts.
2185
cad2e5aa 2186=head1 REFERENCES
2187
2188Articles that are either about B<DB_File> or make use of it.
2189
2190=over 5
2191
2192=item 1.
2193
2194I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2195Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2196
2197=back
2198
cb1a09d0 2199=head1 HISTORY
2200
1f70e1ea 2201Moved to the Changes file.
610ab055 2202
1f70e1ea 2203=head1 BUGS
05475680 2204
1f70e1ea 2205Some older versions of Berkeley DB had problems with fixed length
2206records using the RECNO file format. This problem has been fixed since
2207version 1.85 of Berkeley DB.
e858de61 2208
1f70e1ea 2209I am sure there are bugs in the code. If you do find any, or can
2210suggest any enhancements, I would welcome your comments.
a6ed719b 2211
1f70e1ea 2212=head1 AVAILABILITY
a6ed719b 2213
1f70e1ea 2214B<DB_File> comes with the standard Perl source distribution. Look in
2215the directory F<ext/DB_File>. Given the amount of time between releases
2216of Perl the version that ships with Perl is quite likely to be out of
2217date, so the most recent version can always be found on CPAN (see
2218L<perlmod/CPAN> for details), in the directory
2219F<modules/by-module/DB_File>.
a6ed719b 2220
039d031f 2221This version of B<DB_File> will work with either version 1.x, 2.x or
22223.x of Berkeley DB, but is limited to the functionality provided by
2223version 1.
a6ed719b 2224
cad2e5aa 2225The official web site for Berkeley DB is F<http://www.sleepycat.com>.
039d031f 2226All versions of Berkeley DB are available there.
93af7a87 2227
1f70e1ea 2228Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2229archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2230
1f70e1ea 2231If you are running IRIX, then get Berkeley DB version 1 from
2232F<http://reality.sgi.com/ariel>. It has the patches necessary to
2233compile properly on IRIX 5.3.
610ab055 2234
1f70e1ea 2235=head1 COPYRIGHT
3b35bae3 2236
d63909e4 2237Copyright (c) 1995-2002 Paul Marquess. All rights reserved. This program
a9fd575d 2238is free software; you can redistribute it and/or modify it under the
2239same terms as Perl itself.
3b35bae3 2240
1f70e1ea 2241Although B<DB_File> is covered by the Perl license, the library it
2242makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2243copyright and its own license. Please take the time to read it.
3b35bae3 2244
a9fd575d 2245Here are are few words taken from the Berkeley DB FAQ (at
b90e71be 2246F<http://www.sleepycat.com>) regarding the license:
68dc0745 2247
a9fd575d 2248 Do I have to license DB to use it in Perl scripts?
3b35bae3 2249
a9fd575d 2250 No. The Berkeley DB license requires that software that uses
2251 Berkeley DB be freely redistributable. In the case of Perl, that
2252 software is Perl, and not your scripts. Any Perl scripts that you
2253 write are your property, including scripts that make use of
2254 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2255 place any restriction on what you may do with them.
88108326 2256
1f70e1ea 2257If you are in any doubt about the license situation, contact either the
2258Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 2259
2260
3b35bae3 2261=head1 SEE ALSO
2262
9fe6733a 2263L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2264L<dbmfilter>
3b35bae3 2265
3b35bae3 2266=head1 AUTHOR
2267
8e07c86e 2268The DB_File interface was written by Paul Marquess
6ca2e664 2269E<lt>Paul.Marquess@btinternet.comE<gt>.
d3ef3b8a 2270Questions about the DB system itself may be addressed to
2271E<lt>db@sleepycat.com<gt>.
3b35bae3 2272
2273=cut