ANSI-C headers in test snippets to please g++ (and the rest)
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6d02d21f 3# written by Paul Marquess (pmqs@cpan.org)
c36891e2 4# last modified 11th November 2005
5# version 1.814
36477c24 6#
dcdb9d1f 7# Copyright (c) 1995-2005 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
e5021521 14require 5.00404;
610ab055 15
3245f058 16use warnings;
785da04d 17use strict;
8e07c86e 18use Carp;
88108326 19require Tie::Hash;
20@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 21
88108326 22sub new
8e07c86e 23{
88108326 24 my $pkg = shift ;
25 my %x ;
26 tie %x, $pkg ;
27 bless \%x, $pkg ;
8e07c86e 28}
29
610ab055 30
88108326 31sub TIEHASH
32{
33 my $pkg = shift ;
34
efc79c7d 35 bless { VALID => {
36 bsize => 1,
37 ffactor => 1,
38 nelem => 1,
39 cachesize => 1,
40 hash => 2,
41 lorder => 1,
36477c24 42 },
43 GOT => {}
44 }, $pkg ;
88108326 45}
8e07c86e 46
610ab055 47
8e07c86e 48sub FETCH
49{
88108326 50 my $self = shift ;
51 my $key = shift ;
8e07c86e 52
36477c24 53 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 54
55 my $pkg = ref $self ;
56 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 57}
58
59
60sub STORE
61{
88108326 62 my $self = shift ;
63 my $key = shift ;
64 my $value = shift ;
65
efc79c7d 66 my $type = $self->{VALID}{$key};
67
68 if ( $type )
8e07c86e 69 {
efc79c7d 70 croak "Key '$key' not associated with a code reference"
71 if $type == 2 && !ref $value && ref $value ne 'CODE';
36477c24 72 $self->{GOT}{$key} = $value ;
8e07c86e 73 return ;
74 }
75
88108326 76 my $pkg = ref $self ;
77 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 78}
79
80sub DELETE
81{
88108326 82 my $self = shift ;
83 my $key = shift ;
84
36477c24 85 if ( exists $self->{VALID}{$key} )
8e07c86e 86 {
36477c24 87 delete $self->{GOT}{$key} ;
8e07c86e 88 return ;
89 }
90
88108326 91 my $pkg = ref $self ;
92 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 93}
94
88108326 95sub EXISTS
8e07c86e 96{
88108326 97 my $self = shift ;
98 my $key = shift ;
8e07c86e 99
36477c24 100 exists $self->{VALID}{$key} ;
8e07c86e 101}
102
88108326 103sub NotHere
8e07c86e 104{
18d2dc8c 105 my $self = shift ;
88108326 106 my $method = shift ;
8e07c86e 107
18d2dc8c 108 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 109}
110
18d2dc8c 111sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
112sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
113sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 114
115package DB_File::RECNOINFO ;
785da04d 116
3245f058 117use warnings;
88108326 118use strict ;
119
045291aa 120@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 121
122sub TIEHASH
123{
88108326 124 my $pkg = shift ;
125
36477c24 126 bless { VALID => { map {$_, 1}
127 qw( bval cachesize psize flags lorder reclen bfname )
128 },
129 GOT => {},
130 }, $pkg ;
8e07c86e 131}
132
88108326 133package DB_File::BTREEINFO ;
8e07c86e 134
3245f058 135use warnings;
88108326 136use strict ;
8e07c86e 137
88108326 138@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 139
88108326 140sub TIEHASH
8e07c86e 141{
88108326 142 my $pkg = shift ;
143
efc79c7d 144 bless { VALID => {
145 flags => 1,
146 cachesize => 1,
147 maxkeypage => 1,
148 minkeypage => 1,
149 psize => 1,
150 compare => 2,
151 prefix => 2,
152 lorder => 1,
36477c24 153 },
154 GOT => {},
155 }, $pkg ;
8e07c86e 156}
157
158
8e07c86e 159package DB_File ;
785da04d 160
3245f058 161use warnings;
785da04d 162use strict;
07200f1b 163our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
c36891e2 164our ($db_version, $use_XSLoader, $splice_end_array, $Error);
8e07c86e 165use Carp;
166
785da04d 167
c36891e2 168$VERSION = "1.814" ;
d85a743d 169
170{
171 local $SIG{__WARN__} = sub {$splice_end_array = "@_";};
172 my @a =(1); splice(@a, 3);
173 $splice_end_array =
174 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
175}
8e07c86e 176
177#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 178$DB_BTREE = new DB_File::BTREEINFO ;
179$DB_HASH = new DB_File::HASHINFO ;
180$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 181
785da04d 182require Tie::Hash;
8e07c86e 183require Exporter;
184use AutoLoader;
b90e71be 185BEGIN {
186 $use_XSLoader = 1 ;
e5021521 187 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be 188
189 if ($@) {
190 $use_XSLoader = 0 ;
191 require DynaLoader;
192 @ISA = qw(DynaLoader);
193 }
194}
195
196push @ISA, qw(Tie::Hash Exporter);
8e07c86e 197@EXPORT = qw(
198 $DB_BTREE $DB_HASH $DB_RECNO
88108326 199
8e07c86e 200 BTREEMAGIC
201 BTREEVERSION
202 DB_LOCK
203 DB_SHMEM
204 DB_TXN
205 HASHMAGIC
206 HASHVERSION
207 MAX_PAGE_NUMBER
208 MAX_PAGE_OFFSET
209 MAX_REC_NUMBER
210 RET_ERROR
211 RET_SPECIAL
212 RET_SUCCESS
213 R_CURSOR
214 R_DUP
215 R_FIRST
216 R_FIXEDLEN
217 R_IAFTER
218 R_IBEFORE
219 R_LAST
220 R_NEXT
221 R_NOKEY
222 R_NOOVERWRITE
223 R_PREV
224 R_RECNOSYNC
225 R_SETCURSOR
226 R_SNAPSHOT
227 __R_UNUSED
88108326 228
045291aa 229);
8e07c86e 230
231sub AUTOLOAD {
785da04d 232 my($constname);
8e07c86e 233 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b 234 my ($error, $val) = constant($constname);
235 Carp::croak $error if $error;
57c77851 236 no strict 'refs';
237 *{$AUTOLOAD} = sub { $val };
238 goto &{$AUTOLOAD};
07200f1b 239}
8e07c86e 240
f6b705ef 241
a6ed719b 242eval {
1f70e1ea 243 # Make all Fcntl O_XXX constants available for importing
244 require Fcntl;
245 my @O = grep /^O_/, @Fcntl::EXPORT;
246 Fcntl->import(@O); # first we import what we want to export
247 push(@EXPORT, @O);
a6ed719b 248};
f6b705ef 249
b90e71be 250if ($use_XSLoader)
251 { XSLoader::load("DB_File", $VERSION)}
252else
253 { bootstrap DB_File $VERSION }
8e07c86e 254
255# Preloaded methods go here. Autoload methods go after __END__, and are
256# processed by the autosplit program.
257
05475680 258sub tie_hash_or_array
610ab055 259{
260 my (@arg) = @_ ;
05475680 261 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 262
263 $arg[4] = tied %{ $arg[4] }
264 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
265
efc79c7d 266 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
267 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
268
9c095db2 269 # make recno in Berkeley DB version 2 (or better) work like
270 # recno in version 1.
d6067fe3 271 if ($db_version >= 4 and ! $tieHASH) {
272 $arg[2] |= O_CREAT();
273 }
274
1f70e1ea 275 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
276 $arg[1] and ! -e $arg[1]) {
277 open(FH, ">$arg[1]") or return undef ;
278 close FH ;
279 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
280 }
281
05475680 282 DoTie_($tieHASH, @arg) ;
610ab055 283}
284
05475680 285sub TIEHASH
286{
287 tie_hash_or_array(@_) ;
288}
289
290sub TIEARRAY
291{
292 tie_hash_or_array(@_) ;
293}
88108326 294
045291aa 295sub CLEAR
296{
1f70e1ea 297 my $self = shift;
3245f058 298 my $key = 0 ;
1f70e1ea 299 my $value = "" ;
300 my $status = $self->seq($key, $value, R_FIRST());
301 my @keys;
302
303 while ($status == 0) {
304 push @keys, $key;
305 $status = $self->seq($key, $value, R_NEXT());
306 }
307 foreach $key (reverse @keys) {
308 my $s = $self->del($key);
309 }
310}
311
045291aa 312sub EXTEND { }
313
314sub STORESIZE
315{
316 my $self = shift;
317 my $length = shift ;
318 my $current_length = $self->length() ;
319
320 if ($length < $current_length) {
321 my $key ;
322 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
323 { $self->del($key) }
324 }
a9fd575d 325 elsif ($length > $current_length) {
326 $self->put($length-1, "") ;
327 }
045291aa 328}
329
c5da4faf 330
331sub SPLICE
332{
333 my $self = shift;
334 my $offset = shift;
335 if (not defined $offset) {
d85a743d 336 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf 337 $offset = 0;
338 }
339
340 my $length = @_ ? shift : 0;
341 # Carping about definedness comes _after_ the OFFSET sanity check.
342 # This is so we get the same error messages as Perl's splice().
343 #
344
345 my @list = @_;
346
347 my $size = $self->FETCHSIZE();
348
349 # 'If OFFSET is negative then it start that far from the end of
350 # the array.'
351 #
352 if ($offset < 0) {
353 my $new_offset = $size + $offset;
354 if ($new_offset < 0) {
355 die "Modification of non-creatable array value attempted, "
356 . "subscript $offset";
357 }
358 $offset = $new_offset;
359 }
360
c5da4faf 361 if (not defined $length) {
d85a743d 362 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf 363 $length = 0;
364 }
365
d85a743d 366 if ($offset > $size) {
367 $offset = $size;
368 warnings::warnif('misc', 'splice() offset past end of array')
369 if $splice_end_array;
370 }
371
c5da4faf 372 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
373 if (not defined $length) {
374 $length = $size - $offset;
375 }
376
377 # 'If LENGTH is negative, leave that many elements off the end of
378 # the array.'
379 #
380 if ($length < 0) {
381 $length = $size - $offset + $length;
382
383 if ($length < 0) {
384 # The user must have specified a length bigger than the
385 # length of the array passed in. But perl's splice()
386 # doesn't catch this, it just behaves as for length=0.
387 #
388 $length = 0;
389 }
390 }
391
392 if ($length > $size - $offset) {
393 $length = $size - $offset;
394 }
395
396 # $num_elems holds the current number of elements in the database.
397 my $num_elems = $size;
398
399 # 'Removes the elements designated by OFFSET and LENGTH from an
400 # array,'...
401 #
402 my @removed = ();
403 foreach (0 .. $length - 1) {
404 my $old;
405 my $status = $self->get($offset, $old);
406 if ($status != 0) {
407 my $msg = "error from Berkeley DB on get($offset, \$old)";
408 if ($status == 1) {
409 $msg .= ' (no such element?)';
410 }
411 else {
412 $msg .= ": error status $status";
413 if (defined $! and $! ne '') {
414 $msg .= ", message $!";
415 }
416 }
417 die $msg;
418 }
419 push @removed, $old;
420
421 $status = $self->del($offset);
422 if ($status != 0) {
423 my $msg = "error from Berkeley DB on del($offset)";
424 if ($status == 1) {
425 $msg .= ' (no such element?)';
426 }
427 else {
428 $msg .= ": error status $status";
429 if (defined $! and $! ne '') {
430 $msg .= ", message $!";
431 }
432 }
433 die $msg;
434 }
435
436 -- $num_elems;
437 }
438
439 # ...'and replaces them with the elements of LIST, if any.'
440 my $pos = $offset;
441 while (defined (my $elem = shift @list)) {
442 my $old_pos = $pos;
443 my $status;
444 if ($pos >= $num_elems) {
445 $status = $self->put($pos, $elem);
446 }
447 else {
448 $status = $self->put($pos, $elem, $self->R_IBEFORE);
449 }
450
451 if ($status != 0) {
452 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
453 if ($status == 1) {
454 $msg .= ' (no such element?)';
455 }
456 else {
457 $msg .= ", error status $status";
458 if (defined $! and $! ne '') {
459 $msg .= ", message $!";
460 }
461 }
462 die $msg;
463 }
464
465 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
466 if $old_pos != $pos;
467
468 ++ $pos;
469 ++ $num_elems;
470 }
471
472 if (wantarray) {
473 # 'In list context, returns the elements removed from the
474 # array.'
475 #
476 return @removed;
477 }
478 elsif (defined wantarray and not wantarray) {
479 # 'In scalar context, returns the last element removed, or
480 # undef if no elements are removed.'
481 #
482 if (@removed) {
483 my $last = pop @removed;
484 return "$last";
485 }
486 else {
487 return undef;
488 }
489 }
490 elsif (not defined wantarray) {
491 # Void context
492 }
493 else { die }
494}
495sub ::DB_File::splice { &SPLICE }
496
6ca2e664 497sub find_dup
498{
499 croak "Usage: \$db->find_dup(key,value)\n"
500 unless @_ == 3 ;
501
502 my $db = shift ;
503 my ($origkey, $value_wanted) = @_ ;
504 my ($key, $value) = ($origkey, 0);
505 my ($status) = 0 ;
506
507 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
508 $status == 0 ;
509 $status = $db->seq($key, $value, R_NEXT() ) ) {
510
511 return 0 if $key eq $origkey and $value eq $value_wanted ;
512 }
513
514 return $status ;
515}
516
517sub del_dup
518{
519 croak "Usage: \$db->del_dup(key,value)\n"
520 unless @_ == 3 ;
521
522 my $db = shift ;
523 my ($key, $value) = @_ ;
524 my ($status) = $db->find_dup($key, $value) ;
525 return $status if $status != 0 ;
526
527 $status = $db->del($key, R_CURSOR() ) ;
528 return $status ;
529}
530
88108326 531sub get_dup
532{
533 croak "Usage: \$db->get_dup(key [,flag])\n"
534 unless @_ == 2 or @_ == 3 ;
535
536 my $db = shift ;
537 my $key = shift ;
538 my $flag = shift ;
f6b705ef 539 my $value = 0 ;
88108326 540 my $origkey = $key ;
541 my $wantarray = wantarray ;
f6b705ef 542 my %values = () ;
88108326 543 my @values = () ;
544 my $counter = 0 ;
f6b705ef 545 my $status = 0 ;
88108326 546
f6b705ef 547 # iterate through the database until either EOF ($status == 0)
548 # or a different key is encountered ($key ne $origkey).
549 for ($status = $db->seq($key, $value, R_CURSOR()) ;
550 $status == 0 and $key eq $origkey ;
551 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 552
f6b705ef 553 # save the value or count number of matches
554 if ($wantarray) {
555 if ($flag)
556 { ++ $values{$value} }
557 else
558 { push (@values, $value) }
559 }
560 else
561 { ++ $counter }
88108326 562
88108326 563 }
564
f6b705ef 565 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 566}
567
568
8e07c86e 5691;
570__END__
571
3b35bae3 572=head1 NAME
573
1f70e1ea 574DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 575
576=head1 SYNOPSIS
577
bbc7dcd2 578 use DB_File;
579
88108326 580 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
581 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
582 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 583
3b35bae3 584 $status = $X->del($key [, $flags]) ;
585 $status = $X->put($key, $value [, $flags]) ;
586 $status = $X->get($key, $value [, $flags]) ;
760ac839 587 $status = $X->seq($key, $value, $flags) ;
3b35bae3 588 $status = $X->sync([$flags]) ;
589 $status = $X->fd ;
760ac839 590
f6b705ef 591 # BTREE only
88108326 592 $count = $X->get_dup($key) ;
593 @list = $X->get_dup($key) ;
594 %list = $X->get_dup($key, 1) ;
6ca2e664 595 $status = $X->find_dup($key, $value) ;
596 $status = $X->del_dup($key, $value) ;
88108326 597
f6b705ef 598 # RECNO only
599 $a = $X->length;
600 $a = $X->pop ;
601 $X->push(list);
602 $a = $X->shift;
603 $X->unshift(list);
c5da4faf 604 @r = $X->splice(offset, length, elements);
f6b705ef 605
cad2e5aa 606 # DBM Filters
607 $old_filter = $db->filter_store_key ( sub { ... } ) ;
608 $old_filter = $db->filter_store_value( sub { ... } ) ;
609 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
610 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
611
3b35bae3 612 untie %hash ;
613 untie @array ;
614
615=head1 DESCRIPTION
616
8e07c86e 617B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 618facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 619version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f 620It is assumed that you have a copy of the Berkeley DB manual pages at
621hand when reading this documentation. The interface defined here
622mirrors the Berkeley DB interface closely.
68dc0745 623
8e07c86e 624Berkeley DB is a C library which provides a consistent interface to a
625number of database formats. B<DB_File> provides an interface to all
626three of the database types currently supported by Berkeley DB.
3b35bae3 627
628The file types are:
629
630=over 5
631
88108326 632=item B<DB_HASH>
3b35bae3 633
88108326 634This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 635files. This is equivalent to the functionality provided by other
636hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
637the files created using DB_HASH are not compatible with any of the
638other packages mentioned.
3b35bae3 639
8e07c86e 640A default hashing algorithm, which will be adequate for most
641applications, is built into Berkeley DB. If you do need to use your own
642hashing algorithm it is possible to write your own in Perl and have
643B<DB_File> use it instead.
3b35bae3 644
88108326 645=item B<DB_BTREE>
646
647The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 648sorted, balanced binary tree.
3b35bae3 649
8e07c86e 650As with the DB_HASH format, it is possible to provide a user defined
651Perl routine to perform the comparison of keys. By default, though, the
652keys are stored in lexical order.
3b35bae3 653
88108326 654=item B<DB_RECNO>
3b35bae3 655
8e07c86e 656DB_RECNO allows both fixed-length and variable-length flat text files
657to be manipulated using the same key/value pair interface as in DB_HASH
658and DB_BTREE. In this case the key will consist of a record (line)
659number.
3b35bae3 660
661=back
662
e5021521 663=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea 664
665Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 666it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 667limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 668version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 669like version 1. This feature allows B<DB_File> scripts that were built
e5021521 670with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea 671
672If you want to make use of the new features available in Berkeley DB
b90e71be 6732.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 674
e5021521 675B<Note:> The database file format has changed multiple times in Berkeley
676DB version 2, 3 and 4. If you cannot recreate your databases, you
677must dump any existing databases with either the C<db_dump> or the
678C<db_dump185> utility that comes with Berkeley DB.
679Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
680your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea 681documentation for further details.
682
e5021521 683Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 684DB with DB_File.
1f70e1ea 685
68dc0745 686=head2 Interface to Berkeley DB
3b35bae3 687
688B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 689in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
690allows B<DB_File> to access Berkeley DB files using either an
691associative array (for DB_HASH & DB_BTREE file types) or an ordinary
692array (for the DB_RECNO file type).
3b35bae3 693
88108326 694In addition to the tie() interface, it is also possible to access most
695of the functions provided in the Berkeley DB API directly.
f6b705ef 696See L<THE API INTERFACE>.
3b35bae3 697
88108326 698=head2 Opening a Berkeley DB Database File
3b35bae3 699
8e07c86e 700Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 701Here is the C prototype for dbopen():
3b35bae3 702
703 DB*
704 dbopen (const char * file, int flags, int mode,
705 DBTYPE type, const void * openinfo)
706
707The parameter C<type> is an enumeration which specifies which of the 3
708interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
709Depending on which of these is actually chosen, the final parameter,
710I<openinfo> points to a data structure which allows tailoring of the
711specific interface method.
712
8e07c86e 713This interface is handled slightly differently in B<DB_File>. Here is
88108326 714an equivalent call using B<DB_File>:
3b35bae3 715
88108326 716 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 717
8e07c86e 718The C<filename>, C<flags> and C<mode> parameters are the direct
719equivalent of their dbopen() counterparts. The final parameter $DB_HASH
720performs the function of both the C<type> and C<openinfo> parameters in
721dbopen().
3b35bae3 722
88108326 723In the example above $DB_HASH is actually a pre-defined reference to a
724hash object. B<DB_File> has three of these pre-defined references.
725Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 726
8e07c86e 727The keys allowed in each of these pre-defined references is limited to
728the names used in the equivalent C structure. So, for example, the
729$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 730C<ffactor>, C<hash>, C<lorder> and C<nelem>.
731
732To change one of these elements, just assign to it like this:
733
734 $DB_HASH->{'cachesize'} = 10000 ;
735
736The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
737usually adequate for most applications. If you do need to create extra
738instances of these objects, constructors are available for each file
739type.
740
741Here are examples of the constructors and the valid options available
742for DB_HASH, DB_BTREE and DB_RECNO respectively.
743
744 $a = new DB_File::HASHINFO ;
745 $a->{'bsize'} ;
746 $a->{'cachesize'} ;
747 $a->{'ffactor'};
748 $a->{'hash'} ;
749 $a->{'lorder'} ;
750 $a->{'nelem'} ;
751
752 $b = new DB_File::BTREEINFO ;
753 $b->{'flags'} ;
754 $b->{'cachesize'} ;
755 $b->{'maxkeypage'} ;
756 $b->{'minkeypage'} ;
757 $b->{'psize'} ;
758 $b->{'compare'} ;
759 $b->{'prefix'} ;
760 $b->{'lorder'} ;
761
762 $c = new DB_File::RECNOINFO ;
763 $c->{'bval'} ;
764 $c->{'cachesize'} ;
765 $c->{'psize'} ;
766 $c->{'flags'} ;
767 $c->{'lorder'} ;
768 $c->{'reclen'} ;
769 $c->{'bfname'} ;
770
771The values stored in the hashes above are mostly the direct equivalent
772of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 773default values - that means you don't have to set I<all> of the
88108326 774values when you only want to change one. Here is an example:
775
776 $a = new DB_File::HASHINFO ;
777 $a->{'cachesize'} = 12345 ;
778 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
779
36477c24 780A few of the options need extra discussion here. When used, the C
88108326 781equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
782to C functions. In B<DB_File> these keys are used to store references
783to Perl subs. Below are templates for each of the subs:
784
785 sub hash
786 {
787 my ($data) = @_ ;
788 ...
789 # return the hash value for $data
790 return $hash ;
791 }
3b35bae3 792
88108326 793 sub compare
794 {
795 my ($key, $key2) = @_ ;
796 ...
797 # return 0 if $key1 eq $key2
798 # -1 if $key1 lt $key2
799 # 1 if $key1 gt $key2
800 return (-1 , 0 or 1) ;
801 }
3b35bae3 802
88108326 803 sub prefix
804 {
805 my ($key, $key2) = @_ ;
806 ...
807 # return number of bytes of $key2 which are
808 # necessary to determine that it is greater than $key1
809 return $bytes ;
810 }
3b35bae3 811
f6b705ef 812See L<Changing the BTREE sort order> for an example of using the
813C<compare> template.
88108326 814
36477c24 815If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 816C<bval>, you should check out L<The 'bval' Option>.
36477c24 817
88108326 818=head2 Default Parameters
819
820It is possible to omit some or all of the final 4 parameters in the
821call to C<tie> and let them take default values. As DB_HASH is the most
822common file format used, the call:
823
824 tie %A, "DB_File", "filename" ;
825
826is equivalent to:
827
18d2dc8c 828 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 829
830It is also possible to omit the filename parameter as well, so the
831call:
832
833 tie %A, "DB_File" ;
834
835is equivalent to:
836
18d2dc8c 837 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 838
f6b705ef 839See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 840in place of a filename.
841
f6b705ef 842=head2 In Memory Databases
843
844Berkeley DB allows the creation of in-memory databases by using NULL
845(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
846uses C<undef> instead of NULL to provide this functionality.
847
848=head1 DB_HASH
849
850The DB_HASH file format is probably the most commonly used of the three
851file formats that B<DB_File> supports. It is also very straightforward
852to use.
853
68dc0745 854=head2 A Simple Example
f6b705ef 855
856This example shows how to create a database, add key/value pairs to the
857database, delete keys/value pairs and finally how to enumerate the
858contents of the database.
859
3245f058 860 use warnings ;
610ab055 861 use strict ;
f6b705ef 862 use DB_File ;
07200f1b 863 our (%h, $k, $v) ;
f6b705ef 864
2c2d71f5 865 unlink "fruit" ;
45a340cb 866 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef 867 or die "Cannot open file 'fruit': $!\n";
868
869 # Add a few key/value pairs to the file
870 $h{"apple"} = "red" ;
871 $h{"orange"} = "orange" ;
872 $h{"banana"} = "yellow" ;
873 $h{"tomato"} = "red" ;
874
875 # Check for existence of a key
876 print "Banana Exists\n\n" if $h{"banana"} ;
877
878 # Delete a key/value pair.
879 delete $h{"apple"} ;
880
881 # print the contents of the file
882 while (($k, $v) = each %h)
883 { print "$k -> $v\n" }
884
885 untie %h ;
886
887here is the output:
888
889 Banana Exists
bbc7dcd2 890
f6b705ef 891 orange -> orange
892 tomato -> red
893 banana -> yellow
894
895Note that the like ordinary associative arrays, the order of the keys
896retrieved is in an apparently random order.
897
898=head1 DB_BTREE
899
900The DB_BTREE format is useful when you want to store data in a given
901order. By default the keys will be stored in lexical order, but as you
902will see from the example shown in the next section, it is very easy to
903define your own sorting function.
904
905=head2 Changing the BTREE sort order
906
907This script shows how to override the default sorting algorithm that
908BTREE uses. Instead of using the normal lexical ordering, a case
909insensitive compare function will be used.
88108326 910
3245f058 911 use warnings ;
610ab055 912 use strict ;
f6b705ef 913 use DB_File ;
610ab055 914
915 my %h ;
f6b705ef 916
917 sub Compare
918 {
919 my ($key1, $key2) = @_ ;
920 "\L$key1" cmp "\L$key2" ;
921 }
922
923 # specify the Perl sub that will do the comparison
924 $DB_BTREE->{'compare'} = \&Compare ;
925
2c2d71f5 926 unlink "tree" ;
45a340cb 927 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 928 or die "Cannot open file 'tree': $!\n" ;
929
930 # Add a key/value pair to the file
931 $h{'Wall'} = 'Larry' ;
932 $h{'Smith'} = 'John' ;
933 $h{'mouse'} = 'mickey' ;
934 $h{'duck'} = 'donald' ;
935
936 # Delete
937 delete $h{"duck"} ;
938
939 # Cycle through the keys printing them in order.
940 # Note it is not necessary to sort the keys as
941 # the btree will have kept them in order automatically.
942 foreach (keys %h)
943 { print "$_\n" }
944
945 untie %h ;
946
947Here is the output from the code above.
948
949 mouse
950 Smith
951 Wall
952
953There are a few point to bear in mind if you want to change the
954ordering in a BTREE database:
955
956=over 5
957
958=item 1.
959
960The new compare function must be specified when you create the database.
961
962=item 2.
963
964You cannot change the ordering once the database has been created. Thus
965you must use the same compare function every time you access the
88108326 966database.
967
39793c41 968=item 3
969
970Duplicate keys are entirely defined by the comparison function.
971In the case-insensitive example above, the keys: 'KEY' and 'key'
972would be considered duplicates, and assigning to the second one
52ffee89 973would overwrite the first. If duplicates are allowed for (with the
59e51af5 974R_DUP flag discussed below), only a single copy of duplicate keys
39793c41 975is stored in the database --- so (again with example above) assigning
976three values to the keys: 'KEY', 'Key', and 'key' would leave just
977the first key: 'KEY' in the database with three values. For some
978situations this results in information loss, so care should be taken
979to provide fully qualified comparison functions when necessary.
980For example, the above comparison routine could be modified to
981additionally compare case-sensitively if two keys are equal in the
982case insensitive comparison:
983
984 sub compare {
985 my($key1, $key2) = @_;
986 lc $key1 cmp lc $key2 ||
987 $key1 cmp $key2;
988 }
989
990And now you will only have duplicates when the keys themselves
991are truly the same. (note: in versions of the db library prior to
992about November 1996, such duplicate keys were retained so it was
993possible to recover the original keys in sets of keys that
994compared as equal).
995
996
f6b705ef 997=back
998
68dc0745 999=head2 Handling Duplicate Keys
f6b705ef 1000
1001The BTREE file type optionally allows a single key to be associated
1002with an arbitrary number of values. This option is enabled by setting
1003the flags element of C<$DB_BTREE> to R_DUP when creating the database.
1004
88108326 1005There are some difficulties in using the tied hash interface if you
1006want to manipulate a BTREE database with duplicate keys. Consider this
1007code:
1008
3245f058 1009 use warnings ;
610ab055 1010 use strict ;
88108326 1011 use DB_File ;
610ab055 1012
962cee9f 1013 my ($filename, %h) ;
610ab055 1014
88108326 1015 $filename = "tree" ;
1016 unlink $filename ;
bbc7dcd2 1017
88108326 1018 # Enable duplicate records
1019 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1020
45a340cb 1021 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1022 or die "Cannot open $filename: $!\n";
bbc7dcd2 1023
88108326 1024 # Add some key/value pairs to the file
1025 $h{'Wall'} = 'Larry' ;
1026 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1027 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1028 $h{'Smith'} = 'John' ;
1029 $h{'mouse'} = 'mickey' ;
1030
1031 # iterate through the associative array
1032 # and print each key/value pair.
2c2d71f5 1033 foreach (sort keys %h)
88108326 1034 { print "$_ -> $h{$_}\n" }
1035
f6b705ef 1036 untie %h ;
1037
88108326 1038Here is the output:
1039
1040 Smith -> John
1041 Wall -> Larry
1042 Wall -> Larry
f6b705ef 1043 Wall -> Larry
88108326 1044 mouse -> mickey
1045
f6b705ef 1046As you can see 3 records have been successfully created with key C<Wall>
88108326 1047- the only thing is, when they are retrieved from the database they
f6b705ef 1048I<seem> to have the same value, namely C<Larry>. The problem is caused
1049by the way that the associative array interface works. Basically, when
1050the associative array interface is used to fetch the value associated
1051with a given key, it will only ever retrieve the first value.
88108326 1052
1053Although it may not be immediately obvious from the code above, the
1054associative array interface can be used to write values with duplicate
1055keys, but it cannot be used to read them back from the database.
1056
1057The way to get around this problem is to use the Berkeley DB API method
1058called C<seq>. This method allows sequential access to key/value
f6b705ef 1059pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1060and the API in general.
88108326 1061
1062Here is the script above rewritten using the C<seq> API method.
1063
3245f058 1064 use warnings ;
610ab055 1065 use strict ;
88108326 1066 use DB_File ;
bbc7dcd2 1067
962cee9f 1068 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1069
88108326 1070 $filename = "tree" ;
1071 unlink $filename ;
bbc7dcd2 1072
88108326 1073 # Enable duplicate records
1074 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1075
45a340cb 1076 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1077 or die "Cannot open $filename: $!\n";
bbc7dcd2 1078
88108326 1079 # Add some key/value pairs to the file
1080 $h{'Wall'} = 'Larry' ;
1081 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1082 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1083 $h{'Smith'} = 'John' ;
1084 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1085
f6b705ef 1086 # iterate through the btree using seq
88108326 1087 # and print each key/value pair.
610ab055 1088 $key = $value = 0 ;
f6b705ef 1089 for ($status = $x->seq($key, $value, R_FIRST) ;
1090 $status == 0 ;
1091 $status = $x->seq($key, $value, R_NEXT) )
88108326 1092 { print "$key -> $value\n" }
bbc7dcd2 1093
88108326 1094 undef $x ;
1095 untie %h ;
1096
1097that prints:
1098
1099 Smith -> John
1100 Wall -> Brick
f6b705ef 1101 Wall -> Brick
88108326 1102 Wall -> Larry
1103 mouse -> mickey
1104
f6b705ef 1105This time we have got all the key/value pairs, including the multiple
88108326 1106values associated with the key C<Wall>.
1107
6ca2e664 1108To make life easier when dealing with duplicate keys, B<DB_File> comes with
1109a few utility methods.
1110
68dc0745 1111=head2 The get_dup() Method
f6b705ef 1112
6ca2e664 1113The C<get_dup> method assists in
88108326 1114reading duplicate values from BTREE databases. The method can take the
1115following forms:
1116
1117 $count = $x->get_dup($key) ;
1118 @list = $x->get_dup($key) ;
1119 %list = $x->get_dup($key, 1) ;
1120
1121In a scalar context the method returns the number of values associated
1122with the key, C<$key>.
1123
1124In list context, it returns all the values which match C<$key>. Note
f6b705ef 1125that the values will be returned in an apparently random order.
88108326 1126
7a2e2cd6 1127In list context, if the second parameter is present and evaluates
1128TRUE, the method returns an associative array. The keys of the
1129associative array correspond to the values that matched in the BTREE
1130and the values of the array are a count of the number of times that
1131particular value occurred in the BTREE.
88108326 1132
f6b705ef 1133So assuming the database created above, we can use C<get_dup> like
88108326 1134this:
1135
3245f058 1136 use warnings ;
2c2d71f5 1137 use strict ;
1138 use DB_File ;
bbc7dcd2 1139
962cee9f 1140 my ($filename, $x, %h) ;
2c2d71f5 1141
1142 $filename = "tree" ;
bbc7dcd2 1143
2c2d71f5 1144 # Enable duplicate records
1145 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1146
45a340cb 1147 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5 1148 or die "Cannot open $filename: $!\n";
1149
610ab055 1150 my $cnt = $x->get_dup("Wall") ;
88108326 1151 print "Wall occurred $cnt times\n" ;
1152
610ab055 1153 my %hash = $x->get_dup("Wall", 1) ;
88108326 1154 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1155 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1156
2c2d71f5 1157 my @list = sort $x->get_dup("Wall") ;
88108326 1158 print "Wall => [@list]\n" ;
1159
f6b705ef 1160 @list = $x->get_dup("Smith") ;
88108326 1161 print "Smith => [@list]\n" ;
bbc7dcd2 1162
f6b705ef 1163 @list = $x->get_dup("Dog") ;
88108326 1164 print "Dog => [@list]\n" ;
1165
1166
1167and it will print:
1168
f6b705ef 1169 Wall occurred 3 times
88108326 1170 Larry is there
f6b705ef 1171 There are 2 Brick Walls
1172 Wall => [Brick Brick Larry]
88108326 1173 Smith => [John]
1174 Dog => []
3b35bae3 1175
6ca2e664 1176=head2 The find_dup() Method
1177
1178 $status = $X->find_dup($key, $value) ;
1179
b90e71be 1180This method checks for the existence of a specific key/value pair. If the
6ca2e664 1181pair exists, the cursor is left pointing to the pair and the method
1182returns 0. Otherwise the method returns a non-zero value.
1183
1184Assuming the database from the previous example:
1185
3245f058 1186 use warnings ;
6ca2e664 1187 use strict ;
1188 use DB_File ;
bbc7dcd2 1189
962cee9f 1190 my ($filename, $x, %h, $found) ;
6ca2e664 1191
07200f1b 1192 $filename = "tree" ;
bbc7dcd2 1193
6ca2e664 1194 # Enable duplicate records
1195 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1196
45a340cb 1197 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664 1198 or die "Cannot open $filename: $!\n";
1199
1200 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1201 print "Larry Wall is $found there\n" ;
bbc7dcd2 1202
6ca2e664 1203 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1204 print "Harry Wall is $found there\n" ;
bbc7dcd2 1205
6ca2e664 1206 undef $x ;
1207 untie %h ;
1208
1209prints this
1210
2c2d71f5 1211 Larry Wall is there
6ca2e664 1212 Harry Wall is not there
1213
1214
1215=head2 The del_dup() Method
1216
1217 $status = $X->del_dup($key, $value) ;
1218
1219This method deletes a specific key/value pair. It returns
12200 if they exist and have been deleted successfully.
1221Otherwise the method returns a non-zero value.
1222
b90e71be 1223Again assuming the existence of the C<tree> database
6ca2e664 1224
3245f058 1225 use warnings ;
6ca2e664 1226 use strict ;
1227 use DB_File ;
bbc7dcd2 1228
962cee9f 1229 my ($filename, $x, %h, $found) ;
6ca2e664 1230
07200f1b 1231 $filename = "tree" ;
bbc7dcd2 1232
6ca2e664 1233 # Enable duplicate records
1234 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1235
45a340cb 1236 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664 1237 or die "Cannot open $filename: $!\n";
1238
1239 $x->del_dup("Wall", "Larry") ;
1240
1241 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1242 print "Larry Wall is $found there\n" ;
bbc7dcd2 1243
6ca2e664 1244 undef $x ;
1245 untie %h ;
1246
1247prints this
1248
1249 Larry Wall is not there
1250
f6b705ef 1251=head2 Matching Partial Keys
1252
1253The BTREE interface has a feature which allows partial keys to be
1254matched. This functionality is I<only> available when the C<seq> method
1255is used along with the R_CURSOR flag.
1256
1257 $x->seq($key, $value, R_CURSOR) ;
1258
1259Here is the relevant quote from the dbopen man page where it defines
1260the use of the R_CURSOR flag with seq:
1261
f6b705ef 1262 Note, for the DB_BTREE access method, the returned key is not
1263 necessarily an exact match for the specified key. The returned key
1264 is the smallest key greater than or equal to the specified key,
1265 permitting partial key matches and range searches.
1266
f6b705ef 1267In the example script below, the C<match> sub uses this feature to find
1268and print the first matching key/value pair given a partial key.
1269
3245f058 1270 use warnings ;
610ab055 1271 use strict ;
f6b705ef 1272 use DB_File ;
1273 use Fcntl ;
610ab055 1274
962cee9f 1275 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef 1276
1277 sub match
1278 {
1279 my $key = shift ;
610ab055 1280 my $value = 0;
f6b705ef 1281 my $orig_key = $key ;
1282 $x->seq($key, $value, R_CURSOR) ;
1283 print "$orig_key\t-> $key\t-> $value\n" ;
1284 }
1285
1286 $filename = "tree" ;
1287 unlink $filename ;
1288
45a340cb 1289 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1290 or die "Cannot open $filename: $!\n";
bbc7dcd2 1291
f6b705ef 1292 # Add some key/value pairs to the file
1293 $h{'mouse'} = 'mickey' ;
1294 $h{'Wall'} = 'Larry' ;
1295 $h{'Walls'} = 'Brick' ;
1296 $h{'Smith'} = 'John' ;
bbc7dcd2 1297
f6b705ef 1298
610ab055 1299 $key = $value = 0 ;
f6b705ef 1300 print "IN ORDER\n" ;
1301 for ($st = $x->seq($key, $value, R_FIRST) ;
1302 $st == 0 ;
1303 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1304
2c2d71f5 1305 { print "$key -> $value\n" }
bbc7dcd2 1306
f6b705ef 1307 print "\nPARTIAL MATCH\n" ;
1308
1309 match "Wa" ;
1310 match "A" ;
1311 match "a" ;
1312
1313 undef $x ;
1314 untie %h ;
1315
1316Here is the output:
1317
1318 IN ORDER
1319 Smith -> John
1320 Wall -> Larry
1321 Walls -> Brick
1322 mouse -> mickey
1323
1324 PARTIAL MATCH
1325 Wa -> Wall -> Larry
1326 A -> Smith -> John
1327 a -> mouse -> mickey
1328
1329=head1 DB_RECNO
1330
1331DB_RECNO provides an interface to flat text files. Both variable and
1332fixed length records are supported.
3b35bae3 1333
6ca2e664 1334In order to make RECNO more compatible with Perl, the array offset for
88108326 1335all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1336
88108326 1337As with normal Perl arrays, a RECNO array can be accessed using
1338negative indexes. The index -1 refers to the last element of the array,
1339-2 the second last, and so on. Attempting to access an element before
1340the start of the array will raise a fatal run-time error.
3b35bae3 1341
68dc0745 1342=head2 The 'bval' Option
36477c24 1343
1344The operation of the bval option warrants some discussion. Here is the
1345definition of bval from the Berkeley DB 1.85 recno manual page:
1346
1347 The delimiting byte to be used to mark the end of a
1348 record for variable-length records, and the pad charac-
1349 ter for fixed-length records. If no value is speci-
1350 fied, newlines (``\n'') are used to mark the end of
1351 variable-length records and fixed-length records are
1352 padded with spaces.
1353
1354The second sentence is wrong. In actual fact bval will only default to
1355C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1356openinfo parameter is used at all, the value that happens to be in bval
1357will be used. That means you always have to specify bval when making
1358use of any of the options in the openinfo parameter. This documentation
1359error will be fixed in the next release of Berkeley DB.
1360
1361That clarifies the situation with regards Berkeley DB itself. What
1362about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1363quite useful, so B<DB_File> conforms to it.
36477c24 1364
1365That means that you can specify other options (e.g. cachesize) and
1366still have bval default to C<"\n"> for variable length records, and
1367space for fixed length records.
1368
c5da4faf 1369Also note that the bval option only allows you to specify a single byte
a6d6498e 1370as a delimiter.
c5da4faf 1371
f6b705ef 1372=head2 A Simple Example
3b35bae3 1373
6ca2e664 1374Here is a simple example that uses RECNO (if you are using a version
1375of Perl earlier than 5.004_57 this example won't work -- see
1376L<Extra RECNO Methods> for a workaround).
f6b705ef 1377
3245f058 1378 use warnings ;
610ab055 1379 use strict ;
f6b705ef 1380 use DB_File ;
f6b705ef 1381
2c2d71f5 1382 my $filename = "text" ;
1383 unlink $filename ;
1384
610ab055 1385 my @h ;
45a340cb 1386 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1387 or die "Cannot open file 'text': $!\n" ;
1388
1389 # Add a few key/value pairs to the file
1390 $h[0] = "orange" ;
1391 $h[1] = "blue" ;
1392 $h[2] = "yellow" ;
1393
6ca2e664 1394 push @h, "green", "black" ;
1395
1396 my $elements = scalar @h ;
1397 print "The array contains $elements entries\n" ;
1398
1399 my $last = pop @h ;
1400 print "popped $last\n" ;
1401
1402 unshift @h, "white" ;
1403 my $first = shift @h ;
1404 print "shifted $first\n" ;
1405
f6b705ef 1406 # Check for existence of a key
1407 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1408
1409 # use a negative index
1410 print "The last element is $h[-1]\n" ;
1411 print "The 2nd last element is $h[-2]\n" ;
1412
1413 untie @h ;
3b35bae3 1414
f6b705ef 1415Here is the output from the script:
1416
6ca2e664 1417 The array contains 5 entries
1418 popped black
2c2d71f5 1419 shifted white
f6b705ef 1420 Element 1 Exists with value blue
6ca2e664 1421 The last element is green
1422 The 2nd last element is yellow
f6b705ef 1423
6ca2e664 1424=head2 Extra RECNO Methods
f6b705ef 1425
045291aa 1426If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664 1427array interface is quite limited. In the example script above
1428C<push>, C<pop>, C<shift>, C<unshift>
1429or determining the array length will not work with a tied array.
045291aa 1430
1431To make the interface more useful for older versions of Perl, a number
1432of methods are supplied with B<DB_File> to simulate the missing array
1433operations. All these methods are accessed via the object returned from
1434the tie call.
f6b705ef 1435
1436Here are the methods:
1437
1438=over 5
3b35bae3 1439
f6b705ef 1440=item B<$X-E<gt>push(list) ;>
1441
1442Pushes the elements of C<list> to the end of the array.
1443
1444=item B<$value = $X-E<gt>pop ;>
1445
1446Removes and returns the last element of the array.
1447
1448=item B<$X-E<gt>shift>
1449
1450Removes and returns the first element of the array.
1451
1452=item B<$X-E<gt>unshift(list) ;>
1453
1454Pushes the elements of C<list> to the start of the array.
1455
1456=item B<$X-E<gt>length>
1457
1458Returns the number of elements in the array.
1459
c5da4faf 1460=item B<$X-E<gt>splice(offset, length, elements);>
1461
a6d05634 1462Returns a splice of the array.
c5da4faf 1463
f6b705ef 1464=back
1465
1466=head2 Another Example
1467
1468Here is a more complete example that makes use of some of the methods
1469described above. It also makes use of the API interface directly (see
1470L<THE API INTERFACE>).
1471
3245f058 1472 use warnings ;
f6b705ef 1473 use strict ;
962cee9f 1474 my (@h, $H, $file, $i) ;
f6b705ef 1475 use DB_File ;
1476 use Fcntl ;
bbc7dcd2 1477
f6b705ef 1478 $file = "text" ;
1479
1480 unlink $file ;
1481
45a340cb 1482 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1483 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1484
f6b705ef 1485 # first create a text file to play with
1486 $h[0] = "zero" ;
1487 $h[1] = "one" ;
1488 $h[2] = "two" ;
1489 $h[3] = "three" ;
1490 $h[4] = "four" ;
1491
bbc7dcd2 1492
f6b705ef 1493 # Print the records in order.
1494 #
1495 # The length method is needed here because evaluating a tied
1496 # array in a scalar context does not return the number of
1497 # elements in the array.
1498
1499 print "\nORIGINAL\n" ;
1500 foreach $i (0 .. $H->length - 1) {
1501 print "$i: $h[$i]\n" ;
1502 }
1503
1504 # use the push & pop methods
1505 $a = $H->pop ;
1506 $H->push("last") ;
1507 print "\nThe last record was [$a]\n" ;
1508
1509 # and the shift & unshift methods
1510 $a = $H->shift ;
1511 $H->unshift("first") ;
1512 print "The first record was [$a]\n" ;
1513
1514 # Use the API to add a new record after record 2.
1515 $i = 2 ;
1516 $H->put($i, "Newbie", R_IAFTER) ;
1517
1518 # and a new record before record 1.
1519 $i = 1 ;
1520 $H->put($i, "New One", R_IBEFORE) ;
1521
1522 # delete record 3
1523 $H->del(3) ;
1524
1525 # now print the records in reverse order
1526 print "\nREVERSE\n" ;
1527 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1528 { print "$i: $h[$i]\n" }
1529
1530 # same again, but use the API functions instead
1531 print "\nREVERSE again\n" ;
610ab055 1532 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1533 for ($s = $H->seq($k, $v, R_LAST) ;
1534 $s == 0 ;
1535 $s = $H->seq($k, $v, R_PREV))
1536 { print "$k: $v\n" }
1537
1538 undef $H ;
1539 untie @h ;
1540
1541and this is what it outputs:
1542
1543 ORIGINAL
1544 0: zero
1545 1: one
1546 2: two
1547 3: three
1548 4: four
1549
1550 The last record was [four]
1551 The first record was [zero]
1552
1553 REVERSE
1554 5: last
1555 4: three
1556 3: Newbie
1557 2: one
1558 1: New One
1559 0: first
1560
1561 REVERSE again
1562 5: last
1563 4: three
1564 3: Newbie
1565 2: one
1566 1: New One
1567 0: first
1568
1569Notes:
1570
1571=over 5
1572
1573=item 1.
1574
1575Rather than iterating through the array, C<@h> like this:
1576
1577 foreach $i (@h)
1578
1579it is necessary to use either this:
1580
1581 foreach $i (0 .. $H->length - 1)
1582
1583or this:
1584
1585 for ($a = $H->get($k, $v, R_FIRST) ;
1586 $a == 0 ;
1587 $a = $H->get($k, $v, R_NEXT) )
1588
1589=item 2.
1590
1591Notice that both times the C<put> method was used the record index was
1592specified using a variable, C<$i>, rather than the literal value
1593itself. This is because C<put> will return the record number of the
1594inserted line via that parameter.
1595
1596=back
1597
1598=head1 THE API INTERFACE
3b35bae3 1599
1600As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1601possible to make direct use of most of the API functions defined in the
8e07c86e 1602Berkeley DB documentation.
3b35bae3 1603
88108326 1604To do this you need to store a copy of the object returned from the tie.
3b35bae3 1605
88108326 1606 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1607
8e07c86e 1608Once you have done that, you can access the Berkeley DB API functions
88108326 1609as B<DB_File> methods directly like this:
3b35bae3 1610
1611 $db->put($key, $value, R_NOOVERWRITE) ;
1612
88108326 1613B<Important:> If you have saved a copy of the object returned from
1614C<tie>, the underlying database file will I<not> be closed until both
1615the tied variable is untied and all copies of the saved object are
610ab055 1616destroyed.
88108326 1617
1618 use DB_File ;
1619 $db = tie %hash, "DB_File", "filename"
1620 or die "Cannot tie filename: $!" ;
1621 ...
1622 undef $db ;
1623 untie %hash ;
1624
9a2c4ce3 1625See L<The untie() Gotcha> for more details.
778183f3 1626
88108326 1627All the functions defined in L<dbopen> are available except for
1628close() and dbopen() itself. The B<DB_File> method interface to the
1629supported functions have been implemented to mirror the way Berkeley DB
1630works whenever possible. In particular note that:
1631
1632=over 5
1633
1634=item *
1635
1636The methods return a status value. All return 0 on success.
1637All return -1 to signify an error and set C<$!> to the exact
1638error code. The return code 1 generally (but not always) means that the
1639key specified did not exist in the database.
1640
1641Other return codes are defined. See below and in the Berkeley DB
1642documentation for details. The Berkeley DB documentation should be used
1643as the definitive source.
1644
1645=item *
3b35bae3 1646
88108326 1647Whenever a Berkeley DB function returns data via one of its parameters,
1648the equivalent B<DB_File> method does exactly the same.
3b35bae3 1649
88108326 1650=item *
1651
1652If you are careful, it is possible to mix API calls with the tied
1653hash/array interface in the same piece of code. Although only a few of
1654the methods used to implement the tied interface currently make use of
1655the cursor, you should always assume that the cursor has been changed
1656any time the tied hash/array interface is used. As an example, this
1657code will probably not do what you expect:
1658
1659 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1660 or die "Cannot tie $filename: $!" ;
1661
1662 # Get the first key/value pair and set the cursor
1663 $X->seq($key, $value, R_FIRST) ;
1664
1665 # this line will modify the cursor
1666 $count = scalar keys %x ;
1667
1668 # Get the second key/value pair.
1669 # oops, it didn't, it got the last key/value pair!
1670 $X->seq($key, $value, R_NEXT) ;
1671
1672The code above can be rearranged to get around the problem, like this:
1673
1674 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1675 or die "Cannot tie $filename: $!" ;
1676
1677 # this line will modify the cursor
1678 $count = scalar keys %x ;
1679
1680 # Get the first key/value pair and set the cursor
1681 $X->seq($key, $value, R_FIRST) ;
1682
1683 # Get the second key/value pair.
1684 # worked this time.
1685 $X->seq($key, $value, R_NEXT) ;
1686
1687=back
1688
1689All the constants defined in L<dbopen> for use in the flags parameters
1690in the methods defined below are also available. Refer to the Berkeley
1691DB documentation for the precise meaning of the flags values.
1692
1693Below is a list of the methods available.
3b35bae3 1694
1695=over 5
1696
f6b705ef 1697=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1698
1699Given a key (C<$key>) this method reads the value associated with it
1700from the database. The value read from the database is returned in the
1701C<$value> parameter.
3b35bae3 1702
88108326 1703If the key does not exist the method returns 1.
3b35bae3 1704
88108326 1705No flags are currently defined for this method.
3b35bae3 1706
f6b705ef 1707=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1708
88108326 1709Stores the key/value pair in the database.
1710
1711If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1712will have the record number of the inserted key/value pair set.
3b35bae3 1713
88108326 1714Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1715R_SETCURSOR.
1716
f6b705ef 1717=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1718
88108326 1719Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1720
88108326 1721A return code of 1 means that the requested key was not in the
1722database.
3b35bae3 1723
88108326 1724R_CURSOR is the only valid flag at present.
3b35bae3 1725
f6b705ef 1726=item B<$status = $X-E<gt>fd ;>
3b35bae3 1727
88108326 1728Returns the file descriptor for the underlying database.
3b35bae3 1729
b90e71be 1730See L<Locking: The Trouble with fd> for an explanation for why you should
1731not use C<fd> to lock your database.
3b35bae3 1732
f6b705ef 1733=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1734
88108326 1735This interface allows sequential retrieval from the database. See
1736L<dbopen> for full details.
1737
1738Both the C<$key> and C<$value> parameters will be set to the key/value
1739pair read from the database.
1740
1741The flags parameter is mandatory. The valid flag values are R_CURSOR,
1742R_FIRST, R_LAST, R_NEXT and R_PREV.
1743
f6b705ef 1744=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1745
1746Flushes any cached buffers to disk.
1747
1748R_RECNOSYNC is the only valid flag at present.
3b35bae3 1749
1750=back
1751
cad2e5aa 1752=head1 DBM FILTERS
1753
1754A DBM Filter is a piece of code that is be used when you I<always>
1755want to make the same transformation to all keys and/or values in a
1756DBM database.
1757
1758There are four methods associated with DBM Filters. All work identically,
1759and each is used to install (or uninstall) a single DBM Filter. Each
1760expects a single parameter, namely a reference to a sub. The only
1761difference between them is the place that the filter is installed.
1762
1763To summarise:
1764
1765=over 5
1766
1767=item B<filter_store_key>
1768
1769If a filter has been installed with this method, it will be invoked
1770every time you write a key to a DBM database.
1771
1772=item B<filter_store_value>
1773
1774If a filter has been installed with this method, it will be invoked
1775every time you write a value to a DBM database.
1776
1777
1778=item B<filter_fetch_key>
1779
1780If a filter has been installed with this method, it will be invoked
1781every time you read a key from a DBM database.
1782
1783=item B<filter_fetch_value>
1784
1785If a filter has been installed with this method, it will be invoked
1786every time you read a value from a DBM database.
1787
1788=back
1789
1790You can use any combination of the methods, from none, to all four.
1791
1792All filter methods return the existing filter, if present, or C<undef>
1793in not.
1794
1795To delete a filter pass C<undef> to it.
1796
1797=head2 The Filter
1798
1799When each filter is called by Perl, a local copy of C<$_> will contain
1800the key or value to be filtered. Filtering is achieved by modifying
1801the contents of C<$_>. The return code from the filter is ignored.
1802
1803=head2 An Example -- the NULL termination problem.
1804
1805Consider the following scenario. You have a DBM database
1806that you need to share with a third-party C application. The C application
1807assumes that I<all> keys and values are NULL terminated. Unfortunately
1808when Perl writes to DBM databases it doesn't use NULL termination, so
1809your Perl application will have to manage NULL termination itself. When
1810you write to the database you will have to use something like this:
1811
1812 $hash{"$key\0"} = "$value\0" ;
1813
1814Similarly the NULL needs to be taken into account when you are considering
1815the length of existing keys/values.
1816
1817It would be much better if you could ignore the NULL terminations issue
1818in the main application code and have a mechanism that automatically
1819added the terminating NULL to all keys and values whenever you write to
1820the database and have them removed when you read from the database. As I'm
1821sure you have already guessed, this is a problem that DBM Filters can
1822fix very easily.
1823
3245f058 1824 use warnings ;
cad2e5aa 1825 use strict ;
1826 use DB_File ;
1827
1828 my %hash ;
2359510d 1829 my $filename = "filt" ;
cad2e5aa 1830 unlink $filename ;
1831
1832 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1833 or die "Cannot open $filename: $!\n" ;
1834
1835 # Install DBM Filters
1836 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1837 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1838 $db->filter_fetch_value( sub { s/\0$// } ) ;
1839 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1840
1841 $hash{"abc"} = "def" ;
1842 my $a = $hash{"ABC"} ;
1843 # ...
1844 undef $db ;
1845 untie %hash ;
1846
1847Hopefully the contents of each of the filters should be
1848self-explanatory. Both "fetch" filters remove the terminating NULL,
1849and both "store" filters add a terminating NULL.
1850
1851
1852=head2 Another Example -- Key is a C int.
1853
1854Here is another real-life example. By default, whenever Perl writes to
1855a DBM database it always writes the key and value as strings. So when
1856you use this:
1857
3c4b39be 1858 $hash{12345} = "something" ;
cad2e5aa 1859
1860the key 12345 will get stored in the DBM database as the 5 byte string
1861"12345". If you actually want the key to be stored in the DBM database
1862as a C int, you will have to use C<pack> when writing, and C<unpack>
1863when reading.
1864
1865Here is a DBM Filter that does it:
1866
3245f058 1867 use warnings ;
cad2e5aa 1868 use strict ;
1869 use DB_File ;
1870 my %hash ;
2359510d 1871 my $filename = "filt" ;
cad2e5aa 1872 unlink $filename ;
1873
1874
1875 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1876 or die "Cannot open $filename: $!\n" ;
1877
1878 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1879 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1880 $hash{123} = "def" ;
1881 # ...
1882 undef $db ;
1883 untie %hash ;
1884
1885This time only two filters have been used -- we only need to manipulate
1886the contents of the key, so it wasn't necessary to install any value
1887filters.
1888
f6b705ef 1889=head1 HINTS AND TIPS
3b35bae3 1890
3b35bae3 1891
b90e71be 1892=head2 Locking: The Trouble with fd
3b35bae3 1893
b90e71be 1894Until version 1.72 of this module, the recommended technique for locking
1895B<DB_File> databases was to flock the filehandle returned from the "fd"
1896function. Unfortunately this technique has been shown to be fundamentally
1897flawed (Kudos to David Harris for tracking this down). Use it at your own
1898peril!
3b35bae3 1899
b90e71be 1900The locking technique went like this.
cb1a09d0 1901
2359510d 1902 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
1903 || die "dbcreat foo.db $!";
b90e71be 1904 $fd = $db->fd;
1905 open(DB_FH, "+<&=$fd") || die "dup $!";
1906 flock (DB_FH, LOCK_EX) || die "flock: $!";
1907 ...
1908 $db{"Tom"} = "Jerry" ;
1909 ...
1910 flock(DB_FH, LOCK_UN);
1911 undef $db;
1912 untie %db;
1913 close(DB_FH);
cb1a09d0 1914
b90e71be 1915In simple terms, this is what happens:
cb1a09d0 1916
b90e71be 1917=over 5
cb1a09d0 1918
b90e71be 1919=item 1.
cb1a09d0 1920
b90e71be 1921Use "tie" to open the database.
cb1a09d0 1922
b90e71be 1923=item 2.
cb1a09d0 1924
b90e71be 1925Lock the database with fd & flock.
cb1a09d0 1926
b90e71be 1927=item 3.
cb1a09d0 1928
b90e71be 1929Read & Write to the database.
cb1a09d0 1930
b90e71be 1931=item 4.
cb1a09d0 1932
b90e71be 1933Unlock and close the database.
cb1a09d0 1934
b90e71be 1935=back
1936
1937Here is the crux of the problem. A side-effect of opening the B<DB_File>
1938database in step 2 is that an initial block from the database will get
1939read from disk and cached in memory.
1940
1941To see why this is a problem, consider what can happen when two processes,
1942say "A" and "B", both want to update the same B<DB_File> database
1943using the locking steps outlined above. Assume process "A" has already
1944opened the database and has a write lock, but it hasn't actually updated
1945the database yet (it has finished step 2, but not started step 3 yet). Now
1946process "B" tries to open the same database - step 1 will succeed,
1947but it will block on step 2 until process "A" releases the lock. The
1948important thing to notice here is that at this point in time both
1949processes will have cached identical initial blocks from the database.
1950
1951Now process "A" updates the database and happens to change some of the
1952data held in the initial buffer. Process "A" terminates, flushing
1953all cached data to disk and releasing the database lock. At this point
1954the database on disk will correctly reflect the changes made by process
1955"A".
1956
1957With the lock released, process "B" can now continue. It also updates the
1958database and unfortunately it too modifies the data that was in its
1959initial buffer. Once that data gets flushed to disk it will overwrite
1960some/all of the changes process "A" made to the database.
1961
1962The result of this scenario is at best a database that doesn't contain
1963what you expect. At worst the database will corrupt.
1964
1965The above won't happen every time competing process update the same
1966B<DB_File> database, but it does illustrate why the technique should
1967not be used.
1968
1969=head2 Safe ways to lock a database
1970
1971Starting with version 2.x, Berkeley DB has internal support for locking.
1972The companion module to this one, B<BerkeleyDB>, provides an interface
1973to this locking functionality. If you are serious about locking
1974Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1975
1976If using B<BerkeleyDB> isn't an option, there are a number of modules
1977available on CPAN that can be used to implement locking. Each one
1978implements locking differently and has different goals in mind. It is
1979therefore worth knowing the difference, so that you can pick the right
1980one for your application. Here are the three locking wrappers:
1981
1982=over 5
1983
1984=item B<Tie::DB_Lock>
1985
1986A B<DB_File> wrapper which creates copies of the database file for
1987read access, so that you have a kind of a multiversioning concurrent read
1988system. However, updates are still serial. Use for databases where reads
1989may be lengthy and consistency problems may occur.
1990
1991=item B<Tie::DB_LockFile>
1992
1993A B<DB_File> wrapper that has the ability to lock and unlock the database
1994while it is being used. Avoids the tie-before-flock problem by simply
1995re-tie-ing the database when you get or drop a lock. Because of the
1996flexibility in dropping and re-acquiring the lock in the middle of a
1997session, this can be massaged into a system that will work with long
1998updates and/or reads if the application follows the hints in the POD
1999documentation.
2000
2001=item B<DB_File::Lock>
2002
2003An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
2004before tie-ing the database and drops the lock after the untie. Allows
2005one to use the same lockfile for multiple databases to avoid deadlock
2006problems, if desired. Use for databases where updates are reads are
2007quick and simple flock locking semantics are enough.
2008
2009=back
cb1a09d0 2010
68dc0745 2011=head2 Sharing Databases With C Applications
f6b705ef 2012
2013There is no technical reason why a Berkeley DB database cannot be
2014shared by both a Perl and a C application.
2015
2016The vast majority of problems that are reported in this area boil down
2017to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 2018not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 2019
2020Here is a real example. Netscape 2.0 keeps a record of the locations you
2021visit along with the time you last visited them in a DB_HASH database.
2022This is usually stored in the file F<~/.netscape/history.db>. The key
2023field in the database is the location string and the value field is the
2024time the location was last visited stored as a 4 byte binary value.
2025
2026If you haven't already guessed, the location string is stored with a
2027terminating NULL. This means you need to be careful when accessing the
2028database.
2029
2030Here is a snippet of code that is loosely based on Tom Christiansen's
2031I<ggh> script (available from your nearest CPAN archive in
2032F<authors/id/TOMC/scripts/nshist.gz>).
2033
3245f058 2034 use warnings ;
610ab055 2035 use strict ;
f6b705ef 2036 use DB_File ;
2037 use Fcntl ;
f6b705ef 2038
962cee9f 2039 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef 2040 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2041
2042 $HISTORY = "$dotdir/.netscape/history.db";
2043
2044 tie %hist_db, 'DB_File', $HISTORY
2045 or die "Cannot open $HISTORY: $!\n" ;;
2046
2047 # Dump the complete database
2048 while ( ($href, $binary_time) = each %hist_db ) {
2049
2050 # remove the terminating NULL
2051 $href =~ s/\x00$// ;
2052
2053 # convert the binary time into a user friendly string
2054 $date = localtime unpack("V", $binary_time);
2055 print "$date $href\n" ;
2056 }
2057
2058 # check for the existence of a specific key
2059 # remember to add the NULL
2060 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2061 $date = localtime unpack("V", $binary_time) ;
2062 print "Last visited mox.perl.com on $date\n" ;
2063 }
2064 else {
2065 print "Never visited mox.perl.com\n"
2066 }
2067
2068 untie %hist_db ;
2069
68dc0745 2070=head2 The untie() Gotcha
778183f3 2071
7a2e2cd6 2072If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2073recommended that you read L<perltie/The untie Gotcha>.
778183f3 2074
2075Even if you don't currently make use of the API interface, it is still
2076worth reading it.
2077
2078Here is an example which illustrates the problem from a B<DB_File>
2079perspective:
2080
2081 use DB_File ;
2082 use Fcntl ;
2083
2084 my %x ;
2085 my $X ;
2086
2087 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2088 or die "Cannot tie first time: $!" ;
2089
2090 $x{123} = 456 ;
2091
2092 untie %x ;
2093
2094 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2095 or die "Cannot tie second time: $!" ;
2096
2097 untie %x ;
2098
2099When run, the script will produce this error message:
2100
2101 Cannot tie second time: Invalid argument at bad.file line 14.
2102
2103Although the error message above refers to the second tie() statement
2104in the script, the source of the problem is really with the untie()
2105statement that precedes it.
2106
2107Having read L<perltie> you will probably have already guessed that the
2108error is caused by the extra copy of the tied object stored in C<$X>.
2109If you haven't, then the problem boils down to the fact that the
2110B<DB_File> destructor, DESTROY, will not be called until I<all>
2111references to the tied object are destroyed. Both the tied variable,
2112C<%x>, and C<$X> above hold a reference to the object. The call to
2113untie() will destroy the first, but C<$X> still holds a valid
2114reference, so the destructor will not get called and the database file
2115F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2116attempt to open a database that is already open via the catch-all
778183f3 2117"Invalid argument" doesn't help.
2118
2119If you run the script with the C<-w> flag the error message becomes:
2120
2121 untie attempted while 1 inner references still exist at bad.file line 12.
2122 Cannot tie second time: Invalid argument at bad.file line 14.
2123
2124which pinpoints the real problem. Finally the script can now be
2125modified to fix the original problem by destroying the API object
2126before the untie:
2127
2128 ...
2129 $x{123} = 456 ;
2130
2131 undef $X ;
2132 untie %x ;
2133
2134 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2135 ...
2136
f6b705ef 2137
2138=head1 COMMON QUESTIONS
2139
2140=head2 Why is there Perl source in my database?
2141
2142If you look at the contents of a database file created by DB_File,
2143there can sometimes be part of a Perl script included in it.
2144
2145This happens because Berkeley DB uses dynamic memory to allocate
2146buffers which will subsequently be written to the database file. Being
2147dynamic, the memory could have been used for anything before DB
2148malloced it. As Berkeley DB doesn't clear the memory once it has been
2149allocated, the unused portions will contain random junk. In the case
2150where a Perl script gets written to the database, the random junk will
2151correspond to an area of dynamic memory that happened to be used during
2152the compilation of the script.
2153
2154Unless you don't like the possibility of there being part of your Perl
2155scripts embedded in a database file, this is nothing to worry about.
2156
2157=head2 How do I store complex data structures with DB_File?
2158
2159Although B<DB_File> cannot do this directly, there is a module which
2160can layer transparently over B<DB_File> to accomplish this feat.
2161
2162Check out the MLDBM module, available on CPAN in the directory
2163F<modules/by-module/MLDBM>.
2164
2165=head2 What does "Invalid Argument" mean?
2166
2167You will get this error message when one of the parameters in the
2168C<tie> call is wrong. Unfortunately there are quite a few parameters to
2169get wrong, so it can be difficult to figure out which one it is.
2170
2171Here are a couple of possibilities:
2172
2173=over 5
2174
2175=item 1.
2176
610ab055 2177Attempting to reopen a database without closing it.
f6b705ef 2178
2179=item 2.
2180
2181Using the O_WRONLY flag.
2182
2183=back
2184
2185=head2 What does "Bareword 'DB_File' not allowed" mean?
2186
2187You will encounter this particular error message when you have the
2188C<strict 'subs'> pragma (or the full strict pragma) in your script.
2189Consider this script:
2190
3245f058 2191 use warnings ;
f6b705ef 2192 use strict ;
2193 use DB_File ;
07200f1b 2194 my %x ;
f6b705ef 2195 tie %x, DB_File, "filename" ;
2196
2197Running it produces the error in question:
2198
2199 Bareword "DB_File" not allowed while "strict subs" in use
2200
2201To get around the error, place the word C<DB_File> in either single or
2202double quotes, like this:
2203
2204 tie %x, "DB_File", "filename" ;
2205
2206Although it might seem like a real pain, it is really worth the effort
2207of having a C<use strict> in all your scripts.
2208
cad2e5aa 2209=head1 REFERENCES
2210
2211Articles that are either about B<DB_File> or make use of it.
2212
2213=over 5
2214
2215=item 1.
2216
2217I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2218Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2219
2220=back
2221
cb1a09d0 2222=head1 HISTORY
2223
1f70e1ea 2224Moved to the Changes file.
610ab055 2225
1f70e1ea 2226=head1 BUGS
05475680 2227
1f70e1ea 2228Some older versions of Berkeley DB had problems with fixed length
2229records using the RECNO file format. This problem has been fixed since
2230version 1.85 of Berkeley DB.
e858de61 2231
1f70e1ea 2232I am sure there are bugs in the code. If you do find any, or can
2233suggest any enhancements, I would welcome your comments.
a6ed719b 2234
1f70e1ea 2235=head1 AVAILABILITY
a6ed719b 2236
1f70e1ea 2237B<DB_File> comes with the standard Perl source distribution. Look in
2238the directory F<ext/DB_File>. Given the amount of time between releases
2239of Perl the version that ships with Perl is quite likely to be out of
2240date, so the most recent version can always be found on CPAN (see
5bbd4290 2241L<perlmodlib/CPAN> for details), in the directory
1f70e1ea 2242F<modules/by-module/DB_File>.
a6ed719b 2243
039d031f 2244This version of B<DB_File> will work with either version 1.x, 2.x or
22453.x of Berkeley DB, but is limited to the functionality provided by
2246version 1.
a6ed719b 2247
cad2e5aa 2248The official web site for Berkeley DB is F<http://www.sleepycat.com>.
039d031f 2249All versions of Berkeley DB are available there.
93af7a87 2250
1f70e1ea 2251Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2252archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2253
1f70e1ea 2254If you are running IRIX, then get Berkeley DB version 1 from
2255F<http://reality.sgi.com/ariel>. It has the patches necessary to
2256compile properly on IRIX 5.3.
610ab055 2257
1f70e1ea 2258=head1 COPYRIGHT
3b35bae3 2259
dcdb9d1f 2260Copyright (c) 1995-2005 Paul Marquess. All rights reserved. This program
a9fd575d 2261is free software; you can redistribute it and/or modify it under the
2262same terms as Perl itself.
3b35bae3 2263
1f70e1ea 2264Although B<DB_File> is covered by the Perl license, the library it
2265makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2266copyright and its own license. Please take the time to read it.
3b35bae3 2267
a9fd575d 2268Here are are few words taken from the Berkeley DB FAQ (at
b90e71be 2269F<http://www.sleepycat.com>) regarding the license:
68dc0745 2270
a9fd575d 2271 Do I have to license DB to use it in Perl scripts?
3b35bae3 2272
a9fd575d 2273 No. The Berkeley DB license requires that software that uses
2274 Berkeley DB be freely redistributable. In the case of Perl, that
2275 software is Perl, and not your scripts. Any Perl scripts that you
2276 write are your property, including scripts that make use of
2277 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2278 place any restriction on what you may do with them.
88108326 2279
1f70e1ea 2280If you are in any doubt about the license situation, contact either the
2281Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 2282
2283
3b35bae3 2284=head1 SEE ALSO
2285
5bbd4290 2286L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2287L<perldbmfilter>
3b35bae3 2288
3b35bae3 2289=head1 AUTHOR
2290
8e07c86e 2291The DB_File interface was written by Paul Marquess
5bbd4290 2292E<lt>pmqs@cpan.orgE<gt>.
d3ef3b8a 2293Questions about the DB system itself may be addressed to
5bbd4290 2294E<lt>db@sleepycat.comE<gt>.
3b35bae3 2295
2296=cut