[PERLIO] In line-buffered mode, flush on the *last* newline,
[p5sagit/p5-mst-13.2.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e 1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6d02d21f 3# written by Paul Marquess (pmqs@cpan.org)
5bbd4290 4# last modified 22nd December 2003
5# version 1.808
36477c24 6#
6d02d21f 7# Copyright (c) 1995-2003 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e 11
12package DB_File::HASHINFO ;
785da04d 13
e5021521 14require 5.00404;
610ab055 15
3245f058 16use warnings;
785da04d 17use strict;
8e07c86e 18use Carp;
88108326 19require Tie::Hash;
20@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 21
88108326 22sub new
8e07c86e 23{
88108326 24 my $pkg = shift ;
25 my %x ;
26 tie %x, $pkg ;
27 bless \%x, $pkg ;
8e07c86e 28}
29
610ab055 30
88108326 31sub TIEHASH
32{
33 my $pkg = shift ;
34
efc79c7d 35 bless { VALID => {
36 bsize => 1,
37 ffactor => 1,
38 nelem => 1,
39 cachesize => 1,
40 hash => 2,
41 lorder => 1,
36477c24 42 },
43 GOT => {}
44 }, $pkg ;
88108326 45}
8e07c86e 46
610ab055 47
8e07c86e 48sub FETCH
49{
88108326 50 my $self = shift ;
51 my $key = shift ;
8e07c86e 52
36477c24 53 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 54
55 my $pkg = ref $self ;
56 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e 57}
58
59
60sub STORE
61{
88108326 62 my $self = shift ;
63 my $key = shift ;
64 my $value = shift ;
65
efc79c7d 66 my $type = $self->{VALID}{$key};
67
68 if ( $type )
8e07c86e 69 {
efc79c7d 70 croak "Key '$key' not associated with a code reference"
71 if $type == 2 && !ref $value && ref $value ne 'CODE';
36477c24 72 $self->{GOT}{$key} = $value ;
8e07c86e 73 return ;
74 }
75
88108326 76 my $pkg = ref $self ;
77 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e 78}
79
80sub DELETE
81{
88108326 82 my $self = shift ;
83 my $key = shift ;
84
36477c24 85 if ( exists $self->{VALID}{$key} )
8e07c86e 86 {
36477c24 87 delete $self->{GOT}{$key} ;
8e07c86e 88 return ;
89 }
90
88108326 91 my $pkg = ref $self ;
92 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e 93}
94
88108326 95sub EXISTS
8e07c86e 96{
88108326 97 my $self = shift ;
98 my $key = shift ;
8e07c86e 99
36477c24 100 exists $self->{VALID}{$key} ;
8e07c86e 101}
102
88108326 103sub NotHere
8e07c86e 104{
18d2dc8c 105 my $self = shift ;
88108326 106 my $method = shift ;
8e07c86e 107
18d2dc8c 108 croak ref($self) . " does not define the method ${method}" ;
8e07c86e 109}
110
18d2dc8c 111sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
112sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
113sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e 114
115package DB_File::RECNOINFO ;
785da04d 116
3245f058 117use warnings;
88108326 118use strict ;
119
045291aa 120@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 121
122sub TIEHASH
123{
88108326 124 my $pkg = shift ;
125
36477c24 126 bless { VALID => { map {$_, 1}
127 qw( bval cachesize psize flags lorder reclen bfname )
128 },
129 GOT => {},
130 }, $pkg ;
8e07c86e 131}
132
88108326 133package DB_File::BTREEINFO ;
8e07c86e 134
3245f058 135use warnings;
88108326 136use strict ;
8e07c86e 137
88108326 138@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 139
88108326 140sub TIEHASH
8e07c86e 141{
88108326 142 my $pkg = shift ;
143
efc79c7d 144 bless { VALID => {
145 flags => 1,
146 cachesize => 1,
147 maxkeypage => 1,
148 minkeypage => 1,
149 psize => 1,
150 compare => 2,
151 prefix => 2,
152 lorder => 1,
36477c24 153 },
154 GOT => {},
155 }, $pkg ;
8e07c86e 156}
157
158
8e07c86e 159package DB_File ;
785da04d 160
3245f058 161use warnings;
785da04d 162use strict;
07200f1b 163our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
d85a743d 164our ($db_version, $use_XSLoader, $splice_end_array);
8e07c86e 165use Carp;
166
785da04d 167
97b11a47 168$VERSION = "1.808_01" ;
d85a743d 169
170{
171 local $SIG{__WARN__} = sub {$splice_end_array = "@_";};
172 my @a =(1); splice(@a, 3);
173 $splice_end_array =
174 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
175}
8e07c86e 176
177#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 178$DB_BTREE = new DB_File::BTREEINFO ;
179$DB_HASH = new DB_File::HASHINFO ;
180$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 181
785da04d 182require Tie::Hash;
8e07c86e 183require Exporter;
184use AutoLoader;
b90e71be 185BEGIN {
186 $use_XSLoader = 1 ;
e5021521 187 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be 188
189 if ($@) {
190 $use_XSLoader = 0 ;
191 require DynaLoader;
192 @ISA = qw(DynaLoader);
193 }
194}
195
196push @ISA, qw(Tie::Hash Exporter);
8e07c86e 197@EXPORT = qw(
198 $DB_BTREE $DB_HASH $DB_RECNO
88108326 199
8e07c86e 200 BTREEMAGIC
201 BTREEVERSION
202 DB_LOCK
203 DB_SHMEM
204 DB_TXN
205 HASHMAGIC
206 HASHVERSION
207 MAX_PAGE_NUMBER
208 MAX_PAGE_OFFSET
209 MAX_REC_NUMBER
210 RET_ERROR
211 RET_SPECIAL
212 RET_SUCCESS
213 R_CURSOR
214 R_DUP
215 R_FIRST
216 R_FIXEDLEN
217 R_IAFTER
218 R_IBEFORE
219 R_LAST
220 R_NEXT
221 R_NOKEY
222 R_NOOVERWRITE
223 R_PREV
224 R_RECNOSYNC
225 R_SETCURSOR
226 R_SNAPSHOT
227 __R_UNUSED
88108326 228
045291aa 229);
8e07c86e 230
231sub AUTOLOAD {
785da04d 232 my($constname);
8e07c86e 233 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b 234 my ($error, $val) = constant($constname);
235 Carp::croak $error if $error;
57c77851 236 no strict 'refs';
237 *{$AUTOLOAD} = sub { $val };
238 goto &{$AUTOLOAD};
07200f1b 239}
8e07c86e 240
f6b705ef 241
a6ed719b 242eval {
1f70e1ea 243 # Make all Fcntl O_XXX constants available for importing
244 require Fcntl;
245 my @O = grep /^O_/, @Fcntl::EXPORT;
246 Fcntl->import(@O); # first we import what we want to export
247 push(@EXPORT, @O);
a6ed719b 248};
f6b705ef 249
b90e71be 250if ($use_XSLoader)
251 { XSLoader::load("DB_File", $VERSION)}
252else
253 { bootstrap DB_File $VERSION }
8e07c86e 254
255# Preloaded methods go here. Autoload methods go after __END__, and are
256# processed by the autosplit program.
257
05475680 258sub tie_hash_or_array
610ab055 259{
260 my (@arg) = @_ ;
05475680 261 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 262
263 $arg[4] = tied %{ $arg[4] }
264 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
265
efc79c7d 266 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
267 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
268
1f70e1ea 269 # make recno in Berkeley DB version 2 work like recno in version 1.
270 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
271 $arg[1] and ! -e $arg[1]) {
272 open(FH, ">$arg[1]") or return undef ;
273 close FH ;
274 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
275 }
276
05475680 277 DoTie_($tieHASH, @arg) ;
610ab055 278}
279
05475680 280sub TIEHASH
281{
282 tie_hash_or_array(@_) ;
283}
284
285sub TIEARRAY
286{
287 tie_hash_or_array(@_) ;
288}
88108326 289
045291aa 290sub CLEAR
291{
1f70e1ea 292 my $self = shift;
3245f058 293 my $key = 0 ;
1f70e1ea 294 my $value = "" ;
295 my $status = $self->seq($key, $value, R_FIRST());
296 my @keys;
297
298 while ($status == 0) {
299 push @keys, $key;
300 $status = $self->seq($key, $value, R_NEXT());
301 }
302 foreach $key (reverse @keys) {
303 my $s = $self->del($key);
304 }
305}
306
045291aa 307sub EXTEND { }
308
309sub STORESIZE
310{
311 my $self = shift;
312 my $length = shift ;
313 my $current_length = $self->length() ;
314
315 if ($length < $current_length) {
316 my $key ;
317 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
318 { $self->del($key) }
319 }
a9fd575d 320 elsif ($length > $current_length) {
321 $self->put($length-1, "") ;
322 }
045291aa 323}
324
c5da4faf 325
326sub SPLICE
327{
328 my $self = shift;
329 my $offset = shift;
330 if (not defined $offset) {
d85a743d 331 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf 332 $offset = 0;
333 }
334
335 my $length = @_ ? shift : 0;
336 # Carping about definedness comes _after_ the OFFSET sanity check.
337 # This is so we get the same error messages as Perl's splice().
338 #
339
340 my @list = @_;
341
342 my $size = $self->FETCHSIZE();
343
344 # 'If OFFSET is negative then it start that far from the end of
345 # the array.'
346 #
347 if ($offset < 0) {
348 my $new_offset = $size + $offset;
349 if ($new_offset < 0) {
350 die "Modification of non-creatable array value attempted, "
351 . "subscript $offset";
352 }
353 $offset = $new_offset;
354 }
355
c5da4faf 356 if (not defined $length) {
d85a743d 357 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf 358 $length = 0;
359 }
360
d85a743d 361 if ($offset > $size) {
362 $offset = $size;
363 warnings::warnif('misc', 'splice() offset past end of array')
364 if $splice_end_array;
365 }
366
c5da4faf 367 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
368 if (not defined $length) {
369 $length = $size - $offset;
370 }
371
372 # 'If LENGTH is negative, leave that many elements off the end of
373 # the array.'
374 #
375 if ($length < 0) {
376 $length = $size - $offset + $length;
377
378 if ($length < 0) {
379 # The user must have specified a length bigger than the
380 # length of the array passed in. But perl's splice()
381 # doesn't catch this, it just behaves as for length=0.
382 #
383 $length = 0;
384 }
385 }
386
387 if ($length > $size - $offset) {
388 $length = $size - $offset;
389 }
390
391 # $num_elems holds the current number of elements in the database.
392 my $num_elems = $size;
393
394 # 'Removes the elements designated by OFFSET and LENGTH from an
395 # array,'...
396 #
397 my @removed = ();
398 foreach (0 .. $length - 1) {
399 my $old;
400 my $status = $self->get($offset, $old);
401 if ($status != 0) {
402 my $msg = "error from Berkeley DB on get($offset, \$old)";
403 if ($status == 1) {
404 $msg .= ' (no such element?)';
405 }
406 else {
407 $msg .= ": error status $status";
408 if (defined $! and $! ne '') {
409 $msg .= ", message $!";
410 }
411 }
412 die $msg;
413 }
414 push @removed, $old;
415
416 $status = $self->del($offset);
417 if ($status != 0) {
418 my $msg = "error from Berkeley DB on del($offset)";
419 if ($status == 1) {
420 $msg .= ' (no such element?)';
421 }
422 else {
423 $msg .= ": error status $status";
424 if (defined $! and $! ne '') {
425 $msg .= ", message $!";
426 }
427 }
428 die $msg;
429 }
430
431 -- $num_elems;
432 }
433
434 # ...'and replaces them with the elements of LIST, if any.'
435 my $pos = $offset;
436 while (defined (my $elem = shift @list)) {
437 my $old_pos = $pos;
438 my $status;
439 if ($pos >= $num_elems) {
440 $status = $self->put($pos, $elem);
441 }
442 else {
443 $status = $self->put($pos, $elem, $self->R_IBEFORE);
444 }
445
446 if ($status != 0) {
447 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
448 if ($status == 1) {
449 $msg .= ' (no such element?)';
450 }
451 else {
452 $msg .= ", error status $status";
453 if (defined $! and $! ne '') {
454 $msg .= ", message $!";
455 }
456 }
457 die $msg;
458 }
459
460 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
461 if $old_pos != $pos;
462
463 ++ $pos;
464 ++ $num_elems;
465 }
466
467 if (wantarray) {
468 # 'In list context, returns the elements removed from the
469 # array.'
470 #
471 return @removed;
472 }
473 elsif (defined wantarray and not wantarray) {
474 # 'In scalar context, returns the last element removed, or
475 # undef if no elements are removed.'
476 #
477 if (@removed) {
478 my $last = pop @removed;
479 return "$last";
480 }
481 else {
482 return undef;
483 }
484 }
485 elsif (not defined wantarray) {
486 # Void context
487 }
488 else { die }
489}
490sub ::DB_File::splice { &SPLICE }
491
6ca2e664 492sub find_dup
493{
494 croak "Usage: \$db->find_dup(key,value)\n"
495 unless @_ == 3 ;
496
497 my $db = shift ;
498 my ($origkey, $value_wanted) = @_ ;
499 my ($key, $value) = ($origkey, 0);
500 my ($status) = 0 ;
501
502 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
503 $status == 0 ;
504 $status = $db->seq($key, $value, R_NEXT() ) ) {
505
506 return 0 if $key eq $origkey and $value eq $value_wanted ;
507 }
508
509 return $status ;
510}
511
512sub del_dup
513{
514 croak "Usage: \$db->del_dup(key,value)\n"
515 unless @_ == 3 ;
516
517 my $db = shift ;
518 my ($key, $value) = @_ ;
519 my ($status) = $db->find_dup($key, $value) ;
520 return $status if $status != 0 ;
521
522 $status = $db->del($key, R_CURSOR() ) ;
523 return $status ;
524}
525
88108326 526sub get_dup
527{
528 croak "Usage: \$db->get_dup(key [,flag])\n"
529 unless @_ == 2 or @_ == 3 ;
530
531 my $db = shift ;
532 my $key = shift ;
533 my $flag = shift ;
f6b705ef 534 my $value = 0 ;
88108326 535 my $origkey = $key ;
536 my $wantarray = wantarray ;
f6b705ef 537 my %values = () ;
88108326 538 my @values = () ;
539 my $counter = 0 ;
f6b705ef 540 my $status = 0 ;
88108326 541
f6b705ef 542 # iterate through the database until either EOF ($status == 0)
543 # or a different key is encountered ($key ne $origkey).
544 for ($status = $db->seq($key, $value, R_CURSOR()) ;
545 $status == 0 and $key eq $origkey ;
546 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 547
f6b705ef 548 # save the value or count number of matches
549 if ($wantarray) {
550 if ($flag)
551 { ++ $values{$value} }
552 else
553 { push (@values, $value) }
554 }
555 else
556 { ++ $counter }
88108326 557
88108326 558 }
559
f6b705ef 560 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 561}
562
563
8e07c86e 5641;
565__END__
566
3b35bae3 567=head1 NAME
568
1f70e1ea 569DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3 570
571=head1 SYNOPSIS
572
bbc7dcd2 573 use DB_File;
574
88108326 575 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
576 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
577 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 578
3b35bae3 579 $status = $X->del($key [, $flags]) ;
580 $status = $X->put($key, $value [, $flags]) ;
581 $status = $X->get($key, $value [, $flags]) ;
760ac839 582 $status = $X->seq($key, $value, $flags) ;
3b35bae3 583 $status = $X->sync([$flags]) ;
584 $status = $X->fd ;
760ac839 585
f6b705ef 586 # BTREE only
88108326 587 $count = $X->get_dup($key) ;
588 @list = $X->get_dup($key) ;
589 %list = $X->get_dup($key, 1) ;
6ca2e664 590 $status = $X->find_dup($key, $value) ;
591 $status = $X->del_dup($key, $value) ;
88108326 592
f6b705ef 593 # RECNO only
594 $a = $X->length;
595 $a = $X->pop ;
596 $X->push(list);
597 $a = $X->shift;
598 $X->unshift(list);
c5da4faf 599 @r = $X->splice(offset, length, elements);
f6b705ef 600
cad2e5aa 601 # DBM Filters
602 $old_filter = $db->filter_store_key ( sub { ... } ) ;
603 $old_filter = $db->filter_store_value( sub { ... } ) ;
604 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
605 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
606
3b35bae3 607 untie %hash ;
608 untie @array ;
609
610=head1 DESCRIPTION
611
8e07c86e 612B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 613facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 614version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f 615It is assumed that you have a copy of the Berkeley DB manual pages at
616hand when reading this documentation. The interface defined here
617mirrors the Berkeley DB interface closely.
68dc0745 618
8e07c86e 619Berkeley DB is a C library which provides a consistent interface to a
620number of database formats. B<DB_File> provides an interface to all
621three of the database types currently supported by Berkeley DB.
3b35bae3 622
623The file types are:
624
625=over 5
626
88108326 627=item B<DB_HASH>
3b35bae3 628
88108326 629This database type allows arbitrary key/value pairs to be stored in data
8e07c86e 630files. This is equivalent to the functionality provided by other
631hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
632the files created using DB_HASH are not compatible with any of the
633other packages mentioned.
3b35bae3 634
8e07c86e 635A default hashing algorithm, which will be adequate for most
636applications, is built into Berkeley DB. If you do need to use your own
637hashing algorithm it is possible to write your own in Perl and have
638B<DB_File> use it instead.
3b35bae3 639
88108326 640=item B<DB_BTREE>
641
642The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 643sorted, balanced binary tree.
3b35bae3 644
8e07c86e 645As with the DB_HASH format, it is possible to provide a user defined
646Perl routine to perform the comparison of keys. By default, though, the
647keys are stored in lexical order.
3b35bae3 648
88108326 649=item B<DB_RECNO>
3b35bae3 650
8e07c86e 651DB_RECNO allows both fixed-length and variable-length flat text files
652to be manipulated using the same key/value pair interface as in DB_HASH
653and DB_BTREE. In this case the key will consist of a record (line)
654number.
3b35bae3 655
656=back
657
e5021521 658=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea 659
660Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 661it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 662limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 663version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 664like version 1. This feature allows B<DB_File> scripts that were built
e5021521 665with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea 666
667If you want to make use of the new features available in Berkeley DB
b90e71be 6682.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 669
e5021521 670B<Note:> The database file format has changed multiple times in Berkeley
671DB version 2, 3 and 4. If you cannot recreate your databases, you
672must dump any existing databases with either the C<db_dump> or the
673C<db_dump185> utility that comes with Berkeley DB.
674Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
675your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea 676documentation for further details.
677
e5021521 678Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 679DB with DB_File.
1f70e1ea 680
68dc0745 681=head2 Interface to Berkeley DB
3b35bae3 682
683B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e 684in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
685allows B<DB_File> to access Berkeley DB files using either an
686associative array (for DB_HASH & DB_BTREE file types) or an ordinary
687array (for the DB_RECNO file type).
3b35bae3 688
88108326 689In addition to the tie() interface, it is also possible to access most
690of the functions provided in the Berkeley DB API directly.
f6b705ef 691See L<THE API INTERFACE>.
3b35bae3 692
88108326 693=head2 Opening a Berkeley DB Database File
3b35bae3 694
8e07c86e 695Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 696Here is the C prototype for dbopen():
3b35bae3 697
698 DB*
699 dbopen (const char * file, int flags, int mode,
700 DBTYPE type, const void * openinfo)
701
702The parameter C<type> is an enumeration which specifies which of the 3
703interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
704Depending on which of these is actually chosen, the final parameter,
705I<openinfo> points to a data structure which allows tailoring of the
706specific interface method.
707
8e07c86e 708This interface is handled slightly differently in B<DB_File>. Here is
88108326 709an equivalent call using B<DB_File>:
3b35bae3 710
88108326 711 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 712
8e07c86e 713The C<filename>, C<flags> and C<mode> parameters are the direct
714equivalent of their dbopen() counterparts. The final parameter $DB_HASH
715performs the function of both the C<type> and C<openinfo> parameters in
716dbopen().
3b35bae3 717
88108326 718In the example above $DB_HASH is actually a pre-defined reference to a
719hash object. B<DB_File> has three of these pre-defined references.
720Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 721
8e07c86e 722The keys allowed in each of these pre-defined references is limited to
723the names used in the equivalent C structure. So, for example, the
724$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 725C<ffactor>, C<hash>, C<lorder> and C<nelem>.
726
727To change one of these elements, just assign to it like this:
728
729 $DB_HASH->{'cachesize'} = 10000 ;
730
731The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
732usually adequate for most applications. If you do need to create extra
733instances of these objects, constructors are available for each file
734type.
735
736Here are examples of the constructors and the valid options available
737for DB_HASH, DB_BTREE and DB_RECNO respectively.
738
739 $a = new DB_File::HASHINFO ;
740 $a->{'bsize'} ;
741 $a->{'cachesize'} ;
742 $a->{'ffactor'};
743 $a->{'hash'} ;
744 $a->{'lorder'} ;
745 $a->{'nelem'} ;
746
747 $b = new DB_File::BTREEINFO ;
748 $b->{'flags'} ;
749 $b->{'cachesize'} ;
750 $b->{'maxkeypage'} ;
751 $b->{'minkeypage'} ;
752 $b->{'psize'} ;
753 $b->{'compare'} ;
754 $b->{'prefix'} ;
755 $b->{'lorder'} ;
756
757 $c = new DB_File::RECNOINFO ;
758 $c->{'bval'} ;
759 $c->{'cachesize'} ;
760 $c->{'psize'} ;
761 $c->{'flags'} ;
762 $c->{'lorder'} ;
763 $c->{'reclen'} ;
764 $c->{'bfname'} ;
765
766The values stored in the hashes above are mostly the direct equivalent
767of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 768default values - that means you don't have to set I<all> of the
88108326 769values when you only want to change one. Here is an example:
770
771 $a = new DB_File::HASHINFO ;
772 $a->{'cachesize'} = 12345 ;
773 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
774
36477c24 775A few of the options need extra discussion here. When used, the C
88108326 776equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
777to C functions. In B<DB_File> these keys are used to store references
778to Perl subs. Below are templates for each of the subs:
779
780 sub hash
781 {
782 my ($data) = @_ ;
783 ...
784 # return the hash value for $data
785 return $hash ;
786 }
3b35bae3 787
88108326 788 sub compare
789 {
790 my ($key, $key2) = @_ ;
791 ...
792 # return 0 if $key1 eq $key2
793 # -1 if $key1 lt $key2
794 # 1 if $key1 gt $key2
795 return (-1 , 0 or 1) ;
796 }
3b35bae3 797
88108326 798 sub prefix
799 {
800 my ($key, $key2) = @_ ;
801 ...
802 # return number of bytes of $key2 which are
803 # necessary to determine that it is greater than $key1
804 return $bytes ;
805 }
3b35bae3 806
f6b705ef 807See L<Changing the BTREE sort order> for an example of using the
808C<compare> template.
88108326 809
36477c24 810If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 811C<bval>, you should check out L<The 'bval' Option>.
36477c24 812
88108326 813=head2 Default Parameters
814
815It is possible to omit some or all of the final 4 parameters in the
816call to C<tie> and let them take default values. As DB_HASH is the most
817common file format used, the call:
818
819 tie %A, "DB_File", "filename" ;
820
821is equivalent to:
822
18d2dc8c 823 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 824
825It is also possible to omit the filename parameter as well, so the
826call:
827
828 tie %A, "DB_File" ;
829
830is equivalent to:
831
18d2dc8c 832 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 833
f6b705ef 834See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 835in place of a filename.
836
f6b705ef 837=head2 In Memory Databases
838
839Berkeley DB allows the creation of in-memory databases by using NULL
840(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
841uses C<undef> instead of NULL to provide this functionality.
842
843=head1 DB_HASH
844
845The DB_HASH file format is probably the most commonly used of the three
846file formats that B<DB_File> supports. It is also very straightforward
847to use.
848
68dc0745 849=head2 A Simple Example
f6b705ef 850
851This example shows how to create a database, add key/value pairs to the
852database, delete keys/value pairs and finally how to enumerate the
853contents of the database.
854
3245f058 855 use warnings ;
610ab055 856 use strict ;
f6b705ef 857 use DB_File ;
07200f1b 858 our (%h, $k, $v) ;
f6b705ef 859
2c2d71f5 860 unlink "fruit" ;
45a340cb 861 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef 862 or die "Cannot open file 'fruit': $!\n";
863
864 # Add a few key/value pairs to the file
865 $h{"apple"} = "red" ;
866 $h{"orange"} = "orange" ;
867 $h{"banana"} = "yellow" ;
868 $h{"tomato"} = "red" ;
869
870 # Check for existence of a key
871 print "Banana Exists\n\n" if $h{"banana"} ;
872
873 # Delete a key/value pair.
874 delete $h{"apple"} ;
875
876 # print the contents of the file
877 while (($k, $v) = each %h)
878 { print "$k -> $v\n" }
879
880 untie %h ;
881
882here is the output:
883
884 Banana Exists
bbc7dcd2 885
f6b705ef 886 orange -> orange
887 tomato -> red
888 banana -> yellow
889
890Note that the like ordinary associative arrays, the order of the keys
891retrieved is in an apparently random order.
892
893=head1 DB_BTREE
894
895The DB_BTREE format is useful when you want to store data in a given
896order. By default the keys will be stored in lexical order, but as you
897will see from the example shown in the next section, it is very easy to
898define your own sorting function.
899
900=head2 Changing the BTREE sort order
901
902This script shows how to override the default sorting algorithm that
903BTREE uses. Instead of using the normal lexical ordering, a case
904insensitive compare function will be used.
88108326 905
3245f058 906 use warnings ;
610ab055 907 use strict ;
f6b705ef 908 use DB_File ;
610ab055 909
910 my %h ;
f6b705ef 911
912 sub Compare
913 {
914 my ($key1, $key2) = @_ ;
915 "\L$key1" cmp "\L$key2" ;
916 }
917
918 # specify the Perl sub that will do the comparison
919 $DB_BTREE->{'compare'} = \&Compare ;
920
2c2d71f5 921 unlink "tree" ;
45a340cb 922 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 923 or die "Cannot open file 'tree': $!\n" ;
924
925 # Add a key/value pair to the file
926 $h{'Wall'} = 'Larry' ;
927 $h{'Smith'} = 'John' ;
928 $h{'mouse'} = 'mickey' ;
929 $h{'duck'} = 'donald' ;
930
931 # Delete
932 delete $h{"duck"} ;
933
934 # Cycle through the keys printing them in order.
935 # Note it is not necessary to sort the keys as
936 # the btree will have kept them in order automatically.
937 foreach (keys %h)
938 { print "$_\n" }
939
940 untie %h ;
941
942Here is the output from the code above.
943
944 mouse
945 Smith
946 Wall
947
948There are a few point to bear in mind if you want to change the
949ordering in a BTREE database:
950
951=over 5
952
953=item 1.
954
955The new compare function must be specified when you create the database.
956
957=item 2.
958
959You cannot change the ordering once the database has been created. Thus
960you must use the same compare function every time you access the
88108326 961database.
962
39793c41 963=item 3
964
965Duplicate keys are entirely defined by the comparison function.
966In the case-insensitive example above, the keys: 'KEY' and 'key'
967would be considered duplicates, and assigning to the second one
52ffee89 968would overwrite the first. If duplicates are allowed for (with the
59e51af5 969R_DUP flag discussed below), only a single copy of duplicate keys
39793c41 970is stored in the database --- so (again with example above) assigning
971three values to the keys: 'KEY', 'Key', and 'key' would leave just
972the first key: 'KEY' in the database with three values. For some
973situations this results in information loss, so care should be taken
974to provide fully qualified comparison functions when necessary.
975For example, the above comparison routine could be modified to
976additionally compare case-sensitively if two keys are equal in the
977case insensitive comparison:
978
979 sub compare {
980 my($key1, $key2) = @_;
981 lc $key1 cmp lc $key2 ||
982 $key1 cmp $key2;
983 }
984
985And now you will only have duplicates when the keys themselves
986are truly the same. (note: in versions of the db library prior to
987about November 1996, such duplicate keys were retained so it was
988possible to recover the original keys in sets of keys that
989compared as equal).
990
991
f6b705ef 992=back
993
68dc0745 994=head2 Handling Duplicate Keys
f6b705ef 995
996The BTREE file type optionally allows a single key to be associated
997with an arbitrary number of values. This option is enabled by setting
998the flags element of C<$DB_BTREE> to R_DUP when creating the database.
999
88108326 1000There are some difficulties in using the tied hash interface if you
1001want to manipulate a BTREE database with duplicate keys. Consider this
1002code:
1003
3245f058 1004 use warnings ;
610ab055 1005 use strict ;
88108326 1006 use DB_File ;
610ab055 1007
962cee9f 1008 my ($filename, %h) ;
610ab055 1009
88108326 1010 $filename = "tree" ;
1011 unlink $filename ;
bbc7dcd2 1012
88108326 1013 # Enable duplicate records
1014 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1015
45a340cb 1016 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1017 or die "Cannot open $filename: $!\n";
bbc7dcd2 1018
88108326 1019 # Add some key/value pairs to the file
1020 $h{'Wall'} = 'Larry' ;
1021 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1022 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1023 $h{'Smith'} = 'John' ;
1024 $h{'mouse'} = 'mickey' ;
1025
1026 # iterate through the associative array
1027 # and print each key/value pair.
2c2d71f5 1028 foreach (sort keys %h)
88108326 1029 { print "$_ -> $h{$_}\n" }
1030
f6b705ef 1031 untie %h ;
1032
88108326 1033Here is the output:
1034
1035 Smith -> John
1036 Wall -> Larry
1037 Wall -> Larry
f6b705ef 1038 Wall -> Larry
88108326 1039 mouse -> mickey
1040
f6b705ef 1041As you can see 3 records have been successfully created with key C<Wall>
88108326 1042- the only thing is, when they are retrieved from the database they
f6b705ef 1043I<seem> to have the same value, namely C<Larry>. The problem is caused
1044by the way that the associative array interface works. Basically, when
1045the associative array interface is used to fetch the value associated
1046with a given key, it will only ever retrieve the first value.
88108326 1047
1048Although it may not be immediately obvious from the code above, the
1049associative array interface can be used to write values with duplicate
1050keys, but it cannot be used to read them back from the database.
1051
1052The way to get around this problem is to use the Berkeley DB API method
1053called C<seq>. This method allows sequential access to key/value
f6b705ef 1054pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1055and the API in general.
88108326 1056
1057Here is the script above rewritten using the C<seq> API method.
1058
3245f058 1059 use warnings ;
610ab055 1060 use strict ;
88108326 1061 use DB_File ;
bbc7dcd2 1062
962cee9f 1063 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1064
88108326 1065 $filename = "tree" ;
1066 unlink $filename ;
bbc7dcd2 1067
88108326 1068 # Enable duplicate records
1069 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1070
45a340cb 1071 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1072 or die "Cannot open $filename: $!\n";
bbc7dcd2 1073
88108326 1074 # Add some key/value pairs to the file
1075 $h{'Wall'} = 'Larry' ;
1076 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1077 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1078 $h{'Smith'} = 'John' ;
1079 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1080
f6b705ef 1081 # iterate through the btree using seq
88108326 1082 # and print each key/value pair.
610ab055 1083 $key = $value = 0 ;
f6b705ef 1084 for ($status = $x->seq($key, $value, R_FIRST) ;
1085 $status == 0 ;
1086 $status = $x->seq($key, $value, R_NEXT) )
88108326 1087 { print "$key -> $value\n" }
bbc7dcd2 1088
88108326 1089 undef $x ;
1090 untie %h ;
1091
1092that prints:
1093
1094 Smith -> John
1095 Wall -> Brick
f6b705ef 1096 Wall -> Brick
88108326 1097 Wall -> Larry
1098 mouse -> mickey
1099
f6b705ef 1100This time we have got all the key/value pairs, including the multiple
88108326 1101values associated with the key C<Wall>.
1102
6ca2e664 1103To make life easier when dealing with duplicate keys, B<DB_File> comes with
1104a few utility methods.
1105
68dc0745 1106=head2 The get_dup() Method
f6b705ef 1107
6ca2e664 1108The C<get_dup> method assists in
88108326 1109reading duplicate values from BTREE databases. The method can take the
1110following forms:
1111
1112 $count = $x->get_dup($key) ;
1113 @list = $x->get_dup($key) ;
1114 %list = $x->get_dup($key, 1) ;
1115
1116In a scalar context the method returns the number of values associated
1117with the key, C<$key>.
1118
1119In list context, it returns all the values which match C<$key>. Note
f6b705ef 1120that the values will be returned in an apparently random order.
88108326 1121
7a2e2cd6 1122In list context, if the second parameter is present and evaluates
1123TRUE, the method returns an associative array. The keys of the
1124associative array correspond to the values that matched in the BTREE
1125and the values of the array are a count of the number of times that
1126particular value occurred in the BTREE.
88108326 1127
f6b705ef 1128So assuming the database created above, we can use C<get_dup> like
88108326 1129this:
1130
3245f058 1131 use warnings ;
2c2d71f5 1132 use strict ;
1133 use DB_File ;
bbc7dcd2 1134
962cee9f 1135 my ($filename, $x, %h) ;
2c2d71f5 1136
1137 $filename = "tree" ;
bbc7dcd2 1138
2c2d71f5 1139 # Enable duplicate records
1140 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1141
45a340cb 1142 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5 1143 or die "Cannot open $filename: $!\n";
1144
610ab055 1145 my $cnt = $x->get_dup("Wall") ;
88108326 1146 print "Wall occurred $cnt times\n" ;
1147
610ab055 1148 my %hash = $x->get_dup("Wall", 1) ;
88108326 1149 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1150 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1151
2c2d71f5 1152 my @list = sort $x->get_dup("Wall") ;
88108326 1153 print "Wall => [@list]\n" ;
1154
f6b705ef 1155 @list = $x->get_dup("Smith") ;
88108326 1156 print "Smith => [@list]\n" ;
bbc7dcd2 1157
f6b705ef 1158 @list = $x->get_dup("Dog") ;
88108326 1159 print "Dog => [@list]\n" ;
1160
1161
1162and it will print:
1163
f6b705ef 1164 Wall occurred 3 times
88108326 1165 Larry is there
f6b705ef 1166 There are 2 Brick Walls
1167 Wall => [Brick Brick Larry]
88108326 1168 Smith => [John]
1169 Dog => []
3b35bae3 1170
6ca2e664 1171=head2 The find_dup() Method
1172
1173 $status = $X->find_dup($key, $value) ;
1174
b90e71be 1175This method checks for the existence of a specific key/value pair. If the
6ca2e664 1176pair exists, the cursor is left pointing to the pair and the method
1177returns 0. Otherwise the method returns a non-zero value.
1178
1179Assuming the database from the previous example:
1180
3245f058 1181 use warnings ;
6ca2e664 1182 use strict ;
1183 use DB_File ;
bbc7dcd2 1184
962cee9f 1185 my ($filename, $x, %h, $found) ;
6ca2e664 1186
07200f1b 1187 $filename = "tree" ;
bbc7dcd2 1188
6ca2e664 1189 # Enable duplicate records
1190 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1191
45a340cb 1192 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664 1193 or die "Cannot open $filename: $!\n";
1194
1195 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1196 print "Larry Wall is $found there\n" ;
bbc7dcd2 1197
6ca2e664 1198 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1199 print "Harry Wall is $found there\n" ;
bbc7dcd2 1200
6ca2e664 1201 undef $x ;
1202 untie %h ;
1203
1204prints this
1205
2c2d71f5 1206 Larry Wall is there
6ca2e664 1207 Harry Wall is not there
1208
1209
1210=head2 The del_dup() Method
1211
1212 $status = $X->del_dup($key, $value) ;
1213
1214This method deletes a specific key/value pair. It returns
12150 if they exist and have been deleted successfully.
1216Otherwise the method returns a non-zero value.
1217
b90e71be 1218Again assuming the existence of the C<tree> database
6ca2e664 1219
3245f058 1220 use warnings ;
6ca2e664 1221 use strict ;
1222 use DB_File ;
bbc7dcd2 1223
962cee9f 1224 my ($filename, $x, %h, $found) ;
6ca2e664 1225
07200f1b 1226 $filename = "tree" ;
bbc7dcd2 1227
6ca2e664 1228 # Enable duplicate records
1229 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1230
45a340cb 1231 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664 1232 or die "Cannot open $filename: $!\n";
1233
1234 $x->del_dup("Wall", "Larry") ;
1235
1236 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1237 print "Larry Wall is $found there\n" ;
bbc7dcd2 1238
6ca2e664 1239 undef $x ;
1240 untie %h ;
1241
1242prints this
1243
1244 Larry Wall is not there
1245
f6b705ef 1246=head2 Matching Partial Keys
1247
1248The BTREE interface has a feature which allows partial keys to be
1249matched. This functionality is I<only> available when the C<seq> method
1250is used along with the R_CURSOR flag.
1251
1252 $x->seq($key, $value, R_CURSOR) ;
1253
1254Here is the relevant quote from the dbopen man page where it defines
1255the use of the R_CURSOR flag with seq:
1256
f6b705ef 1257 Note, for the DB_BTREE access method, the returned key is not
1258 necessarily an exact match for the specified key. The returned key
1259 is the smallest key greater than or equal to the specified key,
1260 permitting partial key matches and range searches.
1261
f6b705ef 1262In the example script below, the C<match> sub uses this feature to find
1263and print the first matching key/value pair given a partial key.
1264
3245f058 1265 use warnings ;
610ab055 1266 use strict ;
f6b705ef 1267 use DB_File ;
1268 use Fcntl ;
610ab055 1269
962cee9f 1270 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef 1271
1272 sub match
1273 {
1274 my $key = shift ;
610ab055 1275 my $value = 0;
f6b705ef 1276 my $orig_key = $key ;
1277 $x->seq($key, $value, R_CURSOR) ;
1278 print "$orig_key\t-> $key\t-> $value\n" ;
1279 }
1280
1281 $filename = "tree" ;
1282 unlink $filename ;
1283
45a340cb 1284 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1285 or die "Cannot open $filename: $!\n";
bbc7dcd2 1286
f6b705ef 1287 # Add some key/value pairs to the file
1288 $h{'mouse'} = 'mickey' ;
1289 $h{'Wall'} = 'Larry' ;
1290 $h{'Walls'} = 'Brick' ;
1291 $h{'Smith'} = 'John' ;
bbc7dcd2 1292
f6b705ef 1293
610ab055 1294 $key = $value = 0 ;
f6b705ef 1295 print "IN ORDER\n" ;
1296 for ($st = $x->seq($key, $value, R_FIRST) ;
1297 $st == 0 ;
1298 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1299
2c2d71f5 1300 { print "$key -> $value\n" }
bbc7dcd2 1301
f6b705ef 1302 print "\nPARTIAL MATCH\n" ;
1303
1304 match "Wa" ;
1305 match "A" ;
1306 match "a" ;
1307
1308 undef $x ;
1309 untie %h ;
1310
1311Here is the output:
1312
1313 IN ORDER
1314 Smith -> John
1315 Wall -> Larry
1316 Walls -> Brick
1317 mouse -> mickey
1318
1319 PARTIAL MATCH
1320 Wa -> Wall -> Larry
1321 A -> Smith -> John
1322 a -> mouse -> mickey
1323
1324=head1 DB_RECNO
1325
1326DB_RECNO provides an interface to flat text files. Both variable and
1327fixed length records are supported.
3b35bae3 1328
6ca2e664 1329In order to make RECNO more compatible with Perl, the array offset for
88108326 1330all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1331
88108326 1332As with normal Perl arrays, a RECNO array can be accessed using
1333negative indexes. The index -1 refers to the last element of the array,
1334-2 the second last, and so on. Attempting to access an element before
1335the start of the array will raise a fatal run-time error.
3b35bae3 1336
68dc0745 1337=head2 The 'bval' Option
36477c24 1338
1339The operation of the bval option warrants some discussion. Here is the
1340definition of bval from the Berkeley DB 1.85 recno manual page:
1341
1342 The delimiting byte to be used to mark the end of a
1343 record for variable-length records, and the pad charac-
1344 ter for fixed-length records. If no value is speci-
1345 fied, newlines (``\n'') are used to mark the end of
1346 variable-length records and fixed-length records are
1347 padded with spaces.
1348
1349The second sentence is wrong. In actual fact bval will only default to
1350C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1351openinfo parameter is used at all, the value that happens to be in bval
1352will be used. That means you always have to specify bval when making
1353use of any of the options in the openinfo parameter. This documentation
1354error will be fixed in the next release of Berkeley DB.
1355
1356That clarifies the situation with regards Berkeley DB itself. What
1357about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1358quite useful, so B<DB_File> conforms to it.
36477c24 1359
1360That means that you can specify other options (e.g. cachesize) and
1361still have bval default to C<"\n"> for variable length records, and
1362space for fixed length records.
1363
c5da4faf 1364Also note that the bval option only allows you to specify a single byte
a6d6498e 1365as a delimiter.
c5da4faf 1366
f6b705ef 1367=head2 A Simple Example
3b35bae3 1368
6ca2e664 1369Here is a simple example that uses RECNO (if you are using a version
1370of Perl earlier than 5.004_57 this example won't work -- see
1371L<Extra RECNO Methods> for a workaround).
f6b705ef 1372
3245f058 1373 use warnings ;
610ab055 1374 use strict ;
f6b705ef 1375 use DB_File ;
f6b705ef 1376
2c2d71f5 1377 my $filename = "text" ;
1378 unlink $filename ;
1379
610ab055 1380 my @h ;
45a340cb 1381 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1382 or die "Cannot open file 'text': $!\n" ;
1383
1384 # Add a few key/value pairs to the file
1385 $h[0] = "orange" ;
1386 $h[1] = "blue" ;
1387 $h[2] = "yellow" ;
1388
6ca2e664 1389 push @h, "green", "black" ;
1390
1391 my $elements = scalar @h ;
1392 print "The array contains $elements entries\n" ;
1393
1394 my $last = pop @h ;
1395 print "popped $last\n" ;
1396
1397 unshift @h, "white" ;
1398 my $first = shift @h ;
1399 print "shifted $first\n" ;
1400
f6b705ef 1401 # Check for existence of a key
1402 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1403
1404 # use a negative index
1405 print "The last element is $h[-1]\n" ;
1406 print "The 2nd last element is $h[-2]\n" ;
1407
1408 untie @h ;
3b35bae3 1409
f6b705ef 1410Here is the output from the script:
1411
6ca2e664 1412 The array contains 5 entries
1413 popped black
2c2d71f5 1414 shifted white
f6b705ef 1415 Element 1 Exists with value blue
6ca2e664 1416 The last element is green
1417 The 2nd last element is yellow
f6b705ef 1418
6ca2e664 1419=head2 Extra RECNO Methods
f6b705ef 1420
045291aa 1421If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664 1422array interface is quite limited. In the example script above
1423C<push>, C<pop>, C<shift>, C<unshift>
1424or determining the array length will not work with a tied array.
045291aa 1425
1426To make the interface more useful for older versions of Perl, a number
1427of methods are supplied with B<DB_File> to simulate the missing array
1428operations. All these methods are accessed via the object returned from
1429the tie call.
f6b705ef 1430
1431Here are the methods:
1432
1433=over 5
3b35bae3 1434
f6b705ef 1435=item B<$X-E<gt>push(list) ;>
1436
1437Pushes the elements of C<list> to the end of the array.
1438
1439=item B<$value = $X-E<gt>pop ;>
1440
1441Removes and returns the last element of the array.
1442
1443=item B<$X-E<gt>shift>
1444
1445Removes and returns the first element of the array.
1446
1447=item B<$X-E<gt>unshift(list) ;>
1448
1449Pushes the elements of C<list> to the start of the array.
1450
1451=item B<$X-E<gt>length>
1452
1453Returns the number of elements in the array.
1454
c5da4faf 1455=item B<$X-E<gt>splice(offset, length, elements);>
1456
a6d05634 1457Returns a splice of the array.
c5da4faf 1458
f6b705ef 1459=back
1460
1461=head2 Another Example
1462
1463Here is a more complete example that makes use of some of the methods
1464described above. It also makes use of the API interface directly (see
1465L<THE API INTERFACE>).
1466
3245f058 1467 use warnings ;
f6b705ef 1468 use strict ;
962cee9f 1469 my (@h, $H, $file, $i) ;
f6b705ef 1470 use DB_File ;
1471 use Fcntl ;
bbc7dcd2 1472
f6b705ef 1473 $file = "text" ;
1474
1475 unlink $file ;
1476
45a340cb 1477 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1478 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1479
f6b705ef 1480 # first create a text file to play with
1481 $h[0] = "zero" ;
1482 $h[1] = "one" ;
1483 $h[2] = "two" ;
1484 $h[3] = "three" ;
1485 $h[4] = "four" ;
1486
bbc7dcd2 1487
f6b705ef 1488 # Print the records in order.
1489 #
1490 # The length method is needed here because evaluating a tied
1491 # array in a scalar context does not return the number of
1492 # elements in the array.
1493
1494 print "\nORIGINAL\n" ;
1495 foreach $i (0 .. $H->length - 1) {
1496 print "$i: $h[$i]\n" ;
1497 }
1498
1499 # use the push & pop methods
1500 $a = $H->pop ;
1501 $H->push("last") ;
1502 print "\nThe last record was [$a]\n" ;
1503
1504 # and the shift & unshift methods
1505 $a = $H->shift ;
1506 $H->unshift("first") ;
1507 print "The first record was [$a]\n" ;
1508
1509 # Use the API to add a new record after record 2.
1510 $i = 2 ;
1511 $H->put($i, "Newbie", R_IAFTER) ;
1512
1513 # and a new record before record 1.
1514 $i = 1 ;
1515 $H->put($i, "New One", R_IBEFORE) ;
1516
1517 # delete record 3
1518 $H->del(3) ;
1519
1520 # now print the records in reverse order
1521 print "\nREVERSE\n" ;
1522 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1523 { print "$i: $h[$i]\n" }
1524
1525 # same again, but use the API functions instead
1526 print "\nREVERSE again\n" ;
610ab055 1527 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1528 for ($s = $H->seq($k, $v, R_LAST) ;
1529 $s == 0 ;
1530 $s = $H->seq($k, $v, R_PREV))
1531 { print "$k: $v\n" }
1532
1533 undef $H ;
1534 untie @h ;
1535
1536and this is what it outputs:
1537
1538 ORIGINAL
1539 0: zero
1540 1: one
1541 2: two
1542 3: three
1543 4: four
1544
1545 The last record was [four]
1546 The first record was [zero]
1547
1548 REVERSE
1549 5: last
1550 4: three
1551 3: Newbie
1552 2: one
1553 1: New One
1554 0: first
1555
1556 REVERSE again
1557 5: last
1558 4: three
1559 3: Newbie
1560 2: one
1561 1: New One
1562 0: first
1563
1564Notes:
1565
1566=over 5
1567
1568=item 1.
1569
1570Rather than iterating through the array, C<@h> like this:
1571
1572 foreach $i (@h)
1573
1574it is necessary to use either this:
1575
1576 foreach $i (0 .. $H->length - 1)
1577
1578or this:
1579
1580 for ($a = $H->get($k, $v, R_FIRST) ;
1581 $a == 0 ;
1582 $a = $H->get($k, $v, R_NEXT) )
1583
1584=item 2.
1585
1586Notice that both times the C<put> method was used the record index was
1587specified using a variable, C<$i>, rather than the literal value
1588itself. This is because C<put> will return the record number of the
1589inserted line via that parameter.
1590
1591=back
1592
1593=head1 THE API INTERFACE
3b35bae3 1594
1595As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1596possible to make direct use of most of the API functions defined in the
8e07c86e 1597Berkeley DB documentation.
3b35bae3 1598
88108326 1599To do this you need to store a copy of the object returned from the tie.
3b35bae3 1600
88108326 1601 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1602
8e07c86e 1603Once you have done that, you can access the Berkeley DB API functions
88108326 1604as B<DB_File> methods directly like this:
3b35bae3 1605
1606 $db->put($key, $value, R_NOOVERWRITE) ;
1607
88108326 1608B<Important:> If you have saved a copy of the object returned from
1609C<tie>, the underlying database file will I<not> be closed until both
1610the tied variable is untied and all copies of the saved object are
610ab055 1611destroyed.
88108326 1612
1613 use DB_File ;
1614 $db = tie %hash, "DB_File", "filename"
1615 or die "Cannot tie filename: $!" ;
1616 ...
1617 undef $db ;
1618 untie %hash ;
1619
9a2c4ce3 1620See L<The untie() Gotcha> for more details.
778183f3 1621
88108326 1622All the functions defined in L<dbopen> are available except for
1623close() and dbopen() itself. The B<DB_File> method interface to the
1624supported functions have been implemented to mirror the way Berkeley DB
1625works whenever possible. In particular note that:
1626
1627=over 5
1628
1629=item *
1630
1631The methods return a status value. All return 0 on success.
1632All return -1 to signify an error and set C<$!> to the exact
1633error code. The return code 1 generally (but not always) means that the
1634key specified did not exist in the database.
1635
1636Other return codes are defined. See below and in the Berkeley DB
1637documentation for details. The Berkeley DB documentation should be used
1638as the definitive source.
1639
1640=item *
3b35bae3 1641
88108326 1642Whenever a Berkeley DB function returns data via one of its parameters,
1643the equivalent B<DB_File> method does exactly the same.
3b35bae3 1644
88108326 1645=item *
1646
1647If you are careful, it is possible to mix API calls with the tied
1648hash/array interface in the same piece of code. Although only a few of
1649the methods used to implement the tied interface currently make use of
1650the cursor, you should always assume that the cursor has been changed
1651any time the tied hash/array interface is used. As an example, this
1652code will probably not do what you expect:
1653
1654 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1655 or die "Cannot tie $filename: $!" ;
1656
1657 # Get the first key/value pair and set the cursor
1658 $X->seq($key, $value, R_FIRST) ;
1659
1660 # this line will modify the cursor
1661 $count = scalar keys %x ;
1662
1663 # Get the second key/value pair.
1664 # oops, it didn't, it got the last key/value pair!
1665 $X->seq($key, $value, R_NEXT) ;
1666
1667The code above can be rearranged to get around the problem, like this:
1668
1669 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1670 or die "Cannot tie $filename: $!" ;
1671
1672 # this line will modify the cursor
1673 $count = scalar keys %x ;
1674
1675 # Get the first key/value pair and set the cursor
1676 $X->seq($key, $value, R_FIRST) ;
1677
1678 # Get the second key/value pair.
1679 # worked this time.
1680 $X->seq($key, $value, R_NEXT) ;
1681
1682=back
1683
1684All the constants defined in L<dbopen> for use in the flags parameters
1685in the methods defined below are also available. Refer to the Berkeley
1686DB documentation for the precise meaning of the flags values.
1687
1688Below is a list of the methods available.
3b35bae3 1689
1690=over 5
1691
f6b705ef 1692=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1693
1694Given a key (C<$key>) this method reads the value associated with it
1695from the database. The value read from the database is returned in the
1696C<$value> parameter.
3b35bae3 1697
88108326 1698If the key does not exist the method returns 1.
3b35bae3 1699
88108326 1700No flags are currently defined for this method.
3b35bae3 1701
f6b705ef 1702=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1703
88108326 1704Stores the key/value pair in the database.
1705
1706If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1707will have the record number of the inserted key/value pair set.
3b35bae3 1708
88108326 1709Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1710R_SETCURSOR.
1711
f6b705ef 1712=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1713
88108326 1714Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1715
88108326 1716A return code of 1 means that the requested key was not in the
1717database.
3b35bae3 1718
88108326 1719R_CURSOR is the only valid flag at present.
3b35bae3 1720
f6b705ef 1721=item B<$status = $X-E<gt>fd ;>
3b35bae3 1722
88108326 1723Returns the file descriptor for the underlying database.
3b35bae3 1724
b90e71be 1725See L<Locking: The Trouble with fd> for an explanation for why you should
1726not use C<fd> to lock your database.
3b35bae3 1727
f6b705ef 1728=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1729
88108326 1730This interface allows sequential retrieval from the database. See
1731L<dbopen> for full details.
1732
1733Both the C<$key> and C<$value> parameters will be set to the key/value
1734pair read from the database.
1735
1736The flags parameter is mandatory. The valid flag values are R_CURSOR,
1737R_FIRST, R_LAST, R_NEXT and R_PREV.
1738
f6b705ef 1739=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1740
1741Flushes any cached buffers to disk.
1742
1743R_RECNOSYNC is the only valid flag at present.
3b35bae3 1744
1745=back
1746
cad2e5aa 1747=head1 DBM FILTERS
1748
1749A DBM Filter is a piece of code that is be used when you I<always>
1750want to make the same transformation to all keys and/or values in a
1751DBM database.
1752
1753There are four methods associated with DBM Filters. All work identically,
1754and each is used to install (or uninstall) a single DBM Filter. Each
1755expects a single parameter, namely a reference to a sub. The only
1756difference between them is the place that the filter is installed.
1757
1758To summarise:
1759
1760=over 5
1761
1762=item B<filter_store_key>
1763
1764If a filter has been installed with this method, it will be invoked
1765every time you write a key to a DBM database.
1766
1767=item B<filter_store_value>
1768
1769If a filter has been installed with this method, it will be invoked
1770every time you write a value to a DBM database.
1771
1772
1773=item B<filter_fetch_key>
1774
1775If a filter has been installed with this method, it will be invoked
1776every time you read a key from a DBM database.
1777
1778=item B<filter_fetch_value>
1779
1780If a filter has been installed with this method, it will be invoked
1781every time you read a value from a DBM database.
1782
1783=back
1784
1785You can use any combination of the methods, from none, to all four.
1786
1787All filter methods return the existing filter, if present, or C<undef>
1788in not.
1789
1790To delete a filter pass C<undef> to it.
1791
1792=head2 The Filter
1793
1794When each filter is called by Perl, a local copy of C<$_> will contain
1795the key or value to be filtered. Filtering is achieved by modifying
1796the contents of C<$_>. The return code from the filter is ignored.
1797
1798=head2 An Example -- the NULL termination problem.
1799
1800Consider the following scenario. You have a DBM database
1801that you need to share with a third-party C application. The C application
1802assumes that I<all> keys and values are NULL terminated. Unfortunately
1803when Perl writes to DBM databases it doesn't use NULL termination, so
1804your Perl application will have to manage NULL termination itself. When
1805you write to the database you will have to use something like this:
1806
1807 $hash{"$key\0"} = "$value\0" ;
1808
1809Similarly the NULL needs to be taken into account when you are considering
1810the length of existing keys/values.
1811
1812It would be much better if you could ignore the NULL terminations issue
1813in the main application code and have a mechanism that automatically
1814added the terminating NULL to all keys and values whenever you write to
1815the database and have them removed when you read from the database. As I'm
1816sure you have already guessed, this is a problem that DBM Filters can
1817fix very easily.
1818
3245f058 1819 use warnings ;
cad2e5aa 1820 use strict ;
1821 use DB_File ;
1822
1823 my %hash ;
2359510d 1824 my $filename = "filt" ;
cad2e5aa 1825 unlink $filename ;
1826
1827 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1828 or die "Cannot open $filename: $!\n" ;
1829
1830 # Install DBM Filters
1831 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1832 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1833 $db->filter_fetch_value( sub { s/\0$// } ) ;
1834 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1835
1836 $hash{"abc"} = "def" ;
1837 my $a = $hash{"ABC"} ;
1838 # ...
1839 undef $db ;
1840 untie %hash ;
1841
1842Hopefully the contents of each of the filters should be
1843self-explanatory. Both "fetch" filters remove the terminating NULL,
1844and both "store" filters add a terminating NULL.
1845
1846
1847=head2 Another Example -- Key is a C int.
1848
1849Here is another real-life example. By default, whenever Perl writes to
1850a DBM database it always writes the key and value as strings. So when
1851you use this:
1852
1853 $hash{12345} = "soemthing" ;
1854
1855the key 12345 will get stored in the DBM database as the 5 byte string
1856"12345". If you actually want the key to be stored in the DBM database
1857as a C int, you will have to use C<pack> when writing, and C<unpack>
1858when reading.
1859
1860Here is a DBM Filter that does it:
1861
3245f058 1862 use warnings ;
cad2e5aa 1863 use strict ;
1864 use DB_File ;
1865 my %hash ;
2359510d 1866 my $filename = "filt" ;
cad2e5aa 1867 unlink $filename ;
1868
1869
1870 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1871 or die "Cannot open $filename: $!\n" ;
1872
1873 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1874 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1875 $hash{123} = "def" ;
1876 # ...
1877 undef $db ;
1878 untie %hash ;
1879
1880This time only two filters have been used -- we only need to manipulate
1881the contents of the key, so it wasn't necessary to install any value
1882filters.
1883
f6b705ef 1884=head1 HINTS AND TIPS
3b35bae3 1885
3b35bae3 1886
b90e71be 1887=head2 Locking: The Trouble with fd
3b35bae3 1888
b90e71be 1889Until version 1.72 of this module, the recommended technique for locking
1890B<DB_File> databases was to flock the filehandle returned from the "fd"
1891function. Unfortunately this technique has been shown to be fundamentally
1892flawed (Kudos to David Harris for tracking this down). Use it at your own
1893peril!
3b35bae3 1894
b90e71be 1895The locking technique went like this.
cb1a09d0 1896
2359510d 1897 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
1898 || die "dbcreat foo.db $!";
b90e71be 1899 $fd = $db->fd;
1900 open(DB_FH, "+<&=$fd") || die "dup $!";
1901 flock (DB_FH, LOCK_EX) || die "flock: $!";
1902 ...
1903 $db{"Tom"} = "Jerry" ;
1904 ...
1905 flock(DB_FH, LOCK_UN);
1906 undef $db;
1907 untie %db;
1908 close(DB_FH);
cb1a09d0 1909
b90e71be 1910In simple terms, this is what happens:
cb1a09d0 1911
b90e71be 1912=over 5
cb1a09d0 1913
b90e71be 1914=item 1.
cb1a09d0 1915
b90e71be 1916Use "tie" to open the database.
cb1a09d0 1917
b90e71be 1918=item 2.
cb1a09d0 1919
b90e71be 1920Lock the database with fd & flock.
cb1a09d0 1921
b90e71be 1922=item 3.
cb1a09d0 1923
b90e71be 1924Read & Write to the database.
cb1a09d0 1925
b90e71be 1926=item 4.
cb1a09d0 1927
b90e71be 1928Unlock and close the database.
cb1a09d0 1929
b90e71be 1930=back
1931
1932Here is the crux of the problem. A side-effect of opening the B<DB_File>
1933database in step 2 is that an initial block from the database will get
1934read from disk and cached in memory.
1935
1936To see why this is a problem, consider what can happen when two processes,
1937say "A" and "B", both want to update the same B<DB_File> database
1938using the locking steps outlined above. Assume process "A" has already
1939opened the database and has a write lock, but it hasn't actually updated
1940the database yet (it has finished step 2, but not started step 3 yet). Now
1941process "B" tries to open the same database - step 1 will succeed,
1942but it will block on step 2 until process "A" releases the lock. The
1943important thing to notice here is that at this point in time both
1944processes will have cached identical initial blocks from the database.
1945
1946Now process "A" updates the database and happens to change some of the
1947data held in the initial buffer. Process "A" terminates, flushing
1948all cached data to disk and releasing the database lock. At this point
1949the database on disk will correctly reflect the changes made by process
1950"A".
1951
1952With the lock released, process "B" can now continue. It also updates the
1953database and unfortunately it too modifies the data that was in its
1954initial buffer. Once that data gets flushed to disk it will overwrite
1955some/all of the changes process "A" made to the database.
1956
1957The result of this scenario is at best a database that doesn't contain
1958what you expect. At worst the database will corrupt.
1959
1960The above won't happen every time competing process update the same
1961B<DB_File> database, but it does illustrate why the technique should
1962not be used.
1963
1964=head2 Safe ways to lock a database
1965
1966Starting with version 2.x, Berkeley DB has internal support for locking.
1967The companion module to this one, B<BerkeleyDB>, provides an interface
1968to this locking functionality. If you are serious about locking
1969Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1970
1971If using B<BerkeleyDB> isn't an option, there are a number of modules
1972available on CPAN that can be used to implement locking. Each one
1973implements locking differently and has different goals in mind. It is
1974therefore worth knowing the difference, so that you can pick the right
1975one for your application. Here are the three locking wrappers:
1976
1977=over 5
1978
1979=item B<Tie::DB_Lock>
1980
1981A B<DB_File> wrapper which creates copies of the database file for
1982read access, so that you have a kind of a multiversioning concurrent read
1983system. However, updates are still serial. Use for databases where reads
1984may be lengthy and consistency problems may occur.
1985
1986=item B<Tie::DB_LockFile>
1987
1988A B<DB_File> wrapper that has the ability to lock and unlock the database
1989while it is being used. Avoids the tie-before-flock problem by simply
1990re-tie-ing the database when you get or drop a lock. Because of the
1991flexibility in dropping and re-acquiring the lock in the middle of a
1992session, this can be massaged into a system that will work with long
1993updates and/or reads if the application follows the hints in the POD
1994documentation.
1995
1996=item B<DB_File::Lock>
1997
1998An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
1999before tie-ing the database and drops the lock after the untie. Allows
2000one to use the same lockfile for multiple databases to avoid deadlock
2001problems, if desired. Use for databases where updates are reads are
2002quick and simple flock locking semantics are enough.
2003
2004=back
cb1a09d0 2005
68dc0745 2006=head2 Sharing Databases With C Applications
f6b705ef 2007
2008There is no technical reason why a Berkeley DB database cannot be
2009shared by both a Perl and a C application.
2010
2011The vast majority of problems that are reported in this area boil down
2012to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 2013not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 2014
2015Here is a real example. Netscape 2.0 keeps a record of the locations you
2016visit along with the time you last visited them in a DB_HASH database.
2017This is usually stored in the file F<~/.netscape/history.db>. The key
2018field in the database is the location string and the value field is the
2019time the location was last visited stored as a 4 byte binary value.
2020
2021If you haven't already guessed, the location string is stored with a
2022terminating NULL. This means you need to be careful when accessing the
2023database.
2024
2025Here is a snippet of code that is loosely based on Tom Christiansen's
2026I<ggh> script (available from your nearest CPAN archive in
2027F<authors/id/TOMC/scripts/nshist.gz>).
2028
3245f058 2029 use warnings ;
610ab055 2030 use strict ;
f6b705ef 2031 use DB_File ;
2032 use Fcntl ;
f6b705ef 2033
962cee9f 2034 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef 2035 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2036
2037 $HISTORY = "$dotdir/.netscape/history.db";
2038
2039 tie %hist_db, 'DB_File', $HISTORY
2040 or die "Cannot open $HISTORY: $!\n" ;;
2041
2042 # Dump the complete database
2043 while ( ($href, $binary_time) = each %hist_db ) {
2044
2045 # remove the terminating NULL
2046 $href =~ s/\x00$// ;
2047
2048 # convert the binary time into a user friendly string
2049 $date = localtime unpack("V", $binary_time);
2050 print "$date $href\n" ;
2051 }
2052
2053 # check for the existence of a specific key
2054 # remember to add the NULL
2055 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2056 $date = localtime unpack("V", $binary_time) ;
2057 print "Last visited mox.perl.com on $date\n" ;
2058 }
2059 else {
2060 print "Never visited mox.perl.com\n"
2061 }
2062
2063 untie %hist_db ;
2064
68dc0745 2065=head2 The untie() Gotcha
778183f3 2066
7a2e2cd6 2067If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2068recommended that you read L<perltie/The untie Gotcha>.
778183f3 2069
2070Even if you don't currently make use of the API interface, it is still
2071worth reading it.
2072
2073Here is an example which illustrates the problem from a B<DB_File>
2074perspective:
2075
2076 use DB_File ;
2077 use Fcntl ;
2078
2079 my %x ;
2080 my $X ;
2081
2082 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2083 or die "Cannot tie first time: $!" ;
2084
2085 $x{123} = 456 ;
2086
2087 untie %x ;
2088
2089 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2090 or die "Cannot tie second time: $!" ;
2091
2092 untie %x ;
2093
2094When run, the script will produce this error message:
2095
2096 Cannot tie second time: Invalid argument at bad.file line 14.
2097
2098Although the error message above refers to the second tie() statement
2099in the script, the source of the problem is really with the untie()
2100statement that precedes it.
2101
2102Having read L<perltie> you will probably have already guessed that the
2103error is caused by the extra copy of the tied object stored in C<$X>.
2104If you haven't, then the problem boils down to the fact that the
2105B<DB_File> destructor, DESTROY, will not be called until I<all>
2106references to the tied object are destroyed. Both the tied variable,
2107C<%x>, and C<$X> above hold a reference to the object. The call to
2108untie() will destroy the first, but C<$X> still holds a valid
2109reference, so the destructor will not get called and the database file
2110F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2111attempt to open a database that is already open via the catch-all
778183f3 2112"Invalid argument" doesn't help.
2113
2114If you run the script with the C<-w> flag the error message becomes:
2115
2116 untie attempted while 1 inner references still exist at bad.file line 12.
2117 Cannot tie second time: Invalid argument at bad.file line 14.
2118
2119which pinpoints the real problem. Finally the script can now be
2120modified to fix the original problem by destroying the API object
2121before the untie:
2122
2123 ...
2124 $x{123} = 456 ;
2125
2126 undef $X ;
2127 untie %x ;
2128
2129 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2130 ...
2131
f6b705ef 2132
2133=head1 COMMON QUESTIONS
2134
2135=head2 Why is there Perl source in my database?
2136
2137If you look at the contents of a database file created by DB_File,
2138there can sometimes be part of a Perl script included in it.
2139
2140This happens because Berkeley DB uses dynamic memory to allocate
2141buffers which will subsequently be written to the database file. Being
2142dynamic, the memory could have been used for anything before DB
2143malloced it. As Berkeley DB doesn't clear the memory once it has been
2144allocated, the unused portions will contain random junk. In the case
2145where a Perl script gets written to the database, the random junk will
2146correspond to an area of dynamic memory that happened to be used during
2147the compilation of the script.
2148
2149Unless you don't like the possibility of there being part of your Perl
2150scripts embedded in a database file, this is nothing to worry about.
2151
2152=head2 How do I store complex data structures with DB_File?
2153
2154Although B<DB_File> cannot do this directly, there is a module which
2155can layer transparently over B<DB_File> to accomplish this feat.
2156
2157Check out the MLDBM module, available on CPAN in the directory
2158F<modules/by-module/MLDBM>.
2159
2160=head2 What does "Invalid Argument" mean?
2161
2162You will get this error message when one of the parameters in the
2163C<tie> call is wrong. Unfortunately there are quite a few parameters to
2164get wrong, so it can be difficult to figure out which one it is.
2165
2166Here are a couple of possibilities:
2167
2168=over 5
2169
2170=item 1.
2171
610ab055 2172Attempting to reopen a database without closing it.
f6b705ef 2173
2174=item 2.
2175
2176Using the O_WRONLY flag.
2177
2178=back
2179
2180=head2 What does "Bareword 'DB_File' not allowed" mean?
2181
2182You will encounter this particular error message when you have the
2183C<strict 'subs'> pragma (or the full strict pragma) in your script.
2184Consider this script:
2185
3245f058 2186 use warnings ;
f6b705ef 2187 use strict ;
2188 use DB_File ;
07200f1b 2189 my %x ;
f6b705ef 2190 tie %x, DB_File, "filename" ;
2191
2192Running it produces the error in question:
2193
2194 Bareword "DB_File" not allowed while "strict subs" in use
2195
2196To get around the error, place the word C<DB_File> in either single or
2197double quotes, like this:
2198
2199 tie %x, "DB_File", "filename" ;
2200
2201Although it might seem like a real pain, it is really worth the effort
2202of having a C<use strict> in all your scripts.
2203
cad2e5aa 2204=head1 REFERENCES
2205
2206Articles that are either about B<DB_File> or make use of it.
2207
2208=over 5
2209
2210=item 1.
2211
2212I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2213Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2214
2215=back
2216
cb1a09d0 2217=head1 HISTORY
2218
1f70e1ea 2219Moved to the Changes file.
610ab055 2220
1f70e1ea 2221=head1 BUGS
05475680 2222
1f70e1ea 2223Some older versions of Berkeley DB had problems with fixed length
2224records using the RECNO file format. This problem has been fixed since
2225version 1.85 of Berkeley DB.
e858de61 2226
1f70e1ea 2227I am sure there are bugs in the code. If you do find any, or can
2228suggest any enhancements, I would welcome your comments.
a6ed719b 2229
1f70e1ea 2230=head1 AVAILABILITY
a6ed719b 2231
1f70e1ea 2232B<DB_File> comes with the standard Perl source distribution. Look in
2233the directory F<ext/DB_File>. Given the amount of time between releases
2234of Perl the version that ships with Perl is quite likely to be out of
2235date, so the most recent version can always be found on CPAN (see
5bbd4290 2236L<perlmodlib/CPAN> for details), in the directory
1f70e1ea 2237F<modules/by-module/DB_File>.
a6ed719b 2238
039d031f 2239This version of B<DB_File> will work with either version 1.x, 2.x or
22403.x of Berkeley DB, but is limited to the functionality provided by
2241version 1.
a6ed719b 2242
cad2e5aa 2243The official web site for Berkeley DB is F<http://www.sleepycat.com>.
039d031f 2244All versions of Berkeley DB are available there.
93af7a87 2245
1f70e1ea 2246Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2247archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2248
1f70e1ea 2249If you are running IRIX, then get Berkeley DB version 1 from
2250F<http://reality.sgi.com/ariel>. It has the patches necessary to
2251compile properly on IRIX 5.3.
610ab055 2252
1f70e1ea 2253=head1 COPYRIGHT
3b35bae3 2254
6d02d21f 2255Copyright (c) 1995-2003 Paul Marquess. All rights reserved. This program
a9fd575d 2256is free software; you can redistribute it and/or modify it under the
2257same terms as Perl itself.
3b35bae3 2258
1f70e1ea 2259Although B<DB_File> is covered by the Perl license, the library it
2260makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2261copyright and its own license. Please take the time to read it.
3b35bae3 2262
a9fd575d 2263Here are are few words taken from the Berkeley DB FAQ (at
b90e71be 2264F<http://www.sleepycat.com>) regarding the license:
68dc0745 2265
a9fd575d 2266 Do I have to license DB to use it in Perl scripts?
3b35bae3 2267
a9fd575d 2268 No. The Berkeley DB license requires that software that uses
2269 Berkeley DB be freely redistributable. In the case of Perl, that
2270 software is Perl, and not your scripts. Any Perl scripts that you
2271 write are your property, including scripts that make use of
2272 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2273 place any restriction on what you may do with them.
88108326 2274
1f70e1ea 2275If you are in any doubt about the license situation, contact either the
2276Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1 2277
2278
3b35bae3 2279=head1 SEE ALSO
2280
5bbd4290 2281L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2282L<perldbmfilter>
3b35bae3 2283
3b35bae3 2284=head1 AUTHOR
2285
8e07c86e 2286The DB_File interface was written by Paul Marquess
5bbd4290 2287E<lt>pmqs@cpan.orgE<gt>.
d3ef3b8a 2288Questions about the DB system itself may be addressed to
5bbd4290 2289E<lt>db@sleepycat.comE<gt>.
3b35bae3 2290
2291=cut