lib/Tie/File.pm

   1
   2 package Tie::File;
   3 require 5.005;
   4 use Carp;
   5 use POSIX 'SEEK_SET';
   6 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'O_WRONLY', 'O_RDONLY';
   7 sub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY }
   8
   9 $VERSION = "0.91";
  10 my $DEFAULT_MEMORY_SIZE = 1<<21;    # 2 megabytes
  11 my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
  12 my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
  13
  14 my %good_opt = map {$_ => 1, "-$_" => 1}
  15                qw(memory dw_size mode recsep discipline autodefer autochomp);
  16
  17 sub TIEARRAY {
  18   if (@_ % 2 != 0) {
  19     croak "usage: tie \@array, $_[0], filename, [option => value]...";
  20   }
  21   my ($pack, $file, %opts) = @_;
  22
  23   # transform '-foo' keys into 'foo' keys
  24   for my $key (keys %opts) {
  25     unless ($good_opt{$key}) {
  26       croak("$pack: Unrecognized option '$key'\n");
  27     }
  28     my $okey = $key;
  29     if ($key =~ s/^-+//) {
  30       $opts{$key} = delete $opts{$okey};
  31     }
  32   }
  33
  34   unless (defined $opts{memory}) {
  35     # default is the larger of the default cache size and the
  36     # deferred-write buffer size (if specified)
  37     $opts{memory} = $DEFAULT_MEMORY_SIZE;
  38     $opts{memory} = $opts{dw_size}
  39       if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
  40     # Dora Winifred Read
  41   }
  42   $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
  43   if ($opts{dw_size} > $opts{memory}) {
  44       croak("$pack: dw_size may not be larger than total memory allocation\n");
  45   }
  46   # are we in deferred-write mode?
  47   $opts{defer} = 0 unless defined $opts{defer};
  48   $opts{deferred} = {};         # no records are presently deferred
  49   $opts{deferred_s} = 0;        # count of total bytes in ->{deferred}
  50   $opts{deferred_max} = -1;     # empty
  51
  52   # the cache is a hash instead of an array because it is likely to be
  53   # sparsely populated
  54   $opts{cache} = Tie::File::Cache->new($opts{memory});
  55
  56   # autodeferment is enabled by default
  57   $opts{autodefer} = 1 unless defined $opts{autodefer};
  58   $opts{autodeferring} = 0;     # but is not initially active
  59   $opts{ad_history} = [];
  60   $opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
  61     unless defined $opts{autodefer_threshhold};
  62   $opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
  63     unless defined $opts{autodefer_filelen_threshhold};
  64
  65   $opts{offsets} = [0];
  66   $opts{filename} = $file;
  67   unless (defined $opts{recsep}) {
  68     $opts{recsep} = _default_recsep();
  69   }
  70   $opts{recseplen} = length($opts{recsep});
  71   if ($opts{recseplen} == 0) {
  72     croak "Empty record separator not supported by $pack";
  73   }
  74
  75   $opts{autochomp} = 1 unless defined $opts{autochomp};
  76
  77   $opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode};
  78   $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
  79
  80   my $fh;
  81
  82   if (UNIVERSAL::isa($file, 'GLOB')) {
  83     # We use 1 here on the theory that some systems
  84     # may not indicate failure if we use 0.
  85     # MSWin32 does not indicate failure with 0, but I don't know if
  86     # it will indicate failure with 1 or not.
  87     unless (seek $file, 1, SEEK_SET) {
  88       croak "$pack: your filehandle does not appear to be seekable";
  89     }
  90     seek $file, 0, SEEK_SET     # put it back
  91     $fh = $file;                # setting binmode is the user's problem
  92   } elsif (ref $file) {
  93     croak "usage: tie \@array, $pack, filename, [option => value]...";
  94   } else {
  95     $fh = \do { local *FH };   # only works in 5.005 and later
  96     sysopen $fh, $file, $opts{mode}, 0666 or return;
  97     binmode $fh;
  98   }
  99   { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
 100   if (defined $opts{discipline} && $] >= 5.006) {
 101     # This avoids a compile-time warning under 5.005
 102     eval 'binmode($fh, $opts{discipline})';
 103     croak $@ if $@ =~ /unknown discipline/i;
 104     die if $@;
 105   }
 106   $opts{fh} = $fh;
 107
 108   bless \%opts => $pack;
 109 }
 110
 111 sub FETCH {
 112   my ($self, $n) = @_;
 113   my $rec;
 114
 115   # check the defer buffer
 116   if ($self->_is_deferring && exists $self->{deferred}{$n}) {
 117     $rec = $self->{deferred}{$n};
 118   } else {
 119     $rec = $self->_fetch($n);
 120   }
 121
 122   $self->_chomp1($rec);
 123 }
 124
 125 # Chomp many records in-place; return nothing useful
 126 sub _chomp {
 127   my $self = shift;
 128   return unless $self->{autochomp};
 129   if ($self->{autochomp}) {
 130     for (@_) {
 131       next unless defined;
 132       substr($_, - $self->{recseplen}) = "";
 133     }
 134   }
 135 }
 136
 137 # Chomp one record in-place; return modified record
 138 sub _chomp1 {
 139   my ($self, $rec) = @_;
 140   return $rec unless $self->{autochomp};
 141   return unless defined $rec;
 142   substr($rec, - $self->{recseplen}) = "";
 143   $rec;
 144 }
 145
 146 sub _fetch {
 147   my ($self, $n) = @_;
 148
 149   # check the record cache
 150   { my $cached = $self->{cache}->lookup($n);
 151     return $cached if defined $cached;
 152   }
 153
 154   if ($#{$self->{offsets}} < $n) {
 155     return if $self->{eof};
 156     my $o = $self->_fill_offsets_to($n);
 157     # If it's still undefined, there is no such record, so return 'undef'
 158     return unless defined $o;
 159   }
 160
 161   my $fh = $self->{FH};
 162   $self->_seek($n);             # we can do this now that offsets is populated
 163   my $rec = $self->_read_record;
 164
 165 # If we happen to have just read the first record, check to see if
 166 # the length of the record matches what 'tell' says.  If not, Tie::File
 167 # won't work, and should drop dead.
 168 #
 169 #  if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
 170 #    if (defined $self->{discipline}) {
 171 #      croak "I/O discipline $self->{discipline} not supported";
 172 #    } else {
 173 #      croak "File encoding not supported";
 174 #    }
 175 #  }
 176
 177   $self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
 178   $rec;
 179 }
 180
 181 sub STORE {
 182   my ($self, $n, $rec) = @_;
 183   die "STORE called from _check_integrity!" if $DIAGNOSTIC;
 184
 185   $self->_fixrecs($rec);
 186
 187   if ($self->{autodefer}) {
 188     $self->_annotate_ad_history($n);
 189   }
 190
 191   return $self->_store_deferred($n, $rec) if $self->_is_deferring;
 192
 193
 194   # We need this to decide whether the new record will fit
 195   # It incidentally populates the offsets table
 196   # Note we have to do this before we alter the cache
 197   # 20020324 Wait, but this DOES alter the cache.  TODO BUG?
 198   my $oldrec = $self->_fetch($n);
 199
 200   if (defined($self->{cache}->lookup($n))) {
 201     $self->{cache}->update($n, $rec);
 202   }
 203
 204   if (not defined $oldrec) {
 205     # We're storing a record beyond the end of the file
 206     $self->_extend_file_to($n+1);
 207     $oldrec = $self->{recsep};
 208   }
 209   my $len_diff = length($rec) - length($oldrec);
 210
 211   # length($oldrec) here is not consistent with text mode  TODO XXX BUG
 212   $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
 213
 214   # now update the offsets
 215   # array slice goes from element $n+1 (the first one to move)
 216   # to the end
 217   for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
 218     $_ += $len_diff;
 219   }
 220 }
 221
 222 sub _store_deferred {
 223   my ($self, $n, $rec) = @_;
 224   $self->{cache}->remove($n);
 225   my $old_deferred = $self->{deferred}{$n};
 226
 227   if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
 228     $self->{deferred_max} = $n;
 229   }
 230   $self->{deferred}{$n} = $rec;
 231
 232   my $len_diff = length($rec);
 233   $len_diff -= length($old_deferred) if defined $old_deferred;
 234   $self->{deferred_s} += $len_diff;
 235   $self->{cache}->adj_limit(-$len_diff);
 236   if ($self->{deferred_s} > $self->{dw_size}) {
 237     $self->_flush;
 238   } elsif ($self->_cache_too_full) {
 239     $self->_cache_flush;
 240   }
 241 }
 242
 243 # Remove a single record from the deferred-write buffer without writing it
 244 # The record need not be present
 245 sub _delete_deferred {
 246   my ($self, $n) = @_;
 247   my $rec = delete $self->{deferred}{$n};
 248   return unless defined $rec;
 249
 250   if (defined $self->{deferred_max}
 251       && $n == $self->{deferred_max}) {
 252     undef $self->{deferred_max};
 253   }
 254
 255   $self->{deferred_s} -= length $rec;
 256   $self->{cache}->adj_limit(length $rec);
 257 }
 258
 259 sub FETCHSIZE {
 260   my $self = shift;
 261   my $n = $#{$self->{offsets}};
 262   # 20020317 Change this to binary search
 263   unless ($self->{eof}) {
 264     while (defined ($self->_fill_offsets_to($n+1))) {
 265       ++$n;
 266     }
 267   }
 268   my $top_deferred = $self->_defer_max;
 269   $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
 270   $n;
 271 }
 272
 273 sub STORESIZE {
 274   my ($self, $len) = @_;
 275
 276   if ($self->{autodefer}) {
 277     $self->_annotate_ad_history('STORESIZE');
 278   }
 279
 280   my $olen = $self->FETCHSIZE;
 281   return if $len == $olen;      # Woo-hoo!
 282
 283   # file gets longer
 284   if ($len > $olen) {
 285     if ($self->_is_deferring) {
 286       for ($olen .. $len-1) {
 287         $self->_store_deferred($_, $self->{recsep});
 288       }
 289     } else {
 290       $self->_extend_file_to($len);
 291     }
 292     return;
 293   }
 294
 295   # file gets shorter
 296   if ($self->_is_deferring) {
 297     # TODO maybe replace this with map-plus-assignment?
 298     for (grep $_ >= $len, keys %{$self->{deferred}}) {
 299       $self->_delete_deferred($_);
 300     }
 301     $self->{deferred_max} = $len-1;
 302   }
 303
 304   $self->_seek($len);
 305   $self->_chop_file;
 306   $#{$self->{offsets}} = $len;
 307 #  $self->{offsets}[0] = 0;      # in case we just chopped this
 308
 309   $self->{cache}->remove(grep $_ >= $len, $self->{cache}->keys);
 310 }
 311
 312 sub PUSH {
 313   my $self = shift;
 314   $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
 315 #  $self->FETCHSIZE;  # av.c takes care of this for me
 316 }
 317
 318 sub POP {
 319   my $self = shift;
 320   my $size = $self->FETCHSIZE;
 321   return if $size == 0;
 322 #  print STDERR "# POPPITY POP POP POP\n";
 323   scalar $self->SPLICE($size-1, 1);
 324 }
 325
 326 sub SHIFT {
 327   my $self = shift;
 328   scalar $self->SPLICE(0, 1);
 329 }
 330
 331 sub UNSHIFT {
 332   my $self = shift;
 333   $self->SPLICE(0, 0, @_);
 334   # $self->FETCHSIZE; # av.c takes care of this for me
 335 }
 336
 337 sub CLEAR {
 338   my $self = shift;
 339
 340   if ($self->{autodefer}) {
 341     $self->_annotate_ad_history('CLEAR');
 342   }
 343
 344   $self->_seekb(0);
 345   $self->_chop_file;
 346     $self->{cache}->set_limit($self->{memory});
 347     $self->{cache}->empty;
 348   @{$self->{offsets}} = (0);
 349   %{$self->{deferred}}= ();
 350     $self->{deferred_s} = 0;
 351     $self->{deferred_max} = -1;
 352 }
 353
 354 sub EXTEND {
 355   my ($self, $n) = @_;
 356
 357   # No need to pre-extend anything in this case
 358   return if $self->_is_deferring;
 359
 360   $self->_fill_offsets_to($n);
 361   $self->_extend_file_to($n);
 362 }
 363
 364 sub DELETE {
 365   my ($self, $n) = @_;
 366
 367   if ($self->{autodefer}) {
 368     $self->_annotate_ad_history('DELETE');
 369   }
 370
 371   my $lastrec = $self->FETCHSIZE-1;
 372   my $rec = $self->FETCH($n);
 373   $self->_delete_deferred($n) if $self->_is_deferring;
 374   if ($n == $lastrec) {
 375     $self->_seek($n);
 376     $self->_chop_file;
 377     $#{$self->{offsets}}--;
 378     $self->{cache}->remove($n);
 379     # perhaps in this case I should also remove trailing null records?
 380     # 20020316
 381     # Note that delete @a[-3..-1] deletes the records in the wrong order,
 382     # so we only chop the very last one out of the file.  We could repair this
 383     # by tracking deleted records inside the object.
 384   } elsif ($n < $lastrec) {
 385     $self->STORE($n, "");
 386   }
 387   $rec;
 388 }
 389
 390 sub EXISTS {
 391   my ($self, $n) = @_;
 392   return 1 if exists $self->{deferred}{$n};
 393   $self->_fill_offsets_to($n);  # I think this is unnecessary
 394   $n < $self->FETCHSIZE;
 395 }
 396
 397 sub SPLICE {
 398   my $self = shift;
 399
 400   if ($self->{autodefer}) {
 401     $self->_annotate_ad_history('SPLICE');
 402   }
 403
 404   $self->_flush if $self->_is_deferring; # move this up?
 405   if (wantarray) {
 406     $self->_chomp(my @a = $self->_splice(@_));
 407     @a;
 408   } else {
 409     $self->_chomp1(scalar $self->_splice(@_));
 410   }
 411 }
 412
 413 sub DESTROY {
 414   my $self = shift;
 415   $self->flush if $self->_is_deferring;
 416   $self->{cache}->delink if defined $self->{cache}; # break circular link
 417 }
 418
 419 sub _splice {
 420   my ($self, $pos, $nrecs, @data) = @_;
 421   my @result;
 422
 423   $pos = 0 unless defined $pos;
 424
 425   # Deal with negative and other out-of-range positions
 426   # Also set default for $nrecs
 427   {
 428     my $oldsize = $self->FETCHSIZE;
 429     $nrecs = $oldsize unless defined $nrecs;
 430     my $oldpos = $pos;
 431
 432     if ($pos < 0) {
 433       $pos += $oldsize;
 434       if ($pos < 0) {
 435         croak "Modification of non-creatable array value attempted, subscript $oldpos";
 436       }
 437     }
 438
 439     if ($pos > $oldsize) {
 440       return unless @data;
 441       $pos = $oldsize;          # This is what perl does for normal arrays
 442     }
 443   }
 444
 445   $self->_fixrecs(@data);
 446   my $data = join '', @data;
 447   my $datalen = length $data;
 448   my $oldlen = 0;
 449
 450   # compute length of data being removed
 451   for ($pos .. $pos+$nrecs-1) {
 452     last unless defined $self->_fill_offsets_to($_);
 453     my $rec = $self->_fetch($_);
 454     last unless defined $rec;
 455     push @result, $rec;
 456
 457     # Why don't we just use length($rec) here?
 458     # Because that record might have come from the cache.  _splice
 459     # might have been called to flush out the deferred-write records,
 460     # and in this case length($rec) is the length of the record to be
 461     # *written*, not the length of the actual record in the file.  But
 462     # the offsets are still true. 20020322
 463     $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
 464       if defined $self->{offsets}[$_+1];
 465   }
 466
 467   # Modify the file
 468   $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
 469
 470   # update the offsets table part 1
 471   # compute the offsets of the new records:
 472   my @new_offsets;
 473   if (@data) {
 474     push @new_offsets, $self->{offsets}[$pos];
 475     for (0 .. $#data-1) {
 476       push @new_offsets, $new_offsets[-1] + length($data[$_]);
 477     }
 478   }
 479
 480   # If we're about to splice out the end of the offsets table...
 481   if ($pos + $nrecs >= @{$self->{offsets}}) {
 482     $self->{eof} = 0;           # ... the table is no longer complete
 483   }
 484   splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
 485
 486   # update the offsets table part 2
 487   # adjust the offsets of the following old records
 488   for ($pos+@data .. $#{$self->{offsets}}) {
 489     $self->{offsets}[$_] += $datalen - $oldlen;
 490   }
 491   # If we scrubbed out all known offsets, regenerate the trivial table
 492   # that knows that the file does indeed start at 0.
 493   $self->{offsets}[0] = 0 unless @{$self->{offsets}};
 494   # If the file got longer, the offsets table is no longer complete
 495   $self->{eof} = 0 if @data > $nrecs;
 496
 497
 498   # Perhaps the following cache foolery could be factored out
 499   # into a bunch of mor opaque cache functions.  For example,
 500   # it's odd to delete a record from the cache and then remove
 501   # it from the LRU queue later on; there should be a function to
 502   # do both at once.
 503
 504   # update the read cache, part 1
 505   # modified records
 506   for ($pos .. $pos+$nrecs-1) {
 507     my $new = $data[$_-$pos];
 508     if (defined $new) {
 509       $self->{cache}->update($_, $new);
 510     } else {
 511       $self->{cache}->remove($_);
 512     }
 513   }
 514
 515   # update the read cache, part 2
 516   # moved records - records past the site of the change
 517   # need to be renumbered
 518   # Maybe merge this with the previous block?
 519   {
 520     my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->keys;
 521     my @newkeys = map $_-$nrecs+@data, @oldkeys;
 522     $self->{cache}->rekey(\@oldkeys, \@newkeys);
 523   }
 524
 525   # Now there might be too much data in the cache, if we spliced out
 526   # some short records and spliced in some long ones.  If so, flush
 527   # the cache.
 528   $self->_cache_flush;
 529
 530   # Yes, the return value of 'splice' *is* actually this complicated
 531   wantarray ? @result : @result ? $result[-1] : undef;
 532 }
 533
 534 # write data into the file
 535 # $data is the data to be written.
 536 # it should be written at position $pos, and should overwrite
 537 # exactly $len of the following bytes.
 538 # Note that if length($data) > $len, the subsequent bytes will have to
 539 # be moved up, and if length($data) < $len, they will have to
 540 # be moved down
 541 sub _twrite {
 542   my ($self, $data, $pos, $len) = @_;
 543
 544   unless (defined $pos) {
 545     die "\$pos was undefined in _twrite";
 546   }
 547
 548   my $len_diff = length($data) - $len;
 549
 550   if ($len_diff == 0) {          # Woo-hoo!
 551     my $fh = $self->{fh};
 552     $self->_seekb($pos);
 553     $self->_write_record($data);
 554     return;                     # well, that was easy.
 555   }
 556
 557   # the two records are of different lengths
 558   # our strategy here: rewrite the tail of the file,
 559   # reading ahead one buffer at a time
 560   # $bufsize is required to be at least as large as the data we're overwriting
 561   my $bufsize = _bufsize($len_diff);
 562   my ($writepos, $readpos) = ($pos, $pos+$len);
 563   my $next_block;
 564   my $more_data;
 565
 566   # Seems like there ought to be a way to avoid the repeated code
 567   # and the special case here.  The read(1) is also a little weird.
 568   # Think about this.
 569   do {
 570     $self->_seekb($readpos);
 571     my $br = read $self->{fh}, $next_block, $bufsize;
 572     $more_data = read $self->{fh}, my($dummy), 1;
 573     $self->_seekb($writepos);
 574     $self->_write_record($data);
 575     $readpos += $br;
 576     $writepos += length $data;
 577     $data = $next_block;
 578   } while $more_data;           # BUG XXX TODO how could this have worked?
 579   $self->_seekb($writepos);
 580   $self->_write_record($next_block);
 581
 582   # There might be leftover data at the end of the file
 583   $self->_chop_file if $len_diff < 0;
 584 }
 585
 586 # If a record does not already end with the appropriate terminator
 587 # string, append one.
 588 sub _fixrecs {
 589   my $self = shift;
 590   for (@_) {
 591     $_ = "" unless defined $_;
 592     $_ .= $self->{recsep}
 593       unless substr($_, - $self->{recseplen}) eq $self->{recsep};
 594   }
 595 }
 596
 597
 598 ################################################################
 599 #
 600 # Basic read, write, and seek
 601 #
 602
 603 # seek to the beginning of record #$n
 604 # Assumes that the offsets table is already correctly populated
 605 #
 606 # Note that $n=-1 has a special meaning here: It means the start of
 607 # the last known record; this may or may not be the very last record
 608 # in the file, depending on whether the offsets table is fully populated.
 609 #
 610 sub _seek {
 611   my ($self, $n) = @_;
 612   my $o = $self->{offsets}[$n];
 613   defined($o)
 614     or confess("logic error: undefined offset for record $n");
 615   seek $self->{fh}, $o, SEEK_SET
 616     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 617 }
 618
 619 sub _seekb {
 620   my ($self, $b) = @_;
 621   seek $self->{fh}, $b, SEEK_SET
 622     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 623 }
 624
 625 # populate the offsets table up to the beginning of record $n
 626 # return the offset of record $n
 627 sub _fill_offsets_to {
 628   my ($self, $n) = @_;
 629
 630   return $self->{offsets}[$n] if $self->{eof};
 631
 632   my $fh = $self->{fh};
 633   local *OFF = $self->{offsets};
 634   my $rec;
 635
 636   until ($#OFF >= $n) {
 637     my $o = $OFF[-1];
 638     $self->_seek(-1);           # tricky -- see comment at _seek
 639     $rec = $self->_read_record;
 640     if (defined $rec) {
 641       push @OFF, tell $fh;
 642     } else {
 643       $self->{eof} = 1;
 644       return;                   # It turns out there is no such record
 645     }
 646   }
 647
 648   # we have now read all the records up to record n-1,
 649   # so we can return the offset of record n
 650   return $OFF[$n];
 651 }
 652
 653 # assumes that $rec is already suitably terminated
 654 sub _write_record {
 655   my ($self, $rec) = @_;
 656   my $fh = $self->{fh};
 657   print $fh $rec
 658     or die "Couldn't write record: $!";  # "Should never happen."
 659 #  $self->{_written} += length($rec);
 660 }
 661
 662 sub _read_record {
 663   my $self = shift;
 664   my $rec;
 665   { local $/ = $self->{recsep};
 666     my $fh = $self->{fh};
 667     $rec = <$fh>;
 668   }
 669   return unless defined $rec;
 670   if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) {
 671     # improperly terminated final record --- quietly fix it.
 672 #    my $ac = substr($rec, -$self->{recseplen});
 673 #    $ac =~ s/\n/\\n/g;
 674     unless ($self->{rdonly}) {
 675       my $fh = $self->{fh};
 676       print $fh $self->{recsep};
 677     }
 678     $rec .= $self->{recsep};
 679   }
 680 #  $self->{_read} += length($rec) if defined $rec;
 681   $rec;
 682 }
 683
 684 sub _rw_stats {
 685   my $self = shift;
 686   @{$self}{'_read', '_written'};
 687 }
 688
 689 ################################################################
 690 #
 691 # Read cache management
 692
 693 sub _cache_flush {
 694   my ($self) = @_;
 695   $self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
 696 }
 697
 698 sub _cache_too_full {
 699   my $self = shift;
 700   $self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
 701 }
 702
 703 ################################################################
 704 #
 705 # File custodial services
 706 #
 707
 708
 709 # We have read to the end of the file and have the offsets table
 710 # entirely populated.  Now we need to write a new record beyond
 711 # the end of the file.  We prepare for this by writing
 712 # empty records into the file up to the position we want
 713 #
 714 # assumes that the offsets table already contains the offset of record $n,
 715 # if it exists, and extends to the end of the file if not.
 716 sub _extend_file_to {
 717   my ($self, $n) = @_;
 718   $self->_seek(-1);             # position after the end of the last record
 719   my $pos = $self->{offsets}[-1];
 720
 721   # the offsets table has one entry more than the total number of records
 722   my $extras = $n - $#{$self->{offsets}};
 723
 724   # Todo : just use $self->{recsep} x $extras here?
 725   while ($extras-- > 0) {
 726     $self->_write_record($self->{recsep});
 727     push @{$self->{offsets}}, tell $self->{fh};
 728   }
 729 }
 730
 731 # Truncate the file at the current position
 732 sub _chop_file {
 733   my $self = shift;
 734   truncate $self->{fh}, tell($self->{fh});
 735 }
 736
 737
 738 # compute the size of a buffer suitable for moving
 739 # all the data in a file forward $n bytes
 740 # ($n may be negative)
 741 # The result should be at least $n.
 742 sub _bufsize {
 743   my $n = shift;
 744   return 8192 if $n < 0;
 745   my $b = $n & ~8191;
 746   $b += 8192 if $n & 8191;
 747   $b;
 748 }
 749
 750 ################################################################
 751 #
 752 # Miscellaneous public methods
 753 #
 754
 755 # Lock the file
 756 sub flock {
 757   my ($self, $op) = @_;
 758   unless (@_ <= 3) {
 759     my $pack = ref $self;
 760     croak "Usage: $pack\->flock([OPERATION])";
 761   }
 762   my $fh = $self->{fh};
 763   $op = LOCK_EX unless defined $op;
 764   flock $fh, $op;
 765 }
 766
 767 # Get/set autochomp option
 768 sub autochomp {
 769   my $self = shift;
 770   if (@_) {
 771     my $old = $self->{autochomp};
 772     $self->{autochomp} = shift;
 773     $old;
 774   } else {
 775     $self->{autochomp};
 776   }
 777 }
 778
 779 ################################################################
 780 #
 781 # Matters related to deferred writing
 782 #
 783
 784 # Defer writes
 785 sub defer {
 786   my $self = shift;
 787   $self->_stop_autodeferring;
 788   @{$self->{ad_history}} = ();
 789   $self->{defer} = 1;
 790 }
 791
 792 # Flush deferred writes
 793 #
 794 # This could be better optimized to write the file in one pass, instead
 795 # of one pass per block of records.  But that will require modifications
 796 # to _twrite, so I should have a good _twite test suite first.
 797 sub flush {
 798   my $self = shift;
 799
 800   $self->_flush;
 801   $self->{defer} = 0;
 802 }
 803
 804 sub _flush {
 805   my $self = shift;
 806   my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
 807
 808   while (@writable) {
 809     # gather all consecutive records from the front of @writable
 810     my $first_rec = shift @writable;
 811     my $last_rec = $first_rec+1;
 812     ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
 813     --$last_rec;
 814     $self->_fill_offsets_to($last_rec);
 815     $self->_extend_file_to($last_rec);
 816     $self->_splice($first_rec, $last_rec-$first_rec+1,
 817                    @{$self->{deferred}}{$first_rec .. $last_rec});
 818   }
 819
 820   $self->_discard;               # clear out defered-write-cache
 821 }
 822
 823 # Discard deferred writes and disable future deferred writes
 824 sub discard {
 825   my $self = shift;
 826   $self->_discard;
 827   $self->{defer} = 0;
 828 }
 829
 830 # Discard deferred writes, but retain old deferred writing mode
 831 sub _discard {
 832   my $self = shift;
 833   %{$self->{deferred}} = ();
 834   $self->{deferred_s}  = 0;
 835   $self->{deferred_max}  = -1;
 836   $self->{cache}->set_limit($self->{memory});
 837 }
 838
 839 # Deferred writing is enabled, either explicitly ($self->{defer})
 840 # or automatically ($self->{autodeferring})
 841 sub _is_deferring {
 842   my $self = shift;
 843   $self->{defer} || $self->{autodeferring};
 844 }
 845
 846 # The largest record number of any deferred record
 847 sub _defer_max {
 848   my $self = shift;
 849   return $self->{deferred_max} if defined $self->{deferred_max};
 850   my $max = -1;
 851   for my $key (keys %{$self->{deferred}}) {
 852     $max = $key if $key > $max;
 853   }
 854   $self->{deferred_max} = $max;
 855   $max;
 856 }
 857
 858 ################################################################
 859 #
 860 # Matters related to autodeferment
 861 #
 862
 863 # Get/set autodefer option
 864 sub autodefer {
 865   my $self = shift;
 866   if (@_) {
 867     my $old = $self->{autodefer};
 868     $self->{autodefer} = shift;
 869     if ($old) {
 870       $self->_stop_autodeferring;
 871       @{$self->{ad_history}} = ();
 872     }
 873     $old;
 874   } else {
 875     $self->{autodefer};
 876   }
 877 }
 878
 879 # The user is trying to store record #$n Record that in the history,
 880 # and then enable (or disable) autodeferment if that seems useful.
 881 # Note that it's OK for $n to be a non-number, as long as the function
 882 # is prepared to deal with that.  Nobody else looks at the ad_history.
 883 #
 884 # Now, what does the ad_history mean, and what is this function doing?
 885 # Essentially, the idea is to enable autodeferring when we see that the
 886 # user has made three consecutive STORE calls to three consecutive records.
 887 # ("Three" is actually ->{autodefer_threshhold}.)
 888 # A STORE call for record #$n inserts $n into the autodefer history,
 889 # and if the history contains three consecutive records, we enable
 890 # autodeferment.  An ad_history of [X, Y] means that the most recent
 891 # STOREs were for records X, X+1, ..., Y, in that order.
 892 #
 893 # Inserting a nonconsecutive number erases the history and starts over.
 894 #
 895 # Performing a special operation like SPLICE erases the history.
 896 #
 897 # There's one special case: CLEAR means that CLEAR was just called.
 898 # In this case, we prime the history with [-2, -1] so that if the next
 899 # write is for record 0, autodeferring goes on immediately.  This is for
 900 # the common special case of "@a = (...)".
 901 #
 902 sub _annotate_ad_history {
 903   my ($self, $n) = @_;
 904   return unless $self->{autodefer}; # feature is disabled
 905   return if $self->{defer};     # already in explicit defer mode
 906   return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
 907
 908   local *H = $self->{ad_history};
 909   if ($n eq 'CLEAR') {
 910     @H = (-2, -1);              # prime the history with fake records
 911     $self->_stop_autodeferring;
 912   } elsif ($n =~ /^\d+$/) {
 913     if (@H == 0) {
 914       @H =  ($n, $n);
 915     } else {                    # @H == 2
 916       if ($H[1] == $n-1) {      # another consecutive record
 917         $H[1]++;
 918         if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
 919           $self->{autodeferring} = 1;
 920         }
 921       } else {                  # nonconsecutive- erase and start over
 922         @H = ($n, $n);
 923         $self->_stop_autodeferring;
 924       }
 925     }
 926   } else {                      # SPLICE or STORESIZE or some such
 927     @H = ();
 928     $self->_stop_autodeferring;
 929   }
 930 }
 931
 932 # If autodferring was enabled, cut it out and discard the history
 933 sub _stop_autodeferring {
 934   my $self = shift;
 935   if ($self->{autodeferring}) {
 936     $self->_flush;
 937   }
 938   $self->{autodeferring} = 0;
 939 }
 940
 941 ################################################################
 942
 943
 944 # This is NOT a method.  It is here for two reasons:
 945 #  1. To factor a fairly complicated block out of the constructor
 946 #  2. To provide access for the test suite, which need to be sure
 947 #     files are being written properly.
 948 sub _default_recsep {
 949   my $recsep = $/;
 950   if ($^O eq 'MSWin32') {       # Dos too?
 951     # Windows users expect files to be terminated with \r\n
 952     # But $/ is set to \n instead
 953     # Note that this also transforms \n\n into \r\n\r\n.
 954     # That is a feature.
 955     $recsep =~ s/\n/\r\n/g;
 956   }
 957   $recsep;
 958 }
 959
 960 # Utility function for _check_integrity
 961 sub _ci_warn {
 962   my $msg = shift;
 963   $msg =~ s/\n/\\n/g;
 964   $msg =~ s/\r/\\r/g;
 965   print "# $msg\n";
 966 }
 967
 968 # Given a file, make sure the cache is consistent with the
 969 # file contents and the internal data structures are consistent with
 970 # each other.  Returns true if everything checks out, false if not
 971 #
 972 # The $file argument is no longer used.  It is retained for compatibility
 973 # with the existing test suite.
 974 sub _check_integrity {
 975   my ($self, $file, $warn) = @_;
 976   my $rsl = $self->{recseplen};
 977   my $rs  = $self->{recsep};
 978   my $good = 1;
 979   local *_;                     # local $_ does not work here
 980   local $DIAGNOSTIC = 1;
 981
 982   if (not defined $rs) {
 983     _ci_warn("recsep is undef!");
 984     $good = 0;
 985   } elsif ($rs eq "") {
 986     _ci_warn("recsep is empty!");
 987     $good = 0;
 988   } elsif ($rsl != length $rs) {
 989     my $ln = length $rs;
 990     _ci_warn("recsep <$rs> has length $ln, should be $rsl");
 991     $good = 0;
 992   }
 993
 994   if (not defined $self->{offsets}[0]) {
 995     _ci_warn("offset 0 is missing!");
 996     $good = 0;
 997   } elsif ($self->{offsets}[0] != 0) {
 998     _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
 999     $good = 0;
1000   }
1001
1002   my $cached = 0;
1003   {
1004     local *F = $self->{fh};
1005     seek F, 0, SEEK_SET;
1006     local $. = 0;
1007     local $/ = $rs;
1008
1009     while (<F>) {
1010       my $n = $. - 1;
1011       my $cached = $self->{cache}->_produce($n);
1012       my $offset = $self->{offsets}[$.];
1013       my $ao = tell F;
1014       if (defined $offset && $offset != $ao) {
1015         _ci_warn("rec $n: offset <$offset> actual <$ao>");
1016         $good = 0;
1017       }
1018       if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
1019         $good = 0;
1020         _ci_warn("rec $n: cached <$cached> actual <$_>");
1021       }
1022       if (defined $cached && substr($cached, -$rsl) ne $rs) {
1023         $good = 0;
1024         _ci_warn("rec $n in the cache is missing the record separator");
1025       }
1026       if (! defined $offset && $self->{eof}) {
1027         $good = 0;
1028         _ci_warn("The offset table was marked complete, but it is missing element $.");
1029       }
1030     }
1031     if (@{$self->{offsets}} > $.+1) {
1032         $good = 0;
1033         my $n = @{$self->{offsets}};
1034         _ci_warn("The offset table has $n items, but the file has only $.");
1035     }
1036
1037     my $deferring = $self->_is_deferring;
1038     for my $n ($self->{cache}->keys) {
1039       my $r = $self->{cache}->_produce($n);
1040       $cached += length($r);
1041       next if $n+1 <= $.;         # checked this already
1042       _ci_warn("spurious caching of record $n");
1043       $good = 0;
1044     }
1045     my $b = $self->{cache}->bytes;
1046     if ($cached != $b) {
1047       _ci_warn("cache size is $b, should be $cached");
1048       $good = 0;
1049     }
1050   }
1051
1052   $good = 0 unless $self->{cache}->_check_integrity;
1053
1054   # Now let's check the deferbuffer
1055   # Unless deferred writing is enabled, it should be empty
1056   if (! $self->_is_deferring && %{$self->{deferred}}) {
1057     _ci_warn("deferred writing disabled, but deferbuffer nonempty");
1058     $good = 0;
1059   }
1060
1061   # Any record in the deferbuffer should *not* be present in the readcache
1062   my $deferred_s = 0;
1063   while (my ($n, $r) = each %{$self->{deferred}}) {
1064     $deferred_s += length($r);
1065     if (defined $self->{cache}->_produce($n)) {
1066       _ci_warn("record $n is in the deferbuffer *and* the readcache");
1067       $good = 0;
1068     }
1069     if (substr($r, -$rsl) ne $rs) {
1070       _ci_warn("rec $n in the deferbuffer is missing the record separator");
1071       $good = 0;
1072     }
1073   }
1074
1075   # Total size of deferbuffer should match internal total
1076   if ($deferred_s != $self->{deferred_s}) {
1077     _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
1078     $good = 0;
1079   }
1080
1081   # Total size of deferbuffer should not exceed the specified limit
1082   if ($deferred_s > $self->{dw_size}) {
1083     _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $self->{dw_size}");
1084     $good = 0;
1085   }
1086
1087   # Total size of cached data should not exceed the specified limit
1088   if ($deferred_s + $cached > $self->{memory}) {
1089     my $total = $deferred_s + $cached;
1090     _ci_warn("total stored data size is $total which exceeds the limit of $self->{memory}");
1091     $good = 0;
1092   }
1093
1094   # Stuff related to autodeferment
1095   if (!$self->{autodefer} && @{$self->{ad_history}}) {
1096     _ci_warn("autodefer is disabled, but ad_history is nonempty");
1097     $good = 0;
1098   }
1099   if ($self->{autodeferring} && $self->{defer}) {
1100     _ci_warn("both autodeferring and explicit deferring are active");
1101     $good = 0;
1102   }
1103   if (@{$self->{ad_history}} == 0) {
1104     # That's OK, no additional tests required
1105   } elsif (@{$self->{ad_history}} == 2) {
1106     my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
1107     if (@non_number) {
1108       my $msg;
1109       { local $" = ')(';
1110         $msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
1111       }
1112       _ci_warn($msg);
1113       $good = 0;
1114     } elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
1115       _ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
1116       $good = 0;
1117     }
1118   } else {
1119     _ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
1120     $good = 0;
1121   }
1122
1123   $good;
1124 }
1125
1126 ################################################################
1127 #
1128 # Tie::File::Cache
1129 #
1130 # Read cache
1131
1132 package Tie::File::Cache;
1133 $Tie::File::Cache::VERSION = $Tie::File::VERSION;
1134 use Carp ':DEFAULT', 'confess';
1135
1136 sub HEAP () { 0 }
1137 sub HASH () { 1 }
1138 sub MAX  () { 2 }
1139 sub BYTES() { 3 }
1140 use strict 'vars';
1141
1142 sub new {
1143   my ($pack, $max) = @_;
1144   local *_;
1145   croak "missing argument to ->new" unless defined $max;
1146   my $self = [];
1147   bless $self => $pack;
1148   @$self = (Tie::File::Heap->new($self), {}, $max, 0);
1149   $self;
1150 }
1151
1152 sub adj_limit {
1153   my ($self, $n) = @_;
1154   $self->[MAX] += $n;
1155 }
1156
1157 sub set_limit {
1158   my ($self, $n) = @_;
1159   $self->[MAX] = $n;
1160 }
1161
1162 # For internal use only
1163 # Will be called by the heap structure to notify us that a certain
1164 # piece of data has moved from one heap element to another.
1165 # $k is the hash key of the item
1166 # $n is the new index into the heap at which it is stored
1167 # If $n is undefined, the item has been removed from the heap.
1168 sub _heap_move {
1169   my ($self, $k, $n) = @_;
1170   if (defined $n) {
1171     $self->[HASH]{$k} = $n;
1172   } else {
1173     delete $self->[HASH]{$k};
1174   }
1175 }
1176
1177 sub insert {
1178   my ($self, $key, $val) = @_;
1179   local *_;
1180   croak "missing argument to ->insert" unless defined $key;
1181   unless (defined $self->[MAX]) {
1182     confess "undefined max" ;
1183   }
1184   confess "undefined val" unless defined $val;
1185   return if length($val) > $self->[MAX];
1186   my $oldnode = $self->[HASH]{$key};
1187   if (defined $oldnode) {
1188     my $oldval = $self->[HEAP]->set_val($oldnode, $val);
1189     $self->[BYTES] -= length($oldval);
1190   } else {
1191     $self->[HEAP]->insert($key, $val);
1192   }
1193   $self->[BYTES] += length($val);
1194   $self->flush;
1195 }
1196
1197 sub expire {
1198   my $self = shift;
1199   my $old_data = $self->[HEAP]->popheap;
1200   return unless defined $old_data;
1201   $self->[BYTES] -= length $old_data;
1202   $old_data;
1203 }
1204
1205 sub remove {
1206   my ($self, @keys) = @_;
1207   my @result;
1208   for my $key (@keys) {
1209     next unless exists $self->[HASH]{$key};
1210     my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
1211     $self->[BYTES] -= length $old_data;
1212     push @result, $old_data;
1213   }
1214   @result;
1215 }
1216
1217 sub lookup {
1218   my ($self, $key) = @_;
1219   local *_;
1220   croak "missing argument to ->lookup" unless defined $key;
1221   if (exists $self->[HASH]{$key}) {
1222     $self->[HEAP]->lookup($self->[HASH]{$key});
1223   } else {
1224     return;
1225   }
1226 }
1227
1228 # For internal use only
1229 sub _produce {
1230   my ($self, $key) = @_;
1231   my $loc = $self->[HASH]{$key};
1232   return unless defined $loc;
1233   $self->[HEAP][$loc][2];
1234 }
1235
1236 # For internal use only
1237 sub _promote {
1238   my ($self, $key) = @_;
1239   $self->[HEAP]->promote($self->[HASH]{$key});
1240 }
1241
1242 sub empty {
1243   my ($self) = @_;
1244   %{$self->[HASH]} = ();
1245     $self->[BYTES] = 0;
1246     $self->[HEAP]->empty;
1247 }
1248
1249 sub is_empty {
1250   my ($self) = @_;
1251   keys %{$self->[HASH]} == 0;
1252 }
1253
1254 sub update {
1255   my ($self, $key, $val) = @_;
1256   local *_;
1257   croak "missing argument to ->update" unless defined $key;
1258   if (length($val) > $self->[MAX]) {
1259     my $oldval = $self->remove($key);
1260     $self->[BYTES] -= length($oldval) if defined $oldval;
1261   } elsif (exists $self->[HASH]{$key}) {
1262     my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
1263     $self->[BYTES] += length($val);
1264     $self->[BYTES] -= length($oldval) if defined $oldval;
1265   } else {
1266     $self->[HEAP]->insert($key, $val);
1267     $self->[BYTES] += length($val);
1268   }
1269   $self->flush;
1270 }
1271
1272 sub rekey {
1273   my ($self, $okeys, $nkeys) = @_;
1274   local *_;
1275   my %map;
1276   @map{@$okeys} = @$nkeys;
1277   croak "missing argument to ->rekey" unless defined $nkeys;
1278   croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
1279   my %adjusted;                 # map new keys to heap indices
1280   # You should be able to cut this to one loop TODO XXX
1281   for (0 .. $#$okeys) {
1282     $adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
1283   }
1284   while (my ($nk, $ix) = each %adjusted) {
1285     # @{$self->[HASH]}{keys %adjusted} = values %adjusted;
1286     $self->[HEAP]->rekey($ix, $nk);
1287     $self->[HASH]{$nk} = $ix;
1288   }
1289 }
1290
1291 sub keys {
1292   my $self = shift;
1293   my @a = keys %{$self->[HASH]};
1294   @a;
1295 }
1296
1297 sub bytes {
1298   my $self = shift;
1299   $self->[BYTES];
1300 }
1301
1302 sub reduce_size_to {
1303   my ($self, $max) = @_;
1304   until ($self->is_empty || $self->[BYTES] <= $max) {
1305     $self->expire;
1306   }
1307 }
1308
1309 sub flush {
1310   my $self = shift;
1311   until ($self->is_empty || $self->[BYTES] <= $self->[MAX]) {
1312     $self->expire;
1313   }
1314 }
1315
1316 # For internal use only
1317 sub _produce_lru {
1318   my $self = shift;
1319   $self->[HEAP]->expire_order;
1320 }
1321
1322 sub _check_integrity {
1323   my $self = shift;
1324   $self->[HEAP]->_check_integrity;
1325 }
1326
1327 sub delink {
1328   my $self = shift;
1329   $self->[HEAP] = undef;        # Bye bye heap
1330 }
1331
1332 ################################################################
1333 #
1334 # Tie::File::Heap
1335 #
1336 # Heap data structure for use by cache LRU routines
1337
1338 package Tie::File::Heap;
1339 use Carp ':DEFAULT', 'confess';
1340 $Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION;
1341 sub SEQ () { 0 };
1342 sub KEY () { 1 };
1343 sub DAT () { 2 };
1344
1345 sub new {
1346   my ($pack, $cache) = @_;
1347   die "$pack: Parent cache object $cache does not support _heap_move method"
1348     unless eval { $cache->can('_heap_move') };
1349   my $self = [[0,$cache,0]];
1350   bless $self => $pack;
1351 }
1352
1353 # Allocate a new sequence number, larger than all previously allocated numbers
1354 sub _nseq {
1355   my $self = shift;
1356   $self->[0][0]++;
1357 }
1358
1359 sub _cache {
1360   my $self = shift;
1361   $self->[0][1];
1362 }
1363
1364 sub _nelts {
1365   my $self = shift;
1366   $self->[0][2];
1367 }
1368
1369 sub _nelts_inc {
1370   my $self = shift;
1371   ++$self->[0][2];
1372 }
1373
1374 sub _nelts_dec {
1375   my $self = shift;
1376   --$self->[0][2];
1377 }
1378
1379 sub is_empty {
1380   my $self = shift;
1381   $self->_nelts == 0;
1382 }
1383
1384 sub empty {
1385   my $self = shift;
1386   $#$self = 0;
1387   $self->[0][2] = 0;
1388   $self->[0][0] = 0;            # might as well reset the sequence numbers
1389 }
1390
1391 # notify the parent cache object that we moved something
1392 sub _heap_move {
1393   my $self = shift;
1394   $self->_cache->_heap_move(@_);
1395 }
1396
1397 # Insert a piece of data into the heap with the indicated sequence number.
1398 # The item with the smallest sequence number is always at the top.
1399 # If no sequence number is specified, allocate a new one and insert the
1400 # item at the bottom.
1401 sub insert {
1402   my ($self, $key, $data, $seq) = @_;
1403   $seq = $self->_nseq unless defined $seq;
1404   $self->_insert_new([$seq, $key, $data]);
1405 }
1406
1407 # Insert a new, fresh item at the bottom of the heap
1408 sub _insert_new {
1409   my ($self, $item) = @_;
1410   my $i = @$self;
1411   $i = int($i/2) until defined $self->[$i/2];
1412   $self->[$i] = $item;
1413   $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1414   $self->_nelts_inc;
1415 }
1416
1417 # Insert [$data, $seq] pair at or below item $i in the heap.
1418 # If $i is omitted, default to 1 (the top element.)
1419 sub _insert {
1420   my ($self, $item, $i) = @_;
1421   $self->_check_loc($i) if defined $i;
1422   $i = 1 unless defined $i;
1423   until (! defined $self->[$i]) {
1424     if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
1425       ($self->[$i], $item) = ($item, $self->[$i]);
1426       $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1427     }
1428     # If either is undefined, go that way.  Otherwise, choose at random
1429     my $dir;
1430     $dir = 0 if !defined $self->[2*$i];
1431     $dir = 1 if !defined $self->[2*$i+1];
1432     $dir = int(rand(2)) unless defined $dir;
1433     $i = 2*$i + $dir;
1434   }
1435   $self->[$i] = $item;
1436   $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1437   $self->_nelts_inc;
1438 }
1439
1440 # Remove the item at node $i from the heap, moving child items upwards.
1441 # The item with the smallest sequence number is always at the top.
1442 # Moving items upwards maintains this condition.
1443 # Return the removed item.
1444 sub remove {
1445   my ($self, $i) = @_;
1446   $i = 1 unless defined $i;
1447   my $top = $self->[$i];
1448   return unless defined $top;
1449   while (1) {
1450     my $ii;
1451     my ($L, $R) = (2*$i, 2*$i+1);
1452
1453     # If either is undefined, go the other way.
1454     # Otherwise, go towards the smallest.
1455     last unless defined $self->[$L] || defined $self->[$R];
1456     $ii = $R if not defined $self->[$L];
1457     $ii = $L if not defined $self->[$R];
1458     unless (defined $ii) {
1459       $ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1460     }
1461
1462     $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
1463     $self->[0][1]->_heap_move($self->[$i][KEY], $i);
1464     $i = $ii; # Fill new vacated spot
1465   }
1466   $self->[0][1]->_heap_move($top->[KEY], undef);
1467   undef $self->[$i];
1468   $self->_nelts_dec;
1469   return $top->[DAT];
1470 }
1471
1472 sub popheap {
1473   my $self = shift;
1474   $self->remove(1);
1475 }
1476
1477 # set the sequence number of the indicated item to a higher number
1478 # than any other item in the heap, and bubble the item down to the
1479 # bottom.
1480 sub promote {
1481   my ($self, $n) = @_;
1482   $self->_check_loc($n);
1483   $self->[$n][SEQ] = $self->_nseq;
1484   my $i = $n;
1485   while (1) {
1486     my ($L, $R) = (2*$i, 2*$i+1);
1487     my $dir;
1488     last unless defined $self->[$L] || defined $self->[$R];
1489     $dir = $R unless defined $self->[$L];
1490     $dir = $L unless defined $self->[$R];
1491     unless (defined $dir) {
1492       $dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1493     }
1494     @{$self}[$i, $dir] = @{$self}[$dir, $i];
1495     for ($i, $dir) {
1496       $self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
1497     }
1498     $i = $dir;
1499   }
1500 }
1501
1502 # Return item $n from the heap, promoting its LRU status
1503 sub lookup {
1504   my ($self, $n) = @_;
1505   $self->_check_loc($n);
1506   my $val = $self->[$n];
1507   $self->promote($n);
1508   $val->[DAT];
1509 }
1510
1511
1512 # Assign a new value for node $n, promoting it to the bottom of the heap
1513 sub set_val {
1514   my ($self, $n, $val) = @_;
1515   $self->_check_loc($n);
1516   my $oval = $self->[$n][DAT];
1517   $self->[$n][DAT] = $val;
1518   $self->promote($n);
1519   return $oval;
1520 }
1521
1522 # The hask key has changed for an item;
1523 # alter the heap's record of the hash key
1524 sub rekey {
1525   my ($self, $n, $new_key) = @_;
1526   $self->_check_loc($n);
1527   $self->[$n][KEY] = $new_key;
1528 }
1529
1530 sub _check_loc {
1531   my ($self, $n) = @_;
1532   unless (defined $self->[$n]) {
1533     confess "_check_loc($n) failed";
1534   }
1535 }
1536
1537 sub _check_integrity {
1538   my $self = shift;
1539   my $good = 1;
1540   unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
1541     print "# Element 0 of heap corrupt\n";
1542     $good = 0;
1543   }
1544   $good = 0 unless $self->_satisfies_heap_condition(1);
1545   for my $i (2 .. $#{$self}) {
1546     my $p = int($i/2);          # index of parent node
1547     if (defined $self->[$i] && ! defined $self->[$p]) {
1548       print "# Element $i of heap defined, but parent $p isn't\n";
1549       $good = 0;
1550     }
1551   }
1552   return $good;
1553 }
1554
1555 sub _satisfies_heap_condition {
1556   my $self = shift;
1557   my $n = shift || 1;
1558   my $good = 1;
1559   for (0, 1) {
1560     my $c = $n*2 + $_;
1561     next unless defined $self->[$c];
1562     if ($self->[$n][SEQ] >= $self->[$c]) {
1563       print "# Node $n of heap does not predate node $c\n";
1564       $good = 0 ;
1565     }
1566     $good = 0 unless $self->_satisfies_heap_condition($c);
1567   }
1568   return $good;
1569 }
1570
1571 # Return a list of all the values, sorted by expiration order
1572 sub expire_order {
1573   my $self = shift;
1574   my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
1575   map { $_->[KEY] } @nodes;
1576 }
1577
1578 sub _nodes {
1579   my $self = shift;
1580   my $i = shift || 1;
1581   return unless defined $self->[$i];
1582   ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1));
1583 }
1584
1585 "Cogito, ergo sum.";  # don't forget to return a true value from the file
1586
1587 =head1 NAME
1588
1589 Tie::File - Access the lines of a disk file via a Perl array
1590
1591 =head1 SYNOPSIS
1592
1593         # This file documents Tie::File version 0.90
1594
1595         tie @array, 'Tie::File', filename or die ...;
1596
1597         $array[13] = 'blah';     # line 13 of the file is now 'blah'
1598         print $array[42];        # display line 42 of the file
1599
1600         $n_recs = @array;        # how many records are in the file?
1601         $#array -= 2;            # chop two records off the end
1602
1603
1604         for (@array) {
1605           s/PERL/Perl/g;         # Replace PERL with Perl everywhere in the file
1606         }
1607
1608         # These are just like regular push, pop, unshift, shift, and splice
1609         # Except that they modify the file in the way you would expect
1610
1611         push @array, new recs...;
1612         my $r1 = pop @array;
1613         unshift @array, new recs...;
1614         my $r1 = shift @array;
1615         @old_recs = splice @array, 3, 7, new recs...;
1616
1617         untie @array;            # all finished
1618
1619
1620 =head1 DESCRIPTION
1621
1622 C<Tie::File> represents a regular text file as a Perl array.  Each
1623 element in the array corresponds to a record in the file.  The first
1624 line of the file is element 0 of the array; the second line is element
1625 1, and so on.
1626
1627 The file is I<not> loaded into memory, so this will work even for
1628 gigantic files.
1629
1630 Changes to the array are reflected in the file immediately.
1631
1632 Lazy people and beginners may now stop reading the manual.
1633
1634 =head2 C<recsep>
1635
1636 What is a 'record'?  By default, the meaning is the same as for the
1637 C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
1638 probably C<"\n">.  (Minor exception: on dos and Win32 systems, a
1639 'record' is a string terminated by C<"\r\n">.)  You may change the
1640 definition of "record" by supplying the C<recsep> option in the C<tie>
1641 call:
1642
1643         tie @array, 'Tie::File', $file, recsep => 'es';
1644
1645 This says that records are delimited by the string C<es>.  If the file
1646 contained the following data:
1647
1648         Curse these pesky flies!\n
1649
1650 then the C<@array> would appear to have four elements:
1651
1652         "Curse th"
1653         "e p"
1654         "ky fli"
1655         "!\n"
1656
1657 An undefined value is not permitted as a record separator.  Perl's
1658 special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
1659 emulated.
1660
1661 Records read from the tied array do not have the record separator
1662 string on the end; this is to allow
1663
1664         $array[17] .= "extra";
1665
1666 to work as expected.
1667
1668 (See L<"autochomp">, below.)  Records stored into the array will have
1669 the record separator string appended before they are written to the
1670 file, if they don't have one already.  For example, if the record
1671 separator string is C<"\n">, then the following two lines do exactly
1672 the same thing:
1673
1674         $array[17] = "Cherry pie";
1675         $array[17] = "Cherry pie\n";
1676
1677 The result is that the contents of line 17 of the file will be
1678 replaced with "Cherry pie"; a newline character will separate line 17
1679 from line 18.  This means that this code will do nothing:
1680
1681         chomp $array[17];
1682
1683 Because the C<chomp>ed value will have the separator reattached when
1684 it is written back to the file.  There is no way to create a file
1685 whose trailing record separator string is missing.
1686
1687 Inserting records that I<contain> the record separator string is not
1688 supported by this module.  It will probably produce a reasonable
1689 result, but what this result will be may change in a future version.
1690 Use 'splice' to insert records or to replace one record with several.
1691
1692 =head2 C<autochomp>
1693
1694 Normally, array elements have the record separator removed, so that if
1695 the file contains the text
1696
1697         Gold
1698         Frankincense
1699         Myrrh
1700
1701 the tied array will appear to contain C<("Gold", "Frankincense",
1702 "Myrrh")>.  If you set C<autochomp> to a false value, the record
1703 separator will not be removed.  If the file above was tied with
1704
1705         tie @gifts, "Tie::File", $gifts, autochomp => 0;
1706
1707 then the array C<@gifts> would appear to contain C<("Gold\n",
1708 "Frankincense\n", "Myrrh\n")>, or (on Win32 systems) C<("Gold\r\n",
1709 "Frankincense\r\n", "Myrrh\r\n")>.
1710
1711 =head2 C<mode>
1712
1713 Normally, the specified file will be opened for read and write access,
1714 and will be created if it does not exist.  (That is, the flags
1715 C<O_RDWR | O_CREAT> are supplied in the C<open> call.)  If you want to
1716 change this, you may supply alternative flags in the C<mode> option.
1717 See L<Fcntl> for a listing of available flags.
1718 For example:
1719
1720         # open the file if it exists, but fail if it does not exist
1721         use Fcntl 'O_RDWR';
1722         tie @array, 'Tie::File', $file, mode => O_RDWR;
1723
1724         # create the file if it does not exist
1725         use Fcntl 'O_RDWR', 'O_CREAT';
1726         tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
1727
1728         # open an existing file in read-only mode
1729         use Fcntl 'O_RDONLY';
1730         tie @array, 'Tie::File', $file, mode => O_RDONLY;
1731
1732 Opening the data file in write-only or append mode is not supported.
1733
1734 =head2 C<memory>
1735
1736 This is an upper limit on the amount of memory that C<Tie::File> will
1737 consume at any time while managing the file.  This is used for two
1738 things: managing the I<read cache> and managing the I<deferred write
1739 buffer>.
1740
1741 Records read in from the file are cached, to avoid having to re-read
1742 them repeatedly.  If you read the same record twice, the first time it
1743 will be stored in memory, and the second time it will be fetched from
1744 the I<read cache>.  The amount of data in the read cache will not
1745 exceed the value you specified for C<memory>.  If C<Tie::File> wants
1746 to cache a new record, but the read cache is full, it will make room
1747 by expiring the least-recently visited records from the read cache.
1748
1749 The default memory limit is 2Mib.  You can adjust the maximum read
1750 cache size by supplying the C<memory> option.  The argument is the
1751 desired cache size, in bytes.
1752
1753         # I have a lot of memory, so use a large cache to speed up access
1754         tie @array, 'Tie::File', $file, memory => 20_000_000;
1755
1756 Setting the memory limit to 0 will inhibit caching; records will be
1757 fetched from disk every time you examine them.
1758
1759 The C<memory> value is not an absolute or exact limit on the memory
1760 used.  C<Tie::File> objects contains some structures besides the read
1761 cache and the deferred write buffer, whose sizes are not charged
1762 against C<memory>.
1763
1764 =head2 C<dw_size>
1765
1766 (This is an advanced feature.  Skip this section on first reading.)
1767
1768 If you use deferred writing (See L<"Deferred Writing">, below) then
1769 data you write into the array will not be written directly to the
1770 file; instead, it will be saved in the I<deferred write buffer> to be
1771 written out later.  Data in the deferred write buffer is also charged
1772 against the memory limit you set with the C<memory> option.
1773
1774 You may set the C<dw_size> option to limit the amount of data that can
1775 be saved in the deferred write buffer.  This limit may not exceed the
1776 total memory limit.  For example, if you set C<dw_size> to 1000 and
1777 C<memory> to 2500, that means that no more than 1000 bytes of deferred
1778 writes will be saved up.  The space available for the read cache will
1779 vary, but it will always be at least 1500 bytes (if the deferred write
1780 buffer is full) and it could grow as large as 2500 bytes (if the
1781 deferred write buffer is empty.)
1782
1783 If you don't specify a C<dw_size>, it defaults to the entire memory
1784 limit.
1785
1786 =head2 Option Format
1787
1788 C<-mode> is a synonym for C<mode>.  C<-recsep> is a synonym for
1789 C<recsep>.  C<-memory> is a synonym for C<memory>.  You get the
1790 idea.
1791
1792 =head1 Public Methods
1793
1794 The C<tie> call returns an object, say C<$o>.  You may call
1795
1796         $rec = $o->FETCH($n);
1797         $o->STORE($n, $rec);
1798
1799 to fetch or store the record at line C<$n>, respectively; similarly
1800 the other tied array methods.  (See L<perltie> for details.)  You may
1801 also call the following methods on this object:
1802
1803 =head2 C<flock>
1804
1805         $o->flock(MODE)
1806
1807 will lock the tied file.  C<MODE> has the same meaning as the second
1808 argument to the Perl built-in C<flock> function; for example
1809 C<LOCK_SH> or C<LOCK_EX | LOCK_NB>.  (These constants are provided by
1810 the C<use Fcntl ':flock'> declaration.)
1811
1812 C<MODE> is optional; the default is C<LOCK_EX>.
1813
1814 C<Tie::File> promises that the following sequence of operations will
1815 be safe:
1816
1817         my $o = tie @array, "Tie::File", $filename;
1818         $o->flock;
1819
1820 In particular, C<Tie::File> will I<not> read or write the file during
1821 the C<tie> call.  (Exception: Using C<mode =E<gt> O_TRUNC> will, of
1822 course, erase the file during the C<tie> call.  If you want to do this
1823 safely, then open the file without C<O_TRUNC>, lock the file, and use
1824 C<@array = ()>.)
1825
1826 The best way to unlock a file is to discard the object and untie the
1827 array.  It is probably unsafe to unlock the file without also untying
1828 it, because if you do, changes may remain unwritten inside the object.
1829 That is why there is no shortcut for unlocking.  If you really want to
1830 unlock the file prematurely, you know what to do; if you don't know
1831 what to do, then don't do it.
1832
1833 All the usual warnings about file locking apply here.  In particular,
1834 note that file locking in Perl is B<advisory>, which means that
1835 holding a lock will not prevent anyone else from reading, writing, or
1836 erasing the file; it only prevents them from getting another lock at
1837 the same time.  Locks are analogous to green traffic lights: If you
1838 have a green light, that does not prevent the idiot coming the other
1839 way from plowing into you sideways; it merely guarantees to you that
1840 the idiot does not also have a green light at the same time.
1841
1842 =head2 C<autochomp>
1843
1844         my $old_value = $o->autochomp(0);    # disable autochomp option
1845         my $old_value = $o->autochomp(1);    #  enable autochomp option
1846
1847         my $ac = $o->autochomp();   # recover current value
1848
1849 See L<"autochomp">, above.
1850
1851 =head2 C<defer>, C<flush>, C<discard>, and C<autodefer>
1852
1853 See L<"Deferred Writing">, below.
1854
1855 =head1 Tying to an already-opened filehandle
1856
1857 If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
1858 of the other C<IO> modules, you may use:
1859
1860         tie @array, 'Tie::File', $fh, ...;
1861
1862 Similarly if you opened that handle C<FH> with regular C<open> or
1863 C<sysopen>, you may use:
1864
1865         tie @array, 'Tie::File', \*FH, ...;
1866
1867 Handles that were opened write-only won't work.  Handles that were
1868 opened read-only will work as long as you don't try to modify the
1869 array.  Handles must be attached to seekable sources of data---that
1870 means no pipes or sockets.  If C<Tie::File> can detect that you
1871 supplied a non-seekable handle, the C<tie> call will throw an
1872 exception.  (On Unix systems, it can detect this.)
1873
1874 =head1 Deferred Writing
1875
1876 (This is an advanced feature.  Skip this section on first reading.)
1877
1878 Normally, modifying a C<Tie::File> array writes to the underlying file
1879 immediately.  Every assignment like C<$a[3] = ...> rewrites as much of
1880 the file as is necessary; typically, everything from line 3 through
1881 the end will need to be rewritten.  This is the simplest and most
1882 transparent behavior.  Performance even for large files is reasonably
1883 good.
1884
1885 However, under some circumstances, this behavior may be excessively
1886 slow.  For example, suppose you have a million-record file, and you
1887 want to do:
1888
1889         for (@FILE) {
1890           $_ = "> $_";
1891         }
1892
1893 The first time through the loop, you will rewrite the entire file,
1894 from line 0 through the end.  The second time through the loop, you
1895 will rewrite the entire file from line 1 through the end.  The third
1896 time through the loop, you will rewrite the entire file from line 2 to
1897 the end.  And so on.
1898
1899 If the performance in such cases is unacceptable, you may defer the
1900 actual writing, and then have it done all at once.  The following loop
1901 will perform much better for large files:
1902
1903         (tied @a)->defer;
1904         for (@a) {
1905           $_ = "> $_";
1906         }
1907         (tied @a)->flush;
1908
1909 If C<Tie::File>'s memory limit is large enough, all the writing will
1910 done in memory.  Then, when you call C<-E<gt>flush>, the entire file
1911 will be rewritten in a single pass.
1912
1913 (Actually, the preceding discussion is something of a fib.  You don't
1914 need to enable deferred writing to get good performance for this
1915 common case, because C<Tie::File> will do it for you automatically
1916 unless you specifically tell it not to.  See L<"autodeferring">,
1917 below.)
1918
1919 Calling C<-E<gt>flush> returns the array to immediate-write mode.  If
1920 you wish to discard the deferred writes, you may call C<-E<gt>discard>
1921 instead of C<-E<gt>flush>.  Note that in some cases, some of the data
1922 will have been written already, and it will be too late for
1923 C<-E<gt>discard> to discard all the changes.  Support for
1924 C<-E<gt>discard> may be withdrawn in a future version of C<Tie::File>.
1925
1926 Deferred writes are cached in memory up to the limit specified by the
1927 C<dw_size> option (see above).  If the deferred-write buffer is full
1928 and you try to write still more deferred data, the buffer will be
1929 flushed.  All buffered data will be written immediately, the buffer
1930 will be emptied, and the now-empty space will be used for future
1931 deferred writes.
1932
1933 If the deferred-write buffer isn't yet full, but the total size of the
1934 buffer and the read cache would exceed the C<memory> limit, the oldest
1935 records will be expired from the read cache until the total size is
1936 under the limit.
1937
1938 C<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be
1939 deferred.  When you perform one of these operations, any deferred data
1940 is written to the file and the operation is performed immediately.
1941 This may change in a future version.
1942
1943 If you resize the array with deferred writing enabled, the file will
1944 be resized immediately, but deferred records will not be written.
1945 This has a surprising consequence: C<@a = (...)> erases the file
1946 immediately, but the writing of the actual data is deferred.  This
1947 might be a bug.  If it is a bug, it will be fixed in a future version.
1948
1949 =head2 Autodeferring
1950
1951 C<Tie::File> tries to guess when deferred writing might be helpful,
1952 and to turn it on and off automatically.
1953
1954         for (@a) {
1955           $_ = "> $_";
1956         }
1957
1958 In this example, only the first two assignments will be done
1959 immediately; after this, all the changes to the file will be deferred
1960 up to the user-specified memory limit.
1961
1962 You should usually be able to ignore this and just use the module
1963 without thinking about deferring.  However, special applications may
1964 require fine control over which writes are deferred, or may require
1965 that all writes be immediate.  To disable the autodeferment feature,
1966 use
1967
1968         (tied @o)->autodefer(0);
1969
1970 or
1971
1972         tie @array, 'Tie::File', $file, autodefer => 0;
1973
1974
1975 Similarly, C<-E<gt>autodefer(1)> re-enables autodeferment, and
1976 C<-E<gt>autodefer()> recovers the current value of the autodefer setting.
1977
1978 =head1 CAVEATS
1979
1980 (That's Latin for 'warnings'.)
1981
1982 =over 4
1983
1984 =item *
1985
1986 This is BETA RELEASE SOFTWARE.  It may have bugs.  See the discussion
1987 below about the (lack of any) warranty.
1988
1989 In particular, this means that the interface may change in
1990 incompatible ways from one version to the next, without warning.  That
1991 has happened at least once already.  The interface will freeze before
1992 Perl 5.8 is released, probably sometime in April 2002.
1993
1994 =item *
1995
1996 Reasonable effort was made to make this module efficient.  Nevertheless,
1997 changing the size of a record in the middle of a large file will
1998 always be fairly slow, because everything after the new record must be
1999 moved.
2000
2001 =item *
2002
2003 The behavior of tied arrays is not precisely the same as for regular
2004 arrays.  For example:
2005
2006         # This DOES print "How unusual!"
2007         undef $a[10];  print "How unusual!\n" if defined $a[10];
2008
2009 C<undef>-ing a C<Tie::File> array element just blanks out the
2010 corresponding record in the file.  When you read it back again, you'll
2011 get the empty string, so the supposedly-C<undef>'ed value will be
2012 defined.  Similarly, if you have C<autochomp> disabled, then
2013
2014         # This DOES print "How unusual!" if 'autochomp' is disabled
2015         undef $a[10];
2016         print "How unusual!\n" if $a[10];
2017
2018 Because when C<autochomp> is disabled, C<$a[10]> will read back as
2019 C<"\n"> (or whatever the record separator string is.)
2020
2021 There are other minor differences, particularly regarding C<exists>
2022 and C<delete>, but in general, the correspondence is extremely close.
2023
2024 =item *
2025
2026 Not quite every effort was made to make this module as efficient as
2027 possible.  C<FETCHSIZE> should use binary search instead of linear
2028 search.
2029
2030 The performance of the C<flush> method could be improved.  At present,
2031 it still rewrites the tail of the file once for each block of
2032 contiguous lines to be changed.  In the typical case, this will result
2033 in only one rewrite, but in peculiar cases it might be bad.  It should
2034 be possible to perform I<all> deferred writing with a single rewrite.
2035
2036 Profiling suggests that these defects are probably minor; in any
2037 event, they will be fixed in a future version of the module.
2038
2039 =item *
2040
2041 I have supposed that since this module is concerned with file I/O,
2042 almost all normal use of it will be heavily I/O bound.  This means
2043 that the time to maintain complicated data structures inside the
2044 module will be dominated by the time to actually perform the I/O.
2045 When there was an opportunity to spend CPU time to avoid doing I/O, I
2046 tried to take it.
2047
2048 =item *
2049
2050 You might be tempted to think that deferred writing is like
2051 transactions, with C<flush> as C<commit> and C<discard> as
2052 C<rollback>, but it isn't, so don't.
2053
2054 =back
2055
2056 =head1 SUBCLASSING
2057
2058 This version promises absolutely nothing about the internals, which
2059 may change without notice.  A future version of the module will have a
2060 well-defined and stable subclassing API.
2061
2062 =head1 WHAT ABOUT C<DB_File>?
2063
2064 People sometimes point out that L<DB_File> will do something similar,
2065 and ask why C<Tie::File> module is necessary.
2066
2067 There are a number of reasons that you might prefer C<Tie::File>.
2068 A list is available at C<http://perl.plover.com/TieFile/why-not-DB_File>.
2069
2070 =head1 AUTHOR
2071
2072 Mark Jason Dominus
2073
2074 To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
2075
2076 To receive an announcement whenever a new version of this module is
2077 released, send a blank email message to
2078 C<mjd-perl-tiefile-subscribe@plover.com>.
2079
2080 The most recent version of this module, including documentation and
2081 any news of importance, will be available at
2082
2083         http://perl.plover.com/TieFile/
2084
2085
2086 =head1 LICENSE
2087
2088 C<Tie::File> version 0.90 is copyright (C) 2002 Mark Jason Dominus.
2089
2090 This library is free software; you may redistribute it and/or modify
2091 it under the same terms as Perl itself.
2092
2093 These terms are your choice of any of (1) the Perl Artistic Licence,
2094 or (2) version 2 of the GNU General Public License as published by the
2095 Free Software Foundation, or (3) any later version of the GNU General
2096 Public License.
2097
2098 This library is distributed in the hope that it will be useful,
2099 but WITHOUT ANY WARRANTY; without even the implied warranty of
2100 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2101 GNU General Public License for more details.
2102
2103 You should have received a copy of the GNU General Public License
2104 along with this library program; it should be in the file C<COPYING>.
2105 If not, write to the Free Software Foundation, Inc., 59 Temple Place,
2106 Suite 330, Boston, MA 02111 USA
2107
2108 For licensing inquiries, contact the author at:
2109
2110         Mark Jason Dominus
2111         255 S. Warnock St.
2112         Philadelphia, PA 19107
2113
2114 =head1 WARRANTY
2115
2116 C<Tie::File> version 0.90 comes with ABSOLUTELY NO WARRANTY.
2117 For details, see the license.
2118
2119 =head1 THANKS
2120
2121 Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
2122 core when I hadn't written it yet, and for generally being helpful,
2123 supportive, and competent.  (Usually the rule is "choose any one.")
2124 Also big thanks to Abhijit Menon-Sen for all of the same things.
2125
2126 Special thanks to Craig Berry and Peter Prymmer (for VMS portability
2127 help), Randy Kobes (for Win32 portability help), Clinton Pierce and
2128 Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond
2129 the call of duty), Michael G Schwern (for testing advice), and the
2130 rest of the CPAN testers (for testing generally).
2131
2132 Additional thanks to:
2133 Edward Avis /
2134 Gerrit Haase /
2135 Nikola Knezevic /
2136 Nick Ing-Simmons /
2137 Tassilo von Parseval /
2138 H. Dieter Pearcey /
2139 Slaven Rezic /
2140 Peter Scott /
2141 Peter Somu /
2142 Autrijus Tang (again) /
2143 Tels /
2144 Juerd Wallboer
2145
2146 =head1 TODO
2147
2148 More tests.  (The cache and heap modules need more unit tests.)
2149
2150 Improve SPLICE algorithm to use deferred writing machinery.
2151
2152 Cleverer strategy for flushing deferred writes.
2153
2154 More tests.  (Stuff I didn't think of yet.)
2155
2156 Paragraph mode?
2157
2158 Fixed-length mode.  Leave-blanks mode.
2159
2160 Maybe an autolocking mode?
2161
2162 Record locking with fcntl()?  Then the module might support an undo
2163 log and get real transactions.  What a tour de force that would be.
2164
2165 More tests.
2166
2167 =cut
2168