lib/Tie/File.pm

   1
   2 package Tie::File;
   3 require 5.005;
   4 use Carp;
   5 use POSIX 'SEEK_SET';
   6 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
   7
   8 $VERSION = "0.90";
   9 my $DEFAULT_MEMORY_SIZE = 1<<21;    # 2 megabytes
  10 my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
  11 my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
  12
  13 my %good_opt = map {$_ => 1, "-$_" => 1}
  14                qw(memory dw_size mode recsep discipline autodefer autochomp);
  15
  16 sub TIEARRAY {
  17   if (@_ % 2 != 0) {
  18     croak "usage: tie \@array, $_[0], filename, [option => value]...";
  19   }
  20   my ($pack, $file, %opts) = @_;
  21
  22   # transform '-foo' keys into 'foo' keys
  23   for my $key (keys %opts) {
  24     unless ($good_opt{$key}) {
  25       croak("$pack: Unrecognized option '$key'\n");
  26     }
  27     my $okey = $key;
  28     if ($key =~ s/^-+//) {
  29       $opts{$key} = delete $opts{$okey};
  30     }
  31   }
  32
  33   unless (defined $opts{memory}) {
  34     # default is the larger of the default cache size and the
  35     # deferred-write buffer size (if specified)
  36     $opts{memory} = $DEFAULT_MEMORY_SIZE;
  37     $opts{memory} = $opts{dw_size}
  38       if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
  39     # Dora Winifred Read
  40   }
  41   $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
  42   if ($opts{dw_size} > $opts{memory}) {
  43       croak("$pack: dw_size may not be larger than total memory allocation\n");
  44   }
  45   # are we in deferred-write mode?
  46   $opts{defer} = 0 unless defined $opts{defer};
  47   $opts{deferred} = {};         # no records are presently deferred
  48   $opts{deferred_s} = 0;        # count of total bytes in ->{deferred}
  49   $opts{deferred_max} = -1;     # empty
  50
  51   # the cache is a hash instead of an array because it is likely to be
  52   # sparsely populated
  53   $opts{cache} = Tie::File::Cache->new($opts{memory});
  54
  55   # autodeferment is enabled by default
  56   $opts{autodefer} = 1 unless defined $opts{autodefer};
  57   $opts{autodeferring} = 0;     # but is not initially active
  58   $opts{ad_history} = [];
  59   $opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
  60     unless defined $opts{autodefer_threshhold};
  61   $opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
  62     unless defined $opts{autodefer_filelen_threshhold};
  63
  64   $opts{offsets} = [0];
  65   $opts{filename} = $file;
  66   unless (defined $opts{recsep}) {
  67     $opts{recsep} = _default_recsep();
  68   }
  69   $opts{recseplen} = length($opts{recsep});
  70   if ($opts{recseplen} == 0) {
  71     croak "Empty record separator not supported by $pack";
  72   }
  73
  74   $opts{autochomp} = 1 unless defined $opts{autochomp};
  75
  76   my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
  77   my $fh;
  78
  79   if (UNIVERSAL::isa($file, 'GLOB')) {
  80     # We use 1 here on the theory that some systems
  81     # may not indicate failure if we use 0.
  82     # MSWin32 does not indicate failure with 0, but I don't know if
  83     # it will indicate failure with 1 or not.
  84     unless (seek $file, 1, SEEK_SET) {
  85       croak "$pack: your filehandle does not appear to be seekable";
  86     }
  87     seek $file, 0, SEEK_SET     # put it back
  88     $fh = $file;                # setting binmode is the user's problem
  89   } elsif (ref $file) {
  90     croak "usage: tie \@array, $pack, filename, [option => value]...";
  91   } else {
  92     $fh = \do { local *FH };   # only works in 5.005 and later
  93     sysopen $fh, $file, $mode, 0666 or return;
  94     binmode $fh;
  95   }
  96   { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
  97   if (defined $opts{discipline} && $] >= 5.006) {
  98     # This avoids a compile-time warning under 5.005
  99     eval 'binmode($fh, $opts{discipline})';
 100     croak $@ if $@ =~ /unknown discipline/i;
 101     die if $@;
 102   }
 103   $opts{fh} = $fh;
 104
 105   bless \%opts => $pack;
 106 }
 107
 108 sub FETCH {
 109   my ($self, $n) = @_;
 110   my $rec;
 111
 112   # check the defer buffer
 113   if ($self->_is_deferring && exists $self->{deferred}{$n}) {
 114     $rec = $self->{deferred}{$n};
 115   } else {
 116     $rec = $self->_fetch($n);
 117   }
 118
 119   $self->_chomp1($rec);
 120 }
 121
 122 # Chomp many records in-place; return nothing useful
 123 sub _chomp {
 124   my $self = shift;
 125   return unless $self->{autochomp};
 126   if ($self->{autochomp}) {
 127     for (@_) {
 128       next unless defined;
 129       substr($_, - $self->{recseplen}) = "";
 130     }
 131   }
 132 }
 133
 134 # Chomp one record in-place; return modified record
 135 sub _chomp1 {
 136   my ($self, $rec) = @_;
 137   return $rec unless $self->{autochomp};
 138   return unless defined $rec;
 139   substr($rec, - $self->{recseplen}) = "";
 140   $rec;
 141 }
 142
 143 sub _fetch {
 144   my ($self, $n) = @_;
 145
 146   # check the record cache
 147   { my $cached = $self->{cache}->lookup($n);
 148     return $cached if defined $cached;
 149   }
 150
 151   unless ($#{$self->{offsets}} >= $n) {
 152     my $o = $self->_fill_offsets_to($n);
 153     # If it's still undefined, there is no such record, so return 'undef'
 154     return unless defined $o;
 155   }
 156
 157   my $fh = $self->{FH};
 158   $self->_seek($n);             # we can do this now that offsets is populated
 159   my $rec = $self->_read_record;
 160
 161 # If we happen to have just read the first record, check to see if
 162 # the length of the record matches what 'tell' says.  If not, Tie::File
 163 # won't work, and should drop dead.
 164 #
 165 #  if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
 166 #    if (defined $self->{discipline}) {
 167 #      croak "I/O discipline $self->{discipline} not supported";
 168 #    } else {
 169 #      croak "File encoding not supported";
 170 #    }
 171 #  }
 172
 173   $self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
 174   $rec;
 175 }
 176
 177 sub STORE {
 178   my ($self, $n, $rec) = @_;
 179   die "STORE called from _check_integrity!" if $DIAGNOSTIC;
 180
 181   $self->_fixrecs($rec);
 182
 183   if ($self->{autodefer}) {
 184     $self->_annotate_ad_history($n);
 185   }
 186
 187   return $self->_store_deferred($n, $rec) if $self->_is_deferring;
 188
 189
 190   # We need this to decide whether the new record will fit
 191   # It incidentally populates the offsets table
 192   # Note we have to do this before we alter the cache
 193   # 20020324 Wait, but this DOES alter the cache.  TODO BUG?
 194   my $oldrec = $self->_fetch($n);
 195
 196   if (defined($self->{cache}->lookup($n))) {
 197     $self->{cache}->update($n, $rec);
 198   }
 199
 200   if (not defined $oldrec) {
 201     # We're storing a record beyond the end of the file
 202     $self->_extend_file_to($n+1);
 203     $oldrec = $self->{recsep};
 204   }
 205   my $len_diff = length($rec) - length($oldrec);
 206
 207   # length($oldrec) here is not consistent with text mode  TODO XXX BUG
 208   $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
 209
 210   # now update the offsets
 211   # array slice goes from element $n+1 (the first one to move)
 212   # to the end
 213   for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
 214     $_ += $len_diff;
 215   }
 216 }
 217
 218 sub _store_deferred {
 219   my ($self, $n, $rec) = @_;
 220   $self->{cache}->remove($n);
 221   my $old_deferred = $self->{deferred}{$n};
 222
 223   if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
 224     $self->{deferred_max} = $n;
 225   }
 226   $self->{deferred}{$n} = $rec;
 227
 228   my $len_diff = length($rec);
 229   $len_diff -= length($old_deferred) if defined $old_deferred;
 230   $self->{deferred_s} += $len_diff;
 231   $self->{cache}->adj_limit(-$len_diff);
 232   if ($self->{deferred_s} > $self->{dw_size}) {
 233     $self->_flush;
 234   } elsif ($self->_cache_too_full) {
 235     $self->_cache_flush;
 236   }
 237 }
 238
 239 # Remove a single record from the deferred-write buffer without writing it
 240 # The record need not be present
 241 sub _delete_deferred {
 242   my ($self, $n) = @_;
 243   my $rec = delete $self->{deferred}{$n};
 244   return unless defined $rec;
 245
 246   if (defined $self->{deferred_max}
 247       && $n == $self->{deferred_max}) {
 248     undef $self->{deferred_max};
 249   }
 250
 251   $self->{deferred_s} -= length $rec;
 252   $self->{cache}->adj_limit(length $rec);
 253 }
 254
 255 sub FETCHSIZE {
 256   my $self = shift;
 257   my $n = $#{$self->{offsets}};
 258   # 20020317 Change this to binary search
 259   while (defined ($self->_fill_offsets_to($n+1))) {
 260     ++$n;
 261   }
 262   my $top_deferred = $self->_defer_max;
 263   $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
 264   $n;
 265 }
 266
 267 sub STORESIZE {
 268   my ($self, $len) = @_;
 269
 270   if ($self->{autodefer}) {
 271     $self->_annotate_ad_history('STORESIZE');
 272   }
 273
 274   my $olen = $self->FETCHSIZE;
 275   return if $len == $olen;      # Woo-hoo!
 276
 277   # file gets longer
 278   if ($len > $olen) {
 279     if ($self->_is_deferring) {
 280       for ($olen .. $len-1) {
 281         $self->_store_deferred($_, $self->{recsep});
 282       }
 283     } else {
 284       $self->_extend_file_to($len);
 285     }
 286     return;
 287   }
 288
 289   # file gets shorter
 290   if ($self->_is_deferring) {
 291     # TODO maybe replace this with map-plus-assignment?
 292     for (grep $_ >= $len, keys %{$self->{deferred}}) {
 293       $self->_delete_deferred($_);
 294     }
 295     $self->{deferred_max} = $len-1;
 296   }
 297
 298   $self->_seek($len);
 299   $self->_chop_file;
 300   $#{$self->{offsets}} = $len;
 301 #  $self->{offsets}[0] = 0;      # in case we just chopped this
 302
 303   $self->{cache}->remove(grep $_ >= $len, $self->{cache}->keys);
 304 }
 305
 306 sub PUSH {
 307   my $self = shift;
 308   $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
 309 #  $self->FETCHSIZE;  # av.c takes care of this for me
 310 }
 311
 312 sub POP {
 313   my $self = shift;
 314   my $size = $self->FETCHSIZE;
 315   return if $size == 0;
 316 #  print STDERR "# POPPITY POP POP POP\n";
 317   scalar $self->SPLICE($size-1, 1);
 318 }
 319
 320 sub SHIFT {
 321   my $self = shift;
 322   scalar $self->SPLICE(0, 1);
 323 }
 324
 325 sub UNSHIFT {
 326   my $self = shift;
 327   $self->SPLICE(0, 0, @_);
 328   # $self->FETCHSIZE; # av.c takes care of this for me
 329 }
 330
 331 sub CLEAR {
 332   my $self = shift;
 333
 334   if ($self->{autodefer}) {
 335     $self->_annotate_ad_history('CLEAR');
 336   }
 337
 338   $self->_seekb(0);
 339   $self->_chop_file;
 340     $self->{cache}->set_limit($self->{memory});
 341     $self->{cache}->empty;
 342   @{$self->{offsets}} = (0);
 343   %{$self->{deferred}}= ();
 344     $self->{deferred_s} = 0;
 345     $self->{deferred_max} = -1;
 346 }
 347
 348 sub EXTEND {
 349   my ($self, $n) = @_;
 350
 351   # No need to pre-extend anything in this case
 352   return if $self->_is_deferring;
 353
 354   $self->_fill_offsets_to($n);
 355   $self->_extend_file_to($n);
 356 }
 357
 358 sub DELETE {
 359   my ($self, $n) = @_;
 360
 361   if ($self->{autodefer}) {
 362     $self->_annotate_ad_history('DELETE');
 363   }
 364
 365   my $lastrec = $self->FETCHSIZE-1;
 366   my $rec = $self->FETCH($n);
 367   $self->_delete_deferred($n) if $self->_is_deferring;
 368   if ($n == $lastrec) {
 369     $self->_seek($n);
 370     $self->_chop_file;
 371     $#{$self->{offsets}}--;
 372     $self->{cache}->remove($n);
 373     # perhaps in this case I should also remove trailing null records?
 374     # 20020316
 375     # Note that delete @a[-3..-1] deletes the records in the wrong order,
 376     # so we only chop the very last one out of the file.  We could repair this
 377     # by tracking deleted records inside the object.
 378   } elsif ($n < $lastrec) {
 379     $self->STORE($n, "");
 380   }
 381   $rec;
 382 }
 383
 384 sub EXISTS {
 385   my ($self, $n) = @_;
 386   return 1 if exists $self->{deferred}{$n};
 387   $self->_fill_offsets_to($n);  # I think this is unnecessary
 388   $n < $self->FETCHSIZE;
 389 }
 390
 391 sub SPLICE {
 392   my $self = shift;
 393
 394   if ($self->{autodefer}) {
 395     $self->_annotate_ad_history('SPLICE');
 396   }
 397
 398   $self->_flush if $self->_is_deferring; # move this up?
 399   if (wantarray) {
 400     $self->_chomp(my @a = $self->_splice(@_));
 401     @a;
 402   } else {
 403     $self->_chomp1(scalar $self->_splice(@_));
 404   }
 405 }
 406
 407 sub DESTROY {
 408   my $self = shift;
 409   $self->flush if $self->_is_deferring;
 410   $self->{cache}->delink if defined $self->{cache}; # break circular link
 411 }
 412
 413 sub _splice {
 414   my ($self, $pos, $nrecs, @data) = @_;
 415   my @result;
 416
 417   $pos = 0 unless defined $pos;
 418
 419   # Deal with negative and other out-of-range positions
 420   # Also set default for $nrecs
 421   {
 422     my $oldsize = $self->FETCHSIZE;
 423     $nrecs = $oldsize unless defined $nrecs;
 424     my $oldpos = $pos;
 425
 426     if ($pos < 0) {
 427       $pos += $oldsize;
 428       if ($pos < 0) {
 429         croak "Modification of non-creatable array value attempted, subscript $oldpos";
 430       }
 431     }
 432
 433     if ($pos > $oldsize) {
 434       return unless @data;
 435       $pos = $oldsize;          # This is what perl does for normal arrays
 436     }
 437   }
 438
 439   $self->_fixrecs(@data);
 440   my $data = join '', @data;
 441   my $datalen = length $data;
 442   my $oldlen = 0;
 443
 444   # compute length of data being removed
 445   for ($pos .. $pos+$nrecs-1) {
 446     $self->_fill_offsets_to($_);
 447     my $rec = $self->_fetch($_);
 448     last unless defined $rec;
 449     push @result, $rec;
 450
 451     # Why don't we just use length($rec) here?
 452     # Because that record might have come from the cache.  _splice
 453     # might have been called to flush out the deferred-write records,
 454     # and in this case length($rec) is the length of the record to be *written*,
 455     # not the length of the actual record in the file.  But the offsets are
 456     # still true. 20020322
 457     $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
 458       if defined $self->{offsets}[$_+1];
 459   }
 460
 461   # Modify the file
 462   $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
 463
 464   # update the offsets table part 1
 465   # compute the offsets of the new records:
 466   my @new_offsets;
 467   if (@data) {
 468     push @new_offsets, $self->{offsets}[$pos];
 469     for (0 .. $#data-1) {
 470       push @new_offsets, $new_offsets[-1] + length($data[$_]);
 471     }
 472   }
 473   splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
 474
 475   # update the offsets table part 2
 476   # adjust the offsets of the following old records
 477   for ($pos+@data .. $#{$self->{offsets}}) {
 478     $self->{offsets}[$_] += $datalen - $oldlen;
 479   }
 480   # If we scrubbed out all known offsets, regenerate the trivial table
 481   # that knows that the file does indeed start at 0.
 482   $self->{offsets}[0] = 0 unless @{$self->{offsets}};
 483
 484   # Perhaps the following cache foolery could be factored out
 485   # into a bunch of mor opaque cache functions.  For example,
 486   # it's odd to delete a record from the cache and then remove
 487   # it from the LRU queue later on; there should be a function to
 488   # do both at once.
 489
 490   # update the read cache, part 1
 491   # modified records
 492   for ($pos .. $pos+$nrecs-1) {
 493     my $new = $data[$_-$pos];
 494     if (defined $new) {
 495       $self->{cache}->update($_, $new);
 496     } else {
 497       $self->{cache}->remove($_);
 498     }
 499   }
 500
 501   # update the read cache, part 2
 502   # moved records - records past the site of the change
 503   # need to be renumbered
 504   # Maybe merge this with the previous block?
 505   {
 506     my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->keys;
 507     my @newkeys = map $_-$nrecs+@data, @oldkeys;
 508     $self->{cache}->rekey(\@oldkeys, \@newkeys);
 509   }
 510
 511   # Now there might be too much data in the cache, if we spliced out
 512   # some short records and spliced in some long ones.  If so, flush
 513   # the cache.
 514   $self->_cache_flush;
 515
 516   # Yes, the return value of 'splice' *is* actually this complicated
 517   wantarray ? @result : @result ? $result[-1] : undef;
 518 }
 519
 520 # write data into the file
 521 # $data is the data to be written.
 522 # it should be written at position $pos, and should overwrite
 523 # exactly $len of the following bytes.
 524 # Note that if length($data) > $len, the subsequent bytes will have to
 525 # be moved up, and if length($data) < $len, they will have to
 526 # be moved down
 527 sub _twrite {
 528   my ($self, $data, $pos, $len) = @_;
 529
 530   unless (defined $pos) {
 531     die "\$pos was undefined in _twrite";
 532   }
 533
 534   my $len_diff = length($data) - $len;
 535
 536   if ($len_diff == 0) {          # Woo-hoo!
 537     my $fh = $self->{fh};
 538     $self->_seekb($pos);
 539     $self->_write_record($data);
 540     return;                     # well, that was easy.
 541   }
 542
 543   # the two records are of different lengths
 544   # our strategy here: rewrite the tail of the file,
 545   # reading ahead one buffer at a time
 546   # $bufsize is required to be at least as large as the data we're overwriting
 547   my $bufsize = _bufsize($len_diff);
 548   my ($writepos, $readpos) = ($pos, $pos+$len);
 549   my $next_block;
 550   my $more_data;
 551
 552   # Seems like there ought to be a way to avoid the repeated code
 553   # and the special case here.  The read(1) is also a little weird.
 554   # Think about this.
 555   do {
 556     $self->_seekb($readpos);
 557     my $br = read $self->{fh}, $next_block, $bufsize;
 558     $more_data = read $self->{fh}, my($dummy), 1;
 559     $self->_seekb($writepos);
 560     $self->_write_record($data);
 561     $readpos += $br;
 562     $writepos += length $data;
 563     $data = $next_block;
 564   } while $more_data;           # BUG XXX TODO how could this have worked?
 565   $self->_seekb($writepos);
 566   $self->_write_record($next_block);
 567
 568   # There might be leftover data at the end of the file
 569   $self->_chop_file if $len_diff < 0;
 570 }
 571
 572 # If a record does not already end with the appropriate terminator
 573 # string, append one.
 574 sub _fixrecs {
 575   my $self = shift;
 576   for (@_) {
 577     $_ .= $self->{recsep}
 578       unless substr($_, - $self->{recseplen}) eq $self->{recsep};
 579   }
 580 }
 581
 582
 583 ################################################################
 584 #
 585 # Basic read, write, and seek
 586 #
 587
 588 # seek to the beginning of record #$n
 589 # Assumes that the offsets table is already correctly populated
 590 #
 591 # Note that $n=-1 has a special meaning here: It means the start of
 592 # the last known record; this may or may not be the very last record
 593 # in the file, depending on whether the offsets table is fully populated.
 594 #
 595 sub _seek {
 596   my ($self, $n) = @_;
 597   my $o = $self->{offsets}[$n];
 598   defined($o)
 599     or confess("logic error: undefined offset for record $n");
 600   seek $self->{fh}, $o, SEEK_SET
 601     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 602 }
 603
 604 sub _seekb {
 605   my ($self, $b) = @_;
 606   seek $self->{fh}, $b, SEEK_SET
 607     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 608 }
 609
 610 # populate the offsets table up to the beginning of record $n
 611 # return the offset of record $n
 612 sub _fill_offsets_to {
 613   my ($self, $n) = @_;
 614   my $fh = $self->{fh};
 615   local *OFF = $self->{offsets};
 616   my $rec;
 617
 618   until ($#OFF >= $n) {
 619     my $o = $OFF[-1];
 620     $self->_seek(-1);           # tricky -- see comment at _seek
 621     $rec = $self->_read_record;
 622     if (defined $rec) {
 623       push @OFF, tell $fh;
 624     } else {
 625       return;                   # It turns out there is no such record
 626     }
 627   }
 628
 629   # we have now read all the records up to record n-1,
 630   # so we can return the offset of record n
 631   return $OFF[$n];
 632 }
 633
 634 # assumes that $rec is already suitably terminated
 635 sub _write_record {
 636   my ($self, $rec) = @_;
 637   my $fh = $self->{fh};
 638   print $fh $rec
 639     or die "Couldn't write record: $!";  # "Should never happen."
 640   $self->{_written} += length($rec);
 641 }
 642
 643 sub _read_record {
 644   my $self = shift;
 645   my $rec;
 646   { local $/ = $self->{recsep};
 647     my $fh = $self->{fh};
 648     $rec = <$fh>;
 649   }
 650   $self->{_read} += length($rec) if defined $rec;
 651   $rec;
 652 }
 653
 654 sub _rw_stats {
 655   @{$self}{'_read', '_written'};
 656 }
 657
 658 ################################################################
 659 #
 660 # Read cache management
 661
 662 sub _cache_flush {
 663   my ($self) = @_;
 664   $self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
 665 }
 666
 667 sub _cache_too_full {
 668   my $self = shift;
 669   $self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
 670 }
 671
 672 ################################################################
 673 #
 674 # File custodial services
 675 #
 676
 677
 678 # We have read to the end of the file and have the offsets table
 679 # entirely populated.  Now we need to write a new record beyond
 680 # the end of the file.  We prepare for this by writing
 681 # empty records into the file up to the position we want
 682 #
 683 # assumes that the offsets table already contains the offset of record $n,
 684 # if it exists, and extends to the end of the file if not.
 685 sub _extend_file_to {
 686   my ($self, $n) = @_;
 687   $self->_seek(-1);             # position after the end of the last record
 688   my $pos = $self->{offsets}[-1];
 689
 690   # the offsets table has one entry more than the total number of records
 691   my $extras = $n - $#{$self->{offsets}};
 692
 693   # Todo : just use $self->{recsep} x $extras here?
 694   while ($extras-- > 0) {
 695     $self->_write_record($self->{recsep});
 696     push @{$self->{offsets}}, tell $self->{fh};
 697   }
 698 }
 699
 700 # Truncate the file at the current position
 701 sub _chop_file {
 702   my $self = shift;
 703   truncate $self->{fh}, tell($self->{fh});
 704 }
 705
 706
 707 # compute the size of a buffer suitable for moving
 708 # all the data in a file forward $n bytes
 709 # ($n may be negative)
 710 # The result should be at least $n.
 711 sub _bufsize {
 712   my $n = shift;
 713   return 8192 if $n < 0;
 714   my $b = $n & ~8191;
 715   $b += 8192 if $n & 8191;
 716   $b;
 717 }
 718
 719 ################################################################
 720 #
 721 # Miscellaneous public methods
 722 #
 723
 724 # Lock the file
 725 sub flock {
 726   my ($self, $op) = @_;
 727   unless (@_ <= 3) {
 728     my $pack = ref $self;
 729     croak "Usage: $pack\->flock([OPERATION])";
 730   }
 731   my $fh = $self->{fh};
 732   $op = LOCK_EX unless defined $op;
 733   flock $fh, $op;
 734 }
 735
 736 # Get/set autochomp option
 737 sub autochomp {
 738   my $self = shift;
 739   if (@_) {
 740     my $old = $self->{autochomp};
 741     $self->{autochomp} = shift;
 742     $old;
 743   } else {
 744     $self->{autochomp};
 745   }
 746 }
 747
 748 ################################################################
 749 #
 750 # Matters related to deferred writing
 751 #
 752
 753 # Defer writes
 754 sub defer {
 755   my $self = shift;
 756   $self->_stop_autodeferring;
 757   @{$self->{ad_history}} = ();
 758   $self->{defer} = 1;
 759 }
 760
 761 # Flush deferred writes
 762 #
 763 # This could be better optimized to write the file in one pass, instead
 764 # of one pass per block of records.  But that will require modifications
 765 # to _twrite, so I should have a good _twite test suite first.
 766 sub flush {
 767   my $self = shift;
 768
 769   $self->_flush;
 770   $self->{defer} = 0;
 771 }
 772
 773 sub _flush {
 774   my $self = shift;
 775   my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
 776
 777   while (@writable) {
 778     # gather all consecutive records from the front of @writable
 779     my $first_rec = shift @writable;
 780     my $last_rec = $first_rec+1;
 781     ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
 782     --$last_rec;
 783     $self->_fill_offsets_to($last_rec);
 784     $self->_extend_file_to($last_rec);
 785     $self->_splice($first_rec, $last_rec-$first_rec+1,
 786                    @{$self->{deferred}}{$first_rec .. $last_rec});
 787   }
 788
 789   $self->_discard;               # clear out defered-write-cache
 790 }
 791
 792 # Discard deferred writes and disable future deferred writes
 793 sub discard {
 794   my $self = shift;
 795   $self->_discard;
 796   $self->{defer} = 0;
 797 }
 798
 799 # Discard deferred writes, but retain old deferred writing mode
 800 sub _discard {
 801   my $self = shift;
 802   %{$self->{deferred}} = ();
 803   $self->{deferred_s}  = 0;
 804   $self->{deferred_max}  = -1;
 805   $self->{cache}->set_limit($self->{memory});
 806 }
 807
 808 # Deferred writing is enabled, either explicitly ($self->{defer})
 809 # or automatically ($self->{autodeferring})
 810 sub _is_deferring {
 811   my $self = shift;
 812   $self->{defer} || $self->{autodeferring};
 813 }
 814
 815 # The largest record number of any deferred record
 816 sub _defer_max {
 817   my $self = shift;
 818   return $self->{deferred_max} if defined $self->{deferred_max};
 819   my $max = -1;
 820   for my $key (keys %{$self->{deferred}}) {
 821     $max = $key if $key > $max;
 822   }
 823   $self->{deferred_max} = $max;
 824   $max;
 825 }
 826
 827 ################################################################
 828 #
 829 # Matters related to autodeferment
 830 #
 831
 832 # Get/set autodefer option
 833 sub autodefer {
 834   my $self = shift;
 835   if (@_) {
 836     my $old = $self->{autodefer};
 837     $self->{autodefer} = shift;
 838     if ($old) {
 839       $self->_stop_autodeferring;
 840       @{$self->{ad_history}} = ();
 841     }
 842     $old;
 843   } else {
 844     $self->{autodefer};
 845   }
 846 }
 847
 848 # The user is trying to store record #$n Record that in the history,
 849 # and then enable (or disable) autodeferment if that seems useful.
 850 # Note that it's OK for $n to be a non-number, as long as the function
 851 # is prepared to deal with that.  Nobody else looks at the ad_history.
 852 #
 853 # Now, what does the ad_history mean, and what is this function doing?
 854 # Essentially, the idea is to enable autodeferring when we see that the
 855 # user has made three consecutive STORE calls to three consecutive records.
 856 # ("Three" is actually ->{autodefer_threshhold}.)
 857 # A STORE call for record #$n inserts $n into the autodefer history,
 858 # and if the history contains three consecutive records, we enable
 859 # autodeferment.  An ad_history of [X, Y] means that the most recent
 860 # STOREs were for records X, X+1, ..., Y, in that order.
 861 #
 862 # Inserting a nonconsecutive number erases the history and starts over.
 863 #
 864 # Performing a special operation like SPLICE erases the history.
 865 #
 866 # There's one special case: CLEAR means that CLEAR was just called.
 867 # In this case, we prime the history with [-2, -1] so that if the next
 868 # write is for record 0, autodeferring goes on immediately.  This is for
 869 # the common special case of "@a = (...)".
 870 #
 871 sub _annotate_ad_history {
 872   my ($self, $n) = @_;
 873   return unless $self->{autodefer}; # feature is disabled
 874   return if $self->{defer};     # already in explicit defer mode
 875   return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
 876
 877   local *H = $self->{ad_history};
 878   if ($n eq 'CLEAR') {
 879     @H = (-2, -1);              # prime the history with fake records
 880     $self->_stop_autodeferring;
 881   } elsif ($n =~ /^\d+$/) {
 882     if (@H == 0) {
 883       @H =  ($n, $n);
 884     } else {                    # @H == 2
 885       if ($H[1] == $n-1) {      # another consecutive record
 886         $H[1]++;
 887         if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
 888           $self->{autodeferring} = 1;
 889         }
 890       } else {                  # nonconsecutive- erase and start over
 891         @H = ($n, $n);
 892         $self->_stop_autodeferring;
 893       }
 894     }
 895   } else {                      # SPLICE or STORESIZE or some such
 896     @H = ();
 897     $self->_stop_autodeferring;
 898   }
 899 }
 900
 901 # If autodferring was enabled, cut it out and discard the history
 902 sub _stop_autodeferring {
 903   my $self = shift;
 904   if ($self->{autodeferring}) {
 905     $self->_flush;
 906   }
 907   $self->{autodeferring} = 0;
 908 }
 909
 910 ################################################################
 911
 912
 913 # This is NOT a method.  It is here for two reasons:
 914 #  1. To factor a fairly complicated block out of the constructor
 915 #  2. To provide access for the test suite, which need to be sure
 916 #     files are being written properly.
 917 sub _default_recsep {
 918   my $recsep = $/;
 919   if ($^O eq 'MSWin32') {       # Dos too?
 920     # Windows users expect files to be terminated with \r\n
 921     # But $/ is set to \n instead
 922     # Note that this also transforms \n\n into \r\n\r\n.
 923     # That is a feature.
 924     $recsep =~ s/\n/\r\n/g;
 925   }
 926   $recsep;
 927 }
 928
 929 # Utility function for _check_integrity
 930 sub _ci_warn {
 931   my $msg = shift;
 932   $msg =~ s/\n/\\n/g;
 933   $msg =~ s/\r/\\r/g;
 934   print "# $msg\n";
 935 }
 936
 937 # Given a file, make sure the cache is consistent with the
 938 # file contents and the internal data structures are consistent with
 939 # each other.  Returns true if everything checks out, false if not
 940 #
 941 # The $file argument is no longer used.  It is retained for compatibility
 942 # with the existing test suite.
 943 sub _check_integrity {
 944   my ($self, $file, $warn) = @_;
 945   my $rsl = $self->{recseplen};
 946   my $rs  = $self->{recsep};
 947   my $good = 1;
 948   local *_;                     # local $_ does not work here
 949   local $DIAGNOSTIC = 1;
 950
 951   if (not defined $rs) {
 952     _ci_warn("recsep is undef!");
 953     $good = 0;
 954   } elsif ($rs eq "") {
 955     _ci_warn("recsep is empty!");
 956     $good = 0;
 957   } elsif ($rsl != length $rs) {
 958     my $ln = length $rs;
 959     _ci_warn("recsep <$rs> has length $ln, should be $rsl");
 960     $good = 0;
 961   }
 962
 963   if (not defined $self->{offsets}[0]) {
 964     _ci_warn("offset 0 is missing!");
 965     $good = 0;
 966   } elsif ($self->{offsets}[0] != 0) {
 967     _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
 968     $good = 0;
 969   }
 970
 971   my $cached = 0;
 972   {
 973     local *F = $self->{fh};
 974     seek F, 0, SEEK_SET;
 975     local $. = 0;
 976     local $/ = $rs;
 977
 978     while (<F>) {
 979       my $n = $. - 1;
 980       my $cached = $self->{cache}->_produce($n);
 981       my $offset = $self->{offsets}[$.];
 982       my $ao = tell F;
 983       if (defined $offset && $offset != $ao) {
 984         _ci_warn("rec $n: offset <$offset> actual <$ao>");
 985         $good = 0;
 986       }
 987       if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
 988         $good = 0;
 989         _ci_warn("rec $n: cached <$cached> actual <$_>");
 990       }
 991       if (defined $cached && substr($cached, -$rsl) ne $rs) {
 992         _ci_warn("rec $n in the cache is missing the record separator");
 993       }
 994     }
 995
 996     my $deferring = $self->_is_deferring;
 997     for my $n ($self->{cache}->keys) {
 998       my $r = $self->{cache}->_produce($n);
 999       $cached += length($r);
1000       next if $n+1 <= $.;         # checked this already
1001       _ci_warn("spurious caching of record $n");
1002       $good = 0;
1003     }
1004     my $b = $self->{cache}->bytes;
1005     if ($cached != $b) {
1006       _ci_warn("cache size is $b, should be $cached");
1007       $good = 0;
1008     }
1009   }
1010
1011   $good = 0 unless $self->{cache}->_check_integrity;
1012
1013   # Now let's check the deferbuffer
1014   # Unless deferred writing is enabled, it should be empty
1015   if (! $self->_is_deferring && %{$self->{deferred}}) {
1016     _ci_warn("deferred writing disabled, but deferbuffer nonempty");
1017     $good = 0;
1018   }
1019
1020   # Any record in the deferbuffer should *not* be present in the readcache
1021   my $deferred_s = 0;
1022   while (my ($n, $r) = each %{$self->{deferred}}) {
1023     $deferred_s += length($r);
1024     if (defined $self->{cache}->_produce($n)) {
1025       _ci_warn("record $n is in the deferbuffer *and* the readcache");
1026       $good = 0;
1027     }
1028     if (substr($r, -$rsl) ne $rs) {
1029       _ci_warn("rec $n in the deferbuffer is missing the record separator");
1030       $good = 0;
1031     }
1032   }
1033
1034   # Total size of deferbuffer should match internal total
1035   if ($deferred_s != $self->{deferred_s}) {
1036     _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
1037     $good = 0;
1038   }
1039
1040   # Total size of deferbuffer should not exceed the specified limit
1041   if ($deferred_s > $self->{dw_size}) {
1042     _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $self->{dw_size}");
1043     $good = 0;
1044   }
1045
1046   # Total size of cached data should not exceed the specified limit
1047   if ($deferred_s + $cached > $self->{memory}) {
1048     my $total = $deferred_s + $cached;
1049     _ci_warn("total stored data size is $total which exceeds the limit of $self->{memory}");
1050     $good = 0;
1051   }
1052
1053   # Stuff related to autodeferment
1054   if (!$self->{autodefer} && @{$self->{ad_history}}) {
1055     _ci_warn("autodefer is disabled, but ad_history is nonempty");
1056     $good = 0;
1057   }
1058   if ($self->{autodeferring} && $self->{defer}) {
1059     _ci_warn("both autodeferring and explicit deferring are active");
1060     $good = 0;
1061   }
1062   if (@{$self->{ad_history}} == 0) {
1063     # That's OK, no additional tests required
1064   } elsif (@{$self->{ad_history}} == 2) {
1065     my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
1066     if (@non_number) {
1067       my $msg;
1068       { local $" = ')(';
1069         $msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
1070       }
1071       _ci_warn($msg);
1072       $good = 0;
1073     } elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
1074       _ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
1075       $good = 0;
1076     }
1077   } else {
1078     _ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
1079     $good = 0;
1080   }
1081
1082   $good;
1083 }
1084
1085 ################################################################
1086 #
1087 # Tie::File::Cache
1088 #
1089 # Read cache
1090
1091 package Tie::File::Cache;
1092 $Tie::File::Cache::VERSION = $Tie::File::VERSION;
1093 use Carp ':DEFAULT', 'confess';
1094
1095 sub HEAP () { 0 }
1096 sub HASH () { 1 }
1097 sub MAX  () { 2 }
1098 sub BYTES() { 3 }
1099 use strict 'vars';
1100
1101 sub new {
1102   my ($pack, $max) = @_;
1103   local *_;
1104   croak "missing argument to ->new" unless defined $max;
1105   my $self = [];
1106   bless $self => $pack;
1107   @$self = (Tie::File::Heap->new($self), {}, $max, 0);
1108   $self;
1109 }
1110
1111 sub adj_limit {
1112   my ($self, $n) = @_;
1113   $self->[MAX] += $n;
1114 }
1115
1116 sub set_limit {
1117   my ($self, $n) = @_;
1118   $self->[MAX] = $n;
1119 }
1120
1121 # For internal use only
1122 # Will be called by the heap structure to notify us that a certain
1123 # piece of data has moved from one heap element to another.
1124 # $k is the hash key of the item
1125 # $n is the new index into the heap at which it is stored
1126 # If $n is undefined, the item has been removed from the heap.
1127 sub _heap_move {
1128   my ($self, $k, $n) = @_;
1129   if (defined $n) {
1130     $self->[HASH]{$k} = $n;
1131   } else {
1132     delete $self->[HASH]{$k};
1133   }
1134 }
1135
1136 sub insert {
1137   my ($self, $key, $val) = @_;
1138   local *_;
1139   croak "missing argument to ->insert" unless defined $key;
1140   unless (defined $self->[MAX]) {
1141     confess "undefined max" ;
1142   }
1143   confess "undefined val" unless defined $val;
1144   return if length($val) > $self->[MAX];
1145   my $oldnode = $self->[HASH]{$key};
1146   if (defined $oldnode) {
1147     my $oldval = $self->[HEAP]->set_val($oldnode, $val);
1148     $self->[BYTES] -= length($oldval);
1149   } else {
1150     $self->[HEAP]->insert($key, $val);
1151   }
1152   $self->[BYTES] += length($val);
1153   $self->flush;
1154 }
1155
1156 sub expire {
1157   my $self = shift;
1158   my $old_data = $self->[HEAP]->popheap;
1159   return unless defined $old_data;
1160   $self->[BYTES] -= length $old_data;
1161   $old_data;
1162 }
1163
1164 sub remove {
1165   my ($self, @keys) = @_;
1166   my @result;
1167   for my $key (@keys) {
1168     next unless exists $self->[HASH]{$key};
1169     my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
1170     $self->[BYTES] -= length $old_data;
1171     push @result, $old_data;
1172   }
1173   @result;
1174 }
1175
1176 sub lookup {
1177   my ($self, $key) = @_;
1178   local *_;
1179   croak "missing argument to ->lookup" unless defined $key;
1180   if (exists $self->[HASH]{$key}) {
1181     $self->[HEAP]->lookup($self->[HASH]{$key});
1182   } else {
1183     return;
1184   }
1185 }
1186
1187 # For internal use only
1188 sub _produce {
1189   my ($self, $key) = @_;
1190   my $loc = $self->[HASH]{$key};
1191   return unless defined $loc;
1192   $self->[HEAP][$loc][2];
1193 }
1194
1195 # For internal use only
1196 sub _promote {
1197   my ($self, $key) = @_;
1198   $self->[HEAP]->promote($self->[HASH]{$key});
1199 }
1200
1201 sub empty {
1202   my ($self) = @_;
1203   %{$self->[HASH]} = ();
1204     $self->[BYTES] = 0;
1205     $self->[HEAP]->empty;
1206 }
1207
1208 sub is_empty {
1209   my ($self) = @_;
1210   keys %{$self->[HASH]} == 0;
1211 }
1212
1213 sub update {
1214   my ($self, $key, $val) = @_;
1215   local *_;
1216   croak "missing argument to ->update" unless defined $key;
1217   if (length($val) > $self->[MAX]) {
1218     my $oldval = $self->remove($key);
1219     $self->[BYTES] -= length($oldval) if defined $oldval;
1220   } elsif (exists $self->[HASH]{$key}) {
1221     my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
1222     $self->[BYTES] += length($val);
1223     $self->[BYTES] -= length($oldval) if defined $oldval;
1224   } else {
1225     $self->[HEAP]->insert($key, $val);
1226     $self->[BYTES] += length($val);
1227   }
1228   $self->flush;
1229 }
1230
1231 sub rekey {
1232   my ($self, $okeys, $nkeys) = @_;
1233   local *_;
1234   my %map;
1235   @map{@$okeys} = @$nkeys;
1236   croak "missing argument to ->rekey" unless defined $nkeys;
1237   croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
1238   my %adjusted;                 # map new keys to heap indices
1239   # You should be able to cut this to one loop TODO XXX
1240   for (0 .. $#$okeys) {
1241     $adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
1242   }
1243   while (my ($nk, $ix) = each %adjusted) {
1244     # @{$self->[HASH]}{keys %adjusted} = values %adjusted;
1245     $self->[HEAP]->rekey($ix, $nk);
1246     $self->[HASH]{$nk} = $ix;
1247   }
1248 }
1249
1250 sub keys {
1251   my $self = shift;
1252   my @a = keys %{$self->[HASH]};
1253   @a;
1254 }
1255
1256 sub bytes {
1257   my $self = shift;
1258   $self->[BYTES];
1259 }
1260
1261 sub reduce_size_to {
1262   my ($self, $max) = @_;
1263   until ($self->is_empty || $self->[BYTES] <= $max) {
1264     $self->expire;
1265   }
1266 }
1267
1268 sub flush {
1269   my $self = shift;
1270   until ($self->is_empty || $self->[BYTES] <= $self->[MAX]) {
1271     $self->expire;
1272   }
1273 }
1274
1275 # For internal use only
1276 sub _produce_lru {
1277   my $self = shift;
1278   $self->[HEAP]->expire_order;
1279 }
1280
1281 sub _check_integrity {
1282   my $self = shift;
1283   $self->[HEAP]->_check_integrity;
1284 }
1285
1286 sub delink {
1287   my $self = shift;
1288   $self->[HEAP] = undef;        # Bye bye heap
1289 }
1290
1291 ################################################################
1292 #
1293 # Tie::File::Heap
1294 #
1295 # Heap data structure for use by cache LRU routines
1296
1297 package Tie::File::Heap;
1298 use Carp ':DEFAULT', 'confess';
1299 $Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION;
1300 sub SEQ () { 0 };
1301 sub KEY () { 1 };
1302 sub DAT () { 2 };
1303
1304 sub new {
1305   my ($pack, $cache) = @_;
1306   die "$pack: Parent cache object $cache does not support _heap_move method"
1307     unless eval { $cache->can('_heap_move') };
1308   my $self = [[0,$cache,0]];
1309   bless $self => $pack;
1310 }
1311
1312 # Allocate a new sequence number, larger than all previously allocated numbers
1313 sub _nseq {
1314   my $self = shift;
1315   $self->[0][0]++;
1316 }
1317
1318 sub _cache {
1319   my $self = shift;
1320   $self->[0][1];
1321 }
1322
1323 sub _nelts {
1324   my $self = shift;
1325   $self->[0][2];
1326 }
1327
1328 sub _nelts_inc {
1329   my $self = shift;
1330   ++$self->[0][2];
1331 }
1332
1333 sub _nelts_dec {
1334   my $self = shift;
1335   --$self->[0][2];
1336 }
1337
1338 sub is_empty {
1339   my $self = shift;
1340   $self->_nelts == 0;
1341 }
1342
1343 sub empty {
1344   my $self = shift;
1345   $#$self = 0;
1346   $self->[0][2] = 0;
1347   $self->[0][0] = 0;            # might as well reset the sequence numbers
1348 }
1349
1350 # notify the parent cache objec tthat we moved something
1351 sub _heap_move {
1352   my $self = shift;
1353   $self->_cache->_heap_move(@_);
1354 }
1355
1356 # Insert a piece of data into the heap with the indicated sequence number.
1357 # The item with the smallest sequence number is always at the top.
1358 # If no sequence number is specified, allocate a new one and insert the
1359 # item at the bottom.
1360 sub insert {
1361   my ($self, $key, $data, $seq) = @_;
1362   $seq = $self->_nseq unless defined $seq;
1363   $self->_insert_new([$seq, $key, $data]);
1364 }
1365
1366 # Insert a new, fresh item at the bottom of the heap
1367 sub _insert_new {
1368   my ($self, $item) = @_;
1369   my $i = @$self;
1370   $i = int($i/2) until defined $self->[$i/2];
1371   $self->[$i] = $item;
1372   $self->_heap_move($self->[$i][KEY], $i);
1373   $self->_nelts_inc;
1374 }
1375
1376 # Insert [$data, $seq] pair at or below item $i in the heap.
1377 # If $i is omitted, default to 1 (the top element.)
1378 sub _insert {
1379   my ($self, $item, $i) = @_;
1380   $self->_check_loc($i) if defined $i;
1381   $i = 1 unless defined $i;
1382   until (! defined $self->[$i]) {
1383     if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
1384       ($self->[$i], $item) = ($item, $self->[$i]);
1385       $self->_heap_move($self->[$i][KEY], $i);
1386     }
1387     # If either is undefined, go that way.  Otherwise, choose at random
1388     my $dir;
1389     $dir = 0 if !defined $self->[2*$i];
1390     $dir = 1 if !defined $self->[2*$i+1];
1391     $dir = int(rand(2)) unless defined $dir;
1392     $i = 2*$i + $dir;
1393   }
1394   $self->[$i] = $item;
1395   $self->_heap_move($self->[$i][KEY], $i);
1396   $self->_nelts_inc;
1397 }
1398
1399 # Remove the item at node $i from the heap, moving child items upwards.
1400 # The item with the smallest sequence number is always at the top.
1401 # Moving items upwards maintains this condition.
1402 # Return the removed item.
1403 sub remove {
1404   my ($self, $i) = @_;
1405   $i = 1 unless defined $i;
1406   my $top = $self->[$i];
1407   return unless defined $top;
1408   while (1) {
1409     my $ii;
1410     my ($L, $R) = (2*$i, 2*$i+1);
1411
1412     # If either is undefined, go the other way.
1413     # Otherwise, go towards the smallest.
1414     last unless defined $self->[$L] || defined $self->[$R];
1415     $ii = $R if not defined $self->[$L];
1416     $ii = $L if not defined $self->[$R];
1417     unless (defined $ii) {
1418       $ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1419     }
1420
1421     $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
1422     $self->_heap_move($self->[$i][KEY], $i);
1423     $i = $ii; # Fill new vacated spot
1424   }
1425   $self->_heap_move($top->[KEY], undef);
1426   undef $self->[$i];
1427   $self->_nelts_dec;
1428   return $top->[DAT];
1429 }
1430
1431 sub popheap {
1432   my $self = shift;
1433   $self->remove(1);
1434 }
1435
1436 # set the sequence number of the indicated item to a higher number
1437 # than any other item in the heap, and bubble the item down to the
1438 # bottom.
1439 sub promote {
1440   my ($self, $n) = @_;
1441   $self->_check_loc($n);
1442   $self->[$n][SEQ] = $self->_nseq;
1443   my $i = $n;
1444   while (1) {
1445     my ($L, $R) = (2*$i, 2*$i+1);
1446     my $dir;
1447     last unless defined $self->[$L] || defined $self->[$R];
1448     $dir = $R unless defined $self->[$L];
1449     $dir = $L unless defined $self->[$R];
1450     unless (defined $dir) {
1451       $dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1452     }
1453     @{$self}[$i, $dir] = @{$self}[$dir, $i];
1454     for ($i, $dir) {
1455       $self->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
1456     }
1457     $i = $dir;
1458   }
1459 }
1460
1461 # Return item $n from the heap, promoting its LRU status
1462 sub lookup {
1463   my ($self, $n) = @_;
1464   $self->_check_loc($n);
1465   my $val = $self->[$n];
1466   $self->promote($n);
1467   $val->[DAT];
1468 }
1469
1470
1471 # Assign a new value for node $n, promoting it to the bottom of the heap
1472 sub set_val {
1473   my ($self, $n, $val) = @_;
1474   $self->_check_loc($n);
1475   my $oval = $self->[$n][DAT];
1476   $self->[$n][DAT] = $val;
1477   $self->promote($n);
1478   return $oval;
1479 }
1480
1481 # The hask key has changed for an item;
1482 # alter the heap's record of the hash key
1483 sub rekey {
1484   my ($self, $n, $new_key) = @_;
1485   $self->_check_loc($n);
1486   $self->[$n][KEY] = $new_key;
1487 }
1488
1489 sub _check_loc {
1490   my ($self, $n) = @_;
1491   unless (defined $self->[$n]) {
1492     confess "_check_loc($n) failed";
1493   }
1494 }
1495
1496 sub _check_integrity {
1497   my $self = shift;
1498   my $good = 1;
1499   unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
1500     print "# Element 0 of heap corrupt\n";
1501     $good = 0;
1502   }
1503   $good = 0 unless $self->_satisfies_heap_condition(1);
1504   for my $i (2 .. $#{$self}) {
1505     my $p = int($i/2);          # index of parent node
1506     if (defined $self->[$i] && ! defined $self->[$p]) {
1507       print "# Element $i of heap defined, but parent $p isn't\n";
1508       $good = 0;
1509     }
1510   }
1511   return $good;
1512 }
1513
1514 sub _satisfies_heap_condition {
1515   my $self = shift;
1516   my $n = shift || 1;
1517   my $good = 1;
1518   for (0, 1) {
1519     my $c = $n*2 + $_;
1520     next unless defined $self->[$c];
1521     if ($self->[$n][SEQ] >= $self->[$c]) {
1522       print "# Node $n of heap does not predate node $c\n";
1523       $good = 0 ;
1524     }
1525     $good = 0 unless $self->_satisfies_heap_condition($c);
1526   }
1527   return $good;
1528 }
1529
1530 # Return a list of all the values, sorted by expiration order
1531 sub expire_order {
1532   my $self = shift;
1533   my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
1534   map { $_->[KEY] } @nodes;
1535 }
1536
1537 sub _nodes {
1538   my $self = shift;
1539   my $i = shift || 1;
1540   return unless defined $self->[$i];
1541   ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1));
1542 }
1543
1544 1;
1545
1546
1547
1548 "Cogito, ergo sum.";  # don't forget to return a true value from the file
1549
1550 =head1 NAME
1551
1552 Tie::File - Access the lines of a disk file via a Perl array
1553
1554 =head1 SYNOPSIS
1555
1556         # This file documents Tie::File version 0.90
1557
1558         tie @array, 'Tie::File', filename or die ...;
1559
1560         $array[13] = 'blah';     # line 13 of the file is now 'blah'
1561         print $array[42];        # display line 42 of the file
1562
1563         $n_recs = @array;        # how many records are in the file?
1564         $#array -= 2;            # chop two records off the end
1565
1566
1567         for (@array) {
1568           s/PERL/Perl/g;         # Replace PERL with Perl everywhere in the file
1569         }
1570
1571         # These are just like regular push, pop, unshift, shift, and splice
1572         # Except that they modify the file in the way you would expect
1573
1574         push @array, new recs...;
1575         my $r1 = pop @array;
1576         unshift @array, new recs...;
1577         my $r1 = shift @array;
1578         @old_recs = splice @array, 3, 7, new recs...;
1579
1580         untie @array;            # all finished
1581
1582
1583 =head1 DESCRIPTION
1584
1585 C<Tie::File> represents a regular text file as a Perl array.  Each
1586 element in the array corresponds to a record in the file.  The first
1587 line of the file is element 0 of the array; the second line is element
1588 1, and so on.
1589
1590 The file is I<not> loaded into memory, so this will work even for
1591 gigantic files.
1592
1593 Changes to the array are reflected in the file immediately.
1594
1595 Lazy people and beginners may now stop reading the manual.
1596
1597 =head2 C<recsep>
1598
1599 What is a 'record'?  By default, the meaning is the same as for the
1600 C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
1601 probably C<"\n">.  (Minor exception: on dos and Win32 systems, a
1602 'record' is a string terminated by C<"\r\n">.)  You may change the
1603 definition of "record" by supplying the C<recsep> option in the C<tie>
1604 call:
1605
1606         tie @array, 'Tie::File', $file, recsep => 'es';
1607
1608 This says that records are delimited by the string C<es>.  If the file
1609 contained the following data:
1610
1611         Curse these pesky flies!\n
1612
1613 then the C<@array> would appear to have four elements:
1614
1615         "Curse th"
1616         "e p"
1617         "ky fli"
1618         "!\n"
1619
1620 An undefined value is not permitted as a record separator.  Perl's
1621 special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
1622 emulated.
1623
1624 Records read from the tied array do not have the record separator
1625 string on the end; this is to allow
1626
1627         $array[17] .= "extra";
1628
1629 to work as expected.
1630
1631 (See L<"autochomp">, below.)  Records stored into the array will have
1632 the record separator string appended before they are written to the
1633 file, if they don't have one already.  For example, if the record
1634 separator string is C<"\n">, then the following two lines do exactly
1635 the same thing:
1636
1637         $array[17] = "Cherry pie";
1638         $array[17] = "Cherry pie\n";
1639
1640 The result is that the contents of line 17 of the file will be
1641 replaced with "Cherry pie"; a newline character will separate line 17
1642 from line 18.  This means that in particular, this will do nothing:
1643
1644         chomp $array[17];
1645
1646 Because the C<chomp>ed value will have the separator reattached when
1647 it is written back to the file.  There is no way to create a file
1648 whose trailing record separator string is missing.
1649
1650 Inserting records that I<contain> the record separator string will
1651 produce a reasonable result, but if you can't foresee what this result
1652 will be, you'd better avoid doing this.
1653
1654 =head2 C<autochomp>
1655
1656 Normally, array elements have the record separator removed, so that if
1657 the file contains the text
1658
1659         Gold
1660         Frankincense
1661         Myrrh
1662
1663 the tied array will appear to contain C<("Gold", "Frankincense",
1664 "Myrrh")>.  If you set C<autochomp> to a false value, the record
1665 separator will not be removed.  If the file above was tied with
1666
1667         tie @gifts, "Tie::File", $gifts, autochomp => 0;
1668
1669 then the array C<@gifts> would appear to contain C<("Gold\n",
1670 "Frankincense\n", "Myrrh\n")>, or (on Win32 systems) C<("Gold\r\n",
1671 "Frankincense\r\n", "Myrrh\r\n")>.
1672
1673 =head2 C<mode>
1674
1675 Normally, the specified file will be opened for read and write access,
1676 and will be created if it does not exist.  (That is, the flags
1677 C<O_RDWR | O_CREAT> are supplied in the C<open> call.)  If you want to
1678 change this, you may supply alternative flags in the C<mode> option.
1679 See L<Fcntl> for a listing of available flags.
1680 For example:
1681
1682         # open the file if it exists, but fail if it does not exist
1683         use Fcntl 'O_RDWR';
1684         tie @array, 'Tie::File', $file, mode => O_RDWR;
1685
1686         # create the file if it does not exist
1687         use Fcntl 'O_RDWR', 'O_CREAT';
1688         tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
1689
1690         # open an existing file in read-only mode
1691         use Fcntl 'O_RDONLY';
1692         tie @array, 'Tie::File', $file, mode => O_RDONLY;
1693
1694 Opening the data file in write-only or append mode is not supported.
1695
1696 =head2 C<memory>
1697
1698 This is an upper limit on the amount of memory that C<Tie::File> will
1699 consume at any time while managing the file.  This is used for two
1700 things: managing the I<read cache> and managing the I<deferred write
1701 buffer>.
1702
1703 Records read in from the file are cached, to avoid having to re-read
1704 them repeatedly.  If you read the same record twice, the first time it
1705 will be stored in memory, and the second time it will be fetched from
1706 the I<read cache>.  The amount of data in the read cache will not
1707 exceed the value you specified for C<memory>.  If C<Tie::File> wants
1708 to cache a new record, but the read cache is full, it will make room
1709 by expiring the least-recently visited records from the read cache.
1710
1711 The default memory limit is 2Mib.  You can adjust the maximum read
1712 cache size by supplying the C<memory> option.  The argument is the
1713 desired cache size, in bytes.
1714
1715         # I have a lot of memory, so use a large cache to speed up access
1716         tie @array, 'Tie::File', $file, memory => 20_000_000;
1717
1718 Setting the memory limit to 0 will inhibit caching; records will be
1719 fetched from disk every time you examine them.
1720
1721 =head2 C<dw_size>
1722
1723 (This is an advanced feature.  Skip this section on first reading.)
1724
1725 If you use deferred writing (See L<"Deferred Writing">, below) then
1726 data you write into the array will not be written directly to the
1727 file; instead, it will be saved in the I<deferred write buffer> to be
1728 written out later.  Data in the deferred write buffer is also charged
1729 against the memory limit you set with the C<memory> option.
1730
1731 You may set the C<dw_size> option to limit the amount of data that can
1732 be saved in the deferred write buffer.  This limit may not exceed the
1733 total memory limit.  For example, if you set C<dw_size> to 1000 and
1734 C<memory> to 2500, that means that no more than 1000 bytes of deferred
1735 writes will be saved up.  The space available for the read cache will
1736 vary, but it will always be at least 1500 bytes (if the deferred write
1737 buffer is full) and it could grow as large as 2500 bytes (if the
1738 deferred write buffer is empty.)
1739
1740 If you don't specify a C<dw_size>, it defaults to the entire memory
1741 limit.
1742
1743 =head2 Option Format
1744
1745 C<-mode> is a synonym for C<mode>.  C<-recsep> is a synonym for
1746 C<recsep>.  C<-memory> is a synonym for C<memory>.  You get the
1747 idea.
1748
1749 =head1 Public Methods
1750
1751 The C<tie> call returns an object, say C<$o>.  You may call
1752
1753         $rec = $o->FETCH($n);
1754         $o->STORE($n, $rec);
1755
1756 to fetch or store the record at line C<$n>, respectively; similarly
1757 the other tied array methods.  (See L<perltie> for details.)  You may
1758 also call the following methods on this object:
1759
1760 =head2 C<flock>
1761
1762         $o->flock(MODE)
1763
1764 will lock the tied file.  C<MODE> has the same meaning as the second
1765 argument to the Perl built-in C<flock> function; for example
1766 C<LOCK_SH> or C<LOCK_EX | LOCK_NB>.  (These constants are provided by
1767 the C<use Fcntl ':flock'> declaration.)
1768
1769 C<MODE> is optional; the default is C<LOCK_EX>.
1770
1771 C<Tie::File> promises that the following sequence of operations will
1772 be safe:
1773
1774         my $o = tie @array, "Tie::File", $filename;
1775         $o->flock;
1776
1777 In particular, C<Tie::File> will I<not> read or write the file during
1778 the C<tie> call.  (Exception: Using C<mode =E<gt> O_TRUNC> will, of
1779 course, erase the file during the C<tie> call.  If you want to do this
1780 safely, then open the file without C<O_TRUNC>, lock the file, and use
1781 C<@array = ()>.)
1782
1783 The best way to unlock a file is to discard the object and untie the
1784 array.  It is probably unsafe to unlock the file without also untying
1785 it, because if you do, changes may remain unwritten inside the object.
1786 That is why there is no shortcut for unlocking.  If you really want to
1787 unlock the file prematurely, you know what to do; if you don't know
1788 what to do, then don't do it.
1789
1790 All the usual warnings about file locking apply here.  In particular,
1791 note that file locking in Perl is B<advisory>, which means that
1792 holding a lock will not prevent anyone else from reading, writing, or
1793 erasing the file; it only prevents them from getting another lock at
1794 the same time.  Locks are analogous to green traffic lights: If you
1795 have a green light, that does not prevent the idiot coming the other
1796 way from plowing into you sideways; it merely guarantees to you that
1797 the idiot does not also have a green light at the same time.
1798
1799 =head2 C<autochomp>
1800
1801         my $old_value = $o->autochomp(0);    # disable autochomp option
1802         my $old_value = $o->autochomp(1);    #  enable autochomp option
1803
1804         my $ac = $o->autochomp();   # recover current value
1805
1806 See L<"autochomp">, above.
1807
1808 =head2 C<defer>, C<flush>, C<discard>, and C<autodefer>
1809
1810 See L<"Deferred Writing">, below.
1811
1812 =head1 Tying to an already-opened filehandle
1813
1814 If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
1815 of the other C<IO> modules, you may use:
1816
1817         tie @array, 'Tie::File', $fh, ...;
1818
1819 Similarly if you opened that handle C<FH> with regular C<open> or
1820 C<sysopen>, you may use:
1821
1822         tie @array, 'Tie::File', \*FH, ...;
1823
1824 Handles that were opened write-only won't work.  Handles that were
1825 opened read-only will work as long as you don't try to modify the
1826 array.  Handles must be attached to seekable sources of data---that
1827 means no pipes or sockets.  If you supply a non-seekable handle, the
1828 C<tie> call will try to throw an exception.  (On Unix systems, it
1829 B<will> throw an exception.)
1830
1831 =head1 Deferred Writing
1832
1833 (This is an advanced feature.  Skip this section on first reading.)
1834
1835 Normally, modifying a C<Tie::File> array writes to the underlying file
1836 immediately.  Every assignment like C<$a[3] = ...> rewrites as much of
1837 the file as is necessary; typically, everything from line 3 through
1838 the end will need to be rewritten.  This is the simplest and most
1839 transparent behavior.  Performance even for large files is reasonably
1840 good.
1841
1842 However, under some circumstances, this behavior may be excessively
1843 slow.  For example, suppose you have a million-record file, and you
1844 want to do:
1845
1846         for (@FILE) {
1847           $_ = "> $_";
1848         }
1849
1850 The first time through the loop, you will rewrite the entire file,
1851 from line 0 through the end.  The second time through the loop, you
1852 will rewrite the entire file from line 1 through the end.  The third
1853 time through the loop, you will rewrite the entire file from line 2 to
1854 the end.  And so on.
1855
1856 If the performance in such cases is unacceptable, you may defer the
1857 actual writing, and then have it done all at once.  The following loop
1858 will perform much better for large files:
1859
1860         (tied @a)->defer;
1861         for (@a) {
1862           $_ = "> $_";
1863         }
1864         (tied @a)->flush;
1865
1866 If C<Tie::File>'s memory limit is large enough, all the writing will
1867 done in memory.  Then, when you call C<-E<gt>flush>, the entire file
1868 will be rewritten in a single pass.
1869
1870 (Actually, the preceding discussion is something of a fib.  You don't
1871 need to enable deferred writing to get good performance for this
1872 common case, because C<Tie::File> will do it for you automatically
1873 unless you specifically tell it not to.  See L<"autodeferring">,
1874 below.)
1875
1876 Calling C<-E<gt>flush> returns the array to immediate-write mode.  If
1877 you wish to discard the deferred writes, you may call C<-E<gt>discard>
1878 instead of C<-E<gt>flush>.  Note that in some cases, some of the data
1879 will have been written already, and it will be too late for
1880 C<-E<gt>discard> to discard all the changes.  Support for
1881 C<-E<gt>discard> may be withdrawn in a future version of C<Tie::File>.
1882
1883 Deferred writes are cached in memory up to the limit specified by the
1884 C<dw_size> option (see above).  If the deferred-write buffer is full
1885 and you try to write still more deferred data, the buffer will be
1886 flushed.  All buffered data will be written immediately, the buffer
1887 will be emptied, and the now-empty space will be used for future
1888 deferred writes.
1889
1890 If the deferred-write buffer isn't yet full, but the total size of the
1891 buffer and the read cache would exceed the C<memory> limit, the oldest
1892 records will be flushed out of the read cache until total usage is
1893 under the limit.
1894
1895 C<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be
1896 deferred.  When you perform one of these operations, any deferred data
1897 is written to the file and the operation is performed immediately.
1898 This may change in a future version.
1899
1900 If you resize the array with deferred writing enabled, the file will
1901 be resized immediately, but deferred records will not be written.
1902
1903 =head2 Autodeferring
1904
1905 C<Tie::File> tries to guess when deferred writing might be helpful,
1906 and to turn it on and off automatically.  In the example above, only
1907 the first two assignments will be done immediately; after this, all
1908 the changes to the file will be deferred up to the user-specified
1909 memory limit.
1910
1911 You should usually be able to ignore this and just use the module
1912 without thinking about deferring.  However, special applications may
1913 require fine control over which writes are deferred, or may require
1914 that all writes be immediate.  To disable the autodeferment feature,
1915 use
1916
1917         (tied @o)->autodefer(0);
1918
1919 or
1920
1921         tie @array, 'Tie::File', $file, autodefer => 0;
1922
1923
1924 =head1 CAVEATS
1925
1926 (That's Latin for 'warnings'.)
1927
1928 =over 4
1929
1930 =item *
1931
1932 This is BETA RELEASE SOFTWARE.  It may have bugs.  See the discussion
1933 below about the (lack of any) warranty.
1934
1935 In particular, this means that the interface may change in
1936 incompatible ways from one version to the next, without warning.  That
1937 has happened at least once already.  The interface will freeze before
1938 Perl 5.8 is released, probably sometime in April 2002.
1939
1940 =item *
1941
1942 Reasonable effort was made to make this module efficient.  Nevertheless,
1943 changing the size of a record in the middle of a large file will
1944 always be fairly slow, because everything after the new record must be
1945 moved.
1946
1947 =item *
1948
1949 The behavior of tied arrays is not precisely the same as for regular
1950 arrays.  For example:
1951
1952         # This DOES print "How unusual!"
1953         undef $a[10];  print "How unusual!\n" if defined $a[10];
1954
1955 C<undef>-ing a C<Tie::File> array element just blanks out the
1956 corresponding record in the file.  When you read it back again, you'll
1957 get the empty string, so the supposedly-C<undef>'ed value will be
1958 defined.  Similarly, if you have C<autochomp> disabled, then
1959
1960         # This DOES print "How unusual!" if 'autochomp' is disabled
1961         undef $a[10];
1962         print "How unusual!\n" if $a[10];
1963
1964 Because when C<autochomp> is disabled, C<$a[10]> will read back as
1965 C<"\n"> (or whatever the record separator string is.)
1966
1967 There are other minor differences, particularly regarding C<exists>
1968 and C<delete>, but in general, the correspondence is extremely close.
1969
1970 =item *
1971
1972 Not quite every effort was made to make this module as efficient as
1973 possible.  C<FETCHSIZE> should use binary search instead of linear
1974 search.  The cache's LRU queue should be a heap instead of a list.
1975
1976 The performance of the C<flush> method could be improved.  At present,
1977 it still rewrites the tail of the file once for each block of
1978 contiguous lines to be changed.  In the typical case, this will result
1979 in only one rewrite, but in peculiar cases it might be bad.  It should
1980 be possible to perform I<all> deferred writing with a single rewrite.
1981
1982 These defects are probably minor; in any event, they will be fixed in
1983 a future version of the module.
1984
1985 =item *
1986
1987 The author has supposed that since this module is concerned with file
1988 I/O, almost all normal use of it will be heavily I/O bound, and that
1989 the time to maintain complicated data structures inside the module
1990 will be dominated by the time to actually perform the I/O.  This
1991 suggests, for example, that an LRU read-cache is a good tradeoff, even
1992 if it requires substantial bookkeeping following a C<splice>
1993 operation.
1994
1995 =item *
1996
1997 You might be tempted to think that deferred writing is like
1998 transactions, with C<flush> as C<commit> and C<discard> as
1999 C<rollback>, but it isn't, so don't.
2000
2001 =back
2002
2003 =head1 SUBCLASSING
2004
2005 This version promises absolutely nothing about the internals, which
2006 may change without notice.  A future version of the module will have a
2007 well-defined and stable subclassing API.
2008
2009 =head1 WHAT ABOUT C<DB_File>?
2010
2011 C<DB_File>'s C<DB_RECNO> feature does something similar to
2012 C<Tie::File>, but there are a number of reasons that you might prefer
2013 C<Tie::File>.  C<DB_File> is a great piece of software, but the
2014 C<DB_RECNO> part is less great than the rest of it.
2015
2016 =over 4
2017
2018 =item *
2019
2020 C<DB_File> reads your entire file into memory, modifies it in memory,
2021 and the writes out the entire file again when you untie the file.
2022 This is completely impractical for large files.
2023
2024 C<Tie::File> does not do any of those things.  It doesn't try to read
2025 the entire file into memory; instead it uses a lazy approach and
2026 caches recently-used records.  The cache size is strictly bounded by
2027 the C<memory> option.  DB_File's C<-E<gt>{cachesize}> doesn't prevent
2028 your process from blowing up when reading a big file.
2029
2030 =item *
2031
2032 C<DB_File> has an extremely poor writing strategy.  If you have a
2033 ten-megabyte file and tie it with C<DB_File>, and then use
2034
2035         $a[0] =~ s/PERL/Perl/;
2036
2037 C<DB_file> will then read the entire ten-megabyte file into memory, do
2038 the change, and write the entire file back to disk, reading ten
2039 megabytes and writing ten megabytes.  C<Tie::File> will read and write
2040 only the first record.
2041
2042 If you have a million-record file and tie it with C<DB_File>, and then
2043 use
2044
2045         $a[999998] =~ s/Larry/Larry Wall/;
2046
2047 C<DB_File> will read the entire million-record file into memory, do
2048 the change, and write the entire file back to disk.  C<Tie::File> will
2049 only rewrite records 999998 and 999999.  During the writing process,
2050 it will never have more than a few kilobytes of data in memory at any
2051 time, even if the two records are very large.
2052
2053 =item *
2054
2055 Since changes to C<DB_File> files only appear when you do C<untie>, it
2056 can be inconvenient to arrange for concurrent access to the same file
2057 by two or more processes.  Each process needs to call C<$db-E<gt>sync>
2058 after every write.  When you change a C<Tie::File> array, the changes
2059 are reflected in the file immediately; no explicit C<-E<gt>sync> call
2060 is required.  (Or you can enable deferred writing mode to require that
2061 changes be explicitly sync'ed.)
2062
2063 =item *
2064
2065 C<DB_File> is only installed by default if you already have the C<db>
2066 library on your system; C<Tie::File> is pure Perl and is installed by
2067 default no matter what.  Starting with Perl 5.7.3 you can be
2068 absolutely sure it will be everywhere.  You will never have that
2069 surety with C<DB_File>.  If you don't have C<DB_File> yet, it requires
2070 a C compiler.  You can install C<Tie::File> from CPAN in five minutes
2071 with no compiler.
2072
2073 =item *
2074
2075 C<DB_File> is written in C, so if you aren't allowed to install
2076 modules on your system, it is useless.  C<Tie::File> is written in Perl,
2077 so even if you aren't allowed to install modules, you can look into
2078 the source code, see how it works, and copy the subroutines or the
2079 ideas from the subroutines directly into your own Perl program.
2080
2081 =item *
2082
2083 Except in very old, unsupported versions, C<DB_File>'s free license
2084 requires that you distribute the source code for your entire
2085 application.  If you are not able to distribute the source code for
2086 your application, you must negotiate an alternative license from
2087 Sleepycat, possibly for a fee.  Tie::File is under the Perl Artistic
2088 license and can be distributed free under the same terms as Perl
2089 itself.
2090
2091 =back
2092
2093 =head1 AUTHOR
2094
2095 Mark Jason Dominus
2096
2097 To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
2098
2099 To receive an announcement whenever a new version of this module is
2100 released, send a blank email message to
2101 C<mjd-perl-tiefile-subscribe@plover.com>.
2102
2103 The most recent version of this module, including documentation and
2104 any news of importance, will be available at
2105
2106         http://perl.plover.com/TieFile/
2107
2108
2109 =head1 LICENSE
2110
2111 C<Tie::File> version 0.90 is copyright (C) 2002 Mark Jason Dominus.
2112
2113 This library is free software; you may redistribute it and/or modify
2114 it under the same terms as Perl itself.
2115
2116 These terms are your choice of any of (1) the Perl Artistic Licence,
2117 or (2) version 2 of the GNU General Public License as published by the
2118 Free Software Foundation, or (3) any later version of the GNU General
2119 Public License.
2120
2121 This library is distributed in the hope that it will be useful,
2122 but WITHOUT ANY WARRANTY; without even the implied warranty of
2123 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2124 GNU General Public License for more details.
2125
2126 You should have received a copy of the GNU General Public License
2127 along with this library program; it should be in the file C<COPYING>.
2128 If not, write to the Free Software Foundation, Inc., 59 Temple Place,
2129 Suite 330, Boston, MA 02111 USA
2130
2131 For licensing inquiries, contact the author at:
2132
2133         Mark Jason Dominus
2134         255 S. Warnock St.
2135         Philadelphia, PA 19107
2136
2137 =head1 WARRANTY
2138
2139 C<Tie::File> version 0.90 comes with ABSOLUTELY NO WARRANTY.
2140 For details, see the license.
2141
2142 =head1 THANKS
2143
2144 Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
2145 core when I hadn't written it yet, and for generally being helpful,
2146 supportive, and competent.  (Usually the rule is "choose any one.")
2147 Also big thanks to Abhijit Menon-Sen for all of the same things.
2148
2149 Special thanks to Craig Berry and Peter Prymmer (for VMS portability
2150 help), Randy Kobes (for Win32 portability help), Clinton Pierce and
2151 Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond
2152 the call of duty), Michael G Schwern (for testing advice), and the
2153 rest of the CPAN testers (for testing generally).
2154
2155 Additional thanks to:
2156 Edward Avis /
2157 Gerrit Haase /
2158 Nikola Knezevic /
2159 Nick Ing-Simmons /
2160 Tassilo von Parseval /
2161 H. Dieter Pearcey /
2162 Slaven Rezic /
2163 Peter Scott /
2164 Peter Somu /
2165 Autrijus Tang (again) /
2166 Tels
2167
2168 =head1 TODO
2169
2170 More tests.  (_twrite should be tested separately, because there are a
2171 lot of weird special cases lurking in there.)
2172
2173 Improve SPLICE algorithm to use deferred writing machinery.
2174
2175 More tests.  (Stuff I didn't think of yet.)
2176
2177 Paragraph mode?
2178
2179 Fixed-length mode.  Leave-blanks mode.
2180
2181 Maybe an autolocking mode?
2182
2183 Record locking with fcntl()?  Then the module might support an undo
2184 log and get real transactions.  What a tour de force that would be.
2185
2186 Cleverer strategy for flushing deferred writes.
2187
2188 oMore tests.
2189
2190 =cut
2191
2192
2193
2194
2195
2196
2197