lib/Tie/File.pm

   1
   2 package Tie::File;
   3 use Carp;
   4 use POSIX 'SEEK_SET';
   5 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
   6 require 5.005;
   7
   8 $VERSION = "0.17";
   9
  10 # Idea: The object will always contain an array of byte offsets
  11 # this will be filled in as is necessary and convenient.
  12 # fetch will do seek-read.
  13 # There will be a cache parameter that controls the amount of cached *data*
  14 # Also an LRU queue of cached records
  15 # store will read the relevant record into the cache
  16 # If it's the same length as what is being written, it will overwrite it in
  17 #   place; if not, it will do a from-to copying write.
  18 # The record separator string is also a parameter
  19
  20 # Record numbers start at ZERO.
  21
  22 my $DEFAULT_CACHE_SIZE = 1<<21;    # 2 megabytes
  23
  24 sub TIEARRAY {
  25   if (@_ % 2 != 0) {
  26     croak "usage: tie \@array, $_[0], filename, [option => value]...";
  27   }
  28   my ($pack, $file, %opts) = @_;
  29
  30   # transform '-foo' keys into 'foo' keys
  31   for my $key (keys %opts) {
  32     my $okey = $key;
  33     if ($key =~ s/^-+//) {
  34       $opts{$key} = delete $opts{$okey};
  35     }
  36   }
  37
  38   $opts{cachesize} ||= $DEFAULT_CACHE_SIZE;
  39
  40   # the cache is a hash instead of an array because it is likely to be
  41   # sparsely populated
  42   $opts{cache} = {};
  43   $opts{cached} = 0;   # total size of cached data
  44   $opts{lru} = [];     # replace with heap in later version
  45
  46   $opts{offsets} = [0];
  47   $opts{filename} = $file;
  48   $opts{recsep} = $/ unless defined $opts{recsep};
  49   $opts{recseplen} = length($opts{recsep});
  50   if ($opts{recseplen} == 0) {
  51     croak "Empty record separator not supported by $pack";
  52   }
  53
  54   my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
  55   my $fh;
  56
  57   if (UNIVERSAL::isa($file, 'GLOB')) {
  58     unless (seek $file, 0, SEEK_SET) {
  59       croak "$pack: your filehandle does not appear to be seekable";
  60     }
  61     $fh = $file;
  62   } elsif (ref $file) {
  63     croak "usage: tie \@array, $pack, filename, [option => value]...";
  64   } else {
  65     $fh = \do { local *FH };   # only works in 5.005 and later
  66     sysopen $fh, $file, $mode, 0666 or return;
  67     binmode $fh;
  68   }
  69   { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
  70   $opts{fh} = $fh;
  71
  72   bless \%opts => $pack;
  73 }
  74
  75 sub FETCH {
  76   my ($self, $n) = @_;
  77
  78   # check the record cache
  79   { my $cached = $self->_check_cache($n);
  80     return $cached if defined $cached;
  81   }
  82
  83   unless ($#{$self->{offsets}} >= $n) {
  84     my $o = $self->_fill_offsets_to($n);
  85     # If it's still undefined, there is no such record, so return 'undef'
  86     return unless defined $o;
  87   }
  88
  89   my $fh = $self->{FH};
  90   $self->_seek($n);             # we can do this now that offsets is populated
  91   my $rec = $self->_read_record;
  92   $self->_cache_insert($n, $rec) if defined $rec;
  93   $rec;
  94 }
  95
  96 sub STORE {
  97   my ($self, $n, $rec) = @_;
  98
  99   $self->_fixrecs($rec);
 100
 101   # TODO: what should we do about the cache?  Install the new record
 102   # in the cache only if the old version of the same record was
 103   # already there?
 104
 105   # We need this to decide whether the new record will fit
 106   # It incidentally populates the offsets table
 107   # Note we have to do this before we alter the cache
 108   my $oldrec = $self->FETCH($n);
 109
 110   # _check_cache promotes record $n to MRU.  Is this correct behavior?
 111   if (my $cached = $self->_check_cache($n)) {
 112     $self->{cache}{$n} = $rec;
 113     $self->{cached} += length($rec) - length($cached);
 114   }
 115
 116   if (not defined $oldrec) {
 117     # We're storing a record beyond the end of the file
 118     $self->_extend_file_to($n+1);
 119     $oldrec = $self->{recsep};
 120   }
 121   my $len_diff = length($rec) - length($oldrec);
 122
 123   $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
 124
 125   # now update the offsets
 126   # array slice goes from element $n+1 (the first one to move)
 127   # to the end
 128   for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
 129     $_ += $len_diff;
 130   }
 131 }
 132
 133 sub FETCHSIZE {
 134   my $self = shift;
 135   my $n = $#{$self->{offsets}};
 136   while (defined ($self->_fill_offsets_to($n+1))) {
 137     ++$n;
 138   }
 139   $n;
 140 }
 141
 142 sub STORESIZE {
 143   my ($self, $len) = @_;
 144   my $olen = $self->FETCHSIZE;
 145   return if $len == $olen;      # Woo-hoo!
 146
 147   # file gets longer
 148   if ($len > $olen) {
 149     $self->_extend_file_to($len);
 150     return;
 151   }
 152
 153   # file gets shorter
 154   $self->_seek($len);
 155   $self->_chop_file;
 156   $#{$self->{offsets}} = $len;
 157   my @cached = grep $_ >= $len, keys %{$self->{cache}};
 158   $self->_uncache(@cached);
 159 }
 160
 161 sub PUSH {
 162   my $self = shift;
 163   $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
 164   $self->FETCHSIZE;
 165 }
 166
 167 sub POP {
 168   my $self = shift;
 169   my $size = $self->FETCHSIZE;
 170   return if $size == 0;
 171 #  print STDERR "# POPPITY POP POP POP\n";
 172   scalar $self->SPLICE($size-1, 1);
 173 }
 174
 175 sub SHIFT {
 176   my $self = shift;
 177   scalar $self->SPLICE(0, 1);
 178 }
 179
 180 sub UNSHIFT {
 181   my $self = shift;
 182   $self->SPLICE(0, 0, @_);
 183   $self->FETCHSIZE;
 184 }
 185
 186 sub CLEAR {
 187   # And enable auto-defer mode, since it's likely that they just
 188   # did @a = (...);
 189   my $self = shift;
 190   $self->_seekb(0);
 191   $self->_chop_file;
 192   %{$self->{cache}}   = ();
 193     $self->{cached}   = 0;
 194   @{$self->{lru}}     = ();
 195   @{$self->{offsets}} = (0);
 196 }
 197
 198 sub EXTEND {
 199   my ($self, $n) = @_;
 200   $self->_fill_offsets_to($n);
 201   $self->_extend_file_to($n);
 202 }
 203
 204 sub DELETE {
 205   my ($self, $n) = @_;
 206   my $lastrec = $self->FETCHSIZE-1;
 207   if ($n == $lastrec) {
 208     $self->_seek($n);
 209     $self->_chop_file;
 210     $#{$self->{offsets}}--;
 211     $self->_uncache($n);
 212     # perhaps in this case I should also remove trailing null records?
 213   } else {
 214     $self->STORE($n, "");
 215   }
 216 }
 217
 218 sub EXISTS {
 219   my ($self, $n) = @_;
 220   $self->_fill_offsets_to($n);
 221   0 <= $n && $n < $self->FETCHSIZE;
 222 }
 223
 224 sub SPLICE {
 225   my ($self, $pos, $nrecs, @data) = @_;
 226   my @result;
 227
 228   $pos = 0 unless defined $pos;
 229
 230   # Deal with negative and other out-of-range positions
 231   # Also set default for $nrecs
 232   {
 233     my $oldsize = $self->FETCHSIZE;
 234     $nrecs = $oldsize unless defined $nrecs;
 235     my $oldpos = $pos;
 236
 237     if ($pos < 0) {
 238       $pos += $oldsize;
 239       if ($pos < 0) {
 240         croak "Modification of non-creatable array value attempted, subscript $oldpos";
 241       }
 242     }
 243
 244     if ($pos > $oldsize) {
 245       return unless @data;
 246       $pos = $oldsize;          # This is what perl does for normal arrays
 247     }
 248   }
 249
 250   $self->_fixrecs(@data);
 251   my $data = join '', @data;
 252   my $datalen = length $data;
 253   my $oldlen = 0;
 254
 255   # compute length of data being removed
 256   # Incidentally fills offsets table
 257   for ($pos .. $pos+$nrecs-1) {
 258     my $rec = $self->FETCH($_);
 259     last unless defined $rec;
 260     push @result, $rec;
 261     $oldlen += length($rec);
 262   }
 263
 264   # Modify the file
 265   $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
 266
 267   # update the offsets table part 1
 268   # compute the offsets of the new records:
 269   my @new_offsets;
 270   if (@data) {
 271     push @new_offsets, $self->{offsets}[$pos];
 272     for (0 .. $#data-1) {
 273       push @new_offsets, $new_offsets[-1] + length($data[$_]);
 274     }
 275   }
 276   splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
 277
 278   # update the offsets table part 2
 279   # adjust the offsets of the following old records
 280   for ($pos+@data .. $#{$self->{offsets}}) {
 281     $self->{offsets}[$_] += $datalen - $oldlen;
 282   }
 283   # If we scrubbed out all known offsets, regenerate the trivial table
 284   # that knows that the file does indeed start at 0.
 285   $self->{offsets}[0] = 0 unless @{$self->{offsets}};
 286
 287   # Perhaps the following cache foolery could be factored out
 288   # into a bunch of mor opaque cache functions.  For example,
 289   # it's odd to delete a record from the cache and then remove
 290   # it from the LRU queue later on; there should be a function to
 291   # do both at once.
 292
 293   # update the read cache, part 1
 294   # modified records
 295   # Consider this carefully for correctness
 296   for ($pos .. $pos+$nrecs-1) {
 297     my $cached = $self->{cache}{$_};
 298     next unless defined $cached;
 299     my $new = $data[$_-$pos];
 300     if (defined $new) {
 301       $self->{cached} += length($new) - length($cached);
 302       $self->{cache}{$_} = $new;
 303     } else {
 304       $self->_uncache($_);
 305     }
 306   }
 307   # update the read cache, part 2
 308   # moved records - records past the site of the change
 309   # need to be renumbered
 310   # Maybe merge this with the previous block?
 311   for (keys %{$self->{cache}}) {
 312     next unless $_ >= $pos + $nrecs;
 313     $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_};
 314   }
 315
 316   # fix the LRU queue
 317   my(@new, @changed);
 318   for (@{$self->{lru}}) {
 319     if ($_ >= $pos + $nrecs) {
 320       push @new, $_ + @data - $nrecs;
 321     } elsif ($_ >= $pos) {
 322       push @changed, $_ if $_ < $pos + @data;
 323     } else {
 324       push @new, $_;
 325     }
 326   }
 327   @{$self->{lru}} = (@new, @changed);
 328
 329   # Yes, the return value of 'splice' *is* actually this complicated
 330   wantarray ? @result : @result ? $result[-1] : undef;
 331 }
 332
 333 # write data into the file
 334 # $data is the data to be written.
 335 # it should be written at position $pos, and should overwrite
 336 # exactly $len of the following bytes.
 337 # Note that if length($data) > $len, the subsequent bytes will have to
 338 # be moved up, and if length($data) < $len, they will have to
 339 # be moved down
 340 sub _twrite {
 341   my ($self, $data, $pos, $len) = @_;
 342
 343   unless (defined $pos) {
 344     die "\$pos was undefined in _twrite";
 345   }
 346
 347   my $len_diff = length($data) - $len;
 348
 349   if ($len_diff == 0) {          # Woo-hoo!
 350     my $fh = $self->{fh};
 351     $self->_seekb($pos);
 352     $self->_write_record($data);
 353     return;                     # well, that was easy.
 354   }
 355
 356   # the two records are of different lengths
 357   # our strategy here: rewrite the tail of the file,
 358   # reading ahead one buffer at a time
 359   # $bufsize is required to be at least as large as the data we're overwriting
 360   my $bufsize = _bufsize($len_diff);
 361   my ($writepos, $readpos) = ($pos, $pos+$len);
 362   my $next_block;
 363
 364   # Seems like there ought to be a way to avoid the repeated code
 365   # and the special case here.  The read(1) is also a little weird.
 366   # Think about this.
 367   do {
 368     $self->_seekb($readpos);
 369     my $br = read $self->{fh}, $next_block, $bufsize;
 370     my $more_data = read $self->{fh}, my($dummy), 1;
 371     $self->_seekb($writepos);
 372     $self->_write_record($data);
 373     $readpos += $br;
 374     $writepos += length $data;
 375     $data = $next_block;
 376   } while $more_data;
 377   $self->_seekb($writepos);
 378   $self->_write_record($next_block);
 379
 380   # There might be leftover data at the end of the file
 381   $self->_chop_file if $len_diff < 0;
 382 }
 383
 384 # If a record does not already end with the appropriate terminator
 385 # string, append one.
 386 sub _fixrecs {
 387   my $self = shift;
 388   for (@_) {
 389     $_ .= $self->{recsep}
 390       unless substr($_, - $self->{recseplen}) eq $self->{recsep};
 391   }
 392 }
 393
 394 # seek to the beginning of record #$n
 395 # Assumes that the offsets table is already correctly populated
 396 #
 397 # Note that $n=-1 has a special meaning here: It means the start of
 398 # the last known record; this may or may not be the very last record
 399 # in the file, depending on whether the offsets table is fully populated.
 400 #
 401 sub _seek {
 402   my ($self, $n) = @_;
 403   my $o = $self->{offsets}[$n];
 404   defined($o)
 405     or confess("logic error: undefined offset for record $n");
 406   seek $self->{fh}, $o, SEEK_SET
 407     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 408 }
 409
 410 sub _seekb {
 411   my ($self, $b) = @_;
 412   seek $self->{fh}, $b, SEEK_SET
 413     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 414 }
 415
 416 # populate the offsets table up to the beginning of record $n
 417 # return the offset of record $n
 418 sub _fill_offsets_to {
 419   my ($self, $n) = @_;
 420   my $fh = $self->{fh};
 421   local *OFF = $self->{offsets};
 422   my $rec;
 423
 424   until ($#OFF >= $n) {
 425     my $o = $OFF[-1];
 426     $self->_seek(-1);           # tricky -- see comment at _seek
 427     $rec = $self->_read_record;
 428     if (defined $rec) {
 429       push @OFF, tell $fh;
 430     } else {
 431       return;                   # It turns out there is no such record
 432     }
 433   }
 434
 435   # we have now read all the records up to record n-1,
 436   # so we can return the offset of record n
 437   return $OFF[$n];
 438 }
 439
 440 # assumes that $rec is already suitably terminated
 441 sub _write_record {
 442   my ($self, $rec) = @_;
 443   my $fh = $self->{fh};
 444   print $fh $rec
 445     or die "Couldn't write record: $!";  # "Should never happen."
 446
 447 }
 448
 449 sub _read_record {
 450   my $self = shift;
 451   my $rec;
 452   { local $/ = $self->{recsep};
 453     my $fh = $self->{fh};
 454     $rec = <$fh>;
 455   }
 456   $rec;
 457 }
 458
 459 sub _cache_insert {
 460   my ($self, $n, $rec) = @_;
 461
 462   # Do not cache records that are too big to fit in the cache.
 463   return unless length $rec <= $self->{cachesize};
 464
 465   $self->{cache}{$n} = $rec;
 466   $self->{cached} += length $rec;
 467   push @{$self->{lru}}, $n;     # most-recently-used is at the END
 468
 469   $self->_cache_flush if $self->{cached} > $self->{cachesize};
 470 }
 471
 472 sub _uncache {
 473   my $self = shift;
 474   for my $n (@_) {
 475     my $cached = delete $self->{cache}{$n};
 476     next unless defined $cached;
 477     @{$self->{lru}} = grep $_ != $n, @{$self->{lru}};
 478     $self->{cached} -= length($cached);
 479   }
 480 }
 481
 482 sub _check_cache {
 483   my ($self, $n) = @_;
 484   my $rec;
 485   return unless defined($rec = $self->{cache}{$n});
 486
 487   # cache hit; update LRU queue and return $rec
 488   # replace this with a heap in a later version
 489   @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
 490   $rec;
 491 }
 492
 493 sub _cache_flush {
 494   my ($self) = @_;
 495   while ($self->{cached} > $self->{cachesize}) {
 496     my $lru = shift @{$self->{lru}};
 497     $self->{cached} -= length $lru;
 498     delete $self->{cache}{$lru};
 499   }
 500 }
 501
 502 # We have read to the end of the file and have the offsets table
 503 # entirely populated.  Now we need to write a new record beyond
 504 # the end of the file.  We prepare for this by writing
 505 # empty records into the file up to the position we want
 506 #
 507 # assumes that the offsets table already contains the offset of record $n,
 508 # if it exists, and extends to the end of the file if not.
 509 sub _extend_file_to {
 510   my ($self, $n) = @_;
 511   $self->_seek(-1);             # position after the end of the last record
 512   my $pos = $self->{offsets}[-1];
 513
 514   # the offsets table has one entry more than the total number of records
 515   $extras = $n - $#{$self->{offsets}};
 516
 517   # Todo : just use $self->{recsep} x $extras here?
 518   while ($extras-- > 0) {
 519     $self->_write_record($self->{recsep});
 520     push @{$self->{offsets}}, tell $self->{fh};
 521   }
 522 }
 523
 524 # Truncate the file at the current position
 525 sub _chop_file {
 526   my $self = shift;
 527   truncate $self->{fh}, tell($self->{fh});
 528 }
 529
 530 # compute the size of a buffer suitable for moving
 531 # all the data in a file forward $n bytes
 532 # ($n may be negative)
 533 # The result should be at least $n.
 534 sub _bufsize {
 535   my $n = shift;
 536   return 8192 if $n < 0;
 537   my $b = $n & ~8191;
 538   $b += 8192 if $n & 8191;
 539   $b;
 540 }
 541
 542 # Lock the file
 543 sub flock {
 544   my ($self, $op) = @_;
 545   unless (@_ <= 3) {
 546     my $pack = ref $self;
 547     croak "Usage: $pack\->flock([OPERATION])";
 548   }
 549   my $fh = $self->{fh};
 550   $op = LOCK_EX unless defined $op;
 551   flock $fh, $op;
 552 }
 553
 554 # Given a file, make sure the cache is consistent with the
 555 # file contents
 556 sub _check_integrity {
 557   my ($self, $file, $warn) = @_;
 558   my $good = 1;
 559
 560
 561   if (not defined $self->{offsets}[0]) {
 562     $warn && print STDERR "# offset 0 is missing!\n";
 563     $good = 0;
 564   } elsif ($self->{offsets}[0] != 0) {
 565     $warn && print STDERR "# offset 0 is missing!\n";
 566     $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
 567     $good = 0;
 568   }
 569
 570   local *F = $self->{fh};
 571   seek F, 0, SEEK_SET;
 572   local $/ = $self->{recsep};
 573   $. = 0;
 574
 575   while (<F>) {
 576     my $n = $. - 1;
 577     my $cached = $self->{cache}{$n};
 578     my $offset = $self->{offsets}[$.];
 579     my $ao = tell F;
 580     if (defined $offset && $offset != $ao) {
 581       $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
 582       $good = 0;
 583     }
 584     if (defined $cached && $_ ne $cached) {
 585       $good = 0;
 586       chomp $cached;
 587       chomp;
 588       $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n";
 589     }
 590   }
 591
 592   my $cachesize = 0;
 593   while (my ($n, $r) = each %{$self->{cache}}) {
 594     $cachesize += length($r);
 595     next if $n+1 <= $.;         # checked this already
 596     $warn && print STDERR "# spurious caching of record $n\n";
 597     $good = 0;
 598   }
 599   if ($cachesize != $self->{cached}) {
 600     $warn && print STDERR "# cache size is $self->{cached}, should be $cachesize\n";
 601     $good = 0;
 602   }
 603
 604   my (%seen, @duplicate);
 605   for (@{$self->{lru}}) {
 606     $seen{$_}++;
 607     if (not exists $self->{cache}{$_}) {
 608       print "# $_ is mentioned in the LRU queue, but not in the cache\n";
 609       $good = 0;
 610     }
 611   }
 612   @duplicate = grep $seen{$_}>1, keys %seen;
 613   if (@duplicate) {
 614     my $records = @duplicate == 1 ? 'Record' : 'Records';
 615     my $appear  = @duplicate == 1 ? 'appears' : 'appear';
 616     print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n";
 617     $good = 0;
 618   }
 619   for (keys %{$self->{cache}}) {
 620     unless (exists $seen{$_}) {
 621       print "# record $_ is in the cache but not the LRU queue\n";
 622       $good = 0;
 623     }
 624   }
 625
 626   $good;
 627 }
 628
 629 "Cogito, ergo sum.";  # don't forget to return a true value from the file
 630
 631 =head1 NAME
 632
 633 Tie::File - Access the lines of a disk file via a Perl array
 634
 635 =head1 SYNOPSIS
 636
 637         # This file documents Tie::File version 0.17
 638
 639         tie @array, 'Tie::File', filename or die ...;
 640
 641         $array[13] = 'blah';     # line 13 of the file is now 'blah'
 642         print $array[42];        # display line 42 of the file
 643
 644         $n_recs = @array;        # how many records are in the file?
 645         $#array = $n_recs - 2;   # chop records off the end
 646
 647         # As you would expect:
 648
 649         push @array, new recs...;
 650         my $r1 = pop @array;
 651         unshift @array, new recs...;
 652         my $r1 = shift @array;
 653         @old_recs = splice @array, 3, 7, new recs...;
 654
 655         untie @array;            # all finished
 656
 657 =head1 DESCRIPTION
 658
 659 C<Tie::File> represents a regular text file as a Perl array.  Each
 660 element in the array corresponds to a record in the file.  The first
 661 line of the file is element 0 of the array; the second line is element
 662 1, and so on.
 663
 664 The file is I<not> loaded into memory, so this will work even for
 665 gigantic files.
 666
 667 Changes to the array are reflected in the file immediately.
 668
 669 =head2 C<recsep>
 670
 671 What is a 'record'?  By default, the meaning is the same as for the
 672 C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
 673 probably C<"\n"> or C<"\r\n">.  You may change the definition of
 674 "record" by supplying the C<recsep> option in the C<tie> call:
 675
 676         tie @array, 'Tie::File', $file, recsep => 'es';
 677
 678 This says that records are delimited by the string C<es>.  If the file contained the following data:
 679
 680         Curse these pesky flies!\n
 681
 682 then the C<@array> would appear to have four elements:
 683
 684         "Curse thes"
 685         "e pes"
 686         "ky flies"
 687         "!\n"
 688
 689 An undefined value is not permitted as a record separator.  Perl's
 690 special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
 691 emulated.
 692
 693 Records read from the tied array will have the record separator string
 694 on the end, just as if they were read from the C<E<lt>...E<gt>>
 695 operator.  Records stored into the array will have the record
 696 separator string appended before they are written to the file, if they
 697 don't have one already.  For example, if the record separator string
 698 is C<"\n">, then the following two lines do exactly the same thing:
 699
 700         $array[17] = "Cherry pie";
 701         $array[17] = "Cherry pie\n";
 702
 703 The result is that the contents of line 17 of the file will be
 704 replaced with "Cherry pie"; a newline character will separate line 17
 705 from line 18.  This means that in particular, this will do nothing:
 706
 707         chomp $array[17];
 708
 709 Because the C<chomp>ed value will have the separator reattached when
 710 it is written back to the file.  There is no way to create a file
 711 whose trailing record separator string is missing.
 712
 713 Inserting records that I<contain> the record separator string will
 714 produce a reasonable result, but if you can't foresee what this result
 715 will be, you'd better avoid doing this.
 716
 717 =head2 C<mode>
 718
 719 Normally, the specified file will be opened for read and write access,
 720 and will be created if it does not exist.  (That is, the flags
 721 C<O_RDWR | O_CREAT> are supplied in the C<open> call.)  If you want to
 722 change this, you may supply alternative flags in the C<mode> option.
 723 See L<Fcntl> for a listing of available flags.
 724 For example:
 725
 726         # open the file if it exists, but fail if it does not exist
 727         use Fcntl 'O_RDWR';
 728         tie @array, 'Tie::File', $file, mode => O_RDWR;
 729
 730         # create the file if it does not exist
 731         use Fcntl 'O_RDWR', 'O_CREAT';
 732         tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
 733
 734         # open an existing file in read-only mode
 735         use Fcntl 'O_RDONLY';
 736         tie @array, 'Tie::File', $file, mode => O_RDONLY;
 737
 738 Opening the data file in write-only or append mode is not supported.
 739
 740 =head2 C<cachesize>
 741
 742 Records read in from the file are cached, to avoid having to re-read
 743 them repeatedly.  If you read the same record twice, the first time it
 744 will be stored in memory, and the second time it will be fetched from
 745 memory.
 746
 747 The cache has a bounded size; when it exceeds this size, the
 748 least-recently visited records will be purged from the cache.  The
 749 default size is 2Mib.  You can adjust the amount of space used for the
 750 cache by supplying the C<cachesize> option.  The argument is the desired cache size, in bytes.
 751
 752         # I have a lot of memory, so use a large cache to speed up access
 753         tie @array, 'Tie::File', $file, cachesize => 20_000_000;
 754
 755 Setting the cache size to 0 will inhibit caching; records will be
 756 fetched from disk every time you examine them.
 757
 758 =head2 Option Format
 759
 760 C<-mode> is a synonym for C<mode>.  C<-recsep> is a synonym for
 761 C<recsep>.  C<-cachesize> is a synonym for C<cachesize>.  You get the
 762 idea.
 763
 764 =head1 Public Methods
 765
 766 The C<tie> call returns an object, say C<$o>.  You may call
 767
 768         $rec = $o->FETCH($n);
 769         $o->STORE($n, $rec);
 770
 771 to fetch or store the record at line C<$n>, respectively.  The only other public method in this package is:
 772
 773 =head2 C<flock>
 774
 775         $o->flock(MODE)
 776
 777 will lock the tied file.  C<MODE> has the same meaning as the second
 778 argument to the Perl built-in C<flock> function; for example
 779 C<LOCK_SH> or C<LOCK_EX | LOCK_NB>.  (These constants are provided by
 780 the C<use Fcntl ':flock'> declaration.)
 781
 782 C<MODE> is optional; C<$o-E<gt>flock> simply locks the file with
 783 C<LOCK_EX>.
 784
 785 The best way to unlock a file is to discard the object and untie the
 786 array.  It is probably unsafe to unlock the file without also untying
 787 it, because if you do, changes may remain unwritten inside the object.
 788 That is why there is no shortcut for unlocking.  If you really want to
 789 unlock the file prematurely, you know what to do; if you don't know
 790 what to do, then don't do it.
 791
 792 All the usual warnings about file locking apply here.  In particular,
 793 note that file locking in Perl is B<advisory>, which means that
 794 holding a lock will not prevent anyone else from reading, writing, or
 795 erasing the file; it only prevents them from getting another lock at
 796 the same time.  Locks are analogous to green traffic lights: If you
 797 have a green light, that does not prevent the idiot coming the other
 798 way from plowing into you sideways; it merely guarantees to you that
 799 the idiot does not also have a green light at the same time.
 800
 801 =head2 Tying to an already-opened filehandle
 802
 803 If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
 804 of the other C<IO> modules, you may use:
 805
 806         tie @array, 'Tie::File', $fh, ...;
 807
 808 Similarly if you opened that handle C<FH> with regular C<open> or
 809 C<sysopen>, you may use:
 810
 811         tie @array, 'Tie::File', \*FH, ...;
 812
 813 Handles that were opened write-only won't work.  Handles that were
 814 opened read-only will work as long as you don't try to write to them.
 815 Handles must be attached to seekable sources of data---that means no
 816 pipes or sockets.  If you try to supply a non-seekable handle, the
 817 C<tie> call will try to abort your program.  This feature is not yet
 818 supported under VMS.
 819
 820 =head1 CAVEATS
 821
 822 (That's Latin for 'warnings'.)
 823
 824 =head2 Efficiency Note
 825
 826 Every effort was made to make this module efficient.  Nevertheless,
 827 changing the size of a record in the middle of a large file will
 828 always be slow, because everything after the new record must be moved.
 829
 830 In particular, note that:
 831
 832         # million-line file
 833         for (@file_array) {
 834           $_ .= 'x';
 835         }
 836
 837 is likely to be very slow, because the first iteration must relocate
 838 lines 1 through 999,999; the second iteration must relocate lines 2
 839 through 999,999, and so on.  The relocation is done using block
 840 writes, however, so it's not as slow as it might be.
 841
 842 A soon-to-be-released version of this module will provide a mechanism
 843 for getting better performance in such cases, by deferring the writing
 844 until it can be done all at once.
 845
 846 =head2 Efficiency Note 2
 847
 848 Not every effort was made to make this module as efficient as
 849 possible.  C<FETCHSIZE> should use binary search instead of linear
 850 search.  The cache's LRU queue should be a heap instead of a list.
 851 These defects are probably minor; in any event, they will be fixed in
 852 a later version of the module.
 853
 854 =head2 Efficiency Note 3
 855
 856 The author has supposed that since this module is concerned with file
 857 I/O, almost all normal use of it will be heavily I/O bound, and that
 858 the time to maintain complicated data structures inside the module
 859 will be dominated by the time to actually perform the I/O.  This
 860 suggests, for example, that an LRU read-cache is a good tradeoff,
 861 even if it requires substantial adjustment following a C<splice>
 862 operation.
 863
 864 =head1 CAVEATS
 865
 866 (That's Latin for 'warnings'.)
 867
 868 The behavior of tied arrays is not precisely the same as for regular
 869 arrays.  For example:
 870
 871         undef $a[10];  print "How unusual!\n" if $a[10];
 872
 873 C<undef>-ing a C<Tie::File> array element just blanks out the
 874 corresponding record in the file.  When you read it back again, you'll
 875 see the record separator (typically, $a[10] will appear to contain
 876 "\n") so the supposedly-C<undef>'ed value will be true.
 877
 878 There are other minor differences, but in general, the correspondence
 879 is extremely close.
 880
 881 =head1 AUTHOR
 882
 883 Mark Jason Dominus
 884
 885 To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
 886
 887 To receive an announcement whenever a new version of this module is
 888 released, send a blank email message to
 889 C<mjd-perl-tiefile-subscribe@plover.com>.
 890
 891 =head1 LICENSE
 892
 893 C<Tie::File> version 0.17 is copyright (C) 2002 Mark Jason Dominus.
 894
 895 This library is free software; you may redistribute it and/or modify
 896 it under the same terms as Perl itself.
 897
 898 These terms include your choice of (1) the Perl Artistic Licence, or
 899 (2) version 2 of the GNU General Public License as published by the
 900 Free Software Foundation, or (3) any later version of the GNU General
 901 Public License.
 902
 903 This library is distributed in the hope that it will be useful,
 904 but WITHOUT ANY WARRANTY; without even the implied warranty of
 905 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 906 GNU General Public License for more details.
 907
 908 You should have received a copy of the GNU General Public License
 909 along with this library program; it should be in the file C<COPYING>.
 910 If not, write to the Free Software Foundation, Inc., 59 Temple Place,
 911 Suite 330, Boston, MA 02111 USA
 912
 913 For licensing inquiries, contact the author at:
 914
 915         Mark Jason Dominus
 916         255 S. Warnock St.
 917         Philadelphia, PA 19107
 918
 919 =head1 WARRANTY
 920
 921 C<Tie::File> version 0.17 comes with ABSOLUTELY NO WARRANTY.
 922 For details, see the license.
 923
 924 =head1 THANKS
 925
 926 Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
 927 core when I hadn't written it yet, and for generally being helpful,
 928 supportive, and competent.  (Usually the rule is "choose any one.")
 929 Also big thanks to Abhijit Menon-Sen for all of the same things.
 930
 931 Special thanks to Craig Berry (for VMS portability help), Randy Kobes
 932 (for Win32 portability help), the rest of the CPAN testers (for
 933 testing).
 934
 935 More thanks to:
 936 Gerrit Haase /
 937 Nick Ing-Simmons /
 938 Tassilo von Parseval /
 939 H. Dieter Pearcey /
 940 Peter Somu /
 941 Tels
 942
 943 =head1 TODO
 944
 945 Test DELETE machinery more carefully.
 946
 947 More tests.  (Configuration options, cache flushery.  _twrite should
 948 be tested separately, because there are a lot of weird special cases
 949 lurking in there.)
 950
 951 More tests.  (Stuff I didn't think of yet.)
 952
 953 Deferred writing. (!!!)
 954
 955 Paragraph mode?
 956
 957 More tests.
 958
 959 Fixed-length mode.
 960
 961 Maybe an autolocking mode?
 962
 963 =cut
 964