lib/Tie/File.pm

   1
   2 package Tie::File;
   3 use Carp;
   4 use POSIX 'SEEK_SET';
   5 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
   6 require 5.005;
   7
   8 $VERSION = "0.14";
   9
  10 # Idea: The object will always contain an array of byte offsets
  11 # this will be filled in as is necessary and convenient.
  12 # fetch will do seek-read.
  13 # There will be a cache parameter that controls the amount of cached *data*
  14 # Also an LRU queue of cached records
  15 # store will read the relevant record into the cache
  16 # If it's the same length as what is being written, it will overwrite it in
  17 #   place; if not, it will do a from-to copying write.
  18 # The record separator string is also a parameter
  19
  20 # Record numbers start at ZERO.
  21
  22 my $DEFAULT_CACHE_SIZE = 1<<21;    # 2 megabytes
  23
  24 sub TIEARRAY {
  25   if (@_ % 2 != 0) {
  26     croak "usage: tie \@array, $_[0], filename, [option => value]...";
  27   }
  28   my ($pack, $file, %opts) = @_;
  29
  30   # transform '-foo' keys into 'foo' keys
  31   for my $key (keys %opts) {
  32     my $okey = $key;
  33     if ($key =~ s/^-+//) {
  34       $opts{$key} = delete $opts{$okey};
  35     }
  36   }
  37
  38   $opts{cachesize} ||= $DEFAULT_CACHE_SIZE;
  39
  40   # the cache is a hash instead of an array because it is likely to be
  41   # sparsely populated
  42   $opts{cache} = {};
  43   $opts{cached} = 0;   # total size of cached data
  44   $opts{lru} = [];     # replace with heap in later version
  45
  46   $opts{offsets} = [0];
  47   $opts{filename} = $file;
  48   $opts{recsep} = $/ unless defined $opts{recsep};
  49   $opts{recseplen} = length($opts{recsep});
  50   if ($opts{recseplen} == 0) {
  51     croak "Empty record separator not supported by $pack";
  52   }
  53
  54   my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
  55
  56   my $fh = \do { local *FH };   # only works in 5.005 and later
  57   sysopen $fh, $file, $mode, 0666 or return;
  58   binmode $fh;
  59   { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
  60   $opts{fh} = $fh;
  61
  62   bless \%opts => $pack;
  63 }
  64
  65 sub FETCH {
  66   my ($self, $n) = @_;
  67
  68   # check the record cache
  69   { my $cached = $self->_check_cache($n);
  70     return $cached if defined $cached;
  71   }
  72
  73   unless ($#{$self->{offsets}} >= $n) {
  74     my $o = $self->_fill_offsets_to($n);
  75     # If it's still undefined, there is no such record, so return 'undef'
  76     return unless defined $o;
  77   }
  78
  79   my $fh = $self->{FH};
  80   $self->_seek($n);             # we can do this now that offsets is populated
  81   my $rec = $self->_read_record;
  82   $self->_cache_insert($n, $rec) if defined $rec;
  83   $rec;
  84 }
  85
  86 sub STORE {
  87   my ($self, $n, $rec) = @_;
  88
  89   $self->_fixrecs($rec);
  90
  91   # TODO: what should we do about the cache?  Install the new record
  92   # in the cache only if the old version of the same record was
  93   # already there?
  94
  95   # We need this to decide whether the new record will fit
  96   # It incidentally populates the offsets table
  97   # Note we have to do this before we alter the cache
  98   my $oldrec = $self->FETCH($n);
  99
 100   # _check_cache promotes record $n to MRU.  Is this correct behavior?
 101   $self->{cache}{$n} = $rec if $self->_check_cache($n);
 102
 103   if (not defined $oldrec) {
 104     # We're storing a record beyond the end of the file
 105     $self->_extend_file_to($n+1);
 106     $oldrec = $self->{recsep};
 107   }
 108   my $len_diff = length($rec) - length($oldrec);
 109
 110   $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
 111
 112   # now update the offsets
 113   # array slice goes from element $n+1 (the first one to move)
 114   # to the end
 115   for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
 116     $_ += $len_diff;
 117   }
 118 }
 119
 120 sub FETCHSIZE {
 121   my $self = shift;
 122   my $n = $#{$self->{offsets}};
 123   while (defined ($self->_fill_offsets_to($n+1))) {
 124     ++$n;
 125   }
 126   $n;
 127 }
 128
 129 sub STORESIZE {
 130   my ($self, $len) = @_;
 131   my $olen = $self->FETCHSIZE;
 132   return if $len == $olen;      # Woo-hoo!
 133
 134   # file gets longer
 135   if ($len > $olen) {
 136     $self->_extend_file_to($len);
 137     return;
 138   }
 139
 140   # file gets shorter
 141   $self->_seek($len);
 142   $self->_chop_file;
 143   $#{$self->{offsets}} = $len-1;
 144   my @cached = grep $_ > $len, keys %{$self->{cache}};
 145   delete @{$self->{cache}}{@cached} if @cached;
 146 }
 147
 148 sub PUSH {
 149   my $self = shift;
 150   $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
 151   $self->FETCHSIZE;
 152 }
 153
 154 sub POP {
 155   my $self = shift;
 156   scalar $self->SPLICE(-1, 1);
 157 }
 158
 159 sub SHIFT {
 160   my $self = shift;
 161   scalar $self->SPLICE(0, 1);
 162 }
 163
 164 sub UNSHIFT {
 165   my $self = shift;
 166   $self->SPLICE(0, 0, @_);
 167   $self->FETCHSIZE;
 168 }
 169
 170 sub CLEAR {
 171   # And enable auto-defer mode, since it's likely that they just
 172   # did @a = (...);
 173   my $self = shift;
 174   $self->_seekb(0);
 175   $self->_chop_file;
 176   %{$self->{cache}}   = ();
 177     $self->{cached}   = 0;
 178   @{$self->{lru}}     = ();
 179   @{$self->{offsets}} = (0);
 180 }
 181
 182 sub EXTEND {
 183   my ($self, $n) = @_;
 184   $self->_fill_offsets_to($n);
 185   $self->_extend_file_to($n);
 186 }
 187
 188 sub DELETE {
 189   my ($self, $n) = @_;
 190   my $lastrec = $self->FETCHSIZE-1;
 191   if ($n == $lastrec) {
 192     $self->_seek($n);
 193     $self->_chop_file;
 194     # perhaps in this case I should also remove trailing null records?
 195   } else {
 196     $self->STORE($n, "");
 197   }
 198 }
 199
 200 sub EXISTS {
 201   my ($self, $n) = @_;
 202   $self->_fill_offsets_to($n);
 203   0 <= $n && $n < $self->FETCHSIZE;
 204 }
 205
 206 sub SPLICE {
 207   my ($self, $pos, $nrecs, @data) = @_;
 208   my @result;
 209
 210   {
 211     my $oldsize = $self->FETCHSIZE;
 212     my $oldpos = $pos;
 213
 214     if ($pos < 0) {
 215       $pos += $oldsize;
 216       if ($pos < 0) {
 217         croak "Modification of non-creatable array value attempted, subscript $oldpos";
 218       }
 219     }
 220
 221     if ($pos > $oldsize) {
 222       return unless @data;
 223       $pos = $oldsize;          # This is what perl does for normal arrays
 224     }
 225   }
 226
 227   $self->_fixrecs(@data);
 228   my $data = join '', @data;
 229   my $datalen = length $data;
 230   my $oldlen = 0;
 231
 232   # compute length of data being removed
 233   # Incidentally fills offsets table
 234   for ($pos .. $pos+$nrecs-1) {
 235     my $rec = $self->FETCH($_);
 236     last unless defined $rec;
 237     push @result, $rec;
 238     $oldlen += length($rec);
 239   }
 240
 241   # Modify the file
 242   $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
 243
 244   # update the offsets table part 1
 245   # compute the offsets of the new records:
 246   my @new_offsets;
 247   if (@data) {
 248     push @new_offsets, $self->{offsets}[$pos];
 249     for (0 .. $#data-1) {
 250       push @new_offsets, $new_offsets[-1] + length($data[$_]);
 251     }
 252   }
 253   splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
 254
 255   # update the offsets table part 2
 256   # adjust the offsets of the following old records
 257   for ($pos+@data .. $#{$self->{offsets}}) {
 258     $self->{offsets}[$_] += $datalen - $oldlen;
 259   }
 260   # If we scrubbed out all known offsets, regenerate the trivial table
 261   # that knows that the file does indeed start at 0.
 262   $self->{offsets}[0] = 0 unless @{$self->{offsets}};
 263
 264   # Perhaps the following cache foolery could be factored out
 265   # into a bunch of mor opaque cache functions.  For example,
 266   # it's odd to delete a record from the cache and then remove
 267   # it from the LRU queue later on; there should be a function to
 268   # do both at once.
 269
 270   # update the read cache, part 1
 271   # modified records
 272   # Consider this carefully for correctness
 273   for ($pos .. $pos+$nrecs-1) {
 274     my $cached = $self->{cache}{$_};
 275     next unless defined $cached;
 276     my $new = $data[$_-$pos];
 277     if (defined $new) {
 278       $self->{cached} += length($new) - length($cached);
 279       $self->{cache}{$_} = $new;
 280     } else {
 281       delete $self->{cache}{$_};
 282       $self->{cached} -= length($cached);
 283     }
 284   }
 285   # update the read cache, part 2
 286   # moved records - records past the site of the change
 287   # need to be renumbered
 288   # Maybe merge this with the previous block?
 289   for (keys %{$self->{cache}}) {
 290     next unless $_ >= $pos + $nrecs;
 291     $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_};
 292   }
 293
 294   # fix the LRU queue
 295   my(@new, @changed);
 296   for (@{$self->{lru}}) {
 297     if ($_ >= $pos + $nrecs) {
 298       push @new, $_ + @data - $nrecs;
 299     } elsif ($_ >= $pos) {
 300       push @changed, $_ if $_ < $pos + @data;
 301     } else {
 302       push @new, $_;
 303     }
 304   }
 305   @{$self->{lru}} = (@new, @changed);
 306
 307   # Yes, the return value of 'splice' *is* actually this complicated
 308   wantarray ? @result : @result ? $result[-1] : undef;
 309 }
 310
 311 # write data into the file
 312 # $data is the data to be written.
 313 # it should be written at position $pos, and should overwrite
 314 # exactly $len of the following bytes.
 315 # Note that if length($data) > $len, the subsequent bytes will have to
 316 # be moved up, and if length($data) < $len, they will have to
 317 # be moved down
 318 sub _twrite {
 319   my ($self, $data, $pos, $len) = @_;
 320
 321   unless (defined $pos) {
 322     die "\$pos was undefined in _twrite";
 323   }
 324
 325   my $len_diff = length($data) - $len;
 326
 327   if ($len_diff == 0) {          # Woo-hoo!
 328     my $fh = $self->{fh};
 329     $self->_seekb($pos);
 330     $self->_write_record($data);
 331     return;                     # well, that was easy.
 332   }
 333
 334   # the two records are of different lengths
 335   # our strategy here: rewrite the tail of the file,
 336   # reading ahead one buffer at a time
 337   # $bufsize is required to be at least as large as the data we're overwriting
 338   my $bufsize = _bufsize($len_diff);
 339   my ($writepos, $readpos) = ($pos, $pos+$len);
 340   my $next_block;
 341
 342   # Seems like there ought to be a way to avoid the repeated code
 343   # and the special case here.  The read(1) is also a little weird.
 344   # Think about this.
 345   do {
 346     $self->_seekb($readpos);
 347     my $br = read $self->{fh}, $next_block, $bufsize;
 348     my $more_data = read $self->{fh}, my($dummy), 1;
 349     $self->_seekb($writepos);
 350     $self->_write_record($data);
 351     $readpos += $br;
 352     $writepos += length $data;
 353     $data = $next_block;
 354   } while $more_data;
 355   $self->_seekb($writepos);
 356   $self->_write_record($next_block);
 357
 358   # There might be leftover data at the end of the file
 359   $self->_chop_file if $len_diff < 0;
 360 }
 361
 362 # If a record does not already end with the appropriate terminator
 363 # string, append one.
 364 sub _fixrecs {
 365   my $self = shift;
 366   for (@_) {
 367     $_ .= $self->{recsep}
 368       unless substr($_, - $self->{recseplen}) eq $self->{recsep};
 369   }
 370 }
 371
 372 # seek to the beginning of record #$n
 373 # Assumes that the offsets table is already correctly populated
 374 #
 375 # Note that $n=-1 has a special meaning here: It means the start of
 376 # the last known record; this may or may not be the very last record
 377 # in the file, depending on whether the offsets table is fully populated.
 378 #
 379 sub _seek {
 380   my ($self, $n) = @_;
 381   my $o = $self->{offsets}[$n];
 382   defined($o)
 383     or confess("logic error: undefined offset for record $n");
 384   seek $self->{fh}, $o, SEEK_SET
 385     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 386 }
 387
 388 sub _seekb {
 389   my ($self, $b) = @_;
 390   seek $self->{fh}, $b, SEEK_SET
 391     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 392 }
 393
 394 # populate the offsets table up to the beginning of record $n
 395 # return the offset of record $n
 396 sub _fill_offsets_to {
 397   my ($self, $n) = @_;
 398   my $fh = $self->{fh};
 399   local *OFF = $self->{offsets};
 400   my $rec;
 401
 402   until ($#OFF >= $n) {
 403     my $o = $OFF[-1];
 404     $self->_seek(-1);           # tricky -- see comment at _seek
 405     $rec = $self->_read_record;
 406     if (defined $rec) {
 407       push @OFF, tell $fh;
 408     } else {
 409       return;                   # It turns out there is no such record
 410     }
 411   }
 412
 413   # we have now read all the records up to record n-1,
 414   # so we can return the offset of record n
 415   return $OFF[$n];
 416 }
 417
 418 # assumes that $rec is already suitably terminated
 419 sub _write_record {
 420   my ($self, $rec) = @_;
 421   my $fh = $self->{fh};
 422   print $fh $rec
 423     or die "Couldn't write record: $!";  # "Should never happen."
 424
 425 }
 426
 427 sub _read_record {
 428   my $self = shift;
 429   my $rec;
 430   { local $/ = $self->{recsep};
 431     my $fh = $self->{fh};
 432     $rec = <$fh>;
 433   }
 434   $rec;
 435 }
 436
 437 sub _cache_insert {
 438   my ($self, $n, $rec) = @_;
 439
 440   # Do not cache records that are too big to fit in the cache.
 441   return unless length $rec <= $self->{cachesize};
 442
 443   $self->{cache}{$n} = $rec;
 444   $self->{cached} += length $rec;
 445   push @{$self->{lru}}, $n;     # most-recently-used is at the END
 446
 447   $self->_cache_flush if $self->{cached} > $self->{cachesize};
 448 }
 449
 450 sub _check_cache {
 451   my ($self, $n) = @_;
 452   my $rec;
 453   return unless defined($rec = $self->{cache}{$n});
 454
 455   # cache hit; update LRU queue and return $rec
 456   # replace this with a heap in a later version
 457   @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
 458   $rec;
 459 }
 460
 461 sub _cache_flush {
 462   my ($self) = @_;
 463   while ($self->{cached} > $self->{cachesize}) {
 464     my $lru = shift @{$self->{lru}};
 465     $self->{cached} -= length $lru;
 466     delete $self->{cache}{$lru};
 467   }
 468 }
 469
 470 # We have read to the end of the file and have the offsets table
 471 # entirely populated.  Now we need to write a new record beyond
 472 # the end of the file.  We prepare for this by writing
 473 # empty records into the file up to the position we want
 474 #
 475 # assumes that the offsets table already contains the offset of record $n,
 476 # if it exists, and extends to the end of the file if not.
 477 sub _extend_file_to {
 478   my ($self, $n) = @_;
 479   $self->_seek(-1);             # position after the end of the last record
 480   my $pos = $self->{offsets}[-1];
 481
 482   # the offsets table has one entry more than the total number of records
 483   $extras = $n - $#{$self->{offsets}};
 484
 485   # Todo : just use $self->{recsep} x $extras here?
 486   while ($extras-- > 0) {
 487     $self->_write_record($self->{recsep});
 488     $pos += $self->{recseplen};
 489     push @{$self->{offsets}}, $pos;
 490   }
 491 }
 492
 493 # Truncate the file at the current position
 494 sub _chop_file {
 495   my $self = shift;
 496   truncate $self->{fh}, tell($self->{fh});
 497 }
 498
 499 # compute the size of a buffer suitable for moving
 500 # all the data in a file forward $n bytes
 501 # ($n may be negative)
 502 # The result should be at least $n.
 503 sub _bufsize {
 504   my $n = shift;
 505   return 8192 if $n < 0;
 506   my $b = $n & ~8191;
 507   $b += 8192 if $n & 8191;
 508   $b;
 509 }
 510
 511 # Lock the file
 512 sub flock {
 513   my ($self, $op) = @_;
 514   unless (@_ <= 3) {
 515     my $pack = ref $self;
 516     croak "Usage: $pack\->flock([OPERATION])";
 517   }
 518   my $fh = $self->{fh};
 519   $op = LOCK_EX unless defined $op;
 520   flock $fh, $op;
 521 }
 522
 523 # Given a file, make sure the cache is consistent with the
 524 # file contents
 525 sub _check_integrity {
 526   my ($self, $file, $warn) = @_;
 527   my $good = 1;
 528   local *F;
 529   open F, $file or die "Couldn't open file $file: $!";
 530   binmode F;
 531   local $/ = $self->{recsep};
 532   unless ($self->{offsets}[0] == 0) {
 533     $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
 534     $good = 0;
 535   }
 536   while (<F>) {
 537     my $n = $. - 1;
 538     my $cached = $self->{cache}{$n};
 539     my $offset = $self->{offsets}[$.];
 540     my $ao = tell F;
 541     if (defined $offset && $offset != $ao) {
 542       $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
 543     }
 544     if (defined $cached && $_ ne $cached) {
 545       $good = 0;
 546       chomp $cached;
 547       chomp;
 548       $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n";
 549     }
 550   }
 551
 552   my $cachesize = 0;
 553   while (my ($n, $r) = each %{$self->{cache}}) {
 554     $cachesize += length($r);
 555     next if $n+1 <= $.;         # checked this already
 556     $warn && print STDERR "# spurious caching of record $n\n";
 557     $good = 0;
 558   }
 559   if ($cachesize != $self->{cached}) {
 560     $warn && print STDERR "# cache size is $self->{cached}, should be $cachesize\n";
 561     $good = 0;
 562   }
 563
 564   my (%seen, @duplicate);
 565   for (@{$self->{lru}}) {
 566     $seen{$_}++;
 567     if (not exists $self->{cache}{$_}) {
 568       print "# $_ is mentioned in the LRU queue, but not in the cache\n";
 569       $good = 0;
 570     }
 571   }
 572   @duplicate = grep $seen{$_}>1, keys %seen;
 573   if (@duplicate) {
 574     my $records = @duplicate == 1 ? 'Record' : 'Records';
 575     my $appear  = @duplicate == 1 ? 'appears' : 'appear';
 576     print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n";
 577     $good = 0;
 578   }
 579   for (keys %{$self->{cache}}) {
 580     unless (exists $seen{$_}) {
 581       print "# $record $_ is in the cache but not the LRU queue\n";
 582       $good = 0;
 583     }
 584   }
 585
 586   $good;
 587 }
 588
 589 =head1 NAME
 590
 591 Tie::File - Access the lines of a disk file via a Perl array
 592
 593 =head1 SYNOPSIS
 594
 595         # This file documents Tie::File version 0.14
 596
 597         tie @array, 'Tie::File', filename or die ...;
 598
 599         $array[13] = 'blah';     # line 13 of the file is now 'blah'
 600         print $array[42];        # display line 42 of the file
 601
 602         $n_recs = @array;        # how many records are in the file?
 603         $#array = $n_recs - 2;   # chop records off the end
 604
 605         # As you would expect:
 606
 607         push @array, new recs...;
 608         my $r1 = pop @array;
 609         unshift @array, new recs...;
 610         my $r1 = shift @array;
 611         @old_recs = splice @array, 3, 7, new recs...;
 612
 613         untie @array;            # all finished
 614
 615 =head1 DESCRIPTION
 616
 617 C<Tie::File> represents a regular text file as a Perl array.  Each
 618 element in the array corresponds to a record in the file.  The first
 619 line of the file is element 0 of the array; the second line is element
 620 1, and so on.
 621
 622 The file is I<not> loaded into memory, so this will work even for
 623 gigantic files.
 624
 625 Changes to the array are reflected in the file immediately.
 626
 627 =head2 C<recsep>
 628
 629 What is a 'record'?  By default, the meaning is the same as for the
 630 C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
 631 probably C<"\n"> or C<"\r\n">.  You may change the definition of
 632 "record" by supplying the C<recsep> option in the C<tie> call:
 633
 634         tie @array, 'Tie::File', $file, recsep => 'es';
 635
 636 This says that records are delimited by the string C<es>.  If the file contained the following data:
 637
 638         Curse these pesky flies!\n
 639
 640 then the C<@array> would appear to have four elements:
 641
 642         "Curse thes"
 643         "e pes"
 644         "ky flies"
 645         "!\n"
 646
 647 An undefined value is not permitted as a record separator.  Perl's
 648 special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
 649 emulated.
 650
 651 Records read from the tied array will have the record separator string
 652 on the end, just as if they were read from the C<E<lt>...E<gt>>
 653 operator.  Records stored into the array will have the record
 654 separator string appended before they are written to the file, if they
 655 don't have one already.  For example, if the record separator string
 656 is C<"\n">, then the following two lines do exactly the same thing:
 657
 658         $array[17] = "Cherry pie";
 659         $array[17] = "Cherry pie\n";
 660
 661 The result is that the contents of line 17 of the file will be
 662 replaced with "Cherry pie"; a newline character will separate line 17
 663 from line 18.  This means that inparticular, this will do nothing:
 664
 665         chomp $array[17];
 666
 667 Because the C<chomp>ed value will have the separator reattached when
 668 it is written back to the file.  There is no way to create a file
 669 whose trailing record separator string is missing.
 670
 671 Inserting records that I<contain> the record separator string will
 672 produce a reasonable result, but if you can't foresee what this result
 673 will be, you'd better avoid doing this.
 674
 675 =head2 C<mode>
 676
 677 Normally, the specified file will be opened for read and write access,
 678 and will be created if it does not exist.  (That is, the flags
 679 C<O_RDWR | O_CREAT> are supplied in the C<open> call.)  If you want to
 680 change this, you may supply alternative flags in the C<mode> option.
 681 See L<Fcntl> for a listing of available flags.
 682 For example:
 683
 684         # open the file if it exists, but fail if it does not exist
 685         use Fcntl 'O_RDWR';
 686         tie @array, 'Tie::File', $file, mode => O_RDWR;
 687
 688         # create the file if it does not exist
 689         use Fcntl 'O_RDWR', 'O_CREAT';
 690         tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
 691
 692         # open an existing file in read-only mode
 693         use Fcntl 'O_RDONLY';
 694         tie @array, 'Tie::File', $file, mode => O_RDONLY;
 695
 696 Opening the data file in write-only or append mode is not supported.
 697
 698 =head2 C<cachesize>
 699
 700 Records read in from the file are cached, to avoid having to re-read
 701 them repeatedly.  If you read the same record twice, the first time it
 702 will be stored in memory, and the second time it will be fetched from
 703 memory.
 704
 705 The cache has a bounded size; when it exceeds this size, the
 706 least-recently visited records will be purged from the cache.  The
 707 default size is 2Mib.  You can adjust the amount of space used for the
 708 cache by supplying the C<cachesize> option.  The argument is the desired cache size, in bytes.
 709
 710         # I have a lot of memory, so use a large cache to speed up access
 711         tie @array, 'Tie::File', $file, cachesize => 20_000_000;
 712
 713 Setting the cache size to 0 will inhibit caching; records will be
 714 fetched from disk every time you examine them.
 715
 716 =head2 Option Format
 717
 718 C<-mode> is a synonym for C<mode>.  C<-recsep> is a synonym for
 719 C<recsep>.  C<-cachesize> is a synonym for C<cachesize>.  You get the
 720 idea.
 721
 722 =head1 Public Methods
 723
 724 The C<tie> call returns an object, say C<$o>.  You may call
 725
 726         $rec = $o->FETCH($n);
 727         $o->STORE($n, $rec);
 728
 729 to fetch or store the record at line C<$n>, respectively.  The only other public method in this package is:
 730
 731 =head2 C<flock>
 732
 733         $o->flock(MODE)
 734
 735 will lock the tied file.  C<MODE> has the same meaning as the second
 736 argument to the Perl built-in C<flock> function; for example
 737 C<LOCK_SH> or C<LOCK_EX | LOCK_NB>.  (These constants are provided by
 738 the C<use Fcntl ':flock'> declaration.)
 739
 740 C<MODE> is optional; C<< $o->flock >> simply locks the file with
 741 C<LOCK_EX>.
 742
 743 The best way to unlock a file is to discard the object and untie the
 744 array.  It is probably unsafe to unlock the file without also untying
 745 it, because if you do, changes may remain unwritten inside the object.
 746 That is why there is no shortcut for unlocking.  If you really want to
 747 unlock the file prematurely, you know what to do; if you don't know
 748 what to do, then don't do it.
 749
 750 All the usual warnings about file locking apply here.  In particular,
 751 note that file locking in Perl is B<advisory>, which means that
 752 holding a lock will not prevent anyone else from reading, writing, or
 753 erasing the file; it only prevents them from getting another lock at
 754 the same time.  Locks are analogous to green traffic lights: If you
 755 have a green light, that does not prevent the idiot coming the other
 756 way from plowing into you sideways; it merely guarantees to you that
 757 the idiot does not also have a green light at the same time.
 758
 759 =head1 CAVEATS
 760
 761 (That's Latin for 'warnings'.)
 762
 763 =head2 Efficiency Note
 764
 765 Every effort was made to make this module efficient.  Nevertheless,
 766 changing the size of a record in the middle of a large file will
 767 always be slow, because everything after the new record must be move.
 768
 769 In particular, note that:
 770
 771         # million-line file
 772         for (@file_array) {
 773           $_ .= 'x';
 774         }
 775
 776 is likely to be very slow, because the first iteration must relocate
 777 lines 1 through 999,999; the second iteration must relocate lines 2
 778 through 999,999, and so on.  The relocation is done using block
 779 writes, however, so it's not as slow as it might be.
 780
 781 A future version of this module will provide some mechanism for
 782 getting better performance in such cases, by deferring the writing
 783 until it can be done all at once.
 784
 785 =head2 Efficiency Note 2
 786
 787 Not every effort was made to make this module as efficient as
 788 possible.  C<FETCHSIZE> should use binary search instead of linear
 789 search.  The cache's LRU queue should be a heap instead of a list.
 790 These defects are probably minor; in any event, they will be fixed in
 791 a later version of the module.
 792
 793 =head2 Efficiency Note 3
 794
 795 The author has supposed that since this module is concerned with file
 796 I/O, almost all normal use of it will be heavily I/O bound, and that
 797 the time to maintain complicated data structures inside the module
 798 will be dominated by the time to actually perform the I/O.  This
 799 suggests, for example, that and LRU read-cache is a good tradeoff,
 800 even if it requires substantial adjustment following a C<splice>
 801 operation.
 802
 803 =head1 CAVEATS
 804
 805 (That's Latin for 'warnings'.)
 806
 807 The behavior of tied arrays is not precisely the same as for regular
 808 arrays.  For example:
 809
 810         undef $a[10];  print "How unusual!\n" if $a[10];
 811
 812 C<undef>-ing a C<Tie::File> array element just blanks out the
 813 corresponding record in the file.  When you read it back again, you'll
 814 see the record separator (typically, $a[10] will appear to contain
 815 "\n") so the supposedly-C<undef>'ed value will be true.
 816
 817 There are other minor differences, but in general, the correspondence
 818 is extremely close.
 819
 820 =head1 AUTHOR
 821
 822 Mark Jason Dominus
 823
 824 To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
 825
 826 To receive an announcement whenever a new version of this module is
 827 released, send a blank email message to
 828 C<mjd-perl-tiefile-subscribe@plover.com>.
 829
 830 =head1 LICENSE
 831
 832 C<Tie::File> version 0.14 is copyright (C) 2002 Mark Jason Dominus.
 833
 834 This program is free software; you can redistribute it and/or modify
 835 it under the terms of the GNU General Public License as published by
 836 the Free Software Foundation; either version 2 of the License, or (at
 837 your option) any later version.
 838
 839 This program is distributed in the hope that it will be useful,
 840 but WITHOUT ANY WARRANTY; without even the implied warranty of
 841 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 842 GNU General Public License for more details.
 843
 844 You should have received a copy of the GNU General Public License
 845 along with this program; it should be in the file C<COPYING>.  If not,
 846 write to the Free Software Foundation, Inc., 59 Temple Place, Suite
 847 330, Boston, MA 02111 USA
 848
 849 For licensing inquiries, contact the author at:
 850
 851         Mark Jason Dominus
 852         255 S. Warnock St.
 853         Philadelphia, PA 19107
 854
 855 =head1 WARRANTY
 856
 857 C<Tie::File> version 0.14 comes with ABSOLUTELY NO WARRANTY.
 858 For details, see the license.
 859
 860 =head1 TODO
 861
 862 Tests for default arguments to SPLICE.  Tests for CLEAR/EXTEND.
 863 Tests for DELETE/EXISTS.
 864
 865 More tests.  (Configuration options, cache flushery, locking.  _twrite
 866 should be tested separately, because there are a lot of weird special
 867 cases lurking in there.)
 868
 869 More tests.  (Stuff I didn't think of yet.)
 870
 871 Deferred writing. (!!!)
 872
 873 Paragraph mode?
 874
 875 More tests.
 876
 877 Fixed-length mode.
 878
 879 =cut
 880