lib/Tie/File.pm

   1
   2 package Tie::File;
   3 use Carp;
   4 use POSIX 'SEEK_SET';
   5 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
   6 require 5.005;
   7
   8 $VERSION = "0.16";
   9
  10 # Idea: The object will always contain an array of byte offsets
  11 # this will be filled in as is necessary and convenient.
  12 # fetch will do seek-read.
  13 # There will be a cache parameter that controls the amount of cached *data*
  14 # Also an LRU queue of cached records
  15 # store will read the relevant record into the cache
  16 # If it's the same length as what is being written, it will overwrite it in
  17 #   place; if not, it will do a from-to copying write.
  18 # The record separator string is also a parameter
  19
  20 # Record numbers start at ZERO.
  21
  22 my $DEFAULT_CACHE_SIZE = 1<<21;    # 2 megabytes
  23
  24 sub TIEARRAY {
  25   if (@_ % 2 != 0) {
  26     croak "usage: tie \@array, $_[0], filename, [option => value]...";
  27   }
  28   my ($pack, $file, %opts) = @_;
  29
  30   # transform '-foo' keys into 'foo' keys
  31   for my $key (keys %opts) {
  32     my $okey = $key;
  33     if ($key =~ s/^-+//) {
  34       $opts{$key} = delete $opts{$okey};
  35     }
  36   }
  37
  38   $opts{cachesize} ||= $DEFAULT_CACHE_SIZE;
  39
  40   # the cache is a hash instead of an array because it is likely to be
  41   # sparsely populated
  42   $opts{cache} = {};
  43   $opts{cached} = 0;   # total size of cached data
  44   $opts{lru} = [];     # replace with heap in later version
  45
  46   $opts{offsets} = [0];
  47   $opts{filename} = $file;
  48   $opts{recsep} = $/ unless defined $opts{recsep};
  49   $opts{recseplen} = length($opts{recsep});
  50   if ($opts{recseplen} == 0) {
  51     croak "Empty record separator not supported by $pack";
  52   }
  53
  54   my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
  55   my $fh;
  56
  57   if (UNIVERSAL::isa($file, 'GLOB')) {
  58     unless (seek $file, 0, SEEK_SET) {
  59       croak "$pack: your filehandle does not appear to be seekable";
  60     }
  61     $fh = $file;
  62   } elsif (ref $file) {
  63     croak "usage: tie \@array, $pack, filename, [option => value]...";
  64   } else {
  65     $fh = \do { local *FH };   # only works in 5.005 and later
  66     sysopen $fh, $file, $mode, 0666 or return;
  67     binmode $fh;
  68   }
  69   { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
  70   $opts{fh} = $fh;
  71
  72   bless \%opts => $pack;
  73 }
  74
  75 sub FETCH {
  76   my ($self, $n) = @_;
  77
  78   # check the record cache
  79   { my $cached = $self->_check_cache($n);
  80     return $cached if defined $cached;
  81   }
  82
  83   unless ($#{$self->{offsets}} >= $n) {
  84     my $o = $self->_fill_offsets_to($n);
  85     # If it's still undefined, there is no such record, so return 'undef'
  86     return unless defined $o;
  87   }
  88
  89   my $fh = $self->{FH};
  90   $self->_seek($n);             # we can do this now that offsets is populated
  91   my $rec = $self->_read_record;
  92   $self->_cache_insert($n, $rec) if defined $rec;
  93   $rec;
  94 }
  95
  96 sub STORE {
  97   my ($self, $n, $rec) = @_;
  98
  99   $self->_fixrecs($rec);
 100
 101   # TODO: what should we do about the cache?  Install the new record
 102   # in the cache only if the old version of the same record was
 103   # already there?
 104
 105   # We need this to decide whether the new record will fit
 106   # It incidentally populates the offsets table
 107   # Note we have to do this before we alter the cache
 108   my $oldrec = $self->FETCH($n);
 109
 110   # _check_cache promotes record $n to MRU.  Is this correct behavior?
 111   if (my $cached = $self->_check_cache($n)) {
 112     $self->{cache}{$n} = $rec;
 113     $self->{cached} += length($rec) - length($cached);
 114   }
 115
 116   if (not defined $oldrec) {
 117     # We're storing a record beyond the end of the file
 118     $self->_extend_file_to($n+1);
 119     $oldrec = $self->{recsep};
 120   }
 121   my $len_diff = length($rec) - length($oldrec);
 122
 123   $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
 124
 125   # now update the offsets
 126   # array slice goes from element $n+1 (the first one to move)
 127   # to the end
 128   for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
 129     $_ += $len_diff;
 130   }
 131 }
 132
 133 sub FETCHSIZE {
 134   my $self = shift;
 135   my $n = $#{$self->{offsets}};
 136   while (defined ($self->_fill_offsets_to($n+1))) {
 137     ++$n;
 138   }
 139   $n;
 140 }
 141
 142 sub STORESIZE {
 143   my ($self, $len) = @_;
 144   my $olen = $self->FETCHSIZE;
 145   return if $len == $olen;      # Woo-hoo!
 146
 147   # file gets longer
 148   if ($len > $olen) {
 149     $self->_extend_file_to($len);
 150     return;
 151   }
 152
 153   # file gets shorter
 154   $self->_seek($len);
 155   $self->_chop_file;
 156   $#{$self->{offsets}} = $len-1;
 157   my @cached = grep $_ > $len, keys %{$self->{cache}};
 158   delete @{$self->{cache}}{@cached} if @cached;
 159 }
 160
 161 sub PUSH {
 162   my $self = shift;
 163   $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
 164   $self->FETCHSIZE;
 165 }
 166
 167 sub POP {
 168   my $self = shift;
 169   my $size = $self->FETCHSIZE;
 170   return if $size == 0;
 171 #  print STDERR "# POPPITY POP POP POP\n";
 172   scalar $self->SPLICE($size-1, 1);
 173 }
 174
 175 sub SHIFT {
 176   my $self = shift;
 177   scalar $self->SPLICE(0, 1);
 178 }
 179
 180 sub UNSHIFT {
 181   my $self = shift;
 182   $self->SPLICE(0, 0, @_);
 183   $self->FETCHSIZE;
 184 }
 185
 186 sub CLEAR {
 187   # And enable auto-defer mode, since it's likely that they just
 188   # did @a = (...);
 189   my $self = shift;
 190   $self->_seekb(0);
 191   $self->_chop_file;
 192   %{$self->{cache}}   = ();
 193     $self->{cached}   = 0;
 194   @{$self->{lru}}     = ();
 195   @{$self->{offsets}} = (0);
 196 }
 197
 198 sub EXTEND {
 199   my ($self, $n) = @_;
 200   $self->_fill_offsets_to($n);
 201   $self->_extend_file_to($n);
 202 }
 203
 204 sub DELETE {
 205   my ($self, $n) = @_;
 206   my $lastrec = $self->FETCHSIZE-1;
 207   if ($n == $lastrec) {
 208     $self->_seek($n);
 209     $self->_chop_file;
 210     $#{$self->{offsets}}--;
 211     delete $self->{cached}{$n};
 212     @{$self->{lru}} = grep $_ != $n, @{$self->{lru}};
 213     # perhaps in this case I should also remove trailing null records?
 214   } else {
 215     $self->STORE($n, "");
 216   }
 217 }
 218
 219 sub EXISTS {
 220   my ($self, $n) = @_;
 221   $self->_fill_offsets_to($n);
 222   0 <= $n && $n < $self->FETCHSIZE;
 223 }
 224
 225 sub SPLICE {
 226   my ($self, $pos, $nrecs, @data) = @_;
 227   my @result;
 228
 229   $pos = 0 unless defined $pos;
 230
 231   # Deal with negative and other out-of-range positions
 232   # Also set default for $nrecs
 233   {
 234     my $oldsize = $self->FETCHSIZE;
 235     $nrecs = $oldsize unless defined $nrecs;
 236     my $oldpos = $pos;
 237
 238     if ($pos < 0) {
 239       $pos += $oldsize;
 240       if ($pos < 0) {
 241         croak "Modification of non-creatable array value attempted, subscript $oldpos";
 242       }
 243     }
 244
 245     if ($pos > $oldsize) {
 246       return unless @data;
 247       $pos = $oldsize;          # This is what perl does for normal arrays
 248     }
 249   }
 250
 251   $self->_fixrecs(@data);
 252   my $data = join '', @data;
 253   my $datalen = length $data;
 254   my $oldlen = 0;
 255
 256   # compute length of data being removed
 257   # Incidentally fills offsets table
 258   for ($pos .. $pos+$nrecs-1) {
 259     my $rec = $self->FETCH($_);
 260     last unless defined $rec;
 261     push @result, $rec;
 262     $oldlen += length($rec);
 263   }
 264
 265   # Modify the file
 266   $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
 267
 268   # update the offsets table part 1
 269   # compute the offsets of the new records:
 270   my @new_offsets;
 271   if (@data) {
 272     push @new_offsets, $self->{offsets}[$pos];
 273     for (0 .. $#data-1) {
 274       push @new_offsets, $new_offsets[-1] + length($data[$_]);
 275     }
 276   }
 277   splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
 278
 279   # update the offsets table part 2
 280   # adjust the offsets of the following old records
 281   for ($pos+@data .. $#{$self->{offsets}}) {
 282     $self->{offsets}[$_] += $datalen - $oldlen;
 283   }
 284   # If we scrubbed out all known offsets, regenerate the trivial table
 285   # that knows that the file does indeed start at 0.
 286   $self->{offsets}[0] = 0 unless @{$self->{offsets}};
 287
 288   # Perhaps the following cache foolery could be factored out
 289   # into a bunch of mor opaque cache functions.  For example,
 290   # it's odd to delete a record from the cache and then remove
 291   # it from the LRU queue later on; there should be a function to
 292   # do both at once.
 293
 294   # update the read cache, part 1
 295   # modified records
 296   # Consider this carefully for correctness
 297   for ($pos .. $pos+$nrecs-1) {
 298     my $cached = $self->{cache}{$_};
 299     next unless defined $cached;
 300     my $new = $data[$_-$pos];
 301     if (defined $new) {
 302       $self->{cached} += length($new) - length($cached);
 303       $self->{cache}{$_} = $new;
 304     } else {
 305       delete $self->{cache}{$_};
 306       $self->{cached} -= length($cached);
 307     }
 308   }
 309   # update the read cache, part 2
 310   # moved records - records past the site of the change
 311   # need to be renumbered
 312   # Maybe merge this with the previous block?
 313   for (keys %{$self->{cache}}) {
 314     next unless $_ >= $pos + $nrecs;
 315     $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_};
 316   }
 317
 318   # fix the LRU queue
 319   my(@new, @changed);
 320   for (@{$self->{lru}}) {
 321     if ($_ >= $pos + $nrecs) {
 322       push @new, $_ + @data - $nrecs;
 323     } elsif ($_ >= $pos) {
 324       push @changed, $_ if $_ < $pos + @data;
 325     } else {
 326       push @new, $_;
 327     }
 328   }
 329   @{$self->{lru}} = (@new, @changed);
 330
 331   # Yes, the return value of 'splice' *is* actually this complicated
 332   wantarray ? @result : @result ? $result[-1] : undef;
 333 }
 334
 335 # write data into the file
 336 # $data is the data to be written.
 337 # it should be written at position $pos, and should overwrite
 338 # exactly $len of the following bytes.
 339 # Note that if length($data) > $len, the subsequent bytes will have to
 340 # be moved up, and if length($data) < $len, they will have to
 341 # be moved down
 342 sub _twrite {
 343   my ($self, $data, $pos, $len) = @_;
 344
 345   unless (defined $pos) {
 346     die "\$pos was undefined in _twrite";
 347   }
 348
 349   my $len_diff = length($data) - $len;
 350
 351   if ($len_diff == 0) {          # Woo-hoo!
 352     my $fh = $self->{fh};
 353     $self->_seekb($pos);
 354     $self->_write_record($data);
 355     return;                     # well, that was easy.
 356   }
 357
 358   # the two records are of different lengths
 359   # our strategy here: rewrite the tail of the file,
 360   # reading ahead one buffer at a time
 361   # $bufsize is required to be at least as large as the data we're overwriting
 362   my $bufsize = _bufsize($len_diff);
 363   my ($writepos, $readpos) = ($pos, $pos+$len);
 364   my $next_block;
 365
 366   # Seems like there ought to be a way to avoid the repeated code
 367   # and the special case here.  The read(1) is also a little weird.
 368   # Think about this.
 369   do {
 370     $self->_seekb($readpos);
 371     my $br = read $self->{fh}, $next_block, $bufsize;
 372     my $more_data = read $self->{fh}, my($dummy), 1;
 373     $self->_seekb($writepos);
 374     $self->_write_record($data);
 375     $readpos += $br;
 376     $writepos += length $data;
 377     $data = $next_block;
 378   } while $more_data;
 379   $self->_seekb($writepos);
 380   $self->_write_record($next_block);
 381
 382   # There might be leftover data at the end of the file
 383   $self->_chop_file if $len_diff < 0;
 384 }
 385
 386 # If a record does not already end with the appropriate terminator
 387 # string, append one.
 388 sub _fixrecs {
 389   my $self = shift;
 390   for (@_) {
 391     $_ .= $self->{recsep}
 392       unless substr($_, - $self->{recseplen}) eq $self->{recsep};
 393   }
 394 }
 395
 396 # seek to the beginning of record #$n
 397 # Assumes that the offsets table is already correctly populated
 398 #
 399 # Note that $n=-1 has a special meaning here: It means the start of
 400 # the last known record; this may or may not be the very last record
 401 # in the file, depending on whether the offsets table is fully populated.
 402 #
 403 sub _seek {
 404   my ($self, $n) = @_;
 405   my $o = $self->{offsets}[$n];
 406   defined($o)
 407     or confess("logic error: undefined offset for record $n");
 408   seek $self->{fh}, $o, SEEK_SET
 409     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 410 }
 411
 412 sub _seekb {
 413   my ($self, $b) = @_;
 414   seek $self->{fh}, $b, SEEK_SET
 415     or die "Couldn't seek filehandle: $!";  # "Should never happen."
 416 }
 417
 418 # populate the offsets table up to the beginning of record $n
 419 # return the offset of record $n
 420 sub _fill_offsets_to {
 421   my ($self, $n) = @_;
 422   my $fh = $self->{fh};
 423   local *OFF = $self->{offsets};
 424   my $rec;
 425
 426   until ($#OFF >= $n) {
 427     my $o = $OFF[-1];
 428     $self->_seek(-1);           # tricky -- see comment at _seek
 429     $rec = $self->_read_record;
 430     if (defined $rec) {
 431       push @OFF, tell $fh;
 432     } else {
 433       return;                   # It turns out there is no such record
 434     }
 435   }
 436
 437   # we have now read all the records up to record n-1,
 438   # so we can return the offset of record n
 439   return $OFF[$n];
 440 }
 441
 442 # assumes that $rec is already suitably terminated
 443 sub _write_record {
 444   my ($self, $rec) = @_;
 445   my $fh = $self->{fh};
 446   print $fh $rec
 447     or die "Couldn't write record: $!";  # "Should never happen."
 448
 449 }
 450
 451 sub _read_record {
 452   my $self = shift;
 453   my $rec;
 454   { local $/ = $self->{recsep};
 455     my $fh = $self->{fh};
 456     $rec = <$fh>;
 457   }
 458   $rec;
 459 }
 460
 461 sub _cache_insert {
 462   my ($self, $n, $rec) = @_;
 463
 464   # Do not cache records that are too big to fit in the cache.
 465   return unless length $rec <= $self->{cachesize};
 466
 467   $self->{cache}{$n} = $rec;
 468   $self->{cached} += length $rec;
 469   push @{$self->{lru}}, $n;     # most-recently-used is at the END
 470
 471   $self->_cache_flush if $self->{cached} > $self->{cachesize};
 472 }
 473
 474 sub _check_cache {
 475   my ($self, $n) = @_;
 476   my $rec;
 477   return unless defined($rec = $self->{cache}{$n});
 478
 479   # cache hit; update LRU queue and return $rec
 480   # replace this with a heap in a later version
 481   @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
 482   $rec;
 483 }
 484
 485 sub _cache_flush {
 486   my ($self) = @_;
 487   while ($self->{cached} > $self->{cachesize}) {
 488     my $lru = shift @{$self->{lru}};
 489     $self->{cached} -= length $lru;
 490     delete $self->{cache}{$lru};
 491   }
 492 }
 493
 494 # We have read to the end of the file and have the offsets table
 495 # entirely populated.  Now we need to write a new record beyond
 496 # the end of the file.  We prepare for this by writing
 497 # empty records into the file up to the position we want
 498 #
 499 # assumes that the offsets table already contains the offset of record $n,
 500 # if it exists, and extends to the end of the file if not.
 501 sub _extend_file_to {
 502   my ($self, $n) = @_;
 503   $self->_seek(-1);             # position after the end of the last record
 504   my $pos = $self->{offsets}[-1];
 505
 506   # the offsets table has one entry more than the total number of records
 507   $extras = $n - $#{$self->{offsets}};
 508
 509   # Todo : just use $self->{recsep} x $extras here?
 510   while ($extras-- > 0) {
 511     $self->_write_record($self->{recsep});
 512     push @{$self->{offsets}}, tell $self->{fh};
 513   }
 514 }
 515
 516 # Truncate the file at the current position
 517 sub _chop_file {
 518   my $self = shift;
 519   truncate $self->{fh}, tell($self->{fh});
 520 }
 521
 522 # compute the size of a buffer suitable for moving
 523 # all the data in a file forward $n bytes
 524 # ($n may be negative)
 525 # The result should be at least $n.
 526 sub _bufsize {
 527   my $n = shift;
 528   return 8192 if $n < 0;
 529   my $b = $n & ~8191;
 530   $b += 8192 if $n & 8191;
 531   $b;
 532 }
 533
 534 # Lock the file
 535 sub flock {
 536   my ($self, $op) = @_;
 537   unless (@_ <= 3) {
 538     my $pack = ref $self;
 539     croak "Usage: $pack\->flock([OPERATION])";
 540   }
 541   my $fh = $self->{fh};
 542   $op = LOCK_EX unless defined $op;
 543   flock $fh, $op;
 544 }
 545
 546 # Given a file, make sure the cache is consistent with the
 547 # file contents
 548 sub _check_integrity {
 549   my ($self, $file, $warn) = @_;
 550   my $good = 1;
 551
 552   unless ($self->{offsets}[0] == 0) {
 553     $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
 554     $good = 0;
 555   }
 556
 557   local *F = $self->{fh};
 558   seek F, 0, SEEK_SET;
 559   local $/ = $self->{recsep};
 560   $. = 0;
 561
 562   while (<F>) {
 563     my $n = $. - 1;
 564     my $cached = $self->{cache}{$n};
 565     my $offset = $self->{offsets}[$.];
 566     my $ao = tell F;
 567     if (defined $offset && $offset != $ao) {
 568       $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
 569       $good = 0;
 570     }
 571     if (defined $cached && $_ ne $cached) {
 572       $good = 0;
 573       chomp $cached;
 574       chomp;
 575       $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n";
 576     }
 577   }
 578
 579   my $cachesize = 0;
 580   while (my ($n, $r) = each %{$self->{cache}}) {
 581     $cachesize += length($r);
 582     next if $n+1 <= $.;         # checked this already
 583     $warn && print STDERR "# spurious caching of record $n\n";
 584     $good = 0;
 585   }
 586   if ($cachesize != $self->{cached}) {
 587     $warn && print STDERR "# cache size is $self->{cached}, should be $cachesize\n";
 588     $good = 0;
 589   }
 590
 591   my (%seen, @duplicate);
 592   for (@{$self->{lru}}) {
 593     $seen{$_}++;
 594     if (not exists $self->{cache}{$_}) {
 595       print "# $_ is mentioned in the LRU queue, but not in the cache\n";
 596       $good = 0;
 597     }
 598   }
 599   @duplicate = grep $seen{$_}>1, keys %seen;
 600   if (@duplicate) {
 601     my $records = @duplicate == 1 ? 'Record' : 'Records';
 602     my $appear  = @duplicate == 1 ? 'appears' : 'appear';
 603     print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n";
 604     $good = 0;
 605   }
 606   for (keys %{$self->{cache}}) {
 607     unless (exists $seen{$_}) {
 608       print "# $record $_ is in the cache but not the LRU queue\n";
 609       $good = 0;
 610     }
 611   }
 612
 613   $good;
 614 }
 615
 616 "Cogito, ergo sum.";  # don't forget to return a true value from the file
 617
 618 =head1 NAME
 619
 620 Tie::File - Access the lines of a disk file via a Perl array
 621
 622 =head1 SYNOPSIS
 623
 624         # This file documents Tie::File version 0.16
 625
 626         tie @array, 'Tie::File', filename or die ...;
 627
 628         $array[13] = 'blah';     # line 13 of the file is now 'blah'
 629         print $array[42];        # display line 42 of the file
 630
 631         $n_recs = @array;        # how many records are in the file?
 632         $#array = $n_recs - 2;   # chop records off the end
 633
 634         # As you would expect:
 635
 636         push @array, new recs...;
 637         my $r1 = pop @array;
 638         unshift @array, new recs...;
 639         my $r1 = shift @array;
 640         @old_recs = splice @array, 3, 7, new recs...;
 641
 642         untie @array;            # all finished
 643
 644 =head1 DESCRIPTION
 645
 646 C<Tie::File> represents a regular text file as a Perl array.  Each
 647 element in the array corresponds to a record in the file.  The first
 648 line of the file is element 0 of the array; the second line is element
 649 1, and so on.
 650
 651 The file is I<not> loaded into memory, so this will work even for
 652 gigantic files.
 653
 654 Changes to the array are reflected in the file immediately.
 655
 656 =head2 C<recsep>
 657
 658 What is a 'record'?  By default, the meaning is the same as for the
 659 C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
 660 probably C<"\n"> or C<"\r\n">.  You may change the definition of
 661 "record" by supplying the C<recsep> option in the C<tie> call:
 662
 663         tie @array, 'Tie::File', $file, recsep => 'es';
 664
 665 This says that records are delimited by the string C<es>.  If the file contained the following data:
 666
 667         Curse these pesky flies!\n
 668
 669 then the C<@array> would appear to have four elements:
 670
 671         "Curse thes"
 672         "e pes"
 673         "ky flies"
 674         "!\n"
 675
 676 An undefined value is not permitted as a record separator.  Perl's
 677 special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
 678 emulated.
 679
 680 Records read from the tied array will have the record separator string
 681 on the end, just as if they were read from the C<E<lt>...E<gt>>
 682 operator.  Records stored into the array will have the record
 683 separator string appended before they are written to the file, if they
 684 don't have one already.  For example, if the record separator string
 685 is C<"\n">, then the following two lines do exactly the same thing:
 686
 687         $array[17] = "Cherry pie";
 688         $array[17] = "Cherry pie\n";
 689
 690 The result is that the contents of line 17 of the file will be
 691 replaced with "Cherry pie"; a newline character will separate line 17
 692 from line 18.  This means that in particular, this will do nothing:
 693
 694         chomp $array[17];
 695
 696 Because the C<chomp>ed value will have the separator reattached when
 697 it is written back to the file.  There is no way to create a file
 698 whose trailing record separator string is missing.
 699
 700 Inserting records that I<contain> the record separator string will
 701 produce a reasonable result, but if you can't foresee what this result
 702 will be, you'd better avoid doing this.
 703
 704 =head2 C<mode>
 705
 706 Normally, the specified file will be opened for read and write access,
 707 and will be created if it does not exist.  (That is, the flags
 708 C<O_RDWR | O_CREAT> are supplied in the C<open> call.)  If you want to
 709 change this, you may supply alternative flags in the C<mode> option.
 710 See L<Fcntl> for a listing of available flags.
 711 For example:
 712
 713         # open the file if it exists, but fail if it does not exist
 714         use Fcntl 'O_RDWR';
 715         tie @array, 'Tie::File', $file, mode => O_RDWR;
 716
 717         # create the file if it does not exist
 718         use Fcntl 'O_RDWR', 'O_CREAT';
 719         tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
 720
 721         # open an existing file in read-only mode
 722         use Fcntl 'O_RDONLY';
 723         tie @array, 'Tie::File', $file, mode => O_RDONLY;
 724
 725 Opening the data file in write-only or append mode is not supported.
 726
 727 =head2 C<cachesize>
 728
 729 Records read in from the file are cached, to avoid having to re-read
 730 them repeatedly.  If you read the same record twice, the first time it
 731 will be stored in memory, and the second time it will be fetched from
 732 memory.
 733
 734 The cache has a bounded size; when it exceeds this size, the
 735 least-recently visited records will be purged from the cache.  The
 736 default size is 2Mib.  You can adjust the amount of space used for the
 737 cache by supplying the C<cachesize> option.  The argument is the desired cache size, in bytes.
 738
 739         # I have a lot of memory, so use a large cache to speed up access
 740         tie @array, 'Tie::File', $file, cachesize => 20_000_000;
 741
 742 Setting the cache size to 0 will inhibit caching; records will be
 743 fetched from disk every time you examine them.
 744
 745 =head2 Option Format
 746
 747 C<-mode> is a synonym for C<mode>.  C<-recsep> is a synonym for
 748 C<recsep>.  C<-cachesize> is a synonym for C<cachesize>.  You get the
 749 idea.
 750
 751 =head1 Public Methods
 752
 753 The C<tie> call returns an object, say C<$o>.  You may call
 754
 755         $rec = $o->FETCH($n);
 756         $o->STORE($n, $rec);
 757
 758 to fetch or store the record at line C<$n>, respectively.  The only other public method in this package is:
 759
 760 =head2 C<flock>
 761
 762         $o->flock(MODE)
 763
 764 will lock the tied file.  C<MODE> has the same meaning as the second
 765 argument to the Perl built-in C<flock> function; for example
 766 C<LOCK_SH> or C<LOCK_EX | LOCK_NB>.  (These constants are provided by
 767 the C<use Fcntl ':flock'> declaration.)
 768
 769 C<MODE> is optional; C<$o-E<gt>flock> simply locks the file with
 770 C<LOCK_EX>.
 771
 772 The best way to unlock a file is to discard the object and untie the
 773 array.  It is probably unsafe to unlock the file without also untying
 774 it, because if you do, changes may remain unwritten inside the object.
 775 That is why there is no shortcut for unlocking.  If you really want to
 776 unlock the file prematurely, you know what to do; if you don't know
 777 what to do, then don't do it.
 778
 779 All the usual warnings about file locking apply here.  In particular,
 780 note that file locking in Perl is B<advisory>, which means that
 781 holding a lock will not prevent anyone else from reading, writing, or
 782 erasing the file; it only prevents them from getting another lock at
 783 the same time.  Locks are analogous to green traffic lights: If you
 784 have a green light, that does not prevent the idiot coming the other
 785 way from plowing into you sideways; it merely guarantees to you that
 786 the idiot does not also have a green light at the same time.
 787
 788 =head2 Tying to an already-opened filehandle
 789
 790 If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
 791 of the other C<IO> modules, you may use:
 792
 793         tie @array, 'Tie::File', $fh, ...;
 794
 795 Similarly if you opened that handle C<FH> with regular C<open> or
 796 C<sysopen>, you may use:
 797
 798         tie @array, 'Tie::File', \*FH, ...;
 799
 800 Handles that were opened write-only won't work.  Handles that were
 801 opened read-only will work as long as you don't try to write to them.
 802 Handles must be attached to seekable sources of data---that means no
 803 pipes or sockets.  If you try to supply a non-seekable handle, the
 804 C<tie> call will abort your program.
 805
 806 =head1 CAVEATS
 807
 808 (That's Latin for 'warnings'.)
 809
 810 =head2 Efficiency Note
 811
 812 Every effort was made to make this module efficient.  Nevertheless,
 813 changing the size of a record in the middle of a large file will
 814 always be slow, because everything after the new record must be moved.
 815
 816 In particular, note that:
 817
 818         # million-line file
 819         for (@file_array) {
 820           $_ .= 'x';
 821         }
 822
 823 is likely to be very slow, because the first iteration must relocate
 824 lines 1 through 999,999; the second iteration must relocate lines 2
 825 through 999,999, and so on.  The relocation is done using block
 826 writes, however, so it's not as slow as it might be.
 827
 828 A future version of this module will provide a mechanism for getting
 829 better performance in such cases, by deferring the writing until it
 830 can be done all at once.
 831
 832 =head2 Efficiency Note 2
 833
 834 Not every effort was made to make this module as efficient as
 835 possible.  C<FETCHSIZE> should use binary search instead of linear
 836 search.  The cache's LRU queue should be a heap instead of a list.
 837 These defects are probably minor; in any event, they will be fixed in
 838 a later version of the module.
 839
 840 =head2 Efficiency Note 3
 841
 842 The author has supposed that since this module is concerned with file
 843 I/O, almost all normal use of it will be heavily I/O bound, and that
 844 the time to maintain complicated data structures inside the module
 845 will be dominated by the time to actually perform the I/O.  This
 846 suggests, for example, that an LRU read-cache is a good tradeoff,
 847 even if it requires substantial adjustment following a C<splice>
 848 operation.
 849
 850 =head1 CAVEATS
 851
 852 (That's Latin for 'warnings'.)
 853
 854 The behavior of tied arrays is not precisely the same as for regular
 855 arrays.  For example:
 856
 857         undef $a[10];  print "How unusual!\n" if $a[10];
 858
 859 C<undef>-ing a C<Tie::File> array element just blanks out the
 860 corresponding record in the file.  When you read it back again, you'll
 861 see the record separator (typically, $a[10] will appear to contain
 862 "\n") so the supposedly-C<undef>'ed value will be true.
 863
 864 There are other minor differences, but in general, the correspondence
 865 is extremely close.
 866
 867 =head1 AUTHOR
 868
 869 Mark Jason Dominus
 870
 871 To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
 872
 873 To receive an announcement whenever a new version of this module is
 874 released, send a blank email message to
 875 C<mjd-perl-tiefile-subscribe@plover.com>.
 876
 877 =head1 LICENSE
 878
 879 C<Tie::File> version 0.16 is copyright (C) 2002 Mark Jason Dominus.
 880
 881 This library is free software; you may redistribute it and/or modify
 882 it under the same terms as Perl itself.
 883
 884 These terms include your choice of (1) the Perl Artistic Licence, or
 885 (2) version 2 of the GNU General Public License as published by the
 886 Free Software Foundation, or (3) any later version of the GNU General
 887 Public License.
 888
 889 This library is distributed in the hope that it will be useful,
 890 but WITHOUT ANY WARRANTY; without even the implied warranty of
 891 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 892 GNU General Public License for more details.
 893
 894 You should have received a copy of the GNU General Public License
 895 along with this library program; it should be in the file C<COPYING>.
 896 If not, write to the Free Software Foundation, Inc., 59 Temple Place,
 897 Suite 330, Boston, MA 02111 USA
 898
 899 For licensing inquiries, contact the author at:
 900
 901         Mark Jason Dominus
 902         255 S. Warnock St.
 903         Philadelphia, PA 19107
 904
 905 =head1 WARRANTY
 906
 907 C<Tie::File> version 0.16 comes with ABSOLUTELY NO WARRANTY.
 908 For details, see the license.
 909
 910 =head1 THANKS
 911
 912 Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
 913 core when I hadn't written it yet, and for generally being helpful,
 914 supportive, and competent.  (Usually the rule is "choose any one.")
 915 Also big thanks to Abhijit Menon-Sen for all of the same things.
 916
 917 Special thanks to Craig Berry (for VMS portability help), Randy Kobes
 918 (for Win32 portability help), the rest of the CPAN testers (for
 919 testing).
 920
 921 More thanks to:
 922 Gerrit Haase /
 923 Tassilo von Parseval /
 924 H. Dieter Pearcey /
 925 Peter Somu /
 926 Tels
 927
 928 =head1 TODO
 929
 930 Test DELETE machinery more carefully.
 931
 932 More tests.  (Configuration options, cache flushery.  _twrite should
 933 be tested separately, because there are a lot of weird special cases
 934 lurking in there.)
 935
 936 More tests.  (Stuff I didn't think of yet.)
 937
 938 Deferred writing. (!!!)
 939
 940 Paragraph mode?
 941
 942 More tests.
 943
 944 Fixed-length mode.
 945
 946 Maybe an autolocking mode?
 947
 948 =cut
 949