5 use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
10 # Idea: The object will always contain an array of byte offsets
11 # this will be filled in as is necessary and convenient.
12 # fetch will do seek-read.
13 # There will be a cache parameter that controls the amount of cached *data*
14 # Also an LRU queue of cached records
15 # store will read the relevant record into the cache
16 # If it's the same length as what is being written, it will overwrite it in
17 # place; if not, it will do a from-to copying write.
18 # The record separator string is also a parameter
20 # Record numbers start at ZERO.
22 my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
24 my %good_opt = map {$_ => 1, "-$_" => 1}
25 qw(memory dw_size mode recsep discipline autochomp);
29 croak "usage: tie \@array, $_[0], filename, [option => value]...";
31 my ($pack, $file, %opts) = @_;
33 # transform '-foo' keys into 'foo' keys
34 for my $key (keys %opts) {
35 unless ($good_opt{$key}) {
36 croak("$pack: Unrecognized option '$key'\n");
39 if ($key =~ s/^-+//) {
40 $opts{$key} = delete $opts{$okey};
44 unless (defined $opts{memory}) {
45 # default is the larger of the default cache size and the
46 # deferred-write buffer size (if specified)
47 $opts{memory} = $DEFAULT_MEMORY_SIZE;
48 $opts{memory} = $opts{dw_size}
49 if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
51 $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
52 if ($opts{dw_size} > $opts{memory}) {
53 croak("$pack: dw_size may not be larger than total memory allocation\n");
55 $opts{deferred} = {}; # no records presently deferred
56 $opts{deferred_s} = 0; # count of total bytes in ->{deferred}
58 # the cache is a hash instead of an array because it is likely to be
61 $opts{cached} = 0; # total size of cached data
62 $opts{lru} = []; # replace with heap in later version
65 $opts{filename} = $file;
66 unless (defined $opts{recsep}) {
67 $opts{recsep} = _default_recsep();
69 $opts{recseplen} = length($opts{recsep});
70 if ($opts{recseplen} == 0) {
71 croak "Empty record separator not supported by $pack";
74 $opts{autochomp} = 1 unless defined $opts{autochomp};
76 my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
79 if (UNIVERSAL::isa($file, 'GLOB')) {
80 unless (seek $file, 0, SEEK_SET) {
81 croak "$pack: your filehandle does not appear to be seekable";
85 croak "usage: tie \@array, $pack, filename, [option => value]...";
87 $fh = \do { local *FH }; # only works in 5.005 and later
88 sysopen $fh, $file, $mode, 0666 or return;
91 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
92 if (defined $opts{discipline} && $] >= 5.006) {
93 # This avoids a compile-time warning under 5.005
94 eval 'binmode($fh, $opts{discipline})';
95 croak $@ if $@ =~ /unknown discipline/i;
100 bless \%opts => $pack;
105 $self->_chomp1($self->_fetch($n));
108 # Chomp many records in-place; return nothing useful
111 return unless $self->{autochomp};
112 if ($self->{autochomp}) {
115 substr($_, - $self->{recseplen}) = "";
120 # Chomp one record in-place; return modified record
122 my ($self, $rec) = @_;
123 return $rec unless $self->{autochomp};
124 return unless defined $rec;
125 substr($rec, - $self->{recseplen}) = "";
132 # check the record cache
133 { my $cached = $self->_check_cache($n);
134 return $cached if defined $cached;
137 unless ($#{$self->{offsets}} >= $n) {
138 my $o = $self->_fill_offsets_to($n);
139 # If it's still undefined, there is no such record, so return 'undef'
140 return unless defined $o;
143 my $fh = $self->{FH};
144 $self->_seek($n); # we can do this now that offsets is populated
145 my $rec = $self->_read_record;
147 # If we happen to have just read the first record, check to see if
148 # the length of the record matches what 'tell' says. If not, Tie::File
149 # won't work, and should drop dead.
151 # if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
152 # if (defined $self->{discipline}) {
153 # croak "I/O discipline $self->{discipline} not supported";
155 # croak "File encoding not supported";
159 $self->_cache_insert($n, $rec) if defined $rec;
164 my ($self, $n, $rec) = @_;
166 $self->_fixrecs($rec);
168 return $self->_store_deferred($n, $rec) if $self->{defer};
170 # We need this to decide whether the new record will fit
171 # It incidentally populates the offsets table
172 # Note we have to do this before we alter the cache
173 my $oldrec = $self->_fetch($n);
175 # _check_cache promotes record $n to MRU. Is this correct behavior?
176 if (my $cached = $self->_check_cache($n)) {
177 my $len_diff = length($rec) - length($cached);
178 $self->{cache}{$n} = $rec;
179 $self->{cached} += $len_diff;
182 && $self->{deferred_s} + $self->{cached} > $self->{memory};
185 if (not defined $oldrec) {
186 # We're storing a record beyond the end of the file
187 $self->_extend_file_to($n+1);
188 $oldrec = $self->{recsep};
190 my $len_diff = length($rec) - length($oldrec);
192 # length($oldrec) here is not consistent with text mode TODO XXX BUG
193 $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
195 # now update the offsets
196 # array slice goes from element $n+1 (the first one to move)
198 for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
203 sub _store_deferred {
204 my ($self, $n, $rec) = @_;
206 my $old_deferred = $self->{deferred}{$n};
207 $self->{deferred}{$n} = $rec;
208 $self->{deferred_s} += length($rec);
209 $self->{deferred_s} -= length($old_deferred) if defined $old_deferred;
210 if ($self->{deferred_s} > $self->{dw_size}) {
212 $self->defer; # flush clears the 'defer' flag
213 } elsif ($self->{deferred_s} + $self->{cached} > $self->{memory}) {
220 my $n = $#{$self->{offsets}};
221 while (defined ($self->_fill_offsets_to($n+1))) {
228 my ($self, $len) = @_;
229 my $olen = $self->FETCHSIZE;
230 return if $len == $olen; # Woo-hoo!
234 $self->_extend_file_to($len);
241 $#{$self->{offsets}} = $len;
242 # $self->{offsets}[0] = 0; # in case we just chopped this
243 my @cached = grep $_ >= $len, keys %{$self->{cache}};
244 $self->_uncache(@cached);
249 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
255 my $size = $self->FETCHSIZE;
256 return if $size == 0;
257 # print STDERR "# POPPITY POP POP POP\n";
258 scalar $self->SPLICE($size-1, 1);
263 scalar $self->SPLICE(0, 1);
268 $self->SPLICE(0, 0, @_);
273 # And enable auto-defer mode, since it's likely that they just
278 %{$self->{cache}} = ();
280 @{$self->{lru}} = ();
281 @{$self->{offsets}} = (0);
286 $self->_fill_offsets_to($n);
287 $self->_extend_file_to($n);
292 my $lastrec = $self->FETCHSIZE-1;
293 if ($n == $lastrec) {
296 $#{$self->{offsets}}--;
298 # perhaps in this case I should also remove trailing null records?
300 $self->STORE($n, "");
306 $self->_fill_offsets_to($n);
307 0 <= $n && $n < $self->FETCHSIZE;
312 $self->_flush if $self->{defer};
314 $self->_chomp(my @a = $self->_splice(@_));
317 $self->_chomp1(scalar $self->_splice(@_));
322 $self->flush if $self->{defer};
326 my ($self, $pos, $nrecs, @data) = @_;
329 $pos = 0 unless defined $pos;
331 # Deal with negative and other out-of-range positions
332 # Also set default for $nrecs
334 my $oldsize = $self->FETCHSIZE;
335 $nrecs = $oldsize unless defined $nrecs;
341 croak "Modification of non-creatable array value attempted, subscript $oldpos";
345 if ($pos > $oldsize) {
347 $pos = $oldsize; # This is what perl does for normal arrays
351 $self->_fixrecs(@data);
352 my $data = join '', @data;
353 my $datalen = length $data;
356 # compute length of data being removed
357 # Incidentally fills offsets table
358 for ($pos .. $pos+$nrecs-1) {
359 my $rec = $self->_fetch($_);
360 last unless defined $rec;
362 $oldlen += length($rec);
366 $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
368 # update the offsets table part 1
369 # compute the offsets of the new records:
372 push @new_offsets, $self->{offsets}[$pos];
373 for (0 .. $#data-1) {
374 push @new_offsets, $new_offsets[-1] + length($data[$_]);
377 splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
379 # update the offsets table part 2
380 # adjust the offsets of the following old records
381 for ($pos+@data .. $#{$self->{offsets}}) {
382 $self->{offsets}[$_] += $datalen - $oldlen;
384 # If we scrubbed out all known offsets, regenerate the trivial table
385 # that knows that the file does indeed start at 0.
386 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
388 # Perhaps the following cache foolery could be factored out
389 # into a bunch of mor opaque cache functions. For example,
390 # it's odd to delete a record from the cache and then remove
391 # it from the LRU queue later on; there should be a function to
394 # update the read cache, part 1
396 # Consider this carefully for correctness
397 for ($pos .. $pos+$nrecs-1) {
398 my $cached = $self->{cache}{$_};
399 next unless defined $cached;
400 my $new = $data[$_-$pos];
402 $self->{cached} += length($new) - length($cached);
403 $self->{cache}{$_} = $new;
408 # update the read cache, part 2
409 # moved records - records past the site of the change
410 # need to be renumbered
411 # Maybe merge this with the previous block?
414 for (keys %{$self->{cache}}) {
415 next unless $_ >= $pos + $nrecs;
416 $adjusted{$_-$nrecs+@data} = delete $self->{cache}{$_};
418 @{$self->{cache}}{keys %adjusted} = values %adjusted;
419 # for (keys %{$self->{cache}}) {
420 # next unless $_ >= $pos + $nrecs;
421 # $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_};
427 for (@{$self->{lru}}) {
428 if ($_ >= $pos + $nrecs) {
429 push @new, $_ + @data - $nrecs;
430 } elsif ($_ >= $pos) {
431 push @changed, $_ if $_ < $pos + @data;
436 @{$self->{lru}} = (@new, @changed);
438 # Now there might be too much data in the cache, if we spliced out
439 # some short records and spliced in some long ones. If so, flush
443 # Yes, the return value of 'splice' *is* actually this complicated
444 wantarray ? @result : @result ? $result[-1] : undef;
447 # write data into the file
448 # $data is the data to be written.
449 # it should be written at position $pos, and should overwrite
450 # exactly $len of the following bytes.
451 # Note that if length($data) > $len, the subsequent bytes will have to
452 # be moved up, and if length($data) < $len, they will have to
455 my ($self, $data, $pos, $len) = @_;
457 unless (defined $pos) {
458 die "\$pos was undefined in _twrite";
461 my $len_diff = length($data) - $len;
463 if ($len_diff == 0) { # Woo-hoo!
464 my $fh = $self->{fh};
466 $self->_write_record($data);
467 return; # well, that was easy.
470 # the two records are of different lengths
471 # our strategy here: rewrite the tail of the file,
472 # reading ahead one buffer at a time
473 # $bufsize is required to be at least as large as the data we're overwriting
474 my $bufsize = _bufsize($len_diff);
475 my ($writepos, $readpos) = ($pos, $pos+$len);
478 # Seems like there ought to be a way to avoid the repeated code
479 # and the special case here. The read(1) is also a little weird.
482 $self->_seekb($readpos);
483 my $br = read $self->{fh}, $next_block, $bufsize;
484 my $more_data = read $self->{fh}, my($dummy), 1;
485 $self->_seekb($writepos);
486 $self->_write_record($data);
488 $writepos += length $data;
491 $self->_seekb($writepos);
492 $self->_write_record($next_block);
494 # There might be leftover data at the end of the file
495 $self->_chop_file if $len_diff < 0;
498 # If a record does not already end with the appropriate terminator
499 # string, append one.
503 $_ .= $self->{recsep}
504 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
508 # seek to the beginning of record #$n
509 # Assumes that the offsets table is already correctly populated
511 # Note that $n=-1 has a special meaning here: It means the start of
512 # the last known record; this may or may not be the very last record
513 # in the file, depending on whether the offsets table is fully populated.
517 my $o = $self->{offsets}[$n];
519 or confess("logic error: undefined offset for record $n");
520 seek $self->{fh}, $o, SEEK_SET
521 or die "Couldn't seek filehandle: $!"; # "Should never happen."
526 seek $self->{fh}, $b, SEEK_SET
527 or die "Couldn't seek filehandle: $!"; # "Should never happen."
530 # populate the offsets table up to the beginning of record $n
531 # return the offset of record $n
532 sub _fill_offsets_to {
534 my $fh = $self->{fh};
535 local *OFF = $self->{offsets};
538 until ($#OFF >= $n) {
540 $self->_seek(-1); # tricky -- see comment at _seek
541 $rec = $self->_read_record;
545 return; # It turns out there is no such record
549 # we have now read all the records up to record n-1,
550 # so we can return the offset of record n
554 # assumes that $rec is already suitably terminated
556 my ($self, $rec) = @_;
557 my $fh = $self->{fh};
559 or die "Couldn't write record: $!"; # "Should never happen."
566 { local $/ = $self->{recsep};
567 my $fh = $self->{fh};
574 my ($self, $n, $rec) = @_;
576 # Do not cache records that are too big to fit in the cache.
577 return unless length $rec <= $self->{memory};
579 $self->{cache}{$n} = $rec;
580 $self->{cached} += length $rec;
581 push @{$self->{lru}}, $n; # most-recently-used is at the END
583 $self->_cache_flush if $self->{cached} > $self->{memory};
589 my $cached = delete $self->{cache}{$n};
590 next unless defined $cached;
591 @{$self->{lru}} = grep $_ != $n, @{$self->{lru}};
592 $self->{cached} -= length($cached);
599 return unless defined($rec = $self->{cache}{$n});
601 # cache hit; update LRU queue and return $rec
602 # replace this with a heap in a later version
603 @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
609 while ($self->{cached} + $self->{deferred_s} > $self->{memory}) {
610 my $lru = shift @{$self->{lru}};
611 my $rec = delete $self->{cache}{$lru};
612 $self->{cached} -= length $rec;
616 # We have read to the end of the file and have the offsets table
617 # entirely populated. Now we need to write a new record beyond
618 # the end of the file. We prepare for this by writing
619 # empty records into the file up to the position we want
621 # assumes that the offsets table already contains the offset of record $n,
622 # if it exists, and extends to the end of the file if not.
623 sub _extend_file_to {
625 $self->_seek(-1); # position after the end of the last record
626 my $pos = $self->{offsets}[-1];
628 # the offsets table has one entry more than the total number of records
629 $extras = $n - $#{$self->{offsets}};
631 # Todo : just use $self->{recsep} x $extras here?
632 while ($extras-- > 0) {
633 $self->_write_record($self->{recsep});
634 push @{$self->{offsets}}, tell $self->{fh};
638 # Truncate the file at the current position
641 truncate $self->{fh}, tell($self->{fh});
644 # compute the size of a buffer suitable for moving
645 # all the data in a file forward $n bytes
646 # ($n may be negative)
647 # The result should be at least $n.
650 return 8192 if $n < 0;
652 $b += 8192 if $n & 8191;
658 my ($self, $op) = @_;
660 my $pack = ref $self;
661 croak "Usage: $pack\->flock([OPERATION])";
663 my $fh = $self->{fh};
664 $op = LOCK_EX unless defined $op;
674 # Get/set autochomp option
678 my $old = $self->{autochomp};
679 $self->{autochomp} = shift;
686 # Flush deferred writes
688 # This could be better optimized to write the file in one pass, instead
689 # of one pass per block of records. But that will require modifications
690 # to _twrite, so I should have a good _twite test suite first.
700 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
703 # gather all consecutive records from the front of @writable
704 my $first_rec = shift @writable;
705 my $last_rec = $first_rec+1;
706 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
708 $self->_fill_offsets_to($last_rec);
709 $self->_extend_file_to($last_rec);
710 $self->_splice($first_rec, $last_rec-$first_rec+1,
711 @{$self->{deferred}}{$first_rec .. $last_rec});
714 $self->discard; # clear out defered-write-cache
717 # Discard deferred writes
720 undef $self->{deferred};
721 $self->{deferred_s} = 0;
725 # Not yet implemented
728 sub _default_recsep {
730 if ($^O eq 'MSWin32') {
731 # Windows users expect files to be terminated with \r\n
732 # But $/ is set to \n instead
733 # Note that this also transforms \n\n into \r\n\r\n.
735 $recsep =~ s/\n/\r\n/g;
740 # Given a file, make sure the cache is consistent with the
742 sub _check_integrity {
743 my ($self, $file, $warn) = @_;
746 if (not defined $self->{offsets}[0]) {
747 $warn && print STDERR "# offset 0 is missing!\n";
749 } elsif ($self->{offsets}[0] != 0) {
750 $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
754 local *F = $self->{fh};
756 local $/ = $self->{recsep};
761 my $cached = $self->{cache}{$n};
762 my $offset = $self->{offsets}[$.];
764 if (defined $offset && $offset != $ao) {
765 $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
768 if (defined $cached && $_ ne $cached) {
772 $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n";
777 while (my ($n, $r) = each %{$self->{cache}}) {
778 $memory += length($r);
779 next if $n+1 <= $.; # checked this already
780 $warn && print STDERR "# spurious caching of record $n\n";
783 if ($memory != $self->{cached}) {
784 $warn && print STDERR "# cache size is $self->{cached}, should be $memory\n";
788 my (%seen, @duplicate);
789 for (@{$self->{lru}}) {
791 if (not exists $self->{cache}{$_}) {
792 $warn && print "# $_ is mentioned in the LRU queue, but not in the cache\n";
796 @duplicate = grep $seen{$_}>1, keys %seen;
798 my $records = @duplicate == 1 ? 'Record' : 'Records';
799 my $appear = @duplicate == 1 ? 'appears' : 'appear';
800 $warn && print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n";
803 for (keys %{$self->{cache}}) {
804 unless (exists $seen{$_}) {
805 print "# record $_ is in the cache but not the LRU queue\n";
813 "Cogito, ergo sum."; # don't forget to return a true value from the file
817 Tie::File - Access the lines of a disk file via a Perl array
821 # This file documents Tie::File version 0.20
823 tie @array, 'Tie::File', filename or die ...;
825 $array[13] = 'blah'; # line 13 of the file is now 'blah'
826 print $array[42]; # display line 42 of the file
828 $n_recs = @array; # how many records are in the file?
829 $#array = $n_recs - 2; # chop records off the end
831 # As you would expect:
833 push @array, new recs...;
835 unshift @array, new recs...;
836 my $r1 = shift @array;
837 @old_recs = splice @array, 3, 7, new recs...;
839 untie @array; # all finished
843 C<Tie::File> represents a regular text file as a Perl array. Each
844 element in the array corresponds to a record in the file. The first
845 line of the file is element 0 of the array; the second line is element
848 The file is I<not> loaded into memory, so this will work even for
851 Changes to the array are reflected in the file immediately.
853 Lazy people may now stop reading the manual.
857 What is a 'record'? By default, the meaning is the same as for the
858 C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
859 probably C<"\n">. (Minor exception: on dos and Win32 systems, a
860 'record' is a string terminated by C<"\r\n">.) You may change the
861 definition of "record" by supplying the C<recsep> option in the C<tie>
864 tie @array, 'Tie::File', $file, recsep => 'es';
866 This says that records are delimited by the string C<es>. If the file
867 contained the following data:
869 Curse these pesky flies!\n
871 then the C<@array> would appear to have four elements:
878 An undefined value is not permitted as a record separator. Perl's
879 special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
882 Records read from the tied array do not have the record separator
883 string on the end; this is to allow
885 $array[17] .= "extra";
889 (See L<"autochomp">, below.) Records stored into the array will have
890 the record separator string appended before they are written to the
891 file, if they don't have one already. For example, if the record
892 separator string is C<"\n">, then the following two lines do exactly
895 $array[17] = "Cherry pie";
896 $array[17] = "Cherry pie\n";
898 The result is that the contents of line 17 of the file will be
899 replaced with "Cherry pie"; a newline character will separate line 17
900 from line 18. This means that in particular, this will do nothing:
904 Because the C<chomp>ed value will have the separator reattached when
905 it is written back to the file. There is no way to create a file
906 whose trailing record separator string is missing.
908 Inserting records that I<contain> the record separator string will
909 produce a reasonable result, but if you can't foresee what this result
910 will be, you'd better avoid doing this.
914 Normally, array elements have the record separator removed, so that if
915 the file contains the text
921 the tied array will appear to contain C<("Gold", "Frankincense", "Myrrh")>.
922 If you set C<autochomp> to a false value, the record separator will not be removed. If the file above was tied with
924 tie @gifts, "Tie::File", $gifts, autochomp => 0;
926 then the array C<@gifts> would appear to contain C<("Gold\n",
927 "Frankincense\n", "Myrrh\n")>, or (on Win32 systems) C<("Gold\r\n",
928 "Frankincense\r\n", "Myrrh\r\n")>.
932 Normally, the specified file will be opened for read and write access,
933 and will be created if it does not exist. (That is, the flags
934 C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to
935 change this, you may supply alternative flags in the C<mode> option.
936 See L<Fcntl> for a listing of available flags.
939 # open the file if it exists, but fail if it does not exist
941 tie @array, 'Tie::File', $file, mode => O_RDWR;
943 # create the file if it does not exist
944 use Fcntl 'O_RDWR', 'O_CREAT';
945 tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
947 # open an existing file in read-only mode
948 use Fcntl 'O_RDONLY';
949 tie @array, 'Tie::File', $file, mode => O_RDONLY;
951 Opening the data file in write-only or append mode is not supported.
955 This is an (inexact) upper limit on the amount of memory that
956 C<Tie::File> will consume at any time while managing the file.
957 At present, this is used as a bound on the size of the read cache.
959 Records read in from the file are cached, to avoid having to re-read
960 them repeatedly. If you read the same record twice, the first time it
961 will be stored in memory, and the second time it will be fetched from
962 the I<read cache>. The amount of data in the read cache will not
963 exceed the value you specified for C<memory>. If C<Tie::File> wants
964 to cache a new record, but the read cache is full, it will make room
965 by expiring the least-recently visited records from the read cache.
967 The default memory limit is 2Mib. You can adjust the maximum read
968 cache size by supplying the C<memory> option. The argument is the
969 desired cache size, in bytes.
971 # I have a lot of memory, so use a large cache to speed up access
972 tie @array, 'Tie::File', $file, memory => 20_000_000;
974 Setting the memory limit to 0 will inhibit caching; records will be
975 fetched from disk every time you examine them.
979 C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
980 C<recsep>. C<-memory> is a synonym for C<memory>. You get the
983 =head1 Public Methods
985 The C<tie> call returns an object, say C<$o>. You may call
987 $rec = $o->FETCH($n);
990 to fetch or store the record at line C<$n>, respectively; similarly
991 the other tied array methods. (See L<perltie> for details.) You may
992 also call the following methods on this object:
998 will lock the tied file. C<MODE> has the same meaning as the second
999 argument to the Perl built-in C<flock> function; for example
1000 C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
1001 the C<use Fcntl ':flock'> declaration.)
1003 C<MODE> is optional; C<$o-E<gt>flock> simply locks the file with
1006 The best way to unlock a file is to discard the object and untie the
1007 array. It is probably unsafe to unlock the file without also untying
1008 it, because if you do, changes may remain unwritten inside the object.
1009 That is why there is no shortcut for unlocking. If you really want to
1010 unlock the file prematurely, you know what to do; if you don't know
1011 what to do, then don't do it.
1013 All the usual warnings about file locking apply here. In particular,
1014 note that file locking in Perl is B<advisory>, which means that
1015 holding a lock will not prevent anyone else from reading, writing, or
1016 erasing the file; it only prevents them from getting another lock at
1017 the same time. Locks are analogous to green traffic lights: If you
1018 have a green light, that does not prevent the idiot coming the other
1019 way from plowing into you sideways; it merely guarantees to you that
1020 the idiot does not also have a green light at the same time.
1024 my $old_value = $o->autochomp(0); # disable autochomp option
1025 my $old_value = $o->autochomp(1); # enable autochomp option
1027 my $ac = $o->autochomp(); # recover current value
1029 See L<"autochomp">, above.
1031 =head1 Tying to an already-opened filehandle
1033 If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
1034 of the other C<IO> modules, you may use:
1036 tie @array, 'Tie::File', $fh, ...;
1038 Similarly if you opened that handle C<FH> with regular C<open> or
1039 C<sysopen>, you may use:
1041 tie @array, 'Tie::File', \*FH, ...;
1043 Handles that were opened write-only won't work. Handles that were
1044 opened read-only will work as long as you don't try to write to them.
1045 Handles must be attached to seekable sources of data---that means no
1046 pipes or sockets. If you supply a non-seekable handle, the C<tie>
1047 call will try to abort your program.
1051 (That's Latin for 'warnings'.)
1057 This is BETA RELEASE SOFTWARE. It may have bugs. See the discussion
1058 below about the (lack of any) warranty.
1062 Every effort was made to make this module efficient. Nevertheless,
1063 changing the size of a record in the middle of a large file will
1064 always be fairly slow, because everything after the new record must be
1067 In particular, note that the following innocent-looking loop has very
1075 This is likely to be very slow, because the first iteration must
1076 relocate lines 1 through 999,999; the second iteration must relocate
1077 lines 2 through 999,999, and so on. The relocation is done using
1078 block writes, however, so it's not as slow as it might be.
1080 A soon-to-be-released version of this module will provide a mechanism
1081 for getting better performance in such cases, by deferring the writing
1082 until it can be done all at once. This deferred writing feature might
1083 be enabled automagically if C<Tie::File> guesses that you are about to write many consecutive records. To disable this feature, use
1085 (tied @o)->autodefer(0);
1087 (At present, this call does nothing.)
1091 The behavior of tied arrays is not precisely the same as for regular
1092 arrays. For example:
1094 undef $a[10]; print "How unusual!\n" if $a[10];
1096 C<undef>-ing a C<Tie::File> array element just blanks out the
1097 corresponding record in the file. When you read it back again, you'll
1098 see the record separator (typically, $a[10] will appear to contain
1099 "\n") so the supposedly-C<undef>'ed value will be true.
1101 There are other minor differences, but in general, the correspondence
1106 Not quite every effort was made to make this module as efficient as
1107 possible. C<FETCHSIZE> should use binary search instead of linear
1108 search. The cache's LRU queue should be a heap instead of a list.
1109 These defects are probably minor; in any event, they will be fixed in
1110 a later version of the module.
1114 The author has supposed that since this module is concerned with file
1115 I/O, almost all normal use of it will be heavily I/O bound, and that
1116 the time to maintain complicated data structures inside the module
1117 will be dominated by the time to actually perform the I/O. This
1118 suggests, for example, that an LRU read-cache is a good tradeoff,
1119 even if it requires substantial adjustment following a C<splice>
1124 =head1 WHAT ABOUT C<DB_File>?
1126 C<DB_File>'s C<DB_RECNO> feature does something similar to
1127 C<Tie::File>, but there are a number of reasons that you might prefer
1128 C<Tie::File>. C<DB_File> is a great piece of software, but the
1129 C<DB_RECNO> part is less great than the rest of it.
1135 C<DB_File> reads your entire file into memory, modifies it in memory,
1136 and the writes out the entire file again when you untie the file.
1137 This is completely impractical for large files.
1139 C<Tie::File> does not do any of those things. It doesn't try to read
1140 the entire file into memory; instead it uses a lazy approach and
1141 caches recently-used records. The cache size is strictly bounded by
1142 the C<memory> option. DB_File's C<-E<gt>{cachesize}> doesn't prevent
1143 your process from blowing up when reading a big file.
1147 C<DB_File> has an extremely poor writing strategy. If you have a
1148 ten-megabyte file and tie it with C<DB_File>, and then use
1150 $a[0] =~ s/PERL/Perl/;
1152 C<DB_file> will then read the entire ten-megabyte file into memory, do
1153 the change, and write the entire file back to disk, reading ten
1154 megabytes and writing ten megabytes. C<Tie::File> will read and write
1155 only the first record.
1157 If you have a million-record file and tie it with C<DB_File>, and then
1160 $a[999998] =~ s/Larry/Larry Wall/;
1162 C<DB_File> will read the entire million-record file into memory, do
1163 the change, and write the entire file back to disk. C<Tie::File> will
1164 only rewrite records 999998 and 999999. During the writing process,
1165 it will never have more than a few kilobytes of data in memory at any
1166 time, even if the two records are very large.
1170 Since changes to C<DB_File> files only appear when you do C<untie>, it
1171 can be inconvenient to arrange for concurrent access to the same file
1172 by two or more processes. Each process needs to call C<$db-E<gt>sync>
1173 after every write. When you change a C<Tie::File> array, the changes
1174 are reflected in the file immediately; no explicit C<-E<gt>sync> call
1175 is required. (The forthcoming "deferred writing" mode will allow you
1176 to request that writes be held in memory until explicitly C<sync>'ed.)
1180 C<DB_File> is only installed by default if you already have the C<db>
1181 library on your system; C<Tie::File> is pure Perl and is installed by
1182 default no matter what. Starting with Perl 5.7.3 you can be
1183 absolutely sure it will be everywhere. You will never have that
1184 surety with C<DB_File>. If you don't have C<DB_File> yet, it requires
1185 a C compiler. You can install C<Tie::File> from CPAN in five minutes
1190 C<DB_File> is written in C, so if you aren't allowed to install
1191 modules on your system, it is useless. C<Tie::File> is written in Perl,
1192 so even if you aren't allowed to install modules, you can look into
1193 the source code, see how it works, and copy the subroutines or the
1194 ideas from the subroutines directly into your own Perl program.
1198 Except in very old, unsupported versions, C<DB_File>'s free license
1199 requires that you distribute the source code for your entire
1200 application. If you are not able to distribute the source code for
1201 your application, you must negotiate an alternative license from
1202 Sleepycat, possibly for a fee. Tie::File is under the Perl Artistic
1203 license and can be distributed free under the same terms as Perl
1212 To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
1214 To receive an announcement whenever a new version of this module is
1215 released, send a blank email message to
1216 C<mjd-perl-tiefile-subscribe@plover.com>.
1220 C<Tie::File> version 0.20 is copyright (C) 2002 Mark Jason Dominus.
1222 This library is free software; you may redistribute it and/or modify
1223 it under the same terms as Perl itself.
1225 These terms include your choice of (1) the Perl Artistic Licence, or
1226 (2) version 2 of the GNU General Public License as published by the
1227 Free Software Foundation, or (3) any later version of the GNU General
1230 This library is distributed in the hope that it will be useful,
1231 but WITHOUT ANY WARRANTY; without even the implied warranty of
1232 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1233 GNU General Public License for more details.
1235 You should have received a copy of the GNU General Public License
1236 along with this library program; it should be in the file C<COPYING>.
1237 If not, write to the Free Software Foundation, Inc., 59 Temple Place,
1238 Suite 330, Boston, MA 02111 USA
1240 For licensing inquiries, contact the author at:
1244 Philadelphia, PA 19107
1248 C<Tie::File> version 0.20 comes with ABSOLUTELY NO WARRANTY.
1249 For details, see the license.
1253 Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
1254 core when I hadn't written it yet, and for generally being helpful,
1255 supportive, and competent. (Usually the rule is "choose any one.")
1256 Also big thanks to Abhijit Menon-Sen for all of the same things.
1258 Special thanks to Craig Berry (for VMS portability help), Randy Kobes
1259 (for Win32 portability help), Clinton Pierce and Autrijus Tang (for
1260 heroic eleventh-hour Win32 testing above and beyond the call of duty),
1261 and the rest of the CPAN testers (for testing generally).
1268 Tassilo von Parseval /
1276 Test DELETE machinery more carefully.
1278 More tests. (C<mode> option. _twrite should be tested separately,
1279 because there are a lot of weird special cases lurking in there.)
1281 More tests. (Stuff I didn't think of yet.)
1289 Maybe an autolocking mode?
1291 Finish deferred writing.
1295 Record locking with fcntl()? Then you might support an undo log and
1296 get real transactions. What a coup that would be.