test.pl-isation.
[p5sagit/p5-mst-13.2.git] / lib / Tie / File.pm
CommitLineData
b5aed31e 1
2package Tie::File;
6fc0ea7e 3require 5.005;
6ae23f41 4use Carp ':DEFAULT', 'confess';
b5aed31e 5use POSIX 'SEEK_SET';
6ae23f41 6use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'LOCK_SH', 'O_WRONLY', 'O_RDONLY';
95f36366 7sub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY }
b5aed31e 8
6ae23f41 9
10$VERSION = "0.95";
b3fe5a4c 11my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes
6fc0ea7e 12my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records
13my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful
b3fe5a4c 14
6ae23f41 15my %good_opt = map {$_ => 1, "-$_" => 1}
16 qw(memory dw_size mode recsep discipline
17 autodefer autochomp autodefer_threshhold);
b5aed31e 18
19sub TIEARRAY {
20 if (@_ % 2 != 0) {
21 croak "usage: tie \@array, $_[0], filename, [option => value]...";
22 }
23 my ($pack, $file, %opts) = @_;
24
25 # transform '-foo' keys into 'foo' keys
26 for my $key (keys %opts) {
b3fe5a4c 27 unless ($good_opt{$key}) {
28 croak("$pack: Unrecognized option '$key'\n");
29 }
b5aed31e 30 my $okey = $key;
31 if ($key =~ s/^-+//) {
32 $opts{$key} = delete $opts{$okey};
33 }
34 }
35
b3fe5a4c 36 unless (defined $opts{memory}) {
37 # default is the larger of the default cache size and the
38 # deferred-write buffer size (if specified)
39 $opts{memory} = $DEFAULT_MEMORY_SIZE;
6ae23f41 40 $opts{memory} = $opts{dw_size}
b3fe5a4c 41 if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE;
57c7bc08 42 # Dora Winifred Read
b3fe5a4c 43 }
44 $opts{dw_size} = $opts{memory} unless defined $opts{dw_size};
45 if ($opts{dw_size} > $opts{memory}) {
46 croak("$pack: dw_size may not be larger than total memory allocation\n");
47 }
57c7bc08 48 # are we in deferred-write mode?
49 $opts{defer} = 0 unless defined $opts{defer};
50 $opts{deferred} = {}; # no records are presently deferred
b3fe5a4c 51 $opts{deferred_s} = 0; # count of total bytes in ->{deferred}
6fc0ea7e 52 $opts{deferred_max} = -1; # empty
b5aed31e 53
6ae23f41 54 # What's a good way to arrange that this class can be overridden?
55 $opts{cache} = Tie::File::Cache->new($opts{memory});
6fc0ea7e 56
57 # autodeferment is enabled by default
58 $opts{autodefer} = 1 unless defined $opts{autodefer};
59 $opts{autodeferring} = 0; # but is not initially active
60 $opts{ad_history} = [];
61 $opts{autodefer_threshhold} = $DEFAULT_AUTODEFER_THRESHHOLD
62 unless defined $opts{autodefer_threshhold};
63 $opts{autodefer_filelen_threshhold} = $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD
64 unless defined $opts{autodefer_filelen_threshhold};
b5aed31e 65
66 $opts{offsets} = [0];
67 $opts{filename} = $file;
b3fe5a4c 68 unless (defined $opts{recsep}) {
69 $opts{recsep} = _default_recsep();
70 }
b5aed31e 71 $opts{recseplen} = length($opts{recsep});
72 if ($opts{recseplen} == 0) {
73 croak "Empty record separator not supported by $pack";
74 }
75
0b28bc9a 76 $opts{autochomp} = 1 unless defined $opts{autochomp};
77
27531ffb 78 $opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode};
79 $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
bf919750 80 $opts{sawlastrec} = undef;
27531ffb 81
fa408a35 82 my $fh;
b5aed31e 83
fa408a35 84 if (UNIVERSAL::isa($file, 'GLOB')) {
57c7bc08 85 # We use 1 here on the theory that some systems
86 # may not indicate failure if we use 0.
87 # MSWin32 does not indicate failure with 0, but I don't know if
88 # it will indicate failure with 1 or not.
89 unless (seek $file, 1, SEEK_SET) {
fa408a35 90 croak "$pack: your filehandle does not appear to be seekable";
91 }
57c7bc08 92 seek $file, 0, SEEK_SET # put it back
93 $fh = $file; # setting binmode is the user's problem
fa408a35 94 } elsif (ref $file) {
95 croak "usage: tie \@array, $pack, filename, [option => value]...";
96 } else {
f7363f9d 97 # $fh = \do { local *FH }; # XXX this is buggy
e0cf91d5 98 if ($] < 5.006) {
99 # perl 5.005 and earlier don't autovivify filehandles
100 require Symbol;
101 $fh = Symbol::gensym();
102 }
27531ffb 103 sysopen $fh, $file, $opts{mode}, 0666 or return;
fa408a35 104 binmode $fh;
2a00e2a4 105 ++$opts{ourfh};
fa408a35 106 }
b5aed31e 107 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
b3fe5a4c 108 if (defined $opts{discipline} && $] >= 5.006) {
109 # This avoids a compile-time warning under 5.005
110 eval 'binmode($fh, $opts{discipline})';
111 croak $@ if $@ =~ /unknown discipline/i;
112 die if $@;
113 }
b5aed31e 114 $opts{fh} = $fh;
115
116 bless \%opts => $pack;
117}
118
119sub FETCH {
120 my ($self, $n) = @_;
6fc0ea7e 121 my $rec;
122
123 # check the defer buffer
6ae23f41 124 $rec = $self->{deferred}{$n} if exists $self->{deferred}{$n};
125 $rec = $self->_fetch($n) unless defined $rec;
6fc0ea7e 126
6ae23f41 127 # inlined _chomp1
128 substr($rec, - $self->{recseplen}) = ""
129 if defined $rec && $self->{autochomp};
130 $rec;
0b28bc9a 131}
132
133# Chomp many records in-place; return nothing useful
134sub _chomp {
135 my $self = shift;
136 return unless $self->{autochomp};
137 if ($self->{autochomp}) {
138 for (@_) {
139 next unless defined;
140 substr($_, - $self->{recseplen}) = "";
141 }
142 }
143}
144
145# Chomp one record in-place; return modified record
146sub _chomp1 {
147 my ($self, $rec) = @_;
148 return $rec unless $self->{autochomp};
149 return unless defined $rec;
150 substr($rec, - $self->{recseplen}) = "";
151 $rec;
152}
153
154sub _fetch {
155 my ($self, $n) = @_;
b5aed31e 156
157 # check the record cache
6fc0ea7e 158 { my $cached = $self->{cache}->lookup($n);
b5aed31e 159 return $cached if defined $cached;
160 }
161
27531ffb 162 if ($#{$self->{offsets}} < $n) {
6ae23f41 163 return if $self->{eof}; # request for record beyond end of file
b5aed31e 164 my $o = $self->_fill_offsets_to($n);
165 # If it's still undefined, there is no such record, so return 'undef'
166 return unless defined $o;
167 }
168
169 my $fh = $self->{FH};
170 $self->_seek($n); # we can do this now that offsets is populated
171 my $rec = $self->_read_record;
b3fe5a4c 172
173# If we happen to have just read the first record, check to see if
174# the length of the record matches what 'tell' says. If not, Tie::File
175# won't work, and should drop dead.
176#
177# if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) {
178# if (defined $self->{discipline}) {
179# croak "I/O discipline $self->{discipline} not supported";
180# } else {
181# croak "File encoding not supported";
182# }
183# }
184
6fc0ea7e 185 $self->{cache}->insert($n, $rec) if defined $rec && not $self->{flushing};
b5aed31e 186 $rec;
187}
188
189sub STORE {
190 my ($self, $n, $rec) = @_;
6fc0ea7e 191 die "STORE called from _check_integrity!" if $DIAGNOSTIC;
b5aed31e 192
193 $self->_fixrecs($rec);
194
6fc0ea7e 195 if ($self->{autodefer}) {
196 $self->_annotate_ad_history($n);
197 }
198
199 return $self->_store_deferred($n, $rec) if $self->_is_deferring;
200
b5aed31e 201
202 # We need this to decide whether the new record will fit
203 # It incidentally populates the offsets table
204 # Note we have to do this before we alter the cache
6fc0ea7e 205 # 20020324 Wait, but this DOES alter the cache. TODO BUG?
0b28bc9a 206 my $oldrec = $self->_fetch($n);
b5aed31e 207
b5aed31e 208 if (not defined $oldrec) {
209 # We're storing a record beyond the end of the file
51efdd02 210 $self->_extend_file_to($n+1);
b5aed31e 211 $oldrec = $self->{recsep};
212 }
6ae23f41 213# return if $oldrec eq $rec; # don't bother
b5aed31e 214 my $len_diff = length($rec) - length($oldrec);
215
b3fe5a4c 216 # length($oldrec) here is not consistent with text mode TODO XXX BUG
6ae23f41 217 $self->_mtwrite($rec, $self->{offsets}[$n], length($oldrec));
218 $self->_oadjust([$n, 1, $rec]);
219 $self->{cache}->update($n, $rec);
b5aed31e 220}
221
b3fe5a4c 222sub _store_deferred {
223 my ($self, $n, $rec) = @_;
6fc0ea7e 224 $self->{cache}->remove($n);
b3fe5a4c 225 my $old_deferred = $self->{deferred}{$n};
6fc0ea7e 226
227 if (defined $self->{deferred_max} && $n > $self->{deferred_max}) {
228 $self->{deferred_max} = $n;
229 }
b3fe5a4c 230 $self->{deferred}{$n} = $rec;
6fc0ea7e 231
232 my $len_diff = length($rec);
233 $len_diff -= length($old_deferred) if defined $old_deferred;
234 $self->{deferred_s} += $len_diff;
235 $self->{cache}->adj_limit(-$len_diff);
b3fe5a4c 236 if ($self->{deferred_s} > $self->{dw_size}) {
57c7bc08 237 $self->_flush;
238 } elsif ($self->_cache_too_full) {
b3fe5a4c 239 $self->_cache_flush;
240 }
241}
242
57c7bc08 243# Remove a single record from the deferred-write buffer without writing it
244# The record need not be present
245sub _delete_deferred {
246 my ($self, $n) = @_;
247 my $rec = delete $self->{deferred}{$n};
248 return unless defined $rec;
6fc0ea7e 249
250 if (defined $self->{deferred_max}
251 && $n == $self->{deferred_max}) {
252 undef $self->{deferred_max};
253 }
254
57c7bc08 255 $self->{deferred_s} -= length $rec;
6fc0ea7e 256 $self->{cache}->adj_limit(length $rec);
57c7bc08 257}
258
b5aed31e 259sub FETCHSIZE {
260 my $self = shift;
6ae23f41 261 my $n = $self->{eof} ? $#{$self->{offsets}} : $self->_fill_offsets;
262
6fc0ea7e 263 my $top_deferred = $self->_defer_max;
264 $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1;
b5aed31e 265 $n;
266}
267
268sub STORESIZE {
269 my ($self, $len) = @_;
6fc0ea7e 270
271 if ($self->{autodefer}) {
272 $self->_annotate_ad_history('STORESIZE');
273 }
274
b5aed31e 275 my $olen = $self->FETCHSIZE;
276 return if $len == $olen; # Woo-hoo!
277
278 # file gets longer
279 if ($len > $olen) {
6fc0ea7e 280 if ($self->_is_deferring) {
57c7bc08 281 for ($olen .. $len-1) {
282 $self->_store_deferred($_, $self->{recsep});
283 }
284 } else {
285 $self->_extend_file_to($len);
286 }
b5aed31e 287 return;
288 }
289
290 # file gets shorter
6fc0ea7e 291 if ($self->_is_deferring) {
292 # TODO maybe replace this with map-plus-assignment?
57c7bc08 293 for (grep $_ >= $len, keys %{$self->{deferred}}) {
294 $self->_delete_deferred($_);
295 }
6fc0ea7e 296 $self->{deferred_max} = $len-1;
57c7bc08 297 }
298
b5aed31e 299 $self->_seek($len);
300 $self->_chop_file;
836d9961 301 $#{$self->{offsets}} = $len;
b3fe5a4c 302# $self->{offsets}[0] = 0; # in case we just chopped this
6fc0ea7e 303
bf919750 304 $self->{cache}->remove(grep $_ >= $len, $self->{cache}->ckeys);
b5aed31e 305}
306
6ae23f41 307### OPTIMIZE ME
308### It should not be necessary to do FETCHSIZE
309### Just seek to the end of the file.
51efdd02 310sub PUSH {
311 my $self = shift;
312 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
6ae23f41 313
314 # No need to return:
315 # $self->FETCHSIZE; # because av.c takes care of this for me
51efdd02 316}
317
318sub POP {
319 my $self = shift;
7b6b3db1 320 my $size = $self->FETCHSIZE;
321 return if $size == 0;
322# print STDERR "# POPPITY POP POP POP\n";
323 scalar $self->SPLICE($size-1, 1);
51efdd02 324}
325
326sub SHIFT {
327 my $self = shift;
328 scalar $self->SPLICE(0, 1);
329}
330
331sub UNSHIFT {
332 my $self = shift;
333 $self->SPLICE(0, 0, @_);
57c7bc08 334 # $self->FETCHSIZE; # av.c takes care of this for me
51efdd02 335}
336
337sub CLEAR {
51efdd02 338 my $self = shift;
6fc0ea7e 339
340 if ($self->{autodefer}) {
341 $self->_annotate_ad_history('CLEAR');
342 }
343
51efdd02 344 $self->_seekb(0);
345 $self->_chop_file;
6fc0ea7e 346 $self->{cache}->set_limit($self->{memory});
347 $self->{cache}->empty;
51efdd02 348 @{$self->{offsets}} = (0);
57c7bc08 349 %{$self->{deferred}}= ();
350 $self->{deferred_s} = 0;
6fc0ea7e 351 $self->{deferred_max} = -1;
51efdd02 352}
353
354sub EXTEND {
355 my ($self, $n) = @_;
57c7bc08 356
357 # No need to pre-extend anything in this case
6fc0ea7e 358 return if $self->_is_deferring;
57c7bc08 359
51efdd02 360 $self->_fill_offsets_to($n);
361 $self->_extend_file_to($n);
362}
363
364sub DELETE {
365 my ($self, $n) = @_;
6fc0ea7e 366
367 if ($self->{autodefer}) {
368 $self->_annotate_ad_history('DELETE');
369 }
370
51efdd02 371 my $lastrec = $self->FETCHSIZE-1;
57c7bc08 372 my $rec = $self->FETCH($n);
6fc0ea7e 373 $self->_delete_deferred($n) if $self->_is_deferring;
51efdd02 374 if ($n == $lastrec) {
375 $self->_seek($n);
376 $self->_chop_file;
fa408a35 377 $#{$self->{offsets}}--;
6fc0ea7e 378 $self->{cache}->remove($n);
51efdd02 379 # perhaps in this case I should also remove trailing null records?
57c7bc08 380 # 20020316
381 # Note that delete @a[-3..-1] deletes the records in the wrong order,
382 # so we only chop the very last one out of the file. We could repair this
383 # by tracking deleted records inside the object.
384 } elsif ($n < $lastrec) {
51efdd02 385 $self->STORE($n, "");
386 }
57c7bc08 387 $rec;
51efdd02 388}
389
390sub EXISTS {
391 my ($self, $n) = @_;
57c7bc08 392 return 1 if exists $self->{deferred}{$n};
57c7bc08 393 $n < $self->FETCHSIZE;
51efdd02 394}
395
b5aed31e 396sub SPLICE {
b3fe5a4c 397 my $self = shift;
6fc0ea7e 398
399 if ($self->{autodefer}) {
400 $self->_annotate_ad_history('SPLICE');
401 }
402
403 $self->_flush if $self->_is_deferring; # move this up?
0b28bc9a 404 if (wantarray) {
405 $self->_chomp(my @a = $self->_splice(@_));
406 @a;
407 } else {
408 $self->_chomp1(scalar $self->_splice(@_));
409 }
b3fe5a4c 410}
411
412sub DESTROY {
57c7bc08 413 my $self = shift;
6fc0ea7e 414 $self->flush if $self->_is_deferring;
415 $self->{cache}->delink if defined $self->{cache}; # break circular link
2a00e2a4 416 if ($self->{fh} and $self->{ourfh}) {
417 delete $self->{ourfh};
418 close delete $self->{fh};
2a00e2a4 419 }
b3fe5a4c 420}
421
422sub _splice {
b5aed31e 423 my ($self, $pos, $nrecs, @data) = @_;
424 my @result;
425
7b6b3db1 426 $pos = 0 unless defined $pos;
427
428 # Deal with negative and other out-of-range positions
429 # Also set default for $nrecs
51efdd02 430 {
431 my $oldsize = $self->FETCHSIZE;
7b6b3db1 432 $nrecs = $oldsize unless defined $nrecs;
51efdd02 433 my $oldpos = $pos;
434
435 if ($pos < 0) {
436 $pos += $oldsize;
437 if ($pos < 0) {
438 croak "Modification of non-creatable array value attempted, subscript $oldpos";
439 }
440 }
441
442 if ($pos > $oldsize) {
443 return unless @data;
444 $pos = $oldsize; # This is what perl does for normal arrays
445 }
bf919750 446
447 # The manual is very unclear here
448 if ($nrecs < 0) {
449 $nrecs = $oldsize - $pos + $nrecs;
450 $nrecs = 0 if $nrecs < 0;
451 }
6ae23f41 452
453 # nrecs is too big---it really means "until the end"
454 # 20030507
455 if ($nrecs + $pos > $oldsize) {
456 $nrecs = $oldsize - $pos;
457 }
51efdd02 458 }
b5aed31e 459
460 $self->_fixrecs(@data);
461 my $data = join '', @data;
462 my $datalen = length $data;
463 my $oldlen = 0;
464
465 # compute length of data being removed
466 for ($pos .. $pos+$nrecs-1) {
27531ffb 467 last unless defined $self->_fill_offsets_to($_);
0b28bc9a 468 my $rec = $self->_fetch($_);
b5aed31e 469 last unless defined $rec;
470 push @result, $rec;
6fc0ea7e 471
472 # Why don't we just use length($rec) here?
473 # Because that record might have come from the cache. _splice
474 # might have been called to flush out the deferred-write records,
27531ffb 475 # and in this case length($rec) is the length of the record to be
476 # *written*, not the length of the actual record in the file. But
477 # the offsets are still true. 20020322
6fc0ea7e 478 $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_]
479 if defined $self->{offsets}[$_+1];
b5aed31e 480 }
6ae23f41 481 $self->_fill_offsets_to($pos+$nrecs);
b5aed31e 482
51efdd02 483 # Modify the file
6ae23f41 484 $self->_mtwrite($data, $self->{offsets}[$pos], $oldlen);
485 # Adjust the offsets table
486 $self->_oadjust([$pos, $nrecs, @data]);
487
488 { # Take this read cache stuff out into a separate function
489 # You made a half-attempt to put it into _oadjust.
490 # Finish something like that up eventually.
491 # STORE also needs to do something similarish
492
493 # update the read cache, part 1
494 # modified records
495 for ($pos .. $pos+$nrecs-1) {
496 my $new = $data[$_-$pos];
497 if (defined $new) {
498 $self->{cache}->update($_, $new);
499 } else {
500 $self->{cache}->remove($_);
501 }
b5aed31e 502 }
6ae23f41 503
504 # update the read cache, part 2
505 # moved records - records past the site of the change
506 # need to be renumbered
507 # Maybe merge this with the previous block?
508 {
509 my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys;
510 my @newkeys = map $_-$nrecs+@data, @oldkeys;
511 $self->{cache}->rekey(\@oldkeys, \@newkeys);
b5aed31e 512 }
6fc0ea7e 513
6ae23f41 514 # Now there might be too much data in the cache, if we spliced out
515 # some short records and spliced in some long ones. If so, flush
516 # the cache.
517 $self->_cache_flush;
b5aed31e 518 }
b5aed31e 519
51efdd02 520 # Yes, the return value of 'splice' *is* actually this complicated
521 wantarray ? @result : @result ? $result[-1] : undef;
b5aed31e 522}
523
6ae23f41 524
b5aed31e 525# write data into the file
6ae23f41 526# $data is the data to be written.
b5aed31e 527# it should be written at position $pos, and should overwrite
528# exactly $len of the following bytes.
529# Note that if length($data) > $len, the subsequent bytes will have to
530# be moved up, and if length($data) < $len, they will have to
531# be moved down
532sub _twrite {
533 my ($self, $data, $pos, $len) = @_;
534
535 unless (defined $pos) {
536 die "\$pos was undefined in _twrite";
537 }
538
539 my $len_diff = length($data) - $len;
540
541 if ($len_diff == 0) { # Woo-hoo!
542 my $fh = $self->{fh};
543 $self->_seekb($pos);
544 $self->_write_record($data);
545 return; # well, that was easy.
546 }
547
548 # the two records are of different lengths
549 # our strategy here: rewrite the tail of the file,
550 # reading ahead one buffer at a time
551 # $bufsize is required to be at least as large as the data we're overwriting
552 my $bufsize = _bufsize($len_diff);
553 my ($writepos, $readpos) = ($pos, $pos+$len);
51efdd02 554 my $next_block;
6fc0ea7e 555 my $more_data;
b5aed31e 556
557 # Seems like there ought to be a way to avoid the repeated code
558 # and the special case here. The read(1) is also a little weird.
559 # Think about this.
560 do {
561 $self->_seekb($readpos);
51efdd02 562 my $br = read $self->{fh}, $next_block, $bufsize;
6fc0ea7e 563 $more_data = read $self->{fh}, my($dummy), 1;
b5aed31e 564 $self->_seekb($writepos);
565 $self->_write_record($data);
566 $readpos += $br;
567 $writepos += length $data;
568 $data = $next_block;
6ae23f41 569 } while $more_data;
51efdd02 570 $self->_seekb($writepos);
571 $self->_write_record($next_block);
b5aed31e 572
573 # There might be leftover data at the end of the file
574 $self->_chop_file if $len_diff < 0;
575}
576
6ae23f41 577# _iwrite(D, S, E)
578# Insert text D at position S.
579# Let C = E-S-|D|. If C < 0; die.
580# Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E).
581# Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched.
582#
583# In a later version, don't read the entire intervening area into
584# memory at once; do the copying block by block.
585sub _iwrite {
586 my $self = shift;
587 my ($D, $s, $e) = @_;
588 my $d = length $D;
589 my $c = $e-$s-$d;
590 local *FH = $self->{fh};
591 confess "Not enough space to insert $d bytes between $s and $e"
592 if $c < 0;
593 confess "[$s,$e) is an invalid insertion range" if $e < $s;
594
595 $self->_seekb($s);
596 read FH, my $buf, $e-$s;
597
598 $D .= substr($buf, 0, $c, "");
599
600 $self->_seekb($s);
601 $self->_write_record($D);
602
603 return $buf;
604}
605
606# Like _twrite, but the data-pos-len triple may be repeated; you may
607# write several chunks. All the writing will be done in
608# one pass. Chunks SHALL be in ascending order and SHALL NOT overlap.
609sub _mtwrite {
610 my $self = shift;
611 my $unwritten = "";
612 my $delta = 0;
613
614 @_ % 3 == 0
615 or die "Arguments to _mtwrite did not come in groups of three";
616
617 while (@_) {
618 my ($data, $pos, $len) = splice @_, 0, 3;
619 my $end = $pos + $len; # The OLD end of the segment to be replaced
620 $data = $unwritten . $data;
621 $delta -= length($unwritten);
622 $unwritten = "";
623 $pos += $delta; # This is where the data goes now
624 my $dlen = length $data;
625 $self->_seekb($pos);
626 if ($len >= $dlen) { # the data will fit
627 $self->_write_record($data);
628 $delta += ($dlen - $len); # everything following moves down by this much
629 $data = ""; # All the data in the buffer has been written
630 } else { # won't fit
631 my $writable = substr($data, 0, $len - $delta, "");
632 $self->_write_record($writable);
633 $delta += ($dlen - $len); # everything following moves down by this much
634 }
635
636 # At this point we've written some but maybe not all of the data.
637 # There might be a gap to close up, or $data might still contain a
638 # bunch of unwritten data that didn't fit.
639 my $ndlen = length $data;
640 if ($delta == 0) {
641 $self->_write_record($data);
642 } elsif ($delta < 0) {
643 # upcopy (close up gap)
644 if (@_) {
645 $self->_upcopy($end, $end + $delta, $_[1] - $end);
646 } else {
647 $self->_upcopy($end, $end + $delta);
648 }
649 } else {
650 # downcopy (insert data that didn't fit; replace this data in memory
651 # with _later_ data that doesn't fit)
652 if (@_) {
653 $unwritten = $self->_downcopy($data, $end, $_[1] - $end);
654 } else {
655 # Make the file longer to accomodate the last segment that doesn'
656 $unwritten = $self->_downcopy($data, $end);
657 }
658 }
659 }
660}
661
662# Copy block of data of length $len from position $spos to position $dpos
663# $dpos must be <= $spos
664#
665# If $len is undefined, go all the way to the end of the file
666# and then truncate it ($spos - $dpos bytes will be removed)
667sub _upcopy {
668 my $blocksize = 8192;
669 my ($self, $spos, $dpos, $len) = @_;
670 if ($dpos > $spos) {
671 die "source ($spos) was upstream of destination ($dpos) in _upcopy";
672 } elsif ($dpos == $spos) {
673 return;
674 }
675
676 while (! defined ($len) || $len > 0) {
677 my $readsize = ! defined($len) ? $blocksize
678 : $len > $blocksize ? $blocksize
679 : $len;
680
681 my $fh = $self->{fh};
682 $self->_seekb($spos);
86e5a81e 683 my $bytes_read = read $fh, my($data), $readsize;
6ae23f41 684 $self->_seekb($dpos);
685 if ($data eq "") {
686 $self->_chop_file;
687 last;
688 }
689 $self->_write_record($data);
690 $spos += $bytes_read;
691 $dpos += $bytes_read;
692 $len -= $bytes_read if defined $len;
693 }
694}
695
696# Write $data into a block of length $len at position $pos,
697# moving everything in the block forwards to make room.
698# Instead of writing the last length($data) bytes from the block
699# (because there isn't room for them any longer) return them.
700sub _downcopy {
701 my $blocksize = 8192;
702 my ($self, $data, $pos, $len) = @_;
703 my $fh = $self->{fh};
704
705 while (! defined $len || $len > 0) {
706 my $readsize = ! defined($len) ? $blocksize
707 : $len > $blocksize? $blocksize : $len;
708 $self->_seekb($pos);
86e5a81e 709 read $fh, my($old), $readsize;
6ae23f41 710 $data .= $old;
711 $self->_seekb($pos);
712 my $writable = substr($data, 0, $readsize, "");
713 last if $writable eq "";
714 $self->_write_record($writable);
715 $len -= $readsize if defined $len;
716 $pos += $readsize;
717 }
718 return $data;
719}
720
721# Adjust the object data structures following an '_mtwrite'
722# Arguments are
723# [$pos, $nrecs, @length] items
724# indicating that $nrecs records were removed at $recpos (a record offset)
725# and replaced with records of length @length...
726# Arguments guarantee that $recpos is strictly increasing.
727# No return value
728sub _oadjust {
729 my $self = shift;
730 my $delta = 0;
731 my $delta_recs = 0;
732 my $prev_end = -1;
733 my %newkeys;
734
735 for (@_) {
736 my ($pos, $nrecs, @data) = @$_;
737 $pos += $delta_recs;
738
739 # Adjust the offsets of the records after the previous batch up
740 # to the first new one of this batch
741 for my $i ($prev_end+2 .. $pos - 1) {
742 $self->{offsets}[$i] += $delta;
743 $newkey{$i} = $i + $delta_recs;
744 }
745
746 $prev_end = $pos + @data - 1; # last record moved on this pass
747
748 # Remove the offsets for the removed records;
749 # replace with the offsets for the inserted records
750 my @newoff = ($self->{offsets}[$pos] + $delta);
751 for my $i (0 .. $#data) {
752 my $newlen = length $data[$i];
753 push @newoff, $newoff[$i] + $newlen;
754 $delta += $newlen;
755 }
756
757 for my $i ($pos .. $pos+$nrecs-1) {
758 last if $i+1 > $#{$self->{offsets}};
759 my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i];
760 $delta -= $oldlen;
761 }
762
763# # also this data has changed, so update it in the cache
764# for (0 .. $#data) {
765# $self->{cache}->update($pos + $_, $data[$_]);
766# }
767# if ($delta_recs) {
768# my @oldkeys = grep $_ >= $pos + @data, $self->{cache}->ckeys;
769# my @newkeys = map $_ + $delta_recs, @oldkeys;
770# $self->{cache}->rekey(\@oldkeys, \@newkeys);
771# }
772
773 # replace old offsets with new
774 splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff;
775 # What if we just spliced out the end of the offsets table?
776 # shouldn't we clear $self->{eof}? Test for this XXX BUG TODO
777
778 $delta_recs += @data - $nrecs; # net change in total number of records
779 }
780
781 # The trailing records at the very end of the file
782 if ($delta) {
783 for my $i ($prev_end+2 .. $#{$self->{offsets}}) {
784 $self->{offsets}[$i] += $delta;
785 }
786 }
787
788 # If we scrubbed out all known offsets, regenerate the trivial table
789 # that knows that the file does indeed start at 0.
790 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
791 # If the file got longer, the offsets table is no longer complete
792 # $self->{eof} = 0 if $delta_recs > 0;
793
794 # Now there might be too much data in the cache, if we spliced out
795 # some short records and spliced in some long ones. If so, flush
796 # the cache.
797 $self->_cache_flush;
798}
799
b5aed31e 800# If a record does not already end with the appropriate terminator
801# string, append one.
802sub _fixrecs {
803 my $self = shift;
804 for (@_) {
27531ffb 805 $_ = "" unless defined $_;
b5aed31e 806 $_ .= $self->{recsep}
807 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
808 }
809}
810
57c7bc08 811
812################################################################
813#
814# Basic read, write, and seek
815#
816
b5aed31e 817# seek to the beginning of record #$n
818# Assumes that the offsets table is already correctly populated
819#
820# Note that $n=-1 has a special meaning here: It means the start of
821# the last known record; this may or may not be the very last record
822# in the file, depending on whether the offsets table is fully populated.
823#
824sub _seek {
825 my ($self, $n) = @_;
826 my $o = $self->{offsets}[$n];
827 defined($o)
828 or confess("logic error: undefined offset for record $n");
829 seek $self->{fh}, $o, SEEK_SET
6ae23f41 830 or confess "Couldn't seek filehandle: $!"; # "Should never happen."
b5aed31e 831}
832
6ae23f41 833# seek to byte $b in the file
b5aed31e 834sub _seekb {
835 my ($self, $b) = @_;
836 seek $self->{fh}, $b, SEEK_SET
837 or die "Couldn't seek filehandle: $!"; # "Should never happen."
838}
839
840# populate the offsets table up to the beginning of record $n
841# return the offset of record $n
842sub _fill_offsets_to {
843 my ($self, $n) = @_;
27531ffb 844
845 return $self->{offsets}[$n] if $self->{eof};
846
b5aed31e 847 my $fh = $self->{fh};
848 local *OFF = $self->{offsets};
849 my $rec;
850
851 until ($#OFF >= $n) {
b5aed31e 852 $self->_seek(-1); # tricky -- see comment at _seek
853 $rec = $self->_read_record;
854 if (defined $rec) {
6ae23f41 855 push @OFF, int(tell $fh); # Tels says that int() saves memory here
b5aed31e 856 } else {
27531ffb 857 $self->{eof} = 1;
b5aed31e 858 return; # It turns out there is no such record
859 }
860 }
861
862 # we have now read all the records up to record n-1,
863 # so we can return the offset of record n
6ae23f41 864 $OFF[$n];
865}
866
867sub _fill_offsets {
868 my ($self) = @_;
869
870 my $fh = $self->{fh};
871 local *OFF = $self->{offsets};
872
873 $self->_seek(-1); # tricky -- see comment at _seek
874
875 # Tels says that inlining read_record() would make this loop
876 # five times faster. 20030508
877 while ( defined $self->_read_record()) {
878 # int() saves us memory here
879 push @OFF, int(tell $fh);
880 }
881
882 $self->{eof} = 1;
883 $#OFF;
b5aed31e 884}
885
886# assumes that $rec is already suitably terminated
887sub _write_record {
888 my ($self, $rec) = @_;
889 my $fh = $self->{fh};
bf919750 890 local $\ = "";
b5aed31e 891 print $fh $rec
892 or die "Couldn't write record: $!"; # "Should never happen."
27531ffb 893# $self->{_written} += length($rec);
b5aed31e 894}
895
896sub _read_record {
897 my $self = shift;
898 my $rec;
899 { local $/ = $self->{recsep};
900 my $fh = $self->{fh};
901 $rec = <$fh>;
902 }
27531ffb 903 return unless defined $rec;
bf919750 904 if (! $self->{sawlastrec} &&
905 substr($rec, -$self->{recseplen}) ne $self->{recsep}) {
27531ffb 906 # improperly terminated final record --- quietly fix it.
907# my $ac = substr($rec, -$self->{recseplen});
908# $ac =~ s/\n/\\n/g;
bf919750 909 $self->{sawlastrec} = 1;
27531ffb 910 unless ($self->{rdonly}) {
bf919750 911 local $\ = "";
27531ffb 912 my $fh = $self->{fh};
913 print $fh $self->{recsep};
914 }
915 $rec .= $self->{recsep};
916 }
917# $self->{_read} += length($rec) if defined $rec;
b5aed31e 918 $rec;
919}
920
6fc0ea7e 921sub _rw_stats {
27531ffb 922 my $self = shift;
6fc0ea7e 923 @{$self}{'_read', '_written'};
924}
925
57c7bc08 926################################################################
927#
928# Read cache management
929
6fc0ea7e 930sub _cache_flush {
931 my ($self) = @_;
932 $self->{cache}->reduce_size_to($self->{memory} - $self->{deferred_s});
b5aed31e 933}
934
57c7bc08 935sub _cache_too_full {
936 my $self = shift;
6fc0ea7e 937 $self->{cache}->bytes + $self->{deferred_s} >= $self->{memory};
b5aed31e 938}
939
57c7bc08 940################################################################
941#
942# File custodial services
943#
944
945
b5aed31e 946# We have read to the end of the file and have the offsets table
947# entirely populated. Now we need to write a new record beyond
948# the end of the file. We prepare for this by writing
949# empty records into the file up to the position we want
51efdd02 950#
951# assumes that the offsets table already contains the offset of record $n,
952# if it exists, and extends to the end of the file if not.
b5aed31e 953sub _extend_file_to {
954 my ($self, $n) = @_;
955 $self->_seek(-1); # position after the end of the last record
956 my $pos = $self->{offsets}[-1];
957
958 # the offsets table has one entry more than the total number of records
6fc0ea7e 959 my $extras = $n - $#{$self->{offsets}};
b5aed31e 960
961 # Todo : just use $self->{recsep} x $extras here?
962 while ($extras-- > 0) {
963 $self->_write_record($self->{recsep});
6ae23f41 964 push @{$self->{offsets}}, int(tell $self->{fh});
b5aed31e 965 }
966}
967
968# Truncate the file at the current position
969sub _chop_file {
970 my $self = shift;
971 truncate $self->{fh}, tell($self->{fh});
972}
973
57c7bc08 974
b5aed31e 975# compute the size of a buffer suitable for moving
976# all the data in a file forward $n bytes
977# ($n may be negative)
978# The result should be at least $n.
979sub _bufsize {
980 my $n = shift;
6ae23f41 981 return 8192 if $n <= 0;
b5aed31e 982 my $b = $n & ~8191;
983 $b += 8192 if $n & 8191;
984 $b;
985}
986
57c7bc08 987################################################################
988#
989# Miscellaneous public methods
990#
991
51efdd02 992# Lock the file
993sub flock {
994 my ($self, $op) = @_;
995 unless (@_ <= 3) {
996 my $pack = ref $self;
997 croak "Usage: $pack\->flock([OPERATION])";
998 }
999 my $fh = $self->{fh};
1000 $op = LOCK_EX unless defined $op;
6ae23f41 1001 my $locked = flock $fh, $op;
1002
1003 if ($locked && ($op & (LOCK_EX | LOCK_SH))) {
1004 # If you're locking the file, then presumably it's because
1005 # there might have been a write access by another process.
1006 # In that case, the read cache contents and the offsets table
1007 # might be invalid, so discard them. 20030508
1008 $self->{offsets} = [0];
1009 $self->{cache}->empty;
1010 }
1011
1012 $locked;
51efdd02 1013}
b5aed31e 1014
0b28bc9a 1015# Get/set autochomp option
1016sub autochomp {
1017 my $self = shift;
1018 if (@_) {
1019 my $old = $self->{autochomp};
1020 $self->{autochomp} = shift;
1021 $old;
1022 } else {
1023 $self->{autochomp};
1024 }
1025}
1026
6ae23f41 1027# Get offset table entries; returns offset of nth record
1028sub offset {
1029 my ($self, $n) = @_;
1030
1031 if ($#{$self->{offsets}} < $n) {
1032 return if $self->{eof}; # request for record beyond the end of file
1033 my $o = $self->_fill_offsets_to($n);
1034 # If it's still undefined, there is no such record, so return 'undef'
1035 return unless defined $o;
1036 }
1037
1038 $self->{offsets}[$n];
1039}
1040
1041sub discard_offsets {
1042 my $self = shift;
1043 $self->{offsets} = [0];
1044}
1045
57c7bc08 1046################################################################
1047#
1048# Matters related to deferred writing
1049#
1050
1051# Defer writes
1052sub defer {
1053 my $self = shift;
6fc0ea7e 1054 $self->_stop_autodeferring;
1055 @{$self->{ad_history}} = ();
57c7bc08 1056 $self->{defer} = 1;
1057}
1058
b3fe5a4c 1059# Flush deferred writes
1060#
1061# This could be better optimized to write the file in one pass, instead
1062# of one pass per block of records. But that will require modifications
6ae23f41 1063# to _twrite, so I should have a good _twrite test suite first.
b3fe5a4c 1064sub flush {
1065 my $self = shift;
1066
1067 $self->_flush;
1068 $self->{defer} = 0;
1069}
1070
6ae23f41 1071sub _old_flush {
b3fe5a4c 1072 my $self = shift;
1073 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
6ae23f41 1074
b3fe5a4c 1075 while (@writable) {
1076 # gather all consecutive records from the front of @writable
1077 my $first_rec = shift @writable;
1078 my $last_rec = $first_rec+1;
1079 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1080 --$last_rec;
1081 $self->_fill_offsets_to($last_rec);
1082 $self->_extend_file_to($last_rec);
1083 $self->_splice($first_rec, $last_rec-$first_rec+1,
1084 @{$self->{deferred}}{$first_rec .. $last_rec});
1085 }
1086
57c7bc08 1087 $self->_discard; # clear out defered-write-cache
b3fe5a4c 1088}
1089
6ae23f41 1090sub _flush {
1091 my $self = shift;
1092 my @writable = sort {$a<=>$b} (keys %{$self->{deferred}});
1093 my @args;
1094 my @adjust;
1095
1096 while (@writable) {
1097 # gather all consecutive records from the front of @writable
1098 my $first_rec = shift @writable;
1099 my $last_rec = $first_rec+1;
1100 ++$last_rec, shift @writable while @writable && $last_rec == $writable[0];
1101 --$last_rec;
1102 my $end = $self->_fill_offsets_to($last_rec+1);
1103 if (not defined $end) {
1104 $self->_extend_file_to($last_rec);
1105 $end = $self->{offsets}[$last_rec];
1106 }
1107 my ($start) = $self->{offsets}[$first_rec];
1108 push @args,
1109 join("", @{$self->{deferred}}{$first_rec .. $last_rec}), # data
1110 $start, # position
1111 $end-$start; # length
1112 push @adjust, [$first_rec, # starting at this position...
1113 $last_rec-$first_rec+1, # this many records...
1114 # are replaced with these...
1115 @{$self->{deferred}}{$first_rec .. $last_rec},
1116 ];
1117 }
1118
1119 $self->_mtwrite(@args); # write multiple record groups
1120 $self->_discard; # clear out defered-write-cache
1121 $self->_oadjust(@adjust);
1122}
1123
57c7bc08 1124# Discard deferred writes and disable future deferred writes
b3fe5a4c 1125sub discard {
1126 my $self = shift;
57c7bc08 1127 $self->_discard;
b3fe5a4c 1128 $self->{defer} = 0;
1129}
1130
57c7bc08 1131# Discard deferred writes, but retain old deferred writing mode
1132sub _discard {
1133 my $self = shift;
6fc0ea7e 1134 %{$self->{deferred}} = ();
1135 $self->{deferred_s} = 0;
1136 $self->{deferred_max} = -1;
1137 $self->{cache}->set_limit($self->{memory});
1138}
1139
1140# Deferred writing is enabled, either explicitly ($self->{defer})
1141# or automatically ($self->{autodeferring})
1142sub _is_deferring {
1143 my $self = shift;
1144 $self->{defer} || $self->{autodeferring};
1145}
1146
1147# The largest record number of any deferred record
1148sub _defer_max {
1149 my $self = shift;
1150 return $self->{deferred_max} if defined $self->{deferred_max};
1151 my $max = -1;
1152 for my $key (keys %{$self->{deferred}}) {
1153 $max = $key if $key > $max;
1154 }
1155 $self->{deferred_max} = $max;
1156 $max;
57c7bc08 1157}
1158
6fc0ea7e 1159################################################################
1160#
1161# Matters related to autodeferment
1162#
1163
1164# Get/set autodefer option
1165sub autodefer {
1166 my $self = shift;
1167 if (@_) {
1168 my $old = $self->{autodefer};
1169 $self->{autodefer} = shift;
1170 if ($old) {
1171 $self->_stop_autodeferring;
1172 @{$self->{ad_history}} = ();
1173 }
1174 $old;
1175 } else {
1176 $self->{autodefer};
1177 }
1178}
1179
1180# The user is trying to store record #$n Record that in the history,
1181# and then enable (or disable) autodeferment if that seems useful.
1182# Note that it's OK for $n to be a non-number, as long as the function
1183# is prepared to deal with that. Nobody else looks at the ad_history.
1184#
1185# Now, what does the ad_history mean, and what is this function doing?
1186# Essentially, the idea is to enable autodeferring when we see that the
1187# user has made three consecutive STORE calls to three consecutive records.
1188# ("Three" is actually ->{autodefer_threshhold}.)
1189# A STORE call for record #$n inserts $n into the autodefer history,
1190# and if the history contains three consecutive records, we enable
1191# autodeferment. An ad_history of [X, Y] means that the most recent
1192# STOREs were for records X, X+1, ..., Y, in that order.
1193#
1194# Inserting a nonconsecutive number erases the history and starts over.
1195#
1196# Performing a special operation like SPLICE erases the history.
1197#
1198# There's one special case: CLEAR means that CLEAR was just called.
1199# In this case, we prime the history with [-2, -1] so that if the next
1200# write is for record 0, autodeferring goes on immediately. This is for
1201# the common special case of "@a = (...)".
1202#
1203sub _annotate_ad_history {
1204 my ($self, $n) = @_;
1205 return unless $self->{autodefer}; # feature is disabled
1206 return if $self->{defer}; # already in explicit defer mode
1207 return unless $self->{offsets}[-1] >= $self->{autodefer_filelen_threshhold};
1208
1209 local *H = $self->{ad_history};
1210 if ($n eq 'CLEAR') {
1211 @H = (-2, -1); # prime the history with fake records
1212 $self->_stop_autodeferring;
1213 } elsif ($n =~ /^\d+$/) {
1214 if (@H == 0) {
1215 @H = ($n, $n);
1216 } else { # @H == 2
1217 if ($H[1] == $n-1) { # another consecutive record
1218 $H[1]++;
1219 if ($H[1] - $H[0] + 1 >= $self->{autodefer_threshhold}) {
1220 $self->{autodeferring} = 1;
1221 }
1222 } else { # nonconsecutive- erase and start over
1223 @H = ($n, $n);
1224 $self->_stop_autodeferring;
1225 }
1226 }
1227 } else { # SPLICE or STORESIZE or some such
1228 @H = ();
1229 $self->_stop_autodeferring;
1230 }
1231}
1232
6ae23f41 1233# If autodeferring was enabled, cut it out and discard the history
6fc0ea7e 1234sub _stop_autodeferring {
1235 my $self = shift;
1236 if ($self->{autodeferring}) {
1237 $self->_flush;
1238 }
1239 $self->{autodeferring} = 0;
1240}
1241
1242################################################################
1243
b3fe5a4c 1244
57c7bc08 1245# This is NOT a method. It is here for two reasons:
1246# 1. To factor a fairly complicated block out of the constructor
1247# 2. To provide access for the test suite, which need to be sure
1248# files are being written properly.
b3fe5a4c 1249sub _default_recsep {
1250 my $recsep = $/;
57c7bc08 1251 if ($^O eq 'MSWin32') { # Dos too?
b3fe5a4c 1252 # Windows users expect files to be terminated with \r\n
1253 # But $/ is set to \n instead
1254 # Note that this also transforms \n\n into \r\n\r\n.
1255 # That is a feature.
1256 $recsep =~ s/\n/\r\n/g;
1257 }
1258 $recsep;
1259}
1260
57c7bc08 1261# Utility function for _check_integrity
1262sub _ci_warn {
1263 my $msg = shift;
1264 $msg =~ s/\n/\\n/g;
1265 $msg =~ s/\r/\\r/g;
1266 print "# $msg\n";
1267}
1268
b5aed31e 1269# Given a file, make sure the cache is consistent with the
57c7bc08 1270# file contents and the internal data structures are consistent with
1271# each other. Returns true if everything checks out, false if not
1272#
1273# The $file argument is no longer used. It is retained for compatibility
1274# with the existing test suite.
b5aed31e 1275sub _check_integrity {
1276 my ($self, $file, $warn) = @_;
6fc0ea7e 1277 my $rsl = $self->{recseplen};
1278 my $rs = $self->{recsep};
b5aed31e 1279 my $good = 1;
6fc0ea7e 1280 local *_; # local $_ does not work here
1281 local $DIAGNOSTIC = 1;
1282
1283 if (not defined $rs) {
1284 _ci_warn("recsep is undef!");
1285 $good = 0;
1286 } elsif ($rs eq "") {
1287 _ci_warn("recsep is empty!");
1288 $good = 0;
1289 } elsif ($rsl != length $rs) {
1290 my $ln = length $rs;
1291 _ci_warn("recsep <$rs> has length $ln, should be $rsl");
1292 $good = 0;
1293 }
fa408a35 1294
836d9961 1295 if (not defined $self->{offsets}[0]) {
57c7bc08 1296 _ci_warn("offset 0 is missing!");
836d9961 1297 $good = 0;
bf919750 1298
836d9961 1299 } elsif ($self->{offsets}[0] != 0) {
57c7bc08 1300 _ci_warn("rec 0: offset <$self->{offsets}[0]> s/b 0!");
b5aed31e 1301 $good = 0;
1302 }
fa408a35 1303
57c7bc08 1304 my $cached = 0;
6fc0ea7e 1305 {
1306 local *F = $self->{fh};
1307 seek F, 0, SEEK_SET;
1308 local $. = 0;
1309 local $/ = $rs;
1310
1311 while (<F>) {
1312 my $n = $. - 1;
1313 my $cached = $self->{cache}->_produce($n);
1314 my $offset = $self->{offsets}[$.];
1315 my $ao = tell F;
1316 if (defined $offset && $offset != $ao) {
1317 _ci_warn("rec $n: offset <$offset> actual <$ao>");
1318 $good = 0;
1319 }
1320 if (defined $cached && $_ ne $cached && ! $self->{deferred}{$n}) {
1321 $good = 0;
1322 _ci_warn("rec $n: cached <$cached> actual <$_>");
1323 }
1324 if (defined $cached && substr($cached, -$rsl) ne $rs) {
27531ffb 1325 $good = 0;
6fc0ea7e 1326 _ci_warn("rec $n in the cache is missing the record separator");
1327 }
27531ffb 1328 if (! defined $offset && $self->{eof}) {
1329 $good = 0;
1330 _ci_warn("The offset table was marked complete, but it is missing element $.");
1331 }
1332 }
1333 if (@{$self->{offsets}} > $.+1) {
1334 $good = 0;
1335 my $n = @{$self->{offsets}};
1336 _ci_warn("The offset table has $n items, but the file has only $.");
6fc0ea7e 1337 }
b5aed31e 1338
6fc0ea7e 1339 my $deferring = $self->_is_deferring;
bf919750 1340 for my $n ($self->{cache}->ckeys) {
6fc0ea7e 1341 my $r = $self->{cache}->_produce($n);
1342 $cached += length($r);
1343 next if $n+1 <= $.; # checked this already
1344 _ci_warn("spurious caching of record $n");
b5aed31e 1345 $good = 0;
1346 }
6fc0ea7e 1347 my $b = $self->{cache}->bytes;
1348 if ($cached != $b) {
1349 _ci_warn("cache size is $b, should be $cached");
b5aed31e 1350 $good = 0;
1351 }
1352 }
1353
bf919750 1354 # That cache has its own set of tests
6fc0ea7e 1355 $good = 0 unless $self->{cache}->_check_integrity;
1356
57c7bc08 1357 # Now let's check the deferbuffer
1358 # Unless deferred writing is enabled, it should be empty
6fc0ea7e 1359 if (! $self->_is_deferring && %{$self->{deferred}}) {
57c7bc08 1360 _ci_warn("deferred writing disabled, but deferbuffer nonempty");
1361 $good = 0;
1362 }
1363
1364 # Any record in the deferbuffer should *not* be present in the readcache
1365 my $deferred_s = 0;
1366 while (my ($n, $r) = each %{$self->{deferred}}) {
1367 $deferred_s += length($r);
6fc0ea7e 1368 if (defined $self->{cache}->_produce($n)) {
57c7bc08 1369 _ci_warn("record $n is in the deferbuffer *and* the readcache");
1370 $good = 0;
1371 }
6fc0ea7e 1372 if (substr($r, -$rsl) ne $rs) {
57c7bc08 1373 _ci_warn("rec $n in the deferbuffer is missing the record separator");
1374 $good = 0;
1375 }
1376 }
1377
1378 # Total size of deferbuffer should match internal total
1379 if ($deferred_s != $self->{deferred_s}) {
1380 _ci_warn("buffer size is $self->{deferred_s}, should be $deferred_s");
1381 $good = 0;
1382 }
1383
1384 # Total size of deferbuffer should not exceed the specified limit
1385 if ($deferred_s > $self->{dw_size}) {
1386 _ci_warn("buffer size is $self->{deferred_s} which exceeds the limit of $self->{dw_size}");
1387 $good = 0;
1388 }
1389
1390 # Total size of cached data should not exceed the specified limit
1391 if ($deferred_s + $cached > $self->{memory}) {
1392 my $total = $deferred_s + $cached;
1393 _ci_warn("total stored data size is $total which exceeds the limit of $self->{memory}");
1394 $good = 0;
1395 }
1396
6fc0ea7e 1397 # Stuff related to autodeferment
1398 if (!$self->{autodefer} && @{$self->{ad_history}}) {
1399 _ci_warn("autodefer is disabled, but ad_history is nonempty");
1400 $good = 0;
1401 }
1402 if ($self->{autodeferring} && $self->{defer}) {
1403 _ci_warn("both autodeferring and explicit deferring are active");
1404 $good = 0;
1405 }
1406 if (@{$self->{ad_history}} == 0) {
1407 # That's OK, no additional tests required
1408 } elsif (@{$self->{ad_history}} == 2) {
1409 my @non_number = grep !/^-?\d+$/, @{$self->{ad_history}};
1410 if (@non_number) {
1411 my $msg;
1412 { local $" = ')(';
1413 $msg = "ad_history contains non-numbers (@{$self->{ad_history}})";
1414 }
1415 _ci_warn($msg);
1416 $good = 0;
1417 } elsif ($self->{ad_history}[1] < $self->{ad_history}[0]) {
1418 _ci_warn("ad_history has nonsensical values @{$self->{ad_history}}");
1419 $good = 0;
1420 }
1421 } else {
1422 _ci_warn("ad_history has bad length <@{$self->{ad_history}}>");
1423 $good = 0;
1424 }
1425
b5aed31e 1426 $good;
1427}
1428
6fc0ea7e 1429################################################################
1430#
1431# Tie::File::Cache
1432#
1433# Read cache
1434
1435package Tie::File::Cache;
1436$Tie::File::Cache::VERSION = $Tie::File::VERSION;
1437use Carp ':DEFAULT', 'confess';
1438
1439sub HEAP () { 0 }
1440sub HASH () { 1 }
1441sub MAX () { 2 }
1442sub BYTES() { 3 }
6ae23f41 1443#sub STAT () { 4 } # Array with request statistics for each record
1444#sub MISS () { 5 } # Total number of cache misses
1445#sub REQ () { 6 } # Total number of cache requests
6fc0ea7e 1446use strict 'vars';
1447
1448sub new {
1449 my ($pack, $max) = @_;
1450 local *_;
1451 croak "missing argument to ->new" unless defined $max;
1452 my $self = [];
1453 bless $self => $pack;
1454 @$self = (Tie::File::Heap->new($self), {}, $max, 0);
1455 $self;
1456}
1457
1458sub adj_limit {
1459 my ($self, $n) = @_;
1460 $self->[MAX] += $n;
1461}
1462
1463sub set_limit {
1464 my ($self, $n) = @_;
1465 $self->[MAX] = $n;
1466}
1467
1468# For internal use only
1469# Will be called by the heap structure to notify us that a certain
1470# piece of data has moved from one heap element to another.
1471# $k is the hash key of the item
1472# $n is the new index into the heap at which it is stored
1473# If $n is undefined, the item has been removed from the heap.
1474sub _heap_move {
1475 my ($self, $k, $n) = @_;
1476 if (defined $n) {
1477 $self->[HASH]{$k} = $n;
1478 } else {
6ae23f41 1479 delete $self->[HASH]{$k};
6fc0ea7e 1480 }
1481}
1482
1483sub insert {
1484 my ($self, $key, $val) = @_;
1485 local *_;
1486 croak "missing argument to ->insert" unless defined $key;
1487 unless (defined $self->[MAX]) {
1488 confess "undefined max" ;
1489 }
1490 confess "undefined val" unless defined $val;
1491 return if length($val) > $self->[MAX];
6ae23f41 1492
1493# if ($self->[STAT]) {
1494# $self->[STAT][$key] = 1;
1495# return;
1496# }
1497
6fc0ea7e 1498 my $oldnode = $self->[HASH]{$key};
1499 if (defined $oldnode) {
1500 my $oldval = $self->[HEAP]->set_val($oldnode, $val);
1501 $self->[BYTES] -= length($oldval);
1502 } else {
1503 $self->[HEAP]->insert($key, $val);
1504 }
1505 $self->[BYTES] += length($val);
6ae23f41 1506 $self->flush if $self->[BYTES] > $self->[MAX];
6fc0ea7e 1507}
1508
1509sub expire {
1510 my $self = shift;
1511 my $old_data = $self->[HEAP]->popheap;
1512 return unless defined $old_data;
1513 $self->[BYTES] -= length $old_data;
1514 $old_data;
1515}
1516
1517sub remove {
1518 my ($self, @keys) = @_;
1519 my @result;
6ae23f41 1520
1521# if ($self->[STAT]) {
1522# for my $key (@keys) {
1523# $self->[STAT][$key] = 0;
1524# }
1525# return;
1526# }
1527
6fc0ea7e 1528 for my $key (@keys) {
1529 next unless exists $self->[HASH]{$key};
1530 my $old_data = $self->[HEAP]->remove($self->[HASH]{$key});
1531 $self->[BYTES] -= length $old_data;
1532 push @result, $old_data;
1533 }
1534 @result;
1535}
1536
1537sub lookup {
1538 my ($self, $key) = @_;
1539 local *_;
1540 croak "missing argument to ->lookup" unless defined $key;
6ae23f41 1541
1542# if ($self->[STAT]) {
1543# $self->[MISS]++ if $self->[STAT][$key]++ == 0;
1544# $self->[REQ]++;
1545# my $hit_rate = 1 - $self->[MISS] / $self->[REQ];
1546# # Do some testing to determine this threshhold
1547# $#$self = STAT - 1 if $hit_rate > 0.20;
1548# }
1549
6fc0ea7e 1550 if (exists $self->[HASH]{$key}) {
1551 $self->[HEAP]->lookup($self->[HASH]{$key});
1552 } else {
1553 return;
1554 }
1555}
1556
1557# For internal use only
1558sub _produce {
1559 my ($self, $key) = @_;
1560 my $loc = $self->[HASH]{$key};
1561 return unless defined $loc;
1562 $self->[HEAP][$loc][2];
1563}
1564
1565# For internal use only
1566sub _promote {
1567 my ($self, $key) = @_;
1568 $self->[HEAP]->promote($self->[HASH]{$key});
1569}
1570
1571sub empty {
1572 my ($self) = @_;
1573 %{$self->[HASH]} = ();
1574 $self->[BYTES] = 0;
1575 $self->[HEAP]->empty;
6ae23f41 1576# @{$self->[STAT]} = ();
1577# $self->[MISS] = 0;
1578# $self->[REQ] = 0;
6fc0ea7e 1579}
1580
1581sub is_empty {
1582 my ($self) = @_;
1583 keys %{$self->[HASH]} == 0;
1584}
1585
1586sub update {
1587 my ($self, $key, $val) = @_;
1588 local *_;
1589 croak "missing argument to ->update" unless defined $key;
1590 if (length($val) > $self->[MAX]) {
6ae23f41 1591 my ($oldval) = $self->remove($key);
6fc0ea7e 1592 $self->[BYTES] -= length($oldval) if defined $oldval;
1593 } elsif (exists $self->[HASH]{$key}) {
1594 my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val);
1595 $self->[BYTES] += length($val);
1596 $self->[BYTES] -= length($oldval) if defined $oldval;
1597 } else {
1598 $self->[HEAP]->insert($key, $val);
1599 $self->[BYTES] += length($val);
1600 }
1601 $self->flush;
1602}
1603
1604sub rekey {
1605 my ($self, $okeys, $nkeys) = @_;
1606 local *_;
1607 my %map;
1608 @map{@$okeys} = @$nkeys;
1609 croak "missing argument to ->rekey" unless defined $nkeys;
1610 croak "length mismatch in ->rekey arguments" unless @$nkeys == @$okeys;
1611 my %adjusted; # map new keys to heap indices
1612 # You should be able to cut this to one loop TODO XXX
1613 for (0 .. $#$okeys) {
1614 $adjusted{$nkeys->[$_]} = delete $self->[HASH]{$okeys->[$_]};
1615 }
1616 while (my ($nk, $ix) = each %adjusted) {
1617 # @{$self->[HASH]}{keys %adjusted} = values %adjusted;
1618 $self->[HEAP]->rekey($ix, $nk);
1619 $self->[HASH]{$nk} = $ix;
1620 }
1621}
1622
bf919750 1623sub ckeys {
6fc0ea7e 1624 my $self = shift;
1625 my @a = keys %{$self->[HASH]};
1626 @a;
1627}
1628
6ae23f41 1629# Return total amount of cached data
6fc0ea7e 1630sub bytes {
1631 my $self = shift;
1632 $self->[BYTES];
1633}
1634
6ae23f41 1635# Expire oldest item from cache until cache size is smaller than $max
6fc0ea7e 1636sub reduce_size_to {
1637 my ($self, $max) = @_;
6ae23f41 1638 until ($self->[BYTES] <= $max) {
1639 # Note that Tie::File::Cache::expire has been inlined here
1640 my $old_data = $self->[HEAP]->popheap;
1641 return unless defined $old_data;
1642 $self->[BYTES] -= length $old_data;
6fc0ea7e 1643 }
1644}
1645
6ae23f41 1646# Why not just $self->reduce_size_to($self->[MAX])?
1647# Try this when things stabilize TODO XXX
1648# If the cache is too full, expire the oldest records
6fc0ea7e 1649sub flush {
1650 my $self = shift;
6ae23f41 1651 $self->reduce_size_to($self->[MAX]) if $self->[BYTES] > $self->[MAX];
6fc0ea7e 1652}
1653
1654# For internal use only
1655sub _produce_lru {
1656 my $self = shift;
1657 $self->[HEAP]->expire_order;
1658}
1659
bf919750 1660BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1661
1662sub _check_integrity { # For CACHE
6fc0ea7e 1663 my $self = shift;
bf919750 1664 my $good = 1;
1665
1666 # Test HEAP
1667 $self->[HEAP]->_check_integrity or $good = 0;
1668
1669 # Test HASH
1670 my $bytes = 0;
1671 for my $k (keys %{$self->[HASH]}) {
1672 if ($k ne '0' && $k !~ /^[1-9][0-9]*$/) {
1673 $good = 0;
1674 _ci_warn "Cache hash key <$k> is non-numeric";
1675 }
1676
1677 my $h = $self->[HASH]{$k};
1678 if (! defined $h) {
1679 $good = 0;
1680 _ci_warn "Heap index number for key $k is undefined";
1681 } elsif ($h == 0) {
1682 $good = 0;
1683 _ci_warn "Heap index number for key $k is zero";
1684 } else {
1685 my $j = $self->[HEAP][$h];
1686 if (! defined $j) {
1687 $good = 0;
1688 _ci_warn "Heap contents key $k (=> $h) are undefined";
1689 } else {
1690 $bytes += length($j->[2]);
1691 if ($k ne $j->[1]) {
1692 $good = 0;
1693 _ci_warn "Heap contents key $k (=> $h) is $j->[1], should be $k";
1694 }
1695 }
1696 }
1697 }
1698
1699 # Test BYTES
1700 if ($bytes != $self->[BYTES]) {
1701 $good = 0;
1702 _ci_warn "Total data in cache is $bytes, expected $self->[BYTES]";
1703 }
1704
1705 # Test MAX
1706 if ($bytes > $self->[MAX]) {
1707 $good = 0;
1708 _ci_warn "Total data in cache is $bytes, exceeds maximum $self->[MAX]";
1709 }
1710
1711 return $good;
6fc0ea7e 1712}
1713
1714sub delink {
1715 my $self = shift;
1716 $self->[HEAP] = undef; # Bye bye heap
1717}
1718
1719################################################################
1720#
1721# Tie::File::Heap
1722#
1723# Heap data structure for use by cache LRU routines
1724
1725package Tie::File::Heap;
1726use Carp ':DEFAULT', 'confess';
1727$Tie::File::Heap::VERSION = $Tie::File::Cache::VERSION;
1728sub SEQ () { 0 };
1729sub KEY () { 1 };
1730sub DAT () { 2 };
1731
1732sub new {
1733 my ($pack, $cache) = @_;
1734 die "$pack: Parent cache object $cache does not support _heap_move method"
1735 unless eval { $cache->can('_heap_move') };
1736 my $self = [[0,$cache,0]];
1737 bless $self => $pack;
1738}
1739
1740# Allocate a new sequence number, larger than all previously allocated numbers
1741sub _nseq {
1742 my $self = shift;
1743 $self->[0][0]++;
1744}
1745
1746sub _cache {
1747 my $self = shift;
1748 $self->[0][1];
1749}
1750
1751sub _nelts {
1752 my $self = shift;
1753 $self->[0][2];
1754}
1755
1756sub _nelts_inc {
1757 my $self = shift;
1758 ++$self->[0][2];
1759}
1760
1761sub _nelts_dec {
1762 my $self = shift;
1763 --$self->[0][2];
1764}
1765
1766sub is_empty {
1767 my $self = shift;
1768 $self->_nelts == 0;
1769}
1770
1771sub empty {
1772 my $self = shift;
1773 $#$self = 0;
1774 $self->[0][2] = 0;
1775 $self->[0][0] = 0; # might as well reset the sequence numbers
1776}
1777
27531ffb 1778# notify the parent cache object that we moved something
6fc0ea7e 1779sub _heap_move {
1780 my $self = shift;
1781 $self->_cache->_heap_move(@_);
1782}
1783
1784# Insert a piece of data into the heap with the indicated sequence number.
1785# The item with the smallest sequence number is always at the top.
1786# If no sequence number is specified, allocate a new one and insert the
1787# item at the bottom.
1788sub insert {
1789 my ($self, $key, $data, $seq) = @_;
1790 $seq = $self->_nseq unless defined $seq;
1791 $self->_insert_new([$seq, $key, $data]);
1792}
1793
1794# Insert a new, fresh item at the bottom of the heap
1795sub _insert_new {
1796 my ($self, $item) = @_;
1797 my $i = @$self;
1798 $i = int($i/2) until defined $self->[$i/2];
1799 $self->[$i] = $item;
27531ffb 1800 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
6fc0ea7e 1801 $self->_nelts_inc;
1802}
1803
1804# Insert [$data, $seq] pair at or below item $i in the heap.
1805# If $i is omitted, default to 1 (the top element.)
1806sub _insert {
1807 my ($self, $item, $i) = @_;
bf919750 1808# $self->_check_loc($i) if defined $i;
6fc0ea7e 1809 $i = 1 unless defined $i;
1810 until (! defined $self->[$i]) {
1811 if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older
1812 ($self->[$i], $item) = ($item, $self->[$i]);
27531ffb 1813 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
6fc0ea7e 1814 }
1815 # If either is undefined, go that way. Otherwise, choose at random
1816 my $dir;
1817 $dir = 0 if !defined $self->[2*$i];
1818 $dir = 1 if !defined $self->[2*$i+1];
1819 $dir = int(rand(2)) unless defined $dir;
1820 $i = 2*$i + $dir;
1821 }
1822 $self->[$i] = $item;
27531ffb 1823 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
6fc0ea7e 1824 $self->_nelts_inc;
1825}
1826
1827# Remove the item at node $i from the heap, moving child items upwards.
1828# The item with the smallest sequence number is always at the top.
1829# Moving items upwards maintains this condition.
6ae23f41 1830# Return the removed item. Return undef if there was no item at node $i.
6fc0ea7e 1831sub remove {
1832 my ($self, $i) = @_;
1833 $i = 1 unless defined $i;
1834 my $top = $self->[$i];
1835 return unless defined $top;
1836 while (1) {
1837 my $ii;
1838 my ($L, $R) = (2*$i, 2*$i+1);
1839
1840 # If either is undefined, go the other way.
1841 # Otherwise, go towards the smallest.
1842 last unless defined $self->[$L] || defined $self->[$R];
1843 $ii = $R if not defined $self->[$L];
1844 $ii = $L if not defined $self->[$R];
1845 unless (defined $ii) {
1846 $ii = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1847 }
1848
1849 $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot
27531ffb 1850 $self->[0][1]->_heap_move($self->[$i][KEY], $i);
6fc0ea7e 1851 $i = $ii; # Fill new vacated spot
1852 }
27531ffb 1853 $self->[0][1]->_heap_move($top->[KEY], undef);
6fc0ea7e 1854 undef $self->[$i];
1855 $self->_nelts_dec;
1856 return $top->[DAT];
1857}
1858
1859sub popheap {
1860 my $self = shift;
1861 $self->remove(1);
1862}
1863
1864# set the sequence number of the indicated item to a higher number
1865# than any other item in the heap, and bubble the item down to the
1866# bottom.
1867sub promote {
1868 my ($self, $n) = @_;
bf919750 1869# $self->_check_loc($n);
6fc0ea7e 1870 $self->[$n][SEQ] = $self->_nseq;
1871 my $i = $n;
1872 while (1) {
1873 my ($L, $R) = (2*$i, 2*$i+1);
1874 my $dir;
1875 last unless defined $self->[$L] || defined $self->[$R];
1876 $dir = $R unless defined $self->[$L];
1877 $dir = $L unless defined $self->[$R];
1878 unless (defined $dir) {
1879 $dir = $self->[$L][SEQ] < $self->[$R][SEQ] ? $L : $R;
1880 }
1881 @{$self}[$i, $dir] = @{$self}[$dir, $i];
1882 for ($i, $dir) {
27531ffb 1883 $self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_];
6fc0ea7e 1884 }
1885 $i = $dir;
1886 }
1887}
1888
1889# Return item $n from the heap, promoting its LRU status
1890sub lookup {
1891 my ($self, $n) = @_;
bf919750 1892# $self->_check_loc($n);
6fc0ea7e 1893 my $val = $self->[$n];
1894 $self->promote($n);
1895 $val->[DAT];
1896}
1897
1898
1899# Assign a new value for node $n, promoting it to the bottom of the heap
1900sub set_val {
1901 my ($self, $n, $val) = @_;
bf919750 1902# $self->_check_loc($n);
6fc0ea7e 1903 my $oval = $self->[$n][DAT];
1904 $self->[$n][DAT] = $val;
1905 $self->promote($n);
1906 return $oval;
1907}
1908
1909# The hask key has changed for an item;
1910# alter the heap's record of the hash key
1911sub rekey {
1912 my ($self, $n, $new_key) = @_;
bf919750 1913# $self->_check_loc($n);
6fc0ea7e 1914 $self->[$n][KEY] = $new_key;
1915}
1916
1917sub _check_loc {
1918 my ($self, $n) = @_;
bf919750 1919 unless (1 || defined $self->[$n]) {
6fc0ea7e 1920 confess "_check_loc($n) failed";
1921 }
1922}
1923
bf919750 1924BEGIN { *_ci_warn = \&Tie::File::_ci_warn }
1925
6fc0ea7e 1926sub _check_integrity {
1927 my $self = shift;
1928 my $good = 1;
bf919750 1929 my %seq;
1930
6fc0ea7e 1931 unless (eval {$self->[0][1]->isa("Tie::File::Cache")}) {
bf919750 1932 _ci_warn "Element 0 of heap corrupt";
6fc0ea7e 1933 $good = 0;
1934 }
1935 $good = 0 unless $self->_satisfies_heap_condition(1);
1936 for my $i (2 .. $#{$self}) {
1937 my $p = int($i/2); # index of parent node
1938 if (defined $self->[$i] && ! defined $self->[$p]) {
bf919750 1939 _ci_warn "Element $i of heap defined, but parent $p isn't";
6fc0ea7e 1940 $good = 0;
1941 }
bf919750 1942
1943 if (defined $self->[$i]) {
1944 if ($seq{$self->[$i][SEQ]}) {
1945 my $seq = $self->[$i][SEQ];
1946 _ci_warn "Nodes $i and $seq{$seq} both have SEQ=$seq";
1947 $good = 0;
1948 } else {
1949 $seq{$self->[$i][SEQ]} = $i;
1950 }
1951 }
6fc0ea7e 1952 }
bf919750 1953
6fc0ea7e 1954 return $good;
1955}
1956
1957sub _satisfies_heap_condition {
1958 my $self = shift;
1959 my $n = shift || 1;
1960 my $good = 1;
1961 for (0, 1) {
1962 my $c = $n*2 + $_;
1963 next unless defined $self->[$c];
1964 if ($self->[$n][SEQ] >= $self->[$c]) {
bf919750 1965 _ci_warn "Node $n of heap does not predate node $c";
6fc0ea7e 1966 $good = 0 ;
1967 }
1968 $good = 0 unless $self->_satisfies_heap_condition($c);
1969 }
1970 return $good;
1971}
1972
1973# Return a list of all the values, sorted by expiration order
1974sub expire_order {
1975 my $self = shift;
1976 my @nodes = sort {$a->[SEQ] <=> $b->[SEQ]} $self->_nodes;
1977 map { $_->[KEY] } @nodes;
1978}
1979
1980sub _nodes {
1981 my $self = shift;
1982 my $i = shift || 1;
1983 return unless defined $self->[$i];
1984 ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1));
1985}
1986
fa408a35 1987"Cogito, ergo sum."; # don't forget to return a true value from the file
1988
6ae23f41 1989__END__
1990
b5aed31e 1991=head1 NAME
1992
1993Tie::File - Access the lines of a disk file via a Perl array
1994
1995=head1 SYNOPSIS
1996
6ae23f41 1997 # This file documents Tie::File version 0.95
1998 use Tie::File;
b5aed31e 1999
2000 tie @array, 'Tie::File', filename or die ...;
2001
2002 $array[13] = 'blah'; # line 13 of the file is now 'blah'
2003 print $array[42]; # display line 42 of the file
2004
2005 $n_recs = @array; # how many records are in the file?
57c7bc08 2006 $#array -= 2; # chop two records off the end
2007
b5aed31e 2008
57c7bc08 2009 for (@array) {
2010 s/PERL/Perl/g; # Replace PERL with Perl everywhere in the file
2011 }
2012
2013 # These are just like regular push, pop, unshift, shift, and splice
2014 # Except that they modify the file in the way you would expect
51efdd02 2015
2016 push @array, new recs...;
2017 my $r1 = pop @array;
2018 unshift @array, new recs...;
6ae23f41 2019 my $r2 = shift @array;
b5aed31e 2020 @old_recs = splice @array, 3, 7, new recs...;
2021
2022 untie @array; # all finished
2023
57c7bc08 2024
b5aed31e 2025=head1 DESCRIPTION
2026
2027C<Tie::File> represents a regular text file as a Perl array. Each
2028element in the array corresponds to a record in the file. The first
2029line of the file is element 0 of the array; the second line is element
20301, and so on.
2031
2032The file is I<not> loaded into memory, so this will work even for
2033gigantic files.
2034
2035Changes to the array are reflected in the file immediately.
2036
57c7bc08 2037Lazy people and beginners may now stop reading the manual.
b3fe5a4c 2038
b5aed31e 2039=head2 C<recsep>
2040
2041What is a 'record'? By default, the meaning is the same as for the
2042C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
6ae23f41 2043probably C<"\n">. (Minor exception: on DOS and Win32 systems, a
b3fe5a4c 2044'record' is a string terminated by C<"\r\n">.) You may change the
2045definition of "record" by supplying the C<recsep> option in the C<tie>
2046call:
b5aed31e 2047
2048 tie @array, 'Tie::File', $file, recsep => 'es';
2049
b3fe5a4c 2050This says that records are delimited by the string C<es>. If the file
2051contained the following data:
b5aed31e 2052
2053 Curse these pesky flies!\n
2054
27531ffb 2055then the C<@array> would appear to have four elements:
b5aed31e 2056
0b28bc9a 2057 "Curse th"
2058 "e p"
2059 "ky fli"
b5aed31e 2060 "!\n"
2061
2062An undefined value is not permitted as a record separator. Perl's
2063special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
2064emulated.
2065
0b28bc9a 2066Records read from the tied array do not have the record separator
27531ffb 2067string on the end; this is to allow
0b28bc9a 2068
2069 $array[17] .= "extra";
2070
2071to work as expected.
2072
2073(See L<"autochomp">, below.) Records stored into the array will have
2074the record separator string appended before they are written to the
2075file, if they don't have one already. For example, if the record
2076separator string is C<"\n">, then the following two lines do exactly
2077the same thing:
b5aed31e 2078
2079 $array[17] = "Cherry pie";
2080 $array[17] = "Cherry pie\n";
2081
2082The result is that the contents of line 17 of the file will be
2083replaced with "Cherry pie"; a newline character will separate line 17
27531ffb 2084from line 18. This means that this code will do nothing:
b5aed31e 2085
2086 chomp $array[17];
2087
2088Because the C<chomp>ed value will have the separator reattached when
2089it is written back to the file. There is no way to create a file
2090whose trailing record separator string is missing.
2091
27531ffb 2092Inserting records that I<contain> the record separator string is not
2093supported by this module. It will probably produce a reasonable
2094result, but what this result will be may change in a future version.
2095Use 'splice' to insert records or to replace one record with several.
b5aed31e 2096
0b28bc9a 2097=head2 C<autochomp>
2098
2099Normally, array elements have the record separator removed, so that if
2100the file contains the text
2101
2102 Gold
2103 Frankincense
2104 Myrrh
2105
57c7bc08 2106the tied array will appear to contain C<("Gold", "Frankincense",
2107"Myrrh")>. If you set C<autochomp> to a false value, the record
2108separator will not be removed. If the file above was tied with
0b28bc9a 2109
2110 tie @gifts, "Tie::File", $gifts, autochomp => 0;
2111
2112then the array C<@gifts> would appear to contain C<("Gold\n",
2113"Frankincense\n", "Myrrh\n")>, or (on Win32 systems) C<("Gold\r\n",
2114"Frankincense\r\n", "Myrrh\r\n")>.
2115
b5aed31e 2116=head2 C<mode>
2117
2118Normally, the specified file will be opened for read and write access,
2119and will be created if it does not exist. (That is, the flags
2120C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to
2121change this, you may supply alternative flags in the C<mode> option.
2122See L<Fcntl> for a listing of available flags.
2123For example:
2124
2125 # open the file if it exists, but fail if it does not exist
2126 use Fcntl 'O_RDWR';
2127 tie @array, 'Tie::File', $file, mode => O_RDWR;
2128
2129 # create the file if it does not exist
2130 use Fcntl 'O_RDWR', 'O_CREAT';
2131 tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
2132
2133 # open an existing file in read-only mode
2134 use Fcntl 'O_RDONLY';
2135 tie @array, 'Tie::File', $file, mode => O_RDONLY;
2136
2137Opening the data file in write-only or append mode is not supported.
2138
b3fe5a4c 2139=head2 C<memory>
2140
57c7bc08 2141This is an upper limit on the amount of memory that C<Tie::File> will
2142consume at any time while managing the file. This is used for two
2143things: managing the I<read cache> and managing the I<deferred write
2144buffer>.
b5aed31e 2145
2146Records read in from the file are cached, to avoid having to re-read
2147them repeatedly. If you read the same record twice, the first time it
2148will be stored in memory, and the second time it will be fetched from
b3fe5a4c 2149the I<read cache>. The amount of data in the read cache will not
2150exceed the value you specified for C<memory>. If C<Tie::File> wants
2151to cache a new record, but the read cache is full, it will make room
2152by expiring the least-recently visited records from the read cache.
b5aed31e 2153
b3fe5a4c 2154The default memory limit is 2Mib. You can adjust the maximum read
2155cache size by supplying the C<memory> option. The argument is the
2156desired cache size, in bytes.
b5aed31e 2157
2158 # I have a lot of memory, so use a large cache to speed up access
b3fe5a4c 2159 tie @array, 'Tie::File', $file, memory => 20_000_000;
b5aed31e 2160
b3fe5a4c 2161Setting the memory limit to 0 will inhibit caching; records will be
b5aed31e 2162fetched from disk every time you examine them.
2163
27531ffb 2164The C<memory> value is not an absolute or exact limit on the memory
2165used. C<Tie::File> objects contains some structures besides the read
2166cache and the deferred write buffer, whose sizes are not charged
6ae23f41 2167against C<memory>.
2168
2169The cache itself consumes about 310 bytes per cached record, so if
2170your file has many short records, you may want to decrease the cache
2171memory limit, or else the cache overhead may exceed the size of the
2172cached data.
2173
27531ffb 2174
57c7bc08 2175=head2 C<dw_size>
2176
2177(This is an advanced feature. Skip this section on first reading.)
27531ffb 2178
57c7bc08 2179If you use deferred writing (See L<"Deferred Writing">, below) then
2180data you write into the array will not be written directly to the
2181file; instead, it will be saved in the I<deferred write buffer> to be
2182written out later. Data in the deferred write buffer is also charged
2183against the memory limit you set with the C<memory> option.
2184
2185You may set the C<dw_size> option to limit the amount of data that can
2186be saved in the deferred write buffer. This limit may not exceed the
2187total memory limit. For example, if you set C<dw_size> to 1000 and
2188C<memory> to 2500, that means that no more than 1000 bytes of deferred
2189writes will be saved up. The space available for the read cache will
2190vary, but it will always be at least 1500 bytes (if the deferred write
2191buffer is full) and it could grow as large as 2500 bytes (if the
2192deferred write buffer is empty.)
2193
2194If you don't specify a C<dw_size>, it defaults to the entire memory
2195limit.
2196
b5aed31e 2197=head2 Option Format
2198
2199C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
b3fe5a4c 2200C<recsep>. C<-memory> is a synonym for C<memory>. You get the
b5aed31e 2201idea.
2202
2203=head1 Public Methods
2204
27531ffb 2205The C<tie> call returns an object, say C<$o>. You may call
b5aed31e 2206
2207 $rec = $o->FETCH($n);
2208 $o->STORE($n, $rec);
2209
b3fe5a4c 2210to fetch or store the record at line C<$n>, respectively; similarly
2211the other tied array methods. (See L<perltie> for details.) You may
2212also call the following methods on this object:
51efdd02 2213
2214=head2 C<flock>
2215
2216 $o->flock(MODE)
2217
2218will lock the tied file. C<MODE> has the same meaning as the second
2219argument to the Perl built-in C<flock> function; for example
2220C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
2221the C<use Fcntl ':flock'> declaration.)
2222
57c7bc08 2223C<MODE> is optional; the default is C<LOCK_EX>.
2224
6ae23f41 2225C<Tie::File> maintains an internal table of the byte offset of each
2226record it has seen in the file.
2227
2228When you use C<flock> to lock the file, C<Tie::File> assumes that the
2229read cache is no longer trustworthy, because another process might
2230have modified the file since the last time it was read. Therefore, a
2231successful call to C<flock> discards the contents of the read cache
2232and the internal record offset table.
2233
57c7bc08 2234C<Tie::File> promises that the following sequence of operations will
2235be safe:
2236
2237 my $o = tie @array, "Tie::File", $filename;
2238 $o->flock;
2239
2240In particular, C<Tie::File> will I<not> read or write the file during
2241the C<tie> call. (Exception: Using C<mode =E<gt> O_TRUNC> will, of
2242course, erase the file during the C<tie> call. If you want to do this
2243safely, then open the file without C<O_TRUNC>, lock the file, and use
2244C<@array = ()>.)
51efdd02 2245
2246The best way to unlock a file is to discard the object and untie the
2247array. It is probably unsafe to unlock the file without also untying
2248it, because if you do, changes may remain unwritten inside the object.
2249That is why there is no shortcut for unlocking. If you really want to
2250unlock the file prematurely, you know what to do; if you don't know
2251what to do, then don't do it.
2252
2253All the usual warnings about file locking apply here. In particular,
2254note that file locking in Perl is B<advisory>, which means that
2255holding a lock will not prevent anyone else from reading, writing, or
2256erasing the file; it only prevents them from getting another lock at
2257the same time. Locks are analogous to green traffic lights: If you
2258have a green light, that does not prevent the idiot coming the other
2259way from plowing into you sideways; it merely guarantees to you that
2260the idiot does not also have a green light at the same time.
b5aed31e 2261
0b28bc9a 2262=head2 C<autochomp>
2263
2264 my $old_value = $o->autochomp(0); # disable autochomp option
2265 my $old_value = $o->autochomp(1); # enable autochomp option
2266
2267 my $ac = $o->autochomp(); # recover current value
2268
2269See L<"autochomp">, above.
2270
6fc0ea7e 2271=head2 C<defer>, C<flush>, C<discard>, and C<autodefer>
57c7bc08 2272
2273See L<"Deferred Writing">, below.
2274
6ae23f41 2275=head2 C<offset>
2276
2277 $off = $o->offset($n);
2278
2279This method returns the byte offset of the start of the C<$n>th record
2280in the file. If there is no such record, it returns an undefined
2281value.
2282
0b28bc9a 2283=head1 Tying to an already-opened filehandle
fa408a35 2284
2285If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
2286of the other C<IO> modules, you may use:
2287
2288 tie @array, 'Tie::File', $fh, ...;
2289
2290Similarly if you opened that handle C<FH> with regular C<open> or
2291C<sysopen>, you may use:
2292
2293 tie @array, 'Tie::File', \*FH, ...;
2294
2295Handles that were opened write-only won't work. Handles that were
57c7bc08 2296opened read-only will work as long as you don't try to modify the
2297array. Handles must be attached to seekable sources of data---that
27531ffb 2298means no pipes or sockets. If C<Tie::File> can detect that you
2299supplied a non-seekable handle, the C<tie> call will throw an
2300exception. (On Unix systems, it can detect this.)
57c7bc08 2301
2a00e2a4 2302Note that Tie::File will only close any filehandles that it opened
2303internally. If you passed it a filehandle as above, you "own" the
2304filehandle, and are responsible for closing it after you have untied
2305the @array.
2306
57c7bc08 2307=head1 Deferred Writing
2308
2309(This is an advanced feature. Skip this section on first reading.)
2310
2311Normally, modifying a C<Tie::File> array writes to the underlying file
2312immediately. Every assignment like C<$a[3] = ...> rewrites as much of
2313the file as is necessary; typically, everything from line 3 through
2314the end will need to be rewritten. This is the simplest and most
2315transparent behavior. Performance even for large files is reasonably
2316good.
2317
2318However, under some circumstances, this behavior may be excessively
2319slow. For example, suppose you have a million-record file, and you
2320want to do:
2321
2322 for (@FILE) {
2323 $_ = "> $_";
2324 }
2325
2326The first time through the loop, you will rewrite the entire file,
2327from line 0 through the end. The second time through the loop, you
2328will rewrite the entire file from line 1 through the end. The third
2329time through the loop, you will rewrite the entire file from line 2 to
2330the end. And so on.
2331
2332If the performance in such cases is unacceptable, you may defer the
2333actual writing, and then have it done all at once. The following loop
2334will perform much better for large files:
2335
2336 (tied @a)->defer;
2337 for (@a) {
2338 $_ = "> $_";
2339 }
2340 (tied @a)->flush;
2341
2342If C<Tie::File>'s memory limit is large enough, all the writing will
2343done in memory. Then, when you call C<-E<gt>flush>, the entire file
2344will be rewritten in a single pass.
2345
6fc0ea7e 2346(Actually, the preceding discussion is something of a fib. You don't
2347need to enable deferred writing to get good performance for this
2348common case, because C<Tie::File> will do it for you automatically
2349unless you specifically tell it not to. See L<"autodeferring">,
2350below.)
2351
57c7bc08 2352Calling C<-E<gt>flush> returns the array to immediate-write mode. If
2353you wish to discard the deferred writes, you may call C<-E<gt>discard>
2354instead of C<-E<gt>flush>. Note that in some cases, some of the data
2355will have been written already, and it will be too late for
6fc0ea7e 2356C<-E<gt>discard> to discard all the changes. Support for
2357C<-E<gt>discard> may be withdrawn in a future version of C<Tie::File>.
57c7bc08 2358
2359Deferred writes are cached in memory up to the limit specified by the
2360C<dw_size> option (see above). If the deferred-write buffer is full
2361and you try to write still more deferred data, the buffer will be
2362flushed. All buffered data will be written immediately, the buffer
2363will be emptied, and the now-empty space will be used for future
2364deferred writes.
2365
2366If the deferred-write buffer isn't yet full, but the total size of the
2367buffer and the read cache would exceed the C<memory> limit, the oldest
27531ffb 2368records will be expired from the read cache until the total size is
57c7bc08 2369under the limit.
2370
2371C<push>, C<pop>, C<shift>, C<unshift>, and C<splice> cannot be
2372deferred. When you perform one of these operations, any deferred data
2373is written to the file and the operation is performed immediately.
2374This may change in a future version.
2375
6fc0ea7e 2376If you resize the array with deferred writing enabled, the file will
2377be resized immediately, but deferred records will not be written.
27531ffb 2378This has a surprising consequence: C<@a = (...)> erases the file
2379immediately, but the writing of the actual data is deferred. This
2380might be a bug. If it is a bug, it will be fixed in a future version.
6fc0ea7e 2381
2382=head2 Autodeferring
2383
2384C<Tie::File> tries to guess when deferred writing might be helpful,
27531ffb 2385and to turn it on and off automatically.
2386
2387 for (@a) {
2388 $_ = "> $_";
2389 }
2390
2391In this example, only the first two assignments will be done
2392immediately; after this, all the changes to the file will be deferred
2393up to the user-specified memory limit.
6fc0ea7e 2394
2395You should usually be able to ignore this and just use the module
2396without thinking about deferring. However, special applications may
2397require fine control over which writes are deferred, or may require
2398that all writes be immediate. To disable the autodeferment feature,
2399use
57c7bc08 2400
2401 (tied @o)->autodefer(0);
2402
6fc0ea7e 2403or
2404
2405 tie @array, 'Tie::File', $file, autodefer => 0;
2406
fa408a35 2407
27531ffb 2408Similarly, C<-E<gt>autodefer(1)> re-enables autodeferment, and
2409C<-E<gt>autodefer()> recovers the current value of the autodefer setting.
2410
b5aed31e 2411
6ae23f41 2412=head1 CONCURRENT ACCESS TO FILES
b5aed31e 2413
6ae23f41 2414Caching and deferred writing are inappropriate if you want the same
2415file to be accessed simultaneously from more than one process. You
2416will want to disable these features. You should do that by including
2417the C<memory =E<gt> 0> option in your C<tie> calls; this will inhibit
2418caching and deferred writing.
b3fe5a4c 2419
6ae23f41 2420You will also want to lock the file while reading or writing it. You
2421can use the C<-E<gt>flock> method for this. A future version of this
2422module may provide an 'autolocking' mode.
2423
2424=head1 CAVEATS
b3fe5a4c 2425
6ae23f41 2426(That's Latin for 'warnings'.)
b3fe5a4c 2427
6ae23f41 2428=over 4
6fc0ea7e 2429
27531ffb 2430=item *
b5aed31e 2431
6fc0ea7e 2432Reasonable effort was made to make this module efficient. Nevertheless,
b5aed31e 2433changing the size of a record in the middle of a large file will
b3fe5a4c 2434always be fairly slow, because everything after the new record must be
2435moved.
b5aed31e 2436
b3fe5a4c 2437=item *
2438
2439The behavior of tied arrays is not precisely the same as for regular
2440arrays. For example:
b5aed31e 2441
57c7bc08 2442 # This DOES print "How unusual!"
2443 undef $a[10]; print "How unusual!\n" if defined $a[10];
b3fe5a4c 2444
2445C<undef>-ing a C<Tie::File> array element just blanks out the
2446corresponding record in the file. When you read it back again, you'll
57c7bc08 2447get the empty string, so the supposedly-C<undef>'ed value will be
2448defined. Similarly, if you have C<autochomp> disabled, then
2449
2450 # This DOES print "How unusual!" if 'autochomp' is disabled
27531ffb 2451 undef $a[10];
57c7bc08 2452 print "How unusual!\n" if $a[10];
2453
2454Because when C<autochomp> is disabled, C<$a[10]> will read back as
2455C<"\n"> (or whatever the record separator string is.)
b5aed31e 2456
6fc0ea7e 2457There are other minor differences, particularly regarding C<exists>
2458and C<delete>, but in general, the correspondence is extremely close.
b3fe5a4c 2459
2460=item *
2461
27531ffb 2462I have supposed that since this module is concerned with file I/O,
2463almost all normal use of it will be heavily I/O bound. This means
2464that the time to maintain complicated data structures inside the
2465module will be dominated by the time to actually perform the I/O.
2466When there was an opportunity to spend CPU time to avoid doing I/O, I
6ae23f41 2467usually tried to take it.
b5aed31e 2468
57c7bc08 2469=item *
6fc0ea7e 2470
57c7bc08 2471You might be tempted to think that deferred writing is like
2472transactions, with C<flush> as C<commit> and C<discard> as
6fc0ea7e 2473C<rollback>, but it isn't, so don't.
57c7bc08 2474
6ae23f41 2475=item *
2476
2477There is a large memory overhead for each record offset and for each
2478cache entry: about 310 bytes per cached data record, and about 21 bytes per offset table entry.
2479
2480The per-record overhead will limit the maximum number of records you
2481can access per file. Note that I<accessing> the length of the array
2482via C<$x = scalar @tied_file> accesses B<all> records and stores their
2483offsets. The same for C<foreach (@tied_file)>, even if you exit the
2484loop early.
2485
b3fe5a4c 2486=back
51efdd02 2487
57c7bc08 2488=head1 SUBCLASSING
2489
2490This version promises absolutely nothing about the internals, which
2491may change without notice. A future version of the module will have a
2492well-defined and stable subclassing API.
2493
b3fe5a4c 2494=head1 WHAT ABOUT C<DB_File>?
51efdd02 2495
27531ffb 2496People sometimes point out that L<DB_File> will do something similar,
2497and ask why C<Tie::File> module is necessary.
b3fe5a4c 2498
27531ffb 2499There are a number of reasons that you might prefer C<Tie::File>.
2500A list is available at C<http://perl.plover.com/TieFile/why-not-DB_File>.
b5aed31e 2501
2502=head1 AUTHOR
2503
2504Mark Jason Dominus
2505
2506To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
2507
2508To receive an announcement whenever a new version of this module is
2509released, send a blank email message to
2510C<mjd-perl-tiefile-subscribe@plover.com>.
2511
57c7bc08 2512The most recent version of this module, including documentation and
2513any news of importance, will be available at
2514
2515 http://perl.plover.com/TieFile/
2516
2517
b5aed31e 2518=head1 LICENSE
2519
6ae23f41 2520C<Tie::File> version 0.95 is copyright (C) 2002 Mark Jason Dominus.
7b6b3db1 2521
2522This library is free software; you may redistribute it and/or modify
2523it under the same terms as Perl itself.
b5aed31e 2524
57c7bc08 2525These terms are your choice of any of (1) the Perl Artistic Licence,
2526or (2) version 2 of the GNU General Public License as published by the
7b6b3db1 2527Free Software Foundation, or (3) any later version of the GNU General
2528Public License.
b5aed31e 2529
7b6b3db1 2530This library is distributed in the hope that it will be useful,
b5aed31e 2531but WITHOUT ANY WARRANTY; without even the implied warranty of
2532MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2533GNU General Public License for more details.
2534
2535You should have received a copy of the GNU General Public License
7b6b3db1 2536along with this library program; it should be in the file C<COPYING>.
2537If not, write to the Free Software Foundation, Inc., 59 Temple Place,
2538Suite 330, Boston, MA 02111 USA
b5aed31e 2539
2540For licensing inquiries, contact the author at:
2541
2542 Mark Jason Dominus
2543 255 S. Warnock St.
2544 Philadelphia, PA 19107
2545
2546=head1 WARRANTY
2547
6ae23f41 2548C<Tie::File> version 0.95 comes with ABSOLUTELY NO WARRANTY.
b5aed31e 2549For details, see the license.
2550
fa408a35 2551=head1 THANKS
2552
2553Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
2554core when I hadn't written it yet, and for generally being helpful,
2555supportive, and competent. (Usually the rule is "choose any one.")
2556Also big thanks to Abhijit Menon-Sen for all of the same things.
2557
57c7bc08 2558Special thanks to Craig Berry and Peter Prymmer (for VMS portability
2559help), Randy Kobes (for Win32 portability help), Clinton Pierce and
2560Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond
6fc0ea7e 2561the call of duty), Michael G Schwern (for testing advice), and the
2562rest of the CPAN testers (for testing generally).
b5aed31e 2563
6ae23f41 2564Special thanks to Tels for suggesting several speed and memory
2565optimizations.
2566
57c7bc08 2567Additional thanks to:
b3fe5a4c 2568Edward Avis /
6ae23f41 2569Mattia Barbon /
fa408a35 2570Gerrit Haase /
6ae23f41 2571Jarkko Hietaniemi (again) /
b3fe5a4c 2572Nikola Knezevic /
6ae23f41 2573John Kominetz /
836d9961 2574Nick Ing-Simmons /
fa408a35 2575Tassilo von Parseval /
2576H. Dieter Pearcey /
b3fe5a4c 2577Slaven Rezic /
6ae23f41 2578Eric Roode /
6fc0ea7e 2579Peter Scott /
fa408a35 2580Peter Somu /
57c7bc08 2581Autrijus Tang (again) /
6ae23f41 2582Tels (again) /
2583Juerd Waalboer
7b6b3db1 2584
fa408a35 2585=head1 TODO
2586
b5aed31e 2587More tests. (Stuff I didn't think of yet.)
2588
b5aed31e 2589Paragraph mode?
2590
6fc0ea7e 2591Fixed-length mode. Leave-blanks mode.
b5aed31e 2592
fa408a35 2593Maybe an autolocking mode?
2594
6ae23f41 2595For many common uses of the module, the read cache is a liability.
2596For example, a program that inserts a single record, or that scans the
2597file once, will have a cache hit rate of zero. This suggests a major
2598optimization: The cache should be initially disabled. Here's a hybrid
2599approach: Initially, the cache is disabled, but the cache code
2600maintains statistics about how high the hit rate would be *if* it were
2601enabled. When it sees the hit rate get high enough, it enables
2602itself. The STAT comments in this code are the beginning of an
2603implementation of this.
2604
6fc0ea7e 2605Record locking with fcntl()? Then the module might support an undo
2606log and get real transactions. What a tour de force that would be.
b3fe5a4c 2607
6ae23f41 2608Keeping track of the highest cached record. This would allow reads-in-a-row
2609to skip the cache lookup faster (if reading from 1..N with empty cache at
2610start, the last cached value will be always N-1).
2611
27531ffb 2612More tests.
b3fe5a4c 2613
b5aed31e 2614=cut
2615