From: Ilya Zakharevich Date: Mon, 31 Dec 2001 20:28:46 +0000 (-0500) Subject: Re: [PATCH @13746] tied hashes: memoization X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=17e638fee421d7249f41de02ebe6cb9c4efb477e;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH @13746] tied hashes: memoization Message-ID: <20011231202845.A1323@math.ohio-state.edu> p4raw-id: //depot/perl@14686 --- diff --git a/MANIFEST b/MANIFEST index 58ec65f..e464ac6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1386,6 +1386,8 @@ lib/Tie/Array/stdpush.t Test for Tie::StdArray lib/Tie/Handle.pm Base class for tied handles lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle lib/Tie/Hash.pm Base class for tied hashes +lib/Tie/Memoize.pm Base class for memoized tied hashes +lib/Tie/Memoize.t Test for Memoize.t lib/Tie/RefHash.pm Base class for tied hashes with references as keys lib/Tie/RefHash.t Test for Tie::RefHash and Tie::RefHash::Nestable lib/Tie/Scalar.pm Base class for tied scalars diff --git a/lib/Tie/Memoize.pm b/lib/Tie/Memoize.pm new file mode 100644 index 0000000..0b3d320 --- /dev/null +++ b/lib/Tie/Memoize.pm @@ -0,0 +1,127 @@ +use strict; +package Tie::Memoize; +use Tie::Hash; +our @ISA = 'Tie::ExtraHash'; + +our $exists_token = \undef; + +sub croak {require Carp; goto &Carp::croak} + +# Format: [0: STORAGE, 1: EXISTS-CACHE, 2: FETCH_function; +# 3: EXISTS_function, 4: DATA, 5: EXISTS_different ] + +sub FETCH { + my ($h,$key) = ($_[0][0], $_[1]); + my $res = $h->{$key}; + return $res if defined $res; # Shortcut if accessible + return $res if exists $h->{$key}; # Accessible, but undef + my $cache = $_[0][1]{$key}; + return if defined $cache and not $cache; # Known to not exist + my @res = $_[0][2]->($key, $_[0][4]); # Autoload + $_[0][1]{$key} = 0, return unless @res; # Cache non-existence + delete $_[0][1]{$key}; # Clear existence cache, not needed any more + $_[0][0]{$key} = $res[0]; # Store data and return +} + +sub EXISTS { + my ($a,$key) = (shift, shift); + return 1 if exists $a->[0]{$key}; # Have data + my $cache = $a->[1]{$key}; + return $cache if defined $cache; # Existence cache + my @res = $a->[3]($key,$a->[4]); + $_[0][1]{$key} = 0, return unless @res; # Cache non-existence + # Now we know it exists + return ($_[0][1]{$key} = 1) if $a->[5]; # Only existence reported + # Now know the value + $_[0][0]{$key} = $res[0]; # Store data + return 1 +} + +sub TIEHASH { + croak 'syntax: tie %hash, \'Tie::AutoLoad\', \&fetch_subr' if @_ < 2; + croak 'syntax: tie %hash, \'Tie::AutoLoad\', \&fetch_subr, $data, \&exists_subr, \%data_cache, \%existence_cache' if @_ > 6; + push @_, undef if @_ < 3; # Data + push @_, $_[1] if @_ < 4; # exists + push @_, {} while @_ < 6; # initial value and caches + bless [ @_[4,5,1,3,2], $_[1] ne $_[3]], $_[0] +} + +1; + +=head1 NAME + +Tiel::Memoize - add data to hash when needed + +=head1 SYNOPSIS + + require Tie::Memoize; + tie %hash, 'Tie::Memoize', + \&fetch, # The rest is optional + $DATA, \&exists, + {%ini_value}, {%ini_existence}; + +=head1 DESCRIPTION + +This package allows a tied hash to autoload its values on the first access, +and to use the cached value on the following accesses. + +Only read-accesses (via fetching the value or C) result in calls to +the functions; the modify-accesses are performed as on a normal hash. + +The required arguments during C are the hash, the package, and +the reference to the Cing function. The optional arguments are +an arbitrary scalar $data, the reference to the C function, +and initial values of the hash and of the existence cache. + +Both the Cing function and the C functions have the +same signature: the arguments are C<$key, $data>; $data is the same +value as given as argument during tie()ing. Both functions should +return an empty list if the value does not exist. If C +function is different from the Cing function, it should return +a TRUE value on success. The Cing function should return the +intended value if the key is valid. + +=head1 Inheriting from B + +The structure of the tied() data is an array reference with elements + + 0: cache of known values + 1: cache of known existence of keys + 2: FETCH function + 3: EXISTS function + 4: $data + +The rest is for internal usage of this package. In particular, if +TIEHASH is overwritten, it should call SUPER::TIEHASH. + +=head1 EXAMPLE + + sub slurp { + my ($key, $dir) = shift; + open my $h, '<', "$dir/$key" or return; + local $/; <$h> # slurp it all + } + sub exists { my ($key, $dir) = shift; return -f "$dir/$key" } + + tie %hash, 'Tie::Memoize', \&slurp, $directory, \&exists, + { fake_file1 => $content1, fake_file2 => $content2 }, + { pretend_does_not_exists => 0, known_to_exist => 1 }; + +This example treats the slightly modified contents of $directory as a +hash. The modifications are that the keys F and +F fetch values $content1 and $content2, and +F will never be accessed. Additionally, the +existence of F is never checked (so if it does not +exists when its content is needed, the user of %hash may be confused). + +=head1 BUGS + +FIRSTKEY and NEXTKEY methods go through the keys which were already read, +not all the possible keys of the hash. + +=head1 AUTHOR + +Ilya Zakharevich L. + +=cut + diff --git a/lib/Tie/Memoize.t b/lib/Tie/Memoize.t new file mode 100644 index 0000000..defb437 --- /dev/null +++ b/lib/Tie/Memoize.t @@ -0,0 +1,61 @@ +#!./perl -w + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use strict; +use Tie::Memoize; +use Test::More tests => 28; +use File::Spec; + +sub slurp { + my ($key, $dir) = @_; + open my $h, '<', File::Spec->catfile($dir, $key) or return; + local $/; + <$h> # slurp it all +} +sub exists { my ($key, $dir) = @_; return -f File::Spec->catfile($dir, $key) } + +my $directory = File::Spec->catdir(File::Spec->updir, 'lib'); + +tie my %hash, 'Tie::Memoize', \&slurp, $directory, \&exists, + { fake_file1 => 123, fake_file2 => 45678 }, + { 'strict.pm' => 0, known_to_exist => 1 }; + +ok(not exists $hash{'strict.pm'}); +ok(exists $hash{known_to_exist}); +ok($hash{fake_file2} eq 45678); +ok($hash{fake_file1} eq 123); +ok(exists $hash{known_to_exist}); +ok(not exists $hash{'strict.pm'}); +ok(not defined $hash{fake_file3}); +ok(not defined $hash{known_to_exist}); +ok(not exists $hash{known_to_exist}); +ok(not exists $hash{'strict.pm'}); +my $c = slurp('constant.pm', $directory); +ok($c); +ok($hash{'constant.pm'} eq $c); +ok($hash{'constant.pm'} eq $c); +ok(not exists $hash{'strict.pm'}); +ok(exists $hash{'blib.pm'}); + +untie %hash; + +tie %hash, 'Tie::Memoize', \&slurp, $directory; + +ok(exists $hash{'strict.pm'}, 'existing file'); +ok(not exists $hash{fake_file2}); +ok(not exists $hash{fake_file1}); +ok(not exists $hash{known_to_exist}); +ok(exists $hash{'strict.pm'}, 'existing file again'); +ok(not defined $hash{fake_file3}); +ok(not defined $hash{known_to_exist}); +ok(not exists $hash{known_to_exist}); +ok(exists $hash{'strict.pm'}, 'existing file again'); +ok($hash{'constant.pm'} eq $c); +ok($hash{'constant.pm'} eq $c); +ok(exists $hash{'strict.pm'}, 'existing file again'); +ok(exists $hash{'blib.pm'}, 'another existing file'); +