Once again syncing after too long an absence
[p5sagit/p5-mst-13.2.git] / lib / Tie / SubstrHash.pm
CommitLineData
64d0c973 1package Tie::SubstrHash;
2
0e06870b 3our $VERSION = '1.00';
4
64d0c973 5=head1 NAME
6
7Tie::SubstrHash - Fixed-table-size, fixed-key-length hashing
8
9=head1 SYNOPSIS
10
11 require Tie::SubstrHash;
12
c954a603 13 tie %myhash, 'Tie::SubstrHash', $key_len, $value_len, $table_size;
64d0c973 14
15=head1 DESCRIPTION
16
17The B<Tie::SubstrHash> package provides a hash-table-like interface to
18an array of determinate size, with constant key size and record size.
19
20Upon tying a new hash to this package, the developer must specify the
21size of the keys that will be used, the size of the value fields that the
22keys will index, and the size of the overall table (in terms of key-value
23pairs, not size in hard memory). I<These values will not change for the
24duration of the tied hash>. The newly-allocated hash table may now have
25data stored and retrieved. Efforts to store more than C<$table_size>
26elements will result in a fatal error, as will efforts to store a value
27not exactly C<$value_len> characters in length, or reference through a
28key not exactly C<$key_len> characters in length. While these constraints
29may seem excessive, the result is a hash table using much less internal
30memory than an equivalent freely-allocated hash table.
31
32=head1 CAVEATS
33
34Because the current implementation uses the table and key sizes for the
35hashing algorithm, there is no means by which to dynamically change the
36value of any of the initialization parameters.
37
0e06870b 38The hash does not support exists().
39
64d0c973 40=cut
41
748a9306 42use Carp;
43
44sub TIEHASH {
45 my $pack = shift;
46 my ($klen, $vlen, $tsize) = @_;
47 my $rlen = 1 + $klen + $vlen;
0e06870b 48 $tsize = [$tsize,
49 findgteprime($tsize * 1.1)]; # Allow 10% empty.
748a9306 50 $self = bless ["\0", $klen, $vlen, $tsize, $rlen, 0, -1];
0e06870b 51 $$self[0] x= $rlen * $tsize->[1];
748a9306 52 $self;
53}
54
0e06870b 55sub CLEAR {
56 local($self) = @_;
57 $$self[0] = "\0" x ($$self[4] * $$self[3]->[1]);
58 $$self[5] = 0;
59 $$self[6] = -1;
60}
61
748a9306 62sub FETCH {
63 local($self,$key) = @_;
64 local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
65 &hashkey;
66 for (;;) {
67 $offset = $hash * $rlen;
68 $record = substr($$self[0], $offset, $rlen);
69 if (ord($record) == 0) {
70 return undef;
71 }
72 elsif (ord($record) == 1) {
73 }
74 elsif (substr($record, 1, $klen) eq $key) {
75 return substr($record, 1+$klen, $vlen);
76 }
77 &rehash;
78 }
79}
80
81sub STORE {
82 local($self,$key,$val) = @_;
83 local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
0e06870b 84 croak("Table is full ($tsize->[0] elements)") if $$self[5] > $tsize->[0];
85 croak(qq/Value "$val" is not $vlen characters long/)
748a9306 86 if length($val) != $vlen;
87 my $writeoffset;
88
89 &hashkey;
90 for (;;) {
91 $offset = $hash * $rlen;
92 $record = substr($$self[0], $offset, $rlen);
93 if (ord($record) == 0) {
94 $record = "\2". $key . $val;
95 die "panic" unless length($record) == $rlen;
96 $writeoffset = $offset unless defined $writeoffset;
97 substr($$self[0], $writeoffset, $rlen) = $record;
98 ++$$self[5];
99 return;
100 }
101 elsif (ord($record) == 1) {
102 $writeoffset = $offset unless defined $writeoffset;
103 }
104 elsif (substr($record, 1, $klen) eq $key) {
105 $record = "\2". $key . $val;
106 die "panic" unless length($record) == $rlen;
107 substr($$self[0], $offset, $rlen) = $record;
108 return;
109 }
110 &rehash;
111 }
112}
113
114sub DELETE {
115 local($self,$key) = @_;
116 local($klen, $vlen, $tsize, $rlen) = @$self[1..4];
117 &hashkey;
118 for (;;) {
119 $offset = $hash * $rlen;
120 $record = substr($$self[0], $offset, $rlen);
121 if (ord($record) == 0) {
122 return undef;
123 }
124 elsif (ord($record) == 1) {
125 }
126 elsif (substr($record, 1, $klen) eq $key) {
127 substr($$self[0], $offset, 1) = "\1";
128 return substr($record, 1+$klen, $vlen);
129 --$$self[5];
130 }
131 &rehash;
132 }
133}
134
135sub FIRSTKEY {
136 local($self) = @_;
137 $$self[6] = -1;
138 &NEXTKEY;
139}
140
141sub NEXTKEY {
142 local($self) = @_;
143 local($klen, $vlen, $tsize, $rlen, $entries, $iterix) = @$self[1..6];
0e06870b 144 for (++$iterix; $iterix < $tsize->[1]; ++$iterix) {
748a9306 145 next unless substr($$self[0], $iterix * $rlen, 1) eq "\2";
146 $$self[6] = $iterix;
147 return substr($$self[0], $iterix * $rlen + 1, $klen);
148 }
149 $$self[6] = -1;
150 undef;
151}
152
0e06870b 153sub EXISTS {
154 croak "Tie::SubstrHash does not support exists()";
155}
156
748a9306 157sub hashkey {
0e06870b 158 croak(qq/Key "$key" is not $klen characters long/)
748a9306 159 if length($key) != $klen;
160 $hash = 2;
161 for (unpack('C*', $key)) {
162 $hash = $hash * 33 + $_;
77bc6408 163 &_hashwrap if $hash >= 1e13;
748a9306 164 }
0e06870b 165 &_hashwrap if $hash >= $tsize->[1];
748a9306 166 $hash = 1 unless $hash;
167 $hashbase = $hash;
168}
169
77bc6408 170sub _hashwrap {
0e06870b 171 $hash -= int($hash / $tsize->[1]) * $tsize->[1];
77bc6408 172}
173
748a9306 174sub rehash {
175 $hash += $hashbase;
0e06870b 176 $hash -= $tsize->[1] if $hash >= $tsize->[1];
748a9306 177}
178
0e06870b 179# using POSIX::ceil() would be too heavy, and not all platforms have it.
180sub ceil {
181 my $num = shift;
182 $num = int($num + 1) unless $num == int $num;
183 return $num;
184}
185
186sub findgteprime { # find the smallest prime integer greater than or equal to
748a9306 187 use integer;
188
0e06870b 189# It may be sufficient (and more efficient, IF IT IS CORRECT) to use
190# $max = 1 + int sqrt $num and calculate it once only, but is it correct?
191
192 my $num = ceil(shift);
193 return 2 if $num <= 2;
748a9306 194
0e06870b 195 $num++ unless $num % 2;
748a9306 196
197 NUM:
198 for (;; $num += 2) {
0e06870b 199 my $max = int sqrt $num;
200 for ($i = 3; $i <= $max; $i += 2) {
201 next NUM unless $num % $i;
202 }
203 return $num;
748a9306 204 }
205}
206
2071;