use strict;
-plan tests => 3;
+plan tests => 5;
my %h;
}
ok (Internals::HvREHASH(%h), "20 entries triggers rehash");
+
+
+
+
+# second part using an emulation of the PERL_HASH in perl, mounting an
+# attack on a prepopulated hash. This is also useful if you need normal
+# keys which don't contain \0 -- suitable for stashes
+
+use constant MASK_U32 => 2**32;
+use constant HASH_SEED => 0;
+use constant THRESHOLD => 14;
+use constant START => "a";
+
+# some initial hash data
+my %h2 = map {$_ => 1} 'a'..'cc';
+
+ok (!Internals::HvREHASH(%h2),
+ "starting with pre-populated non-pathalogical hash (rehash flag if off)");
+
+my @keys = get_keys(\%h2);
+$h2{$_}++ for @keys;
+ok (Internals::HvREHASH(%h2),
+ scalar(@keys) . " colliding into the same bucket keys are triggerring rehash");
+
+sub get_keys {
+ my $hr = shift;
+
+ # the minimum of bits required to mount the attack on a hash
+ my $min_bits = log(THRESHOLD)/log(2);
+
+ # if the hash has already been populated with a significant amount
+ # of entries the number of mask bits can be higher
+ my $keys = scalar keys %$hr;
+ my $bits = $keys ? log($keys)/log(2) : 0;
+ $bits = $min_bits if $min_bits > $bits;
+
+ $bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
+ # need to add 2 bits to cover the internal split cases
+ $bits += 2;
+ my $mask = 2**$bits-1;
+ print "# using mask: $mask ($bits)\n";
+
+ my @keys;
+ my $s = START;
+ my $c = 0;
+ # get 2 keys on top of the THRESHOLD
+ my $hash;
+ while (@keys < THRESHOLD+2) {
+ # next if exists $hash->{$s};
+ $hash = hash($s);
+ next unless ($hash & $mask) == 0;
+ $c++;
+ printf "# %2d: %5s, %10s\n", $c, $s, $hash;
+ push @keys, $s;
+ } continue {
+ $s++;
+ }
+
+ return @keys;
+}
+
+
+# trying to provide the fastest equivalent of C macro's PERL_HASH in
+# Perl - the main complication is that it uses U32 integer, which we
+# can't do it perl, without doing some tricks
+sub hash {
+ my $s = shift;
+ my @c = split //, $s;
+ my $u = HASH_SEED;
+ for (@c) {
+ # (A % M) + (B % M) == (A + B) % M
+ # This works because '+' produces a NV, which is big enough to hold
+ # the intermidiate result. We only need the % before any "^" and "&"
+ # to get the result in the range for an I32.
+ # and << doesn't work on NV, so using 1 << 10
+ $u += ord;
+ $u += $u * (1 << 10); $u %= MASK_U32;
+ $u ^= $u >> 6;
+ }
+ $u += $u << 3; $u %= MASK_U32;
+ $u ^= $u >> 11; $u %= MASK_U32;
+ $u += $u << 15; $u %= MASK_U32;
+ $u;
+}