Small optimisations, by Brandon Black
[p5sagit/p5-mst-13.2.git] / t / op / hash.t
CommitLineData
05619474 1#!./perl -w
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
6 require './test.pl';
7}
8
9use strict;
10
d0c79590 11plan tests => 5;
05619474 12
13my %h;
14
15ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on");
16
17foreach (1..10) {
18 $h{"\0"x$_}++;
19}
20
21ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash");
22
23foreach (11..20) {
24 $h{"\0"x$_}++;
25}
26
27ok (Internals::HvREHASH(%h), "20 entries triggers rehash");
d0c79590 28
29
30
31
32# second part using an emulation of the PERL_HASH in perl, mounting an
33# attack on a prepopulated hash. This is also useful if you need normal
34# keys which don't contain \0 -- suitable for stashes
35
36use constant MASK_U32 => 2**32;
37use constant HASH_SEED => 0;
38use constant THRESHOLD => 14;
39use constant START => "a";
40
41# some initial hash data
42my %h2 = map {$_ => 1} 'a'..'cc';
43
44ok (!Internals::HvREHASH(%h2),
45 "starting with pre-populated non-pathalogical hash (rehash flag if off)");
46
47my @keys = get_keys(\%h2);
48$h2{$_}++ for @keys;
49ok (Internals::HvREHASH(%h2),
50 scalar(@keys) . " colliding into the same bucket keys are triggerring rehash");
51
52sub get_keys {
53 my $hr = shift;
54
55 # the minimum of bits required to mount the attack on a hash
56 my $min_bits = log(THRESHOLD)/log(2);
57
58 # if the hash has already been populated with a significant amount
59 # of entries the number of mask bits can be higher
60 my $keys = scalar keys %$hr;
61 my $bits = $keys ? log($keys)/log(2) : 0;
62 $bits = $min_bits if $min_bits > $bits;
63
64 $bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
65 # need to add 2 bits to cover the internal split cases
66 $bits += 2;
67 my $mask = 2**$bits-1;
68 print "# using mask: $mask ($bits)\n";
69
70 my @keys;
71 my $s = START;
72 my $c = 0;
73 # get 2 keys on top of the THRESHOLD
74 my $hash;
75 while (@keys < THRESHOLD+2) {
76 # next if exists $hash->{$s};
77 $hash = hash($s);
78 next unless ($hash & $mask) == 0;
79 $c++;
80 printf "# %2d: %5s, %10s\n", $c, $s, $hash;
81 push @keys, $s;
82 } continue {
83 $s++;
84 }
85
86 return @keys;
87}
88
89
90# trying to provide the fastest equivalent of C macro's PERL_HASH in
91# Perl - the main complication is that it uses U32 integer, which we
92# can't do it perl, without doing some tricks
93sub hash {
94 my $s = shift;
95 my @c = split //, $s;
96 my $u = HASH_SEED;
97 for (@c) {
98 # (A % M) + (B % M) == (A + B) % M
99 # This works because '+' produces a NV, which is big enough to hold
100 # the intermidiate result. We only need the % before any "^" and "&"
101 # to get the result in the range for an I32.
102 # and << doesn't work on NV, so using 1 << 10
103 $u += ord;
104 $u += $u * (1 << 10); $u %= MASK_U32;
105 $u ^= $u >> 6;
106 }
107 $u += $u << 3; $u %= MASK_U32;
108 $u ^= $u >> 11; $u %= MASK_U32;
109 $u += $u << 15; $u %= MASK_U32;
110 $u;
111}