Commit | Line | Data |
1e73acc8 |
1 | #!./perl -w |
1e73acc8 |
2 | use Test::More; |
3 | |
4 | use strict; |
5 | use Hash::Util::FieldHash qw( :all); |
6 | |
7 | no warnings 'misc'; |
8 | |
9 | plan tests => 5; |
10 | |
11 | fieldhash my %h; |
12 | |
13 | ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on"); |
14 | |
15 | foreach (1..10) { |
16 | $h{"\0"x$_}++; |
17 | } |
18 | |
19 | ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash"); |
20 | |
21 | foreach (11..20) { |
22 | $h{"\0"x$_}++; |
23 | } |
24 | |
25 | ok (Internals::HvREHASH(%h), "20 entries triggers rehash"); |
26 | |
27 | |
28 | |
29 | |
30 | # second part using an emulation of the PERL_HASH in perl, mounting an |
33647f77 |
31 | # attack on a pre-populated hash. This is also useful if you need normal |
1e73acc8 |
32 | # keys which don't contain \0 -- suitable for stashes |
33 | |
34 | use constant MASK_U32 => 2**32; |
35 | use constant HASH_SEED => 0; |
36 | use constant THRESHOLD => 14; |
37 | use constant START => "a"; |
38 | |
39 | # some initial hash data |
40 | fieldhash my %h2; |
41 | %h2 = map {$_ => 1} 'a'..'cc'; |
42 | |
43 | ok (!Internals::HvREHASH(%h2), |
33647f77 |
44 | "starting with pre-populated non-pathological hash (rehash flag if off)"); |
1e73acc8 |
45 | |
46 | my @keys = get_keys(\%h2); |
47 | $h2{$_}++ for @keys; |
48 | ok (Internals::HvREHASH(%h2), |
33647f77 |
49 | scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); |
1e73acc8 |
50 | |
51 | sub get_keys { |
52 | my $hr = shift; |
53 | |
54 | # the minimum of bits required to mount the attack on a hash |
55 | my $min_bits = log(THRESHOLD)/log(2); |
56 | |
57 | # if the hash has already been populated with a significant amount |
58 | # of entries the number of mask bits can be higher |
59 | my $keys = scalar keys %$hr; |
60 | my $bits = $keys ? log($keys)/log(2) : 0; |
61 | $bits = $min_bits if $min_bits > $bits; |
62 | |
63 | $bits = int($bits) < $bits ? int($bits) + 1 : int($bits); |
64 | # need to add 2 bits to cover the internal split cases |
65 | $bits += 2; |
66 | my $mask = 2**$bits-1; |
67 | print "# using mask: $mask ($bits)\n"; |
68 | |
69 | my @keys; |
70 | my $s = START; |
71 | my $c = 0; |
72 | # get 2 keys on top of the THRESHOLD |
73 | my $hash; |
74 | while (@keys < THRESHOLD+2) { |
75 | # next if exists $hash->{$s}; |
76 | $hash = hash($s); |
77 | next unless ($hash & $mask) == 0; |
78 | $c++; |
79 | printf "# %2d: %5s, %10s\n", $c, $s, $hash; |
80 | push @keys, $s; |
81 | } continue { |
82 | $s++; |
83 | } |
84 | |
85 | return @keys; |
86 | } |
87 | |
88 | |
89 | # trying to provide the fastest equivalent of C macro's PERL_HASH in |
90 | # Perl - the main complication is that it uses U32 integer, which we |
91 | # can't do it perl, without doing some tricks |
92 | sub hash { |
93 | my $s = shift; |
94 | my @c = split //, $s; |
95 | my $u = HASH_SEED; |
96 | for (@c) { |
97 | # (A % M) + (B % M) == (A + B) % M |
98 | # This works because '+' produces a NV, which is big enough to hold |
33647f77 |
99 | # the intermediate result. We only need the % before any "^" and "&" |
1e73acc8 |
100 | # to get the result in the range for an I32. |
101 | # and << doesn't work on NV, so using 1 << 10 |
102 | $u += ord; |
103 | $u += $u * (1 << 10); $u %= MASK_U32; |
104 | $u ^= $u >> 6; |
105 | } |
106 | $u += $u << 3; $u %= MASK_U32; |
107 | $u ^= $u >> 11; $u %= MASK_U32; |
108 | $u += $u << 15; $u %= MASK_U32; |
109 | $u; |
110 | } |