[p5sagit/p5-mst-13.2.git] / ext / Hash-Util-FieldHash / t / 10_hash.t

#!./perl -w
use Test::More;

use strict;
use Hash::Util::FieldHash qw( :all);

no warnings 'misc';

plan tests => 5;

fieldhash my %h;

ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on");

foreach (1..10) {
  $h{"\0"x$_}++;
}

ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash");

foreach (11..20) {
  $h{"\0"x$_}++;
}

ok (Internals::HvREHASH(%h), "20 entries triggers rehash");


# second part using an emulation of the PERL_HASH in perl, mounting an
# attack on a pre-populated hash. This is also useful if you need normal
# keys which don't contain \0 -- suitable for stashes

use constant MASK_U32  => 2**32;
use constant HASH_SEED => 0;
use constant THRESHOLD => 14;
use constant START     => "a";

# some initial hash data
fieldhash my %h2;
%h2 = map {$_ => 1} 'a'..'cc';

ok (!Internals::HvREHASH(%h2), 
    "starting with pre-populated non-pathological hash (rehash flag if off)");

my @keys = get_keys(\%h2);
$h2{$_}++ for @keys;
ok (Internals::HvREHASH(%h2), 
    scalar(@keys) . " colliding into the same bucket keys are triggering rehash");

sub get_keys {
    my $hr = shift;

    # the minimum of bits required to mount the attack on a hash
    my $min_bits = log(THRESHOLD)/log(2);

    # if the hash has already been populated with a significant amount
    # of entries the number of mask bits can be higher
    my $keys = scalar keys %$hr;
    my $bits = $keys ? log($keys)/log(2) : 0;
    $bits = $min_bits if $min_bits > $bits;

    $bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
    # need to add 2 bits to cover the internal split cases
    $bits += 2;
    my $mask = 2**$bits-1;
    print "# using mask: $mask ($bits)\n";

    my @keys;
    my $s = START;
    my $c = 0;
    # get 2 keys on top of the THRESHOLD
    my $hash;
    while (@keys < THRESHOLD+2) {
        # next if exists $hash->{$s};
        $hash = hash($s);
        next unless ($hash & $mask) == 0;
        $c++;
        printf "# %2d: %5s, %10s\n", $c, $s, $hash;
        push @keys, $s;
    } continue {
        $s++;
    }

    return @keys;
}


# trying to provide the fastest equivalent of C macro's PERL_HASH in
# Perl - the main complication is that it uses U32 integer, which we
# can't do it perl, without doing some tricks
sub hash {
    my $s = shift;
    my @c = split //, $s;
    my $u = HASH_SEED;
    for (@c) {
        # (A % M) + (B % M) == (A + B) % M
        # This works because '+' produces a NV, which is big enough to hold
        # the intermediate result. We only need the % before any "^" and "&"
        # to get the result in the range for an I32.
        # and << doesn't work on NV, so using 1 << 10
        $u += ord;
        $u += $u * (1 << 10); $u %= MASK_U32;
        $u ^= $u >> 6;
    }
    $u += $u << 3;  $u %= MASK_U32;
    $u ^= $u >> 11; $u %= MASK_U32;
    $u += $u << 15; $u %= MASK_U32;
    $u;
}
Commit	Line	Data
1e73acc8	1	#!./perl -w
1e73acc8	2	use Test::More;
	3
	4	use strict;
	5	use Hash::Util::FieldHash qw( :all);
	6
	7	no warnings 'misc';
	8
	9	plan tests => 5;
	10
	11	fieldhash my %h;
	12
	13	ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on");
	14
	15	foreach (1..10) {
	16	$h{"\0"x$_}++;
	17	}
	18
	19	ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash");
	20
	21	foreach (11..20) {
	22	$h{"\0"x$_}++;
	23	}
	24
	25	ok (Internals::HvREHASH(%h), "20 entries triggers rehash");
	26
	27
	28
	29
	30	# second part using an emulation of the PERL_HASH in perl, mounting an
33647f77	31	# attack on a pre-populated hash. This is also useful if you need normal
1e73acc8	32	# keys which don't contain \0 -- suitable for stashes
	33
	34	use constant MASK_U32 => 2**32;
	35	use constant HASH_SEED => 0;
	36	use constant THRESHOLD => 14;
	37	use constant START => "a";
	38
	39	# some initial hash data
	40	fieldhash my %h2;
	41	%h2 = map {$_ => 1} 'a'..'cc';
	42
	43	ok (!Internals::HvREHASH(%h2),
33647f77	44	"starting with pre-populated non-pathological hash (rehash flag if off)");
1e73acc8	45
	46	my @keys = get_keys(\%h2);
	47	$h2{$_}++ for @keys;
	48	ok (Internals::HvREHASH(%h2),
33647f77	49	scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
1e73acc8	50
	51	sub get_keys {
	52	my $hr = shift;
	53
	54	# the minimum of bits required to mount the attack on a hash
	55	my $min_bits = log(THRESHOLD)/log(2);
	56
	57	# if the hash has already been populated with a significant amount
	58	# of entries the number of mask bits can be higher
	59	my $keys = scalar keys %$hr;
	60	my $bits = $keys ? log($keys)/log(2) : 0;
	61	$bits = $min_bits if $min_bits > $bits;
	62
	63	$bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
	64	# need to add 2 bits to cover the internal split cases
	65	$bits += 2;
	66	my $mask = 2**$bits-1;
	67	print "# using mask: $mask ($bits)\n";
	68
	69	my @keys;
	70	my $s = START;
	71	my $c = 0;
	72	# get 2 keys on top of the THRESHOLD
	73	my $hash;
	74	while (@keys < THRESHOLD+2) {
	75	# next if exists $hash->{$s};
	76	$hash = hash($s);
	77	next unless ($hash & $mask) == 0;
	78	$c++;
	79	printf "# %2d: %5s, %10s\n", $c, $s, $hash;
	80	push @keys, $s;
	81	} continue {
	82	$s++;
	83	}
	84
	85	return @keys;
	86	}
	87
	88
	89	# trying to provide the fastest equivalent of C macro's PERL_HASH in
	90	# Perl - the main complication is that it uses U32 integer, which we
	91	# can't do it perl, without doing some tricks
	92	sub hash {
	93	my $s = shift;
	94	my @c = split //, $s;
	95	my $u = HASH_SEED;
	96	for (@c) {
	97	# (A % M) + (B % M) == (A + B) % M
	98	# This works because '+' produces a NV, which is big enough to hold
33647f77	99	# the intermediate result. We only need the % before any "^" and "&"
1e73acc8	100	# to get the result in the range for an I32.
	101	# and << doesn't work on NV, so using 1 << 10
	102	$u += ord;
	103	$u += $u * (1 << 10); $u %= MASK_U32;
	104	$u ^= $u >> 6;
	105	}
	106	$u += $u << 3; $u %= MASK_U32;
	107	$u ^= $u >> 11; $u %= MASK_U32;
	108	$u += $u << 15; $u %= MASK_U32;
	109	$u;
	110	}