From: Mike Guy Date: Fri, 6 Apr 2001 12:47:06 +0000 (+0100) Subject: Re: [PATCH dump.c] hash quality X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b8fa94d833cd9fcbc2e9db4b24f4b6f366a204a4;p=p5sagit%2Fp5-mst-13.2.git Re: [PATCH dump.c] hash quality Message-Id: p4raw-id: //depot/perl@9612 --- diff --git a/dump.c b/dump.c index 9010bc5..9d2595f 100644 --- a/dump.c +++ b/dump.c @@ -1044,15 +1044,24 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo } } PerlIO_putc(file, ')'); - /* Now calculate quality wrt theoretical value */ + /* The "quality" of a hash is defined as the total number of + comparisons needed to access every element once, relative + to the expected number needed for a random hash. + + The total number of comparisons is equal to the sum of + the squares of the number of entries in each backet. + For a random hash of n keys into k backets, the expected + value is + n + n(n-1)/2k + */ + for (i = max; i > 0; i--) { /* Precision: count down. */ sum += freq[i] * i * i; } while ((keys = keys >> 1)) pow2 = pow2 << 1; - /* Approximate by Poisson distribution */ theoret = HvKEYS(sv); - theoret += theoret * theoret/pow2; + theoret += theoret * (theoret-1)/pow2; PerlIO_putc(file, '\n'); Perl_dump_indent(aTHX_ level, file, " hash quality = %.1"NVff"%%", theoret/sum*100); } diff --git a/t/lib/peek.t b/t/lib/peek.t index 7bf1793..96e24a2 100644 --- a/t/lib/peek.t +++ b/t/lib/peek.t @@ -171,7 +171,7 @@ do_test(12, IV = 1 NV = 0 ARRAY = $ADDR \\(0:7, 1:1\\) - hash quality = 150.0% + hash quality = 100.0% KEYS = 1 FILL = 1 MAX = 7