From: Nicholas Clark Date: Sun, 28 Dec 2008 09:36:16 +0000 (+0000) Subject: Precomputing the hash value for a string representable in bytes, but passed in X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=527df579b2667fd0b8db4c07bcab268e0f401be9;p=p5sagit%2Fp5-mst-13.2.git Precomputing the hash value for a string representable in bytes, but passed in in UTF-8, would result in storing the wrong hash value in the hash, and hence failing lookups. I guess not that much XS code precomputes hash values. --- diff --git a/ext/XS/APItest/APItest.xs b/ext/XS/APItest/APItest.xs index 78ee526..e63d804 100644 --- a/ext/XS/APItest/APItest.xs +++ b/ext/XS/APItest/APItest.xs @@ -448,10 +448,14 @@ common(params) if ((svp = hv_fetchs(params, "action", 0))) action = SvIV(*svp); if ((svp = hv_fetchs(params, "val", 0))) - val = *svp; + val = newSVsv(*svp); if ((svp = hv_fetchs(params, "hash", 0))) hash = SvUV(*svp); + if ((svp = hv_fetchs(params, "hash_pv", 0))) { + PERL_HASH(hash, key, klen); + } + result = (HE *)hv_common(hv, keysv, key, klen, flags, action, val, hash); if (!result) { XSRETURN_EMPTY; diff --git a/ext/XS/APItest/t/hash.t b/ext/XS/APItest/t/hash.t index 1ef99ed..47d4437 100644 --- a/ext/XS/APItest/t/hash.t +++ b/ext/XS/APItest/t/hash.t @@ -150,6 +150,27 @@ if ($] > 5.009) { } } +{ + my $as_utf8 = "\241" . chr 256; + chop $as_utf8; + my $as_bytes = "\243"; + foreach my $key ('N', $as_bytes, $as_utf8, "\x{2623}") { + my $ord = ord $key; + foreach my $hash_pv (0, 1) { + my %hash; + is (XS::APItest::Hash::common({hv => \%hash, keypv => $key, + val => $ord, hash_pv => $hash_pv, + action => + XS::APItest::HV_FETCH_ISSTORE}), + $ord, "store $ord \$hash_pv = $hash_pv"); + is_deeply ([each %hash], [$key, $ord], "First key read is good"); + is_deeply ([each %hash], [], "No second key good"); + + is ($hash{$key}, $ord, "Direct hash read finds $ord"); + } + } +} + exit; ################################ The End ################################ diff --git a/hv.c b/hv.c index 78a1097..b764c55 100644 --- a/hv.c +++ b/hv.c @@ -606,6 +606,11 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, if (flags & HVhek_FREEKEY) Safefree(keysave); flags |= HVhek_WASUTF8 | HVhek_FREEKEY; + /* If the caller calculated a hash, it was on the sequence of + octets that are the UTF-8 form. We've now changed the sequence + of octets stored to that of the equivalent byte representation, + so the hash we need is different. */ + hash = 0; } }