From: Nicholas Clark Date: Sat, 25 Oct 2003 22:32:40 +0000 (+0000) Subject: Return 21533 (with modifications) having found the problem X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=fdcd69b65ac55392f341cae1cd6046f4815fa698;p=p5sagit%2Fp5-mst-13.2.git Return 21533 (with modifications) having found the problem p4raw-id: //depot/perl@21535 --- diff --git a/hv.c b/hv.c index 7a1d25b..46c63cd 100644 --- a/hv.c +++ b/hv.c @@ -20,6 +20,8 @@ #define PERL_IN_HV_C #include "perl.h" +#define HV_MAX_LENGTH_BEFORE_SPLIT 4 + STATIC HE* S_new_he(pTHX) { @@ -277,6 +279,10 @@ S_hv_fetch_flags(pTHX_ HV *hv, const char *key, I32 klen, I32 lval, int flags) if (HvREHASH(hv)) { PERL_HASH_INTERNAL(hash, key, klen); + /* Yes, you do need this even though you are not "storing" because + you can flip the flags below if doing an lval lookup. (And that + was put in to give the semantics Andreas was expecting.) */ + flags |= HVhek_REHASH; } else { PERL_HASH(hash, key, klen); } @@ -313,7 +319,7 @@ S_hv_fetch_flags(pTHX_ HV *hv, const char *key, I32 klen, I32 lval, int flags) } else HeKFLAGS(entry) = flags; - if (flags) + if (flags & HVhek_ENABLEHVKFLAGS) HvHASKFLAGS_on(hv); } if (flags & HVhek_FREEKEY) @@ -452,6 +458,10 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash) if (HvREHASH(hv)) { PERL_HASH_INTERNAL(hash, key, klen); + /* Yes, you do need this even though you are not "storing" because + you can flip the flags below if doing an lval lookup. (And that + was put in to give the semantics Andreas was expecting.) */ + flags |= HVhek_REHASH; } else if (!hash) { if SvIsCOW_shared_hash(keysv) { hash = SvUVX(keysv); @@ -487,7 +497,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash) } else HeKFLAGS(entry) = flags; - if (flags) + if (flags & HVhek_ENABLEHVKFLAGS) HvHASKFLAGS_on(hv); } if (key != keysave) @@ -603,7 +613,7 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 hash, int flags) { register XPVHV* xhv; - register I32 i; + register U32 n_links; register HE *entry; register HE **oentry; @@ -650,9 +660,10 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; - i = 1; - for (entry = *oentry; entry; i=0, entry = HeNEXT(entry)) { + n_links = 0; + + for (entry = *oentry; entry; ++n_links, entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; if (HeKLEN(entry) != (I32)klen) @@ -719,9 +730,16 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, *oentry = entry; xhv->xhv_keys++; /* HvKEYS(hv)++ */ - if (i) { /* initial entry? */ + if (!n_links) { /* initial entry? */ xhv->xhv_fill++; /* HvFILL(hv)++ */ - } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) { + } else if ((xhv->xhv_keys > (IV)xhv->xhv_max) + || ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) { + /* Use the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket + splits on a rehashed hash, as we're not going to split it again, + and if someone is lucky (evil) enough to get all the keys in one + list they could exhaust our memory as we repeatedly double the + number of buckets on every entry. Linear search feels a less worse + thing to do. */ hsplit(hv); } @@ -763,7 +781,7 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash) XPVHV* xhv; char *key; STRLEN klen; - I32 i; + U32 n_links; HE *entry; HE **oentry; bool is_utf8; @@ -830,9 +848,9 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash) /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; - i = 1; + n_links = 0; entry = *oentry; - for (; entry; i=0, entry = HeNEXT(entry)) { + for (; entry; ++n_links, entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; if (HeKLEN(entry) != (I32)klen) @@ -886,10 +904,17 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash) *oentry = entry; xhv->xhv_keys++; /* HvKEYS(hv)++ */ - if (i) { /* initial entry? */ + if (!n_links) { /* initial entry? */ xhv->xhv_fill++; /* HvFILL(hv)++ */ - } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) { - hsplit(hv); + } else if ((xhv->xhv_keys > (IV)xhv->xhv_max) + || ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) { + /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket + splits on a rehashed hash, as we're not going to split it again, + and if someone is lucky (evil) enough to get all the keys in one + list they could exhaust our memory as we repeatedly double the + number of buckets on every entry. Linear search feels a less worse + thing to do. */ + hsplit(hv); } return entry; @@ -1511,7 +1536,7 @@ S_hsplit(pTHX_ HV *hv) /* Pick your policy for "hashing isn't working" here: */ - if (longest_chain < 8 || longest_chain * 2 < HvTOTALKEYS(hv) + if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */ || HvREHASH(hv)) { return; } @@ -1523,7 +1548,7 @@ S_hsplit(pTHX_ HV *hv) } /* Awooga. Awooga. Pathological data. */ - /*PerlIO_printf(PerlIO_stderr(), "Awooga %d of %d with %d/%d buckets\n", + /*PerlIO_printf(PerlIO_stderr(), "%p %d of %d with %d/%d buckets\n", hv, longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/ ++newsize; @@ -1533,7 +1558,6 @@ S_hsplit(pTHX_ HV *hv) xhv->xhv_fill = 0; HvSHAREKEYS_off(hv); HvREHASH_on(hv); - HvHASKFLAGS_on(hv); aep = (HE **) xhv->xhv_array; @@ -2096,6 +2120,9 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) hv_free_ent(hv, oldentry); } + /*if (HvREHASH(hv) && entry && !HeKREHASH(entry)) + PerlIO_printf(PerlIO_stderr(), "Awooga %p %p\n", hv, entry);*/ + xhv->xhv_eiter = entry; /* HvEITER(hv) = entry */ return entry; } @@ -2385,6 +2412,9 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags) if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE))) hv_store(PL_strtab, str, len, Nullsv, hash); + + Can't rehash the shared string table, so not sure if it's worth + counting the number of entries in the linked list */ xhv = (XPVHV*)SvANY(PL_strtab); /* assert(xhv_array != 0) */ diff --git a/hv.h b/hv.h index 1c7fcda..5f83440 100644 --- a/hv.h +++ b/hv.h @@ -212,6 +212,7 @@ C. * is utf8 (including 8 bit keys that were entered as utf8, and need upgrading * when retrieved during iteration. It may still be set when there are no longer * any utf8 keys. + * See HVhek_ENABLEHVKFLAGS for the trigger. */ #define HvHASKFLAGS(hv) (SvFLAGS(hv) & SVphv_HASKFLAGS) #define HvHASKFLAGS_on(hv) (SvFLAGS(hv) |= SVphv_HASKFLAGS) @@ -283,6 +284,16 @@ C. * (may change, but Storable is a core module) */ #define HVhek_MASK 0xFF +/* Which flags enable HvHASKFLAGS? Somewhat a hack on a hack, as + HVhek_REHASH is only needed because the rehash flag has to be duplicated + into all keys as hv_iternext has no access to the hash flags. At this + point Storable's tests get upset, because sometimes hashes are "keyed" + and sometimes not, depending on the order of data insertion, and whether + it triggered rehashing. So currently HVhek_REHAS is exempt. +*/ + +#define HVhek_ENABLEHVKFLAGS (HVhek_MASK - HVhek_REHASH) + #define HEK_UTF8(hek) (HEK_FLAGS(hek) & HVhek_UTF8) #define HEK_UTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_UTF8) #define HEK_UTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_UTF8)