/* hv.c
*
- * Copyright (c) 1991-2002, Larry Wall
+ * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+ * 2000, 2001, 2002, 2003, by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
#include "EXTERN.h"
#define PERL_IN_HV_C
+#define PERL_HASH_INTERNAL_ACCESS
#include "perl.h"
+#define HV_MAX_LENGTH_BEFORE_SPLIT 14
+
STATIC HE*
S_new_he(pTHX)
{
return hek;
}
+/* free the pool of temporary HE/HEK pairs retunrned by hv_fetch_ent
+ * for tied hashes */
+
+void
+Perl_free_tied_hv_pool(pTHX)
+{
+ HE *ohe;
+ HE *he = PL_hv_fetch_ent_mh;
+ while (he) {
+ Safefree(HeKEY_hek(he));
+ ohe = he;
+ he = HeNEXT(he);
+ del_HE(ohe);
+ }
+ PL_hv_fetch_ent_mh = Nullhe;
+}
+
#if defined(USE_ITHREADS)
HE *
Perl_he_dup(pTHX_ HE *e, bool shared, CLONE_PARAMS* param)
ptr_table_store(PL_ptr_table, e, ret);
HeNEXT(ret) = he_dup(HeNEXT(e),shared, param);
- if (HeKLEN(e) == HEf_SVKEY)
+ if (HeKLEN(e) == HEf_SVKEY) {
+ char *k;
+ New(54, k, HEK_BASESIZE + sizeof(SV*), char);
+ HeKEY_hek(ret) = (HEK*)k;
HeKEY_sv(ret) = SvREFCNT_inc(sv_dup(HeKEY_sv(e), param));
+ }
else if (shared)
HeKEY_hek(ret) = share_hek_flags(HeKEY(e), HeKLEN(e), HeHASH(e),
HeKFLAGS(e));
}
else {
/* Need to free saved eventually assign to mortal SV */
- SV *sv = sv_newmortal();
+ /* XXX is this line an error ???: SV *sv = sv_newmortal(); */
sv_usepvn(sv, (char *) key, klen);
}
if (flags & HVhek_UTF8) {
return 0;
if (SvRMAGICAL(hv)) {
- /* All this clause seems to be utf8 unaware.
- By moving the utf8 stuff out to hv_fetch_flags I need to ensure
- key doesn't leak. I've not tried solving the utf8-ness.
- NWC.
- */
if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) {
sv = sv_newmortal();
- mg_copy((SV*)hv, sv, key, klen);
+ sv_upgrade(sv, SVt_PVLV);
+ if (flags & HVhek_UTF8) {
+ /* This hack based on the code in hv_exists_ent seems to be
+ the easiest way to pass the utf8 flag through and fix
+ the bug in hv_exists for tied hashes with utf8 keys. */
+ SV *keysv = sv_2mortal(newSVpvn(key, klen));
+ SvUTF8_on(keysv);
+ mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY);
+ } else {
+ mg_copy((SV*)hv, sv, key, klen);
+ }
if (flags & HVhek_FREEKEY)
Safefree(key);
- PL_hv_fetch_sv = sv;
- return &PL_hv_fetch_sv;
+ LvTYPE(sv) = 't';
+ LvTARG(sv) = sv; /* fake (SV**) */
+ return &(LvTARG(sv));
}
#ifdef ENV_IS_CASELESS
else if (mg_find((SV*)hv, PERL_MAGIC_env)) {
}
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ /* Yes, you do need this even though you are not "storing" because
+ you can flip the flags below if doing an lval lookup. (And that
+ was put in to give the semantics Andreas was expecting.) */
+ flags |= HVhek_REHASH;
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
for (; entry; entry = HeNEXT(entry)) {
+ if (!HeKEY_hek(entry))
+ continue;
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
}
else
HeKFLAGS(entry) = flags;
+ if (flags & HVhek_ENABLEHVKFLAGS)
+ HvHASKFLAGS_on(hv);
}
if (flags & HVhek_FREEKEY)
Safefree(key);
/* if we find a placeholder, we pretend we haven't found anything */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
break;
return &HeVAL(entry);
if (SvRMAGICAL(hv)) {
if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) {
sv = sv_newmortal();
- keysv = sv_2mortal(newSVsv(keysv));
+ keysv = newSVsv(keysv);
mg_copy((SV*)hv, sv, (char*)keysv, HEf_SVKEY);
- if (!HeKEY_hek(&PL_hv_fetch_ent_mh)) {
+ /* grab a fake HE/HEK pair from the pool or make a new one */
+ entry = PL_hv_fetch_ent_mh;
+ if (entry)
+ PL_hv_fetch_ent_mh = HeNEXT(entry);
+ else {
char *k;
+ entry = new_HE();
New(54, k, HEK_BASESIZE + sizeof(SV*), char);
- HeKEY_hek(&PL_hv_fetch_ent_mh) = (HEK*)k;
+ HeKEY_hek(entry) = (HEK*)k;
}
- HeSVKEY_set(&PL_hv_fetch_ent_mh, keysv);
- HeVAL(&PL_hv_fetch_ent_mh) = sv;
- return &PL_hv_fetch_ent_mh;
- }
+ HeNEXT(entry) = Nullhe;
+ HeSVKEY_set(entry, keysv);
+ HeVAL(entry) = sv;
+ sv_upgrade(sv, SVt_PVLV);
+ LvTYPE(sv) = 'T';
+ LvTARG(sv) = (SV*)entry; /* so we can free entry when freeing sv */
+ return entry;
+ }
#ifdef ENV_IS_CASELESS
else if (mg_find((SV*)hv, PERL_MAGIC_env)) {
U32 i;
#endif
}
+ keysave = key = SvPV(keysv, klen);
xhv = (XPVHV*)SvANY(hv);
if (!xhv->xhv_array /* !HvARRAY(hv) */) {
if (lval
return 0;
}
- keysave = key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv)!=0);
if (is_utf8) {
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
}
- if (!hash)
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ /* Yes, you do need this even though you are not "storing" because
+ you can flip the flags below if doing an lval lookup. (And that
+ was put in to give the semantics Andreas was expecting.) */
+ flags |= HVhek_REHASH;
+ } else if (!hash) {
+ if SvIsCOW_shared_hash(keysv) {
+ hash = SvUVX(keysv);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
+ }
/* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
}
else
HeKFLAGS(entry) = flags;
+ if (flags & HVhek_ENABLEHVKFLAGS)
+ HvHASKFLAGS_on(hv);
}
if (key != keysave)
Safefree(key);
/* if we find a placeholder, we pretend we haven't found anything */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
break;
return entry;
}
stored within the hash (as in the case of tied hashes). Otherwise it can
be dereferenced to get the original C<SV*>. Note that the caller is
responsible for suitably incrementing the reference count of C<val> before
-the call, and decrementing it if the function returned NULL.
+the call, and decrementing it if the function returned NULL. Effectively
+a successful hv_store takes ownership of one reference to C<val>. This is
+usually what you want; a newly created SV has a reference count of one, so
+if all your code does is create SVs then store them in a hash, hv_store
+will own the only reference to the new SV, and your code doesn't need to do
+anything further to tidy up. hv_store is not implemented as a call to
+hv_store_ent, and does not create a temporary SV for the key, so if your
+key data is not already in SV form then use hv_store in preference to
+hv_store_ent.
See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
information on how to use this function on tied hashes.
register U32 hash, int flags)
{
register XPVHV* xhv;
- register I32 i;
+ register U32 n_links;
register HE *entry;
register HE **oentry;
bool needs_store;
hv_magic_check (hv, &needs_copy, &needs_store);
if (needs_copy) {
- mg_copy((SV*)hv, val, key, klen);
+ if (flags & HVhek_UTF8) {
+ /* This hack based on the code in hv_exists_ent seems to be
+ the easiest way to pass the utf8 flag through and fix
+ the bug in hv_exists for tied hashes with utf8 keys. */
+ SV *keysv = sv_2mortal(newSVpvn(key, klen));
+ SvUTF8_on(keysv);
+ mg_copy((SV*)hv, val, (char *)keysv, HEf_SVKEY);
+ } else {
+ mg_copy((SV*)hv, val, key, klen);
+ }
if (!xhv->xhv_array /* !HvARRAY */ && !needs_store) {
if (flags & HVhek_FREEKEY)
Safefree(key);
if (flags)
HvHASKFLAGS_on((SV*)hv);
- if (!hash)
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
if (!xhv->xhv_array /* !HvARRAY(hv) */)
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
- i = 1;
- for (entry = *oentry; entry; i=0, entry = HeNEXT(entry)) {
+ n_links = 0;
+
+ for (entry = *oentry; entry; ++n_links, entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
continue;
if ((HeKFLAGS(entry) ^ flags) & HVhek_UTF8)
continue;
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
xhv->xhv_placeholders--; /* yes, can store into placeholder slot */
else
SvREFCNT_dec(HeVAL(entry));
/* We have been requested to insert a placeholder. Currently
only Storable is allowed to do this. */
xhv->xhv_placeholders++;
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
} else
HeVAL(entry) = val;
/* We have been requested to insert a placeholder. Currently
only Storable is allowed to do this. */
xhv->xhv_placeholders++;
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
} else
HeVAL(entry) = val;
HeNEXT(entry) = *oentry;
*oentry = entry;
xhv->xhv_keys++; /* HvKEYS(hv)++ */
- if (i) { /* initial entry? */
+ if (!n_links) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */)
- hsplit(hv);
+ } else if ((xhv->xhv_keys > (IV)xhv->xhv_max)
+ || ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) {
+ /* Use the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket
+ splits on a rehashed hash, as we're not going to split it again,
+ and if someone is lucky (evil) enough to get all the keys in one
+ list they could exhaust our memory as we repeatedly double the
+ number of buckets on every entry. Linear search feels a less worse
+ thing to do. */
+ hsplit(hv);
}
return &HeVAL(entry);
contents of the return value can be accessed using the C<He?> macros
described here. Note that the caller is responsible for suitably
incrementing the reference count of C<val> before the call, and
-decrementing it if the function returned NULL.
+decrementing it if the function returned NULL. Effectively a successful
+hv_store_ent takes ownership of one reference to C<val>. This is
+usually what you want; a newly created SV has a reference count of one, so
+if all your code does is create SVs then store them in a hash, hv_store
+will own the only reference to the new SV, and your code doesn't need to do
+anything further to tidy up. Note that hv_store_ent only reads the C<key>;
+unlike C<val> it does not take ownership of it, so maintaining the correct
+reference count on C<key> is entirely the caller's responsibility. hv_store
+is not implemented as a call to hv_store_ent, and does not create a temporary
+SV for the key, so if your key data is not already in SV form then use
+hv_store in preference to hv_store_ent.
See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
information on how to use this function on tied hashes.
XPVHV* xhv;
char *key;
STRLEN klen;
- I32 i;
+ U32 n_links;
HE *entry;
HE **oentry;
bool is_utf8;
HvHASKFLAGS_on((SV*)hv);
}
- if (!hash)
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
+ if SvIsCOW_shared_hash(keysv) {
+ hash = SvUVX(keysv);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
+ }
if (!xhv->xhv_array /* !HvARRAY(hv) */)
Newz(505, xhv->xhv_array /* HvARRAY(hv) */,
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
- i = 1;
+ n_links = 0;
entry = *oentry;
- for (; entry; i=0, entry = HeNEXT(entry)) {
+ for (; entry; ++n_links, entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
continue;
if ((HeKFLAGS(entry) ^ flags) & HVhek_UTF8)
continue;
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
xhv->xhv_placeholders--; /* yes, can store into placeholder slot */
else
SvREFCNT_dec(HeVAL(entry));
*oentry = entry;
xhv->xhv_keys++; /* HvKEYS(hv)++ */
- if (i) { /* initial entry? */
+ if (!n_links) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */)
- hsplit(hv);
+ } else if ((xhv->xhv_keys > (IV)xhv->xhv_max)
+ || ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) {
+ /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket
+ splits on a rehashed hash, as we're not going to split it again,
+ and if someone is lucky (evil) enough to get all the keys in one
+ list they could exhaust our memory as we repeatedly double the
+ number of buckets on every entry. Linear search feels a less worse
+ thing to do. */
+ hsplit(hv);
}
return entry;
SV *
Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
{
- register XPVHV* xhv;
- register I32 i;
- register U32 hash;
- register HE *entry;
- register HE **oentry;
- SV **svp;
- SV *sv;
- bool is_utf8 = FALSE;
- int k_flags = 0;
- const char *keysave = key;
-
- if (!hv)
- return Nullsv;
- if (klen < 0) {
- klen = -klen;
- is_utf8 = TRUE;
- }
- if (SvRMAGICAL(hv)) {
- bool needs_copy;
- bool needs_store;
- hv_magic_check (hv, &needs_copy, &needs_store);
-
- if (needs_copy && (svp = hv_fetch(hv, key, klen, TRUE))) {
- sv = *svp;
- mg_clear(sv);
- if (!needs_store) {
- if (mg_find(sv, PERL_MAGIC_tiedelem)) {
- /* No longer an element */
- sv_unmagic(sv, PERL_MAGIC_tiedelem);
- return sv;
- }
- return Nullsv; /* element cannot be deleted */
- }
-#ifdef ENV_IS_CASELESS
- else if (mg_find((SV*)hv, PERL_MAGIC_env)) {
- sv = sv_2mortal(newSVpvn(key,klen));
- key = strupr(SvPVX(sv));
- }
-#endif
- }
- }
- xhv = (XPVHV*)SvANY(hv);
- if (!xhv->xhv_array /* !HvARRAY(hv) */)
- return Nullsv;
-
- if (is_utf8) {
- STRLEN tmplen = klen;
- /* See the note in hv_fetch(). --jhi */
- key = (char*)bytes_from_utf8((U8*)key, &tmplen, &is_utf8);
- klen = tmplen;
- if (is_utf8)
- k_flags = HVhek_UTF8;
- if (key != keysave)
- k_flags |= HVhek_FREEKEY;
- }
-
- PERL_HASH(hash, key, klen);
-
- /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
- oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
- entry = *oentry;
- i = 1;
- for (; entry; i=0, oentry = &HeNEXT(entry), entry = *oentry) {
- if (HeHASH(entry) != hash) /* strings can't be equal */
- continue;
- if (HeKLEN(entry) != (I32)klen)
- continue;
- if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
- continue;
- if ((HeKFLAGS(entry) ^ k_flags) & HVhek_UTF8)
- continue;
- if (k_flags & HVhek_FREEKEY)
- Safefree(key);
- /* if placeholder is here, it's already been deleted.... */
- if (HeVAL(entry) == &PL_sv_undef)
- {
- if (SvREADONLY(hv))
- return Nullsv; /* if still SvREADONLY, leave it deleted. */
- else {
- /* okay, really delete the placeholder... */
- *oentry = HeNEXT(entry);
- if (i && !*oentry)
- xhv->xhv_fill--; /* HvFILL(hv)-- */
- if (entry == xhv->xhv_eiter /* HvEITER(hv) */)
- HvLAZYDEL_on(hv);
- else
- hv_free_ent(hv, entry);
- xhv->xhv_keys--; /* HvKEYS(hv)-- */
- if (xhv->xhv_keys == 0)
- HvHASKFLAGS_off(hv);
- xhv->xhv_placeholders--;
- return Nullsv;
- }
- }
- else if (SvREADONLY(hv) && HeVAL(entry) && SvREADONLY(HeVAL(entry))) {
- S_hv_notallowed(aTHX_ k_flags, key, klen,
- "delete readonly key '%"SVf"' from"
- );
- }
-
- if (flags & G_DISCARD)
- sv = Nullsv;
- else {
- sv = sv_2mortal(HeVAL(entry));
- HeVAL(entry) = &PL_sv_undef;
- }
-
- /*
- * If a restricted hash, rather than really deleting the entry, put
- * a placeholder there. This marks the key as being "approved", so
- * we can still access via not-really-existing key without raising
- * an error.
- */
- if (SvREADONLY(hv)) {
- HeVAL(entry) = &PL_sv_undef;
- /* We'll be saving this slot, so the number of allocated keys
- * doesn't go down, but the number placeholders goes up */
- xhv->xhv_placeholders++; /* HvPLACEHOLDERS(hv)++ */
- } else {
- *oentry = HeNEXT(entry);
- if (i && !*oentry)
- xhv->xhv_fill--; /* HvFILL(hv)-- */
- if (entry == xhv->xhv_eiter /* HvEITER(hv) */)
- HvLAZYDEL_on(hv);
- else
- hv_free_ent(hv, entry);
- xhv->xhv_keys--; /* HvKEYS(hv)-- */
- if (xhv->xhv_keys == 0)
- HvHASKFLAGS_off(hv);
- }
- return sv;
- }
- if (SvREADONLY(hv)) {
- S_hv_notallowed(aTHX_ k_flags, key, klen,
- "access disallowed key '%"SVf"' from"
- );
- }
-
- if (k_flags & HVhek_FREEKEY)
- Safefree(key);
- return Nullsv;
+ return hv_delete_common(hv, NULL, key, klen, flags, 0);
}
/*
SV *
Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
{
+ return hv_delete_common(hv, keysv, NULL, 0, flags, hash);
+}
+
+SV *
+S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen_i32,
+ I32 flags, U32 hash)
+{
register XPVHV* xhv;
register I32 i;
- register char *key;
STRLEN klen;
register HE *entry;
register HE **oentry;
SV *sv;
bool is_utf8;
int k_flags = 0;
- char *keysave;
+ const char *keysave;
if (!hv)
return Nullsv;
+
+ if (keysv) {
+ key = SvPV(keysv, klen);
+ is_utf8 = (SvUTF8(keysv) != 0);
+ } else {
+ if (klen_i32 < 0) {
+ klen = -klen_i32;
+ is_utf8 = TRUE;
+ } else {
+ klen = klen_i32;
+ is_utf8 = FALSE;
+ }
+ }
+ keysave = key;
+
if (SvRMAGICAL(hv)) {
bool needs_copy;
bool needs_store;
hv_magic_check (hv, &needs_copy, &needs_store);
- if (needs_copy && (entry = hv_fetch_ent(hv, keysv, TRUE, hash))) {
- sv = HeVAL(entry);
- mg_clear(sv);
- if (!needs_store) {
- if (mg_find(sv, PERL_MAGIC_tiedelem)) {
- /* No longer an element */
- sv_unmagic(sv, PERL_MAGIC_tiedelem);
- return sv;
- }
- return Nullsv; /* element cannot be deleted */
+ if (needs_copy) {
+ sv = NULL;
+ if (keysv) {
+ if ((entry = hv_fetch_ent(hv, keysv, TRUE, hash))) {
+ sv = HeVAL(entry);
+ }
+ } else {
+ SV **svp;
+ if ((svp = hv_fetch(hv, key, is_utf8 ? -klen : klen, TRUE))) {
+ sv = *svp;
+ }
+ }
+ if (sv) {
+ if (SvMAGICAL(sv)) {
+ mg_clear(sv);
+ }
+ if (!needs_store) {
+ if (mg_find(sv, PERL_MAGIC_tiedelem)) {
+ /* No longer an element */
+ sv_unmagic(sv, PERL_MAGIC_tiedelem);
+ return sv;
+ }
+ return Nullsv; /* element cannot be deleted */
+ }
}
#ifdef ENV_IS_CASELESS
else if (mg_find((SV*)hv, PERL_MAGIC_env)) {
- key = SvPV(keysv, klen);
+ /* XXX This code isn't UTF8 clean. */
keysv = sv_2mortal(newSVpvn(key,klen));
- (void)strupr(SvPVX(keysv));
+ keysave = key = strupr(SvPVX(keysv));
+ is_utf8 = 0;
hash = 0;
}
#endif
if (!xhv->xhv_array /* !HvARRAY(hv) */)
return Nullsv;
- keysave = key = SvPV(keysv, klen);
- is_utf8 = (SvUTF8(keysv) != 0);
-
if (is_utf8) {
key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
if (is_utf8)
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
Safefree(key);
/* if placeholder is here, it's already been deleted.... */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
{
if (SvREADONLY(hv))
return Nullsv; /* if still SvREADONLY, leave it deleted. */
sv = Nullsv;
else {
sv = sv_2mortal(HeVAL(entry));
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
}
/*
* an error.
*/
if (SvREADONLY(hv)) {
- HeVAL(entry) = &PL_sv_undef;
+ HeVAL(entry) = &PL_sv_placeholder;
/* We'll be saving this slot, so the number of allocated keys
* doesn't go down, but the number placeholders goes up */
xhv->xhv_placeholders++; /* HvPLACEHOLDERS(hv)++ */
bool
Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
{
- register XPVHV* xhv;
- register U32 hash;
- register HE *entry;
- SV *sv;
- bool is_utf8 = FALSE;
- const char *keysave = key;
- int k_flags = 0;
-
- if (!hv)
- return 0;
-
- if (klen < 0) {
- klen = -klen;
- is_utf8 = TRUE;
- }
-
- if (SvRMAGICAL(hv)) {
- if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) {
- sv = sv_newmortal();
- mg_copy((SV*)hv, sv, key, klen);
- magic_existspack(sv, mg_find(sv, PERL_MAGIC_tiedelem));
- return SvTRUE(sv);
- }
-#ifdef ENV_IS_CASELESS
- else if (mg_find((SV*)hv, PERL_MAGIC_env)) {
- sv = sv_2mortal(newSVpvn(key,klen));
- key = strupr(SvPVX(sv));
- }
-#endif
- }
-
- xhv = (XPVHV*)SvANY(hv);
-#ifndef DYNAMIC_ENV_FETCH
- if (!xhv->xhv_array /* !HvARRAY(hv) */)
- return 0;
-#endif
-
- if (is_utf8) {
- STRLEN tmplen = klen;
- /* See the note in hv_fetch(). --jhi */
- key = (char*)bytes_from_utf8((U8*)key, &tmplen, &is_utf8);
- klen = tmplen;
- if (is_utf8)
- k_flags = HVhek_UTF8;
- if (key != keysave)
- k_flags |= HVhek_FREEKEY;
- }
-
- PERL_HASH(hash, key, klen);
-
-#ifdef DYNAMIC_ENV_FETCH
- if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*);
- else
-#endif
- /* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
- entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
- for (; entry; entry = HeNEXT(entry)) {
- if (HeHASH(entry) != hash) /* strings can't be equal */
- continue;
- if (HeKLEN(entry) != klen)
- continue;
- if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
- continue;
- if ((HeKFLAGS(entry) ^ k_flags) & HVhek_UTF8)
- continue;
- if (k_flags & HVhek_FREEKEY)
- Safefree(key);
- /* If we find the key, but the value is a placeholder, return false. */
- if (HeVAL(entry) == &PL_sv_undef)
- return FALSE;
-
- return TRUE;
- }
-#ifdef DYNAMIC_ENV_FETCH /* is it out there? */
- if (SvRMAGICAL((SV*)hv) && mg_find((SV*)hv, PERL_MAGIC_env)) {
- unsigned long len;
- char *env = PerlEnv_ENVgetenv_len(key,&len);
- if (env) {
- sv = newSVpvn(env,len);
- SvTAINTED_on(sv);
- (void)hv_store(hv,key,klen,sv,hash);
- if (k_flags & HVhek_FREEKEY)
- Safefree(key);
- return TRUE;
- }
- }
-#endif
- if (k_flags & HVhek_FREEKEY)
- Safefree(key);
- return FALSE;
+ return hv_exists_common(hv, NULL, key, klen, 0);
}
-
/*
=for apidoc hv_exists_ent
bool
Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
{
+ return hv_exists_common(hv, keysv, NULL, 0, hash);
+}
+
+bool
+S_hv_exists_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen_i32,
+ U32 hash)
+{
register XPVHV* xhv;
- register char *key;
STRLEN klen;
register HE *entry;
SV *sv;
bool is_utf8;
- char *keysave;
+ const char *keysave;
int k_flags = 0;
if (!hv)
return 0;
+ if (keysv) {
+ key = SvPV(keysv, klen);
+ is_utf8 = (SvUTF8(keysv) != 0);
+ } else {
+ if (klen_i32 < 0) {
+ klen = -klen_i32;
+ is_utf8 = TRUE;
+ } else {
+ klen = klen_i32;
+ is_utf8 = FALSE;
+ }
+ }
+ keysave = key;
+
if (SvRMAGICAL(hv)) {
if (mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) {
- SV* svret = sv_newmortal();
+ SV* svret;
+
+ if (keysv || is_utf8) {
+ if (!keysv) {
+ keysv = newSVpvn(key, klen);
+ SvUTF8_on(keysv);
+ } else {
+ keysv = newSVsv(keysv);
+ }
+ key = (char *)sv_2mortal(keysv);
+ klen = HEf_SVKEY;
+ }
+
+ /* I don't understand why hv_exists_ent has svret and sv,
+ whereas hv_exists only had one. */
+ svret = sv_newmortal();
sv = sv_newmortal();
- keysv = sv_2mortal(newSVsv(keysv));
- mg_copy((SV*)hv, sv, (char*)keysv, HEf_SVKEY);
- magic_existspack(svret, mg_find(sv, PERL_MAGIC_tiedelem));
- return SvTRUE(svret);
+ mg_copy((SV*)hv, sv, key, klen);
+ magic_existspack(svret, mg_find(sv, PERL_MAGIC_tiedelem));
+ return (bool)SvTRUE(svret);
}
#ifdef ENV_IS_CASELESS
else if (mg_find((SV*)hv, PERL_MAGIC_env)) {
- key = SvPV(keysv, klen);
+ /* XXX This code isn't UTF8 clean. */
keysv = sv_2mortal(newSVpvn(key,klen));
- (void)strupr(SvPVX(keysv));
+ keysave = key = strupr(SvPVX(keysv));
+ is_utf8 = 0;
hash = 0;
}
#endif
return 0;
#endif
- keysave = key = SvPV(keysv, klen);
- is_utf8 = (SvUTF8(keysv) != 0);
if (is_utf8) {
key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
if (is_utf8)
if (key != keysave)
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
#ifdef DYNAMIC_ENV_FETCH
if (k_flags & HVhek_FREEKEY)
Safefree(key);
/* If we find the key, but the value is a placeholder, return false. */
- if (HeVAL(entry) == &PL_sv_undef)
+ if (HeVAL(entry) == &PL_sv_placeholder)
return FALSE;
return TRUE;
}
return FALSE;
}
+
STATIC void
S_hsplit(pTHX_ HV *hv)
{
register HE **bep;
register HE *entry;
register HE **oentry;
+ int longest_chain = 0;
+ int was_shared;
PL_nomemok = TRUE;
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
aep = (HE**)a;
for (i=0; i<oldsize; i++,aep++) {
+ int left_length = 0;
+ int right_length = 0;
+
if (!*aep) /* non-existent */
continue;
bep = aep+oldsize;
if (!*bep)
xhv->xhv_fill++; /* HvFILL(hv)++ */
*bep = entry;
+ right_length++;
continue;
}
- else
+ else {
oentry = &HeNEXT(entry);
+ left_length++;
+ }
}
if (!*aep) /* everything moved */
xhv->xhv_fill--; /* HvFILL(hv)-- */
+ /* I think we don't actually need to keep track of the longest length,
+ merely flag if anything is too long. But for the moment while
+ developing this code I'll track it. */
+ if (left_length > longest_chain)
+ longest_chain = left_length;
+ if (right_length > longest_chain)
+ longest_chain = right_length;
}
+
+
+ /* Pick your policy for "hashing isn't working" here: */
+ if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */
+ || HvREHASH(hv)) {
+ return;
+ }
+
+ if (hv == PL_strtab) {
+ /* Urg. Someone is doing something nasty to the string table.
+ Can't win. */
+ return;
+ }
+
+ /* Awooga. Awooga. Pathological data. */
+ /*PerlIO_printf(PerlIO_stderr(), "%p %d of %d with %d/%d buckets\n", hv,
+ longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/
+
+ ++newsize;
+ Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
+ was_shared = HvSHAREKEYS(hv);
+
+ xhv->xhv_fill = 0;
+ HvSHAREKEYS_off(hv);
+ HvREHASH_on(hv);
+
+ aep = (HE **) xhv->xhv_array;
+
+ for (i=0; i<newsize; i++,aep++) {
+ entry = *aep;
+ while (entry) {
+ /* We're going to trash this HE's next pointer when we chain it
+ into the new hash below, so store where we go next. */
+ HE *next = HeNEXT(entry);
+ UV hash;
+
+ /* Rehash it */
+ PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry));
+
+ if (was_shared) {
+ /* Unshare it. */
+ HEK *new_hek
+ = save_hek_flags(HeKEY(entry), HeKLEN(entry),
+ hash, HeKFLAGS(entry));
+ unshare_hek (HeKEY_hek(entry));
+ HeKEY_hek(entry) = new_hek;
+ } else {
+ /* Not shared, so simply write the new hash in. */
+ HeHASH(entry) = hash;
+ }
+ /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/
+ HEK_REHASH_on(HeKEY_hek(entry));
+ /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/
+
+ /* Copy oentry to the correct new chain. */
+ bep = ((HE**)a) + (hash & (I32) xhv->xhv_max);
+ if (!*bep)
+ xhv->xhv_fill++; /* HvFILL(hv)++ */
+ HeNEXT(entry) = *bep;
+ *bep = entry;
+
+ entry = next;
+ }
+ }
+ Safefree (xhv->xhv_array);
+ xhv->xhv_array = a; /* HvARRAY(hv) = a */
}
void
#ifndef NODEFAULT_SHAREKEYS
HvSHAREKEYS_on(hv); /* key-sharing on by default */
#endif
+
xhv->xhv_max = 7; /* HvMAX(hv) = 7 (start with 8 buckets) */
xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
xhv->xhv_pmroot = 0; /* HvPMROOT(hv) = 0 */
if (!hv)
return;
- if(SvREADONLY(hv)) {
- Perl_croak(aTHX_ "Attempt to clear a restricted hash");
- }
+ DEBUG_A(Perl_hv_assert(aTHX_ hv));
xhv = (XPVHV*)SvANY(hv);
+
+ if (SvREADONLY(hv) && xhv->xhv_array != NULL) {
+ /* restricted hash: convert all keys to placeholders */
+ I32 i;
+ HE* entry;
+ for (i = 0; i <= (I32) xhv->xhv_max; i++) {
+ entry = ((HE**)xhv->xhv_array)[i];
+ for (; entry; entry = HeNEXT(entry)) {
+ /* not already placeholder */
+ if (HeVAL(entry) != &PL_sv_placeholder) {
+ if (HeVAL(entry) && SvREADONLY(HeVAL(entry))) {
+ SV* keysv = hv_iterkeysv(entry);
+ Perl_croak(aTHX_
+ "Attempt to delete readonly key '%"SVf"' from a restricted hash",
+ keysv);
+ }
+ SvREFCNT_dec(HeVAL(entry));
+ HeVAL(entry) = &PL_sv_placeholder;
+ xhv->xhv_placeholders++; /* HvPLACEHOLDERS(hv)++ */
+ }
+ }
+ }
+ return;
+ }
+
hfreeentries(hv);
- xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
- xhv->xhv_keys = 0; /* HvKEYS(hv) = 0 */
xhv->xhv_placeholders = 0; /* HvPLACEHOLDERS(hv) = 0 */
if (xhv->xhv_array /* HvARRAY(hv) */)
(void)memzero(xhv->xhv_array /* HvARRAY(hv) */,
mg_clear((SV*)hv);
HvHASKFLAGS_off(hv);
+ HvREHASH_off(hv);
}
STATIC void
riter = 0;
max = HvMAX(hv);
array = HvARRAY(hv);
+ /* make everyone else think the array is empty, so that the destructors
+ * called for freed entries can't recusively mess with us */
+ HvARRAY(hv) = Null(HE**);
+ HvFILL(hv) = 0;
+ ((XPVHV*) SvANY(hv))->xhv_keys = 0;
+
entry = array[0];
for (;;) {
if (entry) {
entry = array[riter];
}
}
+ HvARRAY(hv) = array;
(void)hv_iterinit(hv);
}
register XPVHV* xhv;
if (!hv)
return;
+ DEBUG_A(Perl_hv_assert(aTHX_ hv));
xhv = (XPVHV*)SvANY(hv);
hfreeentries(hv);
Safefree(xhv->xhv_array /* HvARRAY(hv) */);
if (HvNAME(hv)) {
+ if(PL_stashcache)
+ hv_delete(PL_stashcache, HvNAME(hv), strlen(HvNAME(hv)), G_DISCARD);
Safefree(HvNAME(hv));
HvNAME(hv) = 0;
}
xhv->xhv_max = 7; /* HvMAX(hv) = 7 (it's a normal hash) */
xhv->xhv_array = 0; /* HvARRAY(hv) = 0 */
- xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
- xhv->xhv_keys = 0; /* HvKEYS(hv) = 0 */
xhv->xhv_placeholders = 0; /* HvPLACEHOLDERS(hv) = 0 */
if (SvRMAGICAL(hv))
The C<flags> value will normally be zero; if HV_ITERNEXT_WANTPLACEHOLDERS is
set the placeholders keys (for restricted hashes) will be returned in addition
to normal keys. By default placeholders are automatically skipped over.
-Currently a placeholder is implemented with a value that is literally
-<&Perl_sv_undef> (a regular C<undef> value is a normal read-write SV for which
-C<!SvOK> is false). Note that the implementation of placeholders and
+Currently a placeholder is implemented with a value that is
+C<&Perl_sv_placeholder>. Note that the implementation of placeholders and
restricted hashes may change, and the implementation currently is
insufficiently abstracted for any change to be tidy.
Newz(506, xhv->xhv_array /* HvARRAY(hv) */,
PERL_HV_ARRAY_ALLOC_BYTES(xhv->xhv_max+1 /* HvMAX(hv)+1 */),
char);
+ /* At start of hash, entry is NULL. */
if (entry)
{
entry = HeNEXT(entry);
* Skip past any placeholders -- don't want to include them in
* any iteration.
*/
- while (entry && HeVAL(entry) == &PL_sv_undef) {
+ while (entry && HeVAL(entry) == &PL_sv_placeholder) {
entry = HeNEXT(entry);
}
}
}
while (!entry) {
+ /* OK. Come to the end of the current list. Grab the next one. */
+
xhv->xhv_riter++; /* HvRITER(hv)++ */
if (xhv->xhv_riter > (I32)xhv->xhv_max /* HvRITER(hv) > HvMAX(hv) */) {
+ /* There is no next one. End of the hash. */
xhv->xhv_riter = -1; /* HvRITER(hv) = -1 */
break;
}
entry = ((HE**)xhv->xhv_array)[xhv->xhv_riter];
if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
- /* if we have an entry, but it's a placeholder, don't count it */
- if (entry && HeVAL(entry) == &PL_sv_undef)
- entry = 0;
- }
+ /* If we have an entry, but it's a placeholder, don't count it.
+ Try the next. */
+ while (entry && HeVAL(entry) == &PL_sv_placeholder)
+ entry = HeNEXT(entry);
+ }
+ /* Will loop again if this linked list starts NULL
+ (for HV_ITERNEXT_WANTPLACEHOLDERS)
+ or if we run through it and find only placeholders. */
}
if (oldentry && HvLAZYDEL(hv)) { /* was deleted earlier? */
hv_free_ent(hv, oldentry);
}
+ /*if (HvREHASH(hv) && entry && !HeKREHASH(entry))
+ PerlIO_printf(PerlIO_stderr(), "Awooga %p %p\n", hv, entry);*/
+
xhv->xhv_eiter = entry; /* HvEITER(hv) = entry */
return entry;
}
sv = newSVpvn ((char*)as_utf8, utf8_len);
SvUTF8_on (sv);
- } else {
+ Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */
+ } else if (flags & HVhek_REHASH) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK. This hv is using custom a hasing
+ algorithm. Hence we can't return a shared string scalar, as
+ that would contain the (wrong) hash value, and might get passed
+ into an hv routine with a regular hash */
+
+ sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek));
+ if (HEK_UTF8(hek))
+ SvUTF8_on (sv);
+ } else {
sv = newSVpvn_share(HEK_KEY(hek),
(HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)),
HEK_HASH(hek));
if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE)))
hv_store(PL_strtab, str, len, Nullsv, hash);
+
+ Can't rehash the shared string table, so not sure if it's worth
+ counting the number of entries in the linked list
*/
xhv = (XPVHV*)SvANY(PL_strtab);
/* assert(xhv_array != 0) */
xhv->xhv_keys++; /* HvKEYS(hv)++ */
if (i) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
- if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */)
+ } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
hsplit(PL_strtab);
}
}
return HeKEY_hek(entry);
}
+
+
+/*
+=for apidoc hv_assert
+
+Check that a hash is in an internally consistent state.
+
+=cut
+*/
+
+void
+Perl_hv_assert(pTHX_ HV *hv)
+{
+ HE* entry;
+ int withflags = 0;
+ int placeholders = 0;
+ int real = 0;
+ int bad = 0;
+ I32 riter = HvRITER(hv);
+ HE *eiter = HvEITER(hv);
+
+ (void)hv_iterinit(hv);
+
+ while ((entry = hv_iternext_flags(hv, HV_ITERNEXT_WANTPLACEHOLDERS))) {
+ /* sanity check the values */
+ if (HeVAL(entry) == &PL_sv_placeholder) {
+ placeholders++;
+ } else {
+ real++;
+ }
+ /* sanity check the keys */
+ if (HeSVKEY(entry)) {
+ /* Don't know what to check on SV keys. */
+ } else if (HeKUTF8(entry)) {
+ withflags++;
+ if (HeKWASUTF8(entry)) {
+ PerlIO_printf(Perl_debug_log,
+ "hash key has both WASUFT8 and UTF8: '%.*s'\n",
+ (int) HeKLEN(entry), HeKEY(entry));
+ bad = 1;
+ }
+ } else if (HeKWASUTF8(entry)) {
+ withflags++;
+ }
+ }
+ if (!SvTIED_mg((SV*)hv, PERL_MAGIC_tied)) {
+ if (HvUSEDKEYS(hv) != real) {
+ PerlIO_printf(Perl_debug_log, "Count %d key(s), but hash reports %d\n",
+ (int) real, (int) HvUSEDKEYS(hv));
+ bad = 1;
+ }
+ if (HvPLACEHOLDERS(hv) != placeholders) {
+ PerlIO_printf(Perl_debug_log,
+ "Count %d placeholder(s), but hash reports %d\n",
+ (int) placeholders, (int) HvPLACEHOLDERS(hv));
+ bad = 1;
+ }
+ }
+ if (withflags && ! HvHASKFLAGS(hv)) {
+ PerlIO_printf(Perl_debug_log,
+ "Hash has HASKFLAGS off but I count %d key(s) with flags\n",
+ withflags);
+ bad = 1;
+ }
+ if (bad) {
+ sv_dump((SV *)hv);
+ }
+ HvRITER(hv) = riter; /* Restore hash iterator state */
+ HvEITER(hv) = eiter;
+}