Additional patch for UTF8-keys (Re: perl@8016)
Inaba Hiroto [Sat, 9 Dec 2000 18:02:00 +0000 (03:02 +0900)]
Message-ID: <3A31F508.34F4BB23@st.rim.or.jp>

exists() didn't work for UTF-8 keys, and neither did shared hash keys.

p4raw-id: //depot/perl@8056

embed.pl
hv.c
op.c
proto.h
sv.c

index 35b714f..d441c4b 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -1801,7 +1801,7 @@ Apd       |SV*    |newSVuv        |UV u
 Apd    |SV*    |newSVnv        |NV n
 Apd    |SV*    |newSVpv        |const char* s|STRLEN len
 Apd    |SV*    |newSVpvn       |const char* s|STRLEN len
-Apd    |SV*    |newSVpvn_share |const char* s|STRLEN len|U32 hash
+Apd    |SV*    |newSVpvn_share |const char* s|I32 len|U32 hash
 Afpd   |SV*    |newSVpvf       |const char* pat|...
 Ap     |SV*    |vnewSVpvf      |const char* pat|va_list* args
 Apd    |SV*    |newSVrv        |SV* rv|const char* classname
diff --git a/hv.c b/hv.c
index 334f7ad..f5aa4a8 100644 (file)
--- a/hv.c
+++ b/hv.c
@@ -94,7 +94,8 @@ S_save_hek(pTHX_ const char *str, I32 len, U32 hash)
 void
 Perl_unshare_hek(pTHX_ HEK *hek)
 {
-    unsharepvn(HEK_KEY(hek),HEK_LEN(hek),HEK_HASH(hek));
+    unsharepvn(HEK_KEY(hek),HEK_UTF8(hek)?-HEK_LEN(hek):HEK_LEN(hek),
+               HEK_HASH(hek));
 }
 
 #if defined(USE_ITHREADS)
@@ -118,9 +119,9 @@ Perl_he_dup(pTHX_ HE *e, bool shared)
     if (HeKLEN(e) == HEf_SVKEY)
        HeKEY_sv(ret) = SvREFCNT_inc(sv_dup(HeKEY_sv(e)));
     else if (shared)
- HeKEY_hek(ret) = share_hek(HeKEY(e), HeKLEN_UTF8(e), HeHASH(e));
+       HeKEY_hek(ret) = share_hek(HeKEY(e), HeKLEN_UTF8(e), HeHASH(e));
     else
- HeKEY_hek(ret) = save_hek(HeKEY(e), HeKLEN_UTF8(e), HeHASH(e));
+       HeKEY_hek(ret) = save_hek(HeKEY(e), HeKLEN_UTF8(e), HeHASH(e));
     HeVAL(ret) = SvREFCNT_inc(sv_dup(HeVAL(e)));
     return ret;
 }
@@ -205,8 +206,8 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is this it? */
            continue;
- if (HeKUTF8(entry) != (char)is_utf8)
-     continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        return &HeVAL(entry);
     }
 #ifdef DYNAMIC_ENV_FETCH  /* %ENV lookup?  If so, try to fetch the value now */
@@ -222,7 +223,7 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
 #endif
     if (lval) {                /* gonna assign to this, so it better be there */
        sv = NEWSV(61,0);
- return hv_store(hv,key,is_utf8?-klen:klen,sv,hash);
+       return hv_store(hv,key,is_utf8?-klen:klen,sv,hash);
     }
     return 0;
 }
@@ -317,8 +318,8 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is this it? */
            continue;
- if (HeKUTF8(entry) != (char)is_utf8)
-     continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        return entry;
     }
 #ifdef DYNAMIC_ENV_FETCH  /* %ENV lookup?  If so, try to fetch the value now */
@@ -428,8 +429,8 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is this it? */
            continue;
- if (HeKUTF8(entry) != (char)is_utf8)
-     continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        SvREFCNT_dec(HeVAL(entry));
        HeVAL(entry) = val;
        return &HeVAL(entry);
@@ -437,9 +438,9 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
 
     entry = new_HE();
     if (HvSHAREKEYS(hv))
- HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
+       HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
     else                                       /* gotta do the real thing */
- HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
+       HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
     HeVAL(entry) = val;
     HeNEXT(entry) = *oentry;
     *oentry = entry;
@@ -532,8 +533,8 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is this it? */
            continue;
- if (HeKUTF8(entry) != (char)is_utf8)
-     continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        SvREFCNT_dec(HeVAL(entry));
        HeVAL(entry) = val;
        return entry;
@@ -541,9 +542,9 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
 
     entry = new_HE();
     if (HvSHAREKEYS(hv))
- HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
+       HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
     else                                       /* gotta do the real thing */
- HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
+       HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
     HeVAL(entry) = val;
     HeNEXT(entry) = *oentry;
     *oentry = entry;
@@ -626,8 +627,8 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is this it? */
            continue;
- if (HeKUTF8(entry) != (char)is_utf8)
-     continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        *oentry = HeNEXT(entry);
        if (i && !*oentry)
            xhv->xhv_fill--;
@@ -717,8 +718,8 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is this it? */
            continue;
- if (HeKUTF8(entry) != (char)is_utf8)
-     continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        *oentry = HeNEXT(entry);
        if (i && !*oentry)
            xhv->xhv_fill--;
@@ -799,8 +800,8 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is this it? */
            continue;
- if (HeKUTF8(entry) != (char)is_utf8)
-     continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        return TRUE;
     }
 #ifdef DYNAMIC_ENV_FETCH  /* is it out there? */
@@ -837,6 +838,7 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
     STRLEN klen;
     register HE *entry;
     SV *sv;
+    bool is_utf8;
 
     if (!hv)
        return 0;
@@ -866,6 +868,7 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
 #endif
 
     key = SvPV(keysv, klen);
+    is_utf8 = (SvUTF8(keysv) != 0);
     if (!hash)
        PERL_HASH(hash, key, klen);
 
@@ -881,6 +884,8 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is this it? */
            continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        return TRUE;
     }
 #ifdef DYNAMIC_ENV_FETCH  /* is it out there? */
@@ -1095,7 +1100,7 @@ Perl_newHVhv(pTHX_ HV *ohv)
        /* Slow way */
        hv_iterinit(ohv);
        while ((entry = hv_iternext(ohv))) {
-     hv_store(hv, HeKEY(entry), HeKLEN_UTF8(entry),
+           hv_store(hv, HeKEY(entry), HeKLEN_UTF8(entry),
                     SvREFCNT_inc(HeVAL(entry)), HeHASH(entry));
        }
        HvRITER(ohv) = hv_riter;
@@ -1386,13 +1391,9 @@ Perl_hv_iterkeysv(pTHX_ register HE *entry)
 {
     if (HeKLEN(entry) == HEf_SVKEY)
        return sv_mortalcopy(HeKEY_sv(entry));
-    else {
-        SV *sv = newSVpvn_share((HeKLEN(entry) ? HeKEY(entry) : ""),
-    HeKLEN(entry), HeHASH(entry));
- if (HeKUTF8(entry))
-     SvUTF8_on(sv);
- return sv_2mortal(sv);
-    }
+    else
+       return sv_2mortal(newSVpvn_share((HeKLEN(entry) ? HeKEY(entry) : ""),
+                                        HeKLEN_UTF8(entry), HeHASH(entry)));
 }
 
 /*
@@ -1469,6 +1470,12 @@ Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash)
     register HE **oentry;
     register I32 i = 1;
     I32 found = 0;
+    bool is_utf8 = FALSE;
+
+    if (len < 0) {
+      len = -len;
+      is_utf8 = TRUE;
+    }
 
     /* what follows is the moral equivalent of:
     if ((Svp = hv_fetch(PL_strtab, tmpsv, FALSE, hash))) {
@@ -1486,6 +1493,8 @@ Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash)
            continue;
        if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */
            continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        found = 1;
        if (--HeVAL(entry) == Nullsv) {
            *oentry = HeNEXT(entry);
@@ -1538,14 +1547,14 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
            continue;
        if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */
            continue;
- if (HeKUTF8(entry) != (char)is_utf8)
-     continue;
+       if (HeKUTF8(entry) != (char)is_utf8)
+           continue;
        found = 1;
        break;
     }
     if (!found) {
        entry = new_HE();
- HeKEY_hek(entry) = save_hek(str, is_utf8?-len:len, hash);
+       HeKEY_hek(entry) = save_hek(str, is_utf8?-len:len, hash);
        HeVAL(entry) = Nullsv;
        HeNEXT(entry) = *oentry;
        *oentry = entry;
diff --git a/op.c b/op.c
index c530e5f..b6a9c7c 100644 (file)
--- a/op.c
+++ b/op.c
@@ -6721,9 +6721,9 @@ Perl_peep(pTHX_ register OP *o)
            svp = cSVOPx_svp(((BINOP*)o)->op_last);
            if ((!SvFAKE(sv = *svp) || !SvREADONLY(sv)) && !IS_PADCONST(sv)) {
                key = SvPV(sv, keylen);
-               lexname = newSVpvn_share(key, keylen, 0);
                if (SvUTF8(sv))
-                   SvUTF8_on(lexname);
+                 keylen = -keylen;
+               lexname = newSVpvn_share(key, keylen, 0);
                SvREFCNT_dec(sv);
                *svp = lexname;
            }
diff --git a/proto.h b/proto.h
index 1a3802a..ef1e8a5 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -543,7 +543,7 @@ PERL_CALLCONV SV*   Perl_newSVuv(pTHX_ UV u);
 PERL_CALLCONV SV*      Perl_newSVnv(pTHX_ NV n);
 PERL_CALLCONV SV*      Perl_newSVpv(pTHX_ const char* s, STRLEN len);
 PERL_CALLCONV SV*      Perl_newSVpvn(pTHX_ const char* s, STRLEN len);
-PERL_CALLCONV SV*      Perl_newSVpvn_share(pTHX_ const char* s, STRLEN len, U32 hash);
+PERL_CALLCONV SV*      Perl_newSVpvn_share(pTHX_ const char* s, I32 len, U32 hash);
 PERL_CALLCONV SV*      Perl_newSVpvf(pTHX_ const char* pat, ...)
 #ifdef CHECK_FORMAT
  __attribute__((format(printf,pTHX_1,pTHX_2)))
diff --git a/sv.c b/sv.c
index 87da8f7..56e9f8d 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -3080,7 +3080,7 @@ Perl_sv_force_normal_flags(pTHX_ register SV *sv, U32 flags)
            *SvEND(sv) = '\0';
            SvFAKE_off(sv);
            SvREADONLY_off(sv);
-           unsharepvn(pvx,len,hash);
+           unsharepvn(pvx,SvUTF8(sv)?-len:len,hash);
        }
        else if (PL_curcop != &PL_compiling)
            Perl_croak(aTHX_ PL_no_modify);
@@ -3806,7 +3806,7 @@ Perl_sv_clear(pTHX_ register SV *sv)
        else if (SvPVX(sv) && SvLEN(sv))
            Safefree(SvPVX(sv));
        else if (SvPVX(sv) && SvREADONLY(sv) && SvFAKE(sv)) {
-           unsharepvn(SvPVX(sv),SvCUR(sv),SvUVX(sv));
+           unsharepvn(SvPVX(sv),SvUTF8(sv)?-SvCUR(sv):SvCUR(sv),SvUVX(sv));
            SvFAKE_off(sv);
        }
        break;
@@ -4862,20 +4862,27 @@ will avoid string compare.
 */
 
 SV *
-Perl_newSVpvn_share(pTHX_ const char *src, STRLEN len, U32 hash)
+Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash)
 {
     register SV *sv;
+    bool is_utf8 = FALSE;
+    if (len < 0) {
+        len = -len;
+        is_utf8 = TRUE;
+    }
     if (!hash)
        PERL_HASH(hash, src, len);
     new_SV(sv);
     sv_upgrade(sv, SVt_PVIV);
-    SvPVX(sv) = sharepvn(src, len, hash);
+    SvPVX(sv) = sharepvn(src, is_utf8?-len:len, hash);
     SvCUR(sv) = len;
     SvUVX(sv) = hash;
     SvLEN(sv) = 0;
     SvREADONLY_on(sv);
     SvFAKE_on(sv);
     SvPOK_on(sv);
+    if (is_utf8)
+        SvUTF8_on(sv);
     return sv;
 }