From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Sun, 28 Jan 2001 19:28:40 +0000 (+0000)
Subject: Patch from Inaba Hiroto:
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f9a6324217cffea75ff769ddd313748c0613a128;p=p5sagit%2Fp5-mst-13.2.git

Patch from Inaba Hiroto:
- canonical UTF-8 hash keys: if a key string for a hash is
  UTF8-on, try downgrade the string and use it if
  unicode::distinct is not in effect.
  For the task, I added a function bytes_from_utf8() to utf8.c.
  It might resemble utf8_to_bytes() but it is not convenient
  to the task.
  Made a test for it and added to t/op/each.t
- Changed do_print in doio.c to apply sv_utf8_(downgrade|upgrade) to
  the mortal copy of the argument SV.
  And changed t/io/utf8.t test 18 which expects print() to
  upgrade its argument.
- re-implement sv_eq with bytes_from_utf8()
- some bug fixes
  - tr/// does not handle UTF8 range (\x{}-\x{})
  - \ before raw UTF8 character produced
    "Malformed UTF-8 character" warning.
  - "\x{100}\N{CENT SIGN}" is Malformed.
    Added tests for these 3.
  - and one silly bug (by me) with qu operator.

p4raw-id: //depot/perl@8583
---

diff --git a/doio.c b/doio.c
index 6056ea7..a1d0e46 100644
--- a/doio.c
+++ b/doio.c
@@ -1169,13 +1169,12 @@ Perl_do_print(pTHX_ register SV *sv, PerlIO *fp)
 	/* FALL THROUGH */
     default:
 	if (PerlIO_isutf8(fp)) {
-	    tmps = SvPVutf8(sv, len);
-	}
-	else {
-	    if (DO_UTF8(sv))
-		sv_utf8_downgrade(sv, FALSE);
-	    tmps = SvPV(sv, len);
+	    if (!SvUTF8(sv))
+		sv_utf8_upgrade(sv = sv_mortalcopy(sv));
 	}
+	else if (DO_UTF8(sv))
+	    sv_utf8_downgrade((sv = sv_mortalcopy(sv)), FALSE);
+	tmps = SvPV(sv, len);
 	break;
     }
     /* To detect whether the process is about to overstep its
diff --git a/embed.h b/embed.h
index f2a05c7..fe0b6b3 100644
--- a/embed.h
+++ b/embed.h
@@ -737,6 +737,7 @@
 #define utf8_distance		Perl_utf8_distance
 #define utf8_hop		Perl_utf8_hop
 #define utf8_to_bytes		Perl_utf8_to_bytes
+#define bytes_from_utf8		Perl_bytes_from_utf8
 #define bytes_to_utf8		Perl_bytes_to_utf8
 #define utf8_to_uv_simple	Perl_utf8_to_uv_simple
 #define utf8_to_uv		Perl_utf8_to_uv
@@ -2214,6 +2215,7 @@
 #define utf8_distance(a,b)	Perl_utf8_distance(aTHX_ a,b)
 #define utf8_hop(a,b)		Perl_utf8_hop(aTHX_ a,b)
 #define utf8_to_bytes(a,b)	Perl_utf8_to_bytes(aTHX_ a,b)
+#define bytes_from_utf8(a,b,c)	Perl_bytes_from_utf8(aTHX_ a,b,c)
 #define bytes_to_utf8(a,b)	Perl_bytes_to_utf8(aTHX_ a,b)
 #define utf8_to_uv_simple(a,b)	Perl_utf8_to_uv_simple(aTHX_ a,b)
 #define utf8_to_uv(a,b,c,d)	Perl_utf8_to_uv(aTHX_ a,b,c,d)
@@ -4342,6 +4344,8 @@
 #define utf8_hop		Perl_utf8_hop
 #define Perl_utf8_to_bytes	CPerlObj::Perl_utf8_to_bytes
 #define utf8_to_bytes		Perl_utf8_to_bytes
+#define Perl_bytes_from_utf8	CPerlObj::Perl_bytes_from_utf8
+#define bytes_from_utf8		Perl_bytes_from_utf8
 #define Perl_bytes_to_utf8	CPerlObj::Perl_bytes_to_utf8
 #define bytes_to_utf8		Perl_bytes_to_utf8
 #define Perl_utf8_to_uv_simple	CPerlObj::Perl_utf8_to_uv_simple
diff --git a/embed.pl b/embed.pl
index e71337b..9732773 100755
--- a/embed.pl
+++ b/embed.pl
@@ -2085,6 +2085,7 @@ Adp	|STRLEN	|utf8_length	|U8* s|U8 *e
 Apd	|IV	|utf8_distance	|U8 *a|U8 *b
 Apd	|U8*	|utf8_hop	|U8 *s|I32 off
 ApMd	|U8*	|utf8_to_bytes	|U8 *s|STRLEN *len
+ApMd	|U8*	|bytes_from_utf8|U8 *s|STRLEN *len|bool *is_utf8
 ApMd	|U8*	|bytes_to_utf8	|U8 *s|STRLEN *len
 Apd	|UV	|utf8_to_uv_simple|U8 *s|STRLEN* retlen
 Adp	|UV	|utf8_to_uv	|U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
diff --git a/global.sym b/global.sym
index 48128c9..dab2a7c 100644
--- a/global.sym
+++ b/global.sym
@@ -470,6 +470,7 @@ Perl_utf8_length
 Perl_utf8_distance
 Perl_utf8_hop
 Perl_utf8_to_bytes
+Perl_bytes_from_utf8
 Perl_bytes_to_utf8
 Perl_utf8_to_uv_simple
 Perl_utf8_to_uv
diff --git a/hv.c b/hv.c
index 0e50523..c999488 100644
--- a/hv.c
+++ b/hv.c
@@ -152,6 +152,7 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
     register HE *entry;
     SV *sv;
     bool is_utf8 = FALSE;
+    const char *keysave = key;
 
     if (!hv)
 	return 0;
@@ -196,6 +197,9 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
 	    return 0;
     }
 
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	key = (char*)bytes_from_utf8((U8*)key, (STRLEN*)&klen, &is_utf8);
+
     PERL_HASH(hash, key, klen);
 
     entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -208,6 +212,8 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
 	    continue;
 	if (HeKUTF8(entry) != (char)is_utf8)
 	    continue;
+	if (key != keysave)
+	    Safefree(key);
 	return &HeVAL(entry);
     }
 #ifdef DYNAMIC_ENV_FETCH  /* %ENV lookup?  If so, try to fetch the value now */
@@ -217,14 +223,24 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
 	if (env) {
 	    sv = newSVpvn(env,len);
 	    SvTAINTED_on(sv);
+	    if (key != keysave)
+		Safefree(key);
 	    return hv_store(hv,key,klen,sv,hash);
 	}
     }
 #endif
     if (lval) {		/* gonna assign to this, so it better be there */
 	sv = NEWSV(61,0);
-	return hv_store(hv,key,is_utf8?-klen:klen,sv,hash);
+	if (key != keysave) { /* must be is_utf8 == 0 */
+	    SV **ret = hv_store(hv,key,klen,sv,hash);
+	    Safefree(key);
+	    return ret;
+	}
+	else
+	    return hv_store(hv,key,is_utf8?-klen:klen,sv,hash);
     }
+    if (key != keysave)
+	Safefree(key);
     return 0;
 }
 
@@ -256,6 +272,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
     register HE *entry;
     SV *sv;
     bool is_utf8;
+    char *keysave;
 
     if (!hv)
 	return 0;
@@ -304,9 +321,12 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
 	    return 0;
     }
 
-    key = SvPV(keysv, klen);
+    keysave = key = SvPV(keysv, klen);
     is_utf8 = (SvUTF8(keysv)!=0);
 
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
+
     if (!hash)
 	PERL_HASH(hash, key, klen);
 
@@ -320,6 +340,8 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
 	    continue;
 	if (HeKUTF8(entry) != (char)is_utf8)
 	    continue;
+	if (key != keysave)
+	    Safefree(key);
 	return entry;
     }
 #ifdef DYNAMIC_ENV_FETCH  /* %ENV lookup?  If so, try to fetch the value now */
@@ -333,6 +355,8 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
 	}
     }
 #endif
+    if (key != keysave)
+	Safefree(key);
     if (lval) {		/* gonna assign to this, so it better be there */
 	sv = NEWSV(61,0);
 	return hv_store_ent(hv,keysv,sv,hash);
@@ -385,6 +409,7 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
     register HE *entry;
     register HE **oentry;
     bool is_utf8 = FALSE;
+    const char *keysave = key;
 
     if (!hv)
 	return 0;
@@ -412,6 +437,9 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
 #endif
 	}
     }
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	key = (char*)bytes_from_utf8((U8*)key, (STRLEN*)&klen, &is_utf8);
+
     if (!hash)
 	PERL_HASH(hash, key, klen);
 
@@ -433,6 +461,8 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
 	    continue;
 	SvREFCNT_dec(HeVAL(entry));
 	HeVAL(entry) = val;
+	if (key != keysave)
+	    Safefree(key);
 	return &HeVAL(entry);
     }
 
@@ -441,6 +471,8 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
 	HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
     else                                       /* gotta do the real thing */
 	HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
+    if (key != keysave)
+	Safefree(key);
     HeVAL(entry) = val;
     HeNEXT(entry) = *oentry;
     *oentry = entry;
@@ -484,6 +516,7 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
     register HE *entry;
     register HE **oentry;
     bool is_utf8;
+    char *keysave;
 
     if (!hv)
 	return 0;
@@ -513,9 +546,12 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
 	}
     }
 
-    key = SvPV(keysv, klen);
+    keysave = key = SvPV(keysv, klen);
     is_utf8 = (SvUTF8(keysv) != 0);
 
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
+
     if (!hash)
 	PERL_HASH(hash, key, klen);
 
@@ -537,6 +573,8 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
 	    continue;
 	SvREFCNT_dec(HeVAL(entry));
 	HeVAL(entry) = val;
+	if (key != keysave)
+	    Safefree(key);
 	return entry;
     }
 
@@ -545,6 +583,8 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
 	HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
     else                                       /* gotta do the real thing */
 	HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
+    if (key != keysave)
+	Safefree(key);
     HeVAL(entry) = val;
     HeNEXT(entry) = *oentry;
     *oentry = entry;
@@ -581,6 +621,7 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
     SV **svp;
     SV *sv;
     bool is_utf8 = FALSE;
+    const char *keysave = key;
 
     if (!hv)
 	return Nullsv;
@@ -615,6 +656,9 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
     if (!xhv->xhv_array)
 	return Nullsv;
 
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	key = (char*)bytes_from_utf8((U8*)key, (STRLEN*)&klen, &is_utf8);
+
     PERL_HASH(hash, key, klen);
 
     oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -629,6 +673,8 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
 	    continue;
 	if (HeKUTF8(entry) != (char)is_utf8)
 	    continue;
+	if (key != keysave)
+	    Safefree(key);
 	*oentry = HeNEXT(entry);
 	if (i && !*oentry)
 	    xhv->xhv_fill--;
@@ -645,6 +691,8 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
 	--xhv->xhv_keys;
 	return sv;
     }
+    if (key != keysave)
+	Safefree(key);
     return Nullsv;
 }
 
@@ -670,6 +718,7 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
     register HE **oentry;
     SV *sv;
     bool is_utf8;
+    char *keysave;
 
     if (!hv)
 	return Nullsv;
@@ -702,9 +751,12 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
     if (!xhv->xhv_array)
 	return Nullsv;
 
-    key = SvPV(keysv, klen);
+    keysave = key = SvPV(keysv, klen);
     is_utf8 = (SvUTF8(keysv) != 0);
 
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
+
     if (!hash)
 	PERL_HASH(hash, key, klen);
 
@@ -720,6 +772,8 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
 	    continue;
 	if (HeKUTF8(entry) != (char)is_utf8)
 	    continue;
+	if (key != keysave)
+	    Safefree(key);
 	*oentry = HeNEXT(entry);
 	if (i && !*oentry)
 	    xhv->xhv_fill--;
@@ -736,6 +790,8 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
 	--xhv->xhv_keys;
 	return sv;
     }
+    if (key != keysave)
+	Safefree(key);
     return Nullsv;
 }
 
@@ -756,6 +812,7 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
     register HE *entry;
     SV *sv;
     bool is_utf8 = FALSE;
+    const char *keysave = key;
 
     if (!hv)
 	return 0;
@@ -786,6 +843,9 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
 	return 0;
 #endif
 
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	key = (char*)bytes_from_utf8((U8*)key, (STRLEN*)&klen, &is_utf8);
+
     PERL_HASH(hash, key, klen);
 
 #ifdef DYNAMIC_ENV_FETCH
@@ -802,6 +862,8 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
 	    continue;
 	if (HeKUTF8(entry) != (char)is_utf8)
 	    continue;
+	if (key != keysave)
+	    Safefree(key);
 	return TRUE;
     }
 #ifdef DYNAMIC_ENV_FETCH  /* is it out there? */
@@ -816,6 +878,8 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
 	}
     }
 #endif
+    if (key != keysave)
+	Safefree(key);
     return FALSE;
 }
 
@@ -839,6 +903,7 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
     register HE *entry;
     SV *sv;
     bool is_utf8;
+    char *keysave;
 
     if (!hv)
 	return 0;
@@ -867,8 +932,10 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
 	return 0;
 #endif
 
-    key = SvPV(keysv, klen);
+    keysave = key = SvPV(keysv, klen);
     is_utf8 = (SvUTF8(keysv) != 0);
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
     if (!hash)
 	PERL_HASH(hash, key, klen);
 
@@ -886,6 +953,8 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
 	    continue;
 	if (HeKUTF8(entry) != (char)is_utf8)
 	    continue;
+	if (key != keysave)
+	    Safefree(key);
 	return TRUE;
     }
 #ifdef DYNAMIC_ENV_FETCH  /* is it out there? */
@@ -900,6 +969,8 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
 	}
     }
 #endif
+    if (key != keysave)
+	Safefree(key);
     return FALSE;
 }
 
@@ -1471,10 +1542,13 @@ Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash)
     register I32 i = 1;
     I32 found = 0;
     bool is_utf8 = FALSE;
+    const char *save = str;
 
     if (len < 0) {
       len = -len;
       is_utf8 = TRUE;
+      if (!(PL_hints & HINT_UTF8_DISTINCT))
+	  str = (char*)bytes_from_utf8((U8*)str, (STRLEN*)&len, &is_utf8);
     }
 
     /* what follows is the moral equivalent of:
@@ -1507,7 +1581,8 @@ Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash)
 	break;
     }
     UNLOCK_STRTAB_MUTEX;
-
+    if (str != save)
+	Safefree(str);
     if (!found && ckWARN_d(WARN_INTERNAL))
 	Perl_warner(aTHX_ WARN_INTERNAL, "Attempt to free non-existent shared string '%s'",str);
 }
@@ -1525,10 +1600,13 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
     register I32 i = 1;
     I32 found = 0;
     bool is_utf8 = FALSE;
+    const char *save = str;
 
     if (len < 0) {
       len = -len;
       is_utf8 = TRUE;
+      if (!(PL_hints & HINT_UTF8_DISTINCT))
+	  str = (char*)bytes_from_utf8((U8*)str, (STRLEN*)&len, &is_utf8);
     }
 
     /* what follows is the moral equivalent of:
@@ -1568,8 +1646,7 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
 
     ++HeVAL(entry);				/* use value slot as REFCNT */
     UNLOCK_STRTAB_MUTEX;
+    if (str != save)
+	Safefree(str);
     return HeKEY_hek(entry);
 }
-
-
-
diff --git a/objXSUB.h b/objXSUB.h
index 8cdfec0..51e4eb2 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -1889,6 +1889,10 @@
 #define Perl_utf8_to_bytes	pPerl->Perl_utf8_to_bytes
 #undef  utf8_to_bytes
 #define utf8_to_bytes		Perl_utf8_to_bytes
+#undef  Perl_bytes_from_utf8
+#define Perl_bytes_from_utf8	pPerl->Perl_bytes_from_utf8
+#undef  bytes_from_utf8
+#define bytes_from_utf8		Perl_bytes_from_utf8
 #undef  Perl_bytes_to_utf8
 #define Perl_bytes_to_utf8	pPerl->Perl_bytes_to_utf8
 #undef  bytes_to_utf8
diff --git a/perlapi.c b/perlapi.c
index f0016d5..04d48b9 100644
--- a/perlapi.c
+++ b/perlapi.c
@@ -3413,6 +3413,13 @@ Perl_utf8_to_bytes(pTHXo_ U8 *s, STRLEN *len)
     return ((CPerlObj*)pPerl)->Perl_utf8_to_bytes(s, len);
 }
 
+#undef  Perl_bytes_from_utf8
+U8*
+Perl_bytes_from_utf8(pTHXo_ U8 *s, STRLEN *len, bool *is_utf8)
+{
+    return ((CPerlObj*)pPerl)->Perl_bytes_from_utf8(s, len, is_utf8);
+}
+
 #undef  Perl_bytes_to_utf8
 U8*
 Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN *len)
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 25fe18a..1828b55 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -182,6 +182,23 @@ must then use C<av_store> to assign values to these new elements.
 =for hackers
 Found in file av.c
 
+=item bytes_from_utf8
+
+Converts a string C<s> of length C<len> from UTF8 into byte encoding.
+Unlike <utf8_to_bytes> but like C<bytes_to_utf8>, returns a pointer to
+the newly-created string, and updates C<len> to contain the new length.
+Returns the original string if no conversion occurs, C<len> and
+C<is_utf8> are unchanged. Do nothing if C<is_utf8> points to 0. Sets
+C<is_utf8> to 0 if C<s> is converted or malformed .
+
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+	U8*	bytes_from_utf8(U8 *s, STRLEN *len, bool *is_utf8)
+
+=for hackers
+Found in file utf8.c
+
 =item bytes_to_utf8
 
 Converts a string C<s> of length C<len> from ASCII into UTF8 encoding.
@@ -2420,19 +2437,19 @@ false, defined or undefined.  Does not handle 'get' magic.
 =for hackers
 Found in file sv.h
 
-=item svtype
+=item SvTYPE
 
-An enum of flags for Perl types.  These are found in the file B<sv.h> 
-in the C<svtype> enum.  Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV.  See C<svtype>.
+
+	svtype	SvTYPE(SV* sv)
 
 =for hackers
 Found in file sv.h
 
-=item SvTYPE
-
-Returns the type of the SV.  See C<svtype>.
+=item svtype
 
-	svtype	SvTYPE(SV* sv)
+An enum of flags for Perl types.  These are found in the file B<sv.h> 
+in the C<svtype> enum.  Test these flags with the C<SvTYPE> macro.
 
 =for hackers
 Found in file sv.h
diff --git a/proto.h b/proto.h
index a1f0fee..e39d33e 100644
--- a/proto.h
+++ b/proto.h
@@ -818,6 +818,7 @@ PERL_CALLCONV STRLEN	Perl_utf8_length(pTHX_ U8* s, U8 *e);
 PERL_CALLCONV IV	Perl_utf8_distance(pTHX_ U8 *a, U8 *b);
 PERL_CALLCONV U8*	Perl_utf8_hop(pTHX_ U8 *s, I32 off);
 PERL_CALLCONV U8*	Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
+PERL_CALLCONV U8*	Perl_bytes_from_utf8(pTHX_ U8 *s, STRLEN *len, bool *is_utf8);
 PERL_CALLCONV U8*	Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len);
 PERL_CALLCONV UV	Perl_utf8_to_uv_simple(pTHX_ U8 *s, STRLEN* retlen);
 PERL_CALLCONV UV	Perl_utf8_to_uv(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags);
diff --git a/sv.c b/sv.c
index ed7ebdc..c53486a 100644
--- a/sv.c
+++ b/sv.c
@@ -4690,30 +4690,24 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
 
     /* do not utf8ize the comparands as a side-effect */
     if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) {
+	bool is_utf8 = TRUE;
+
 	if (PL_hints & HINT_UTF8_DISTINCT)
 	    return FALSE;
 
 	if (SvUTF8(sv1)) {
-	    (void)utf8_to_bytes((U8*)(pv1 = savepvn(pv1, cur1)), &cur1);
-	    {
-		IV scur1 = cur1;
-		if (scur1 < 0) {
-		    Safefree(pv1);
-		    return 0;
-		}
-	    }
-	    pv1tmp = TRUE;
+	    char *pv = bytes_from_utf8((U8*)pv1, &cur1, &is_utf8);
+	    if (is_utf8)
+		return 0;
+	    pv1tmp = (pv != pv1);
+	    pv1 = pv;
 	}
 	else {
-	    (void)utf8_to_bytes((U8*)(pv2 = savepvn(pv2, cur2)), &cur2);
-	    {
-		IV scur2 = cur2;
-		if (scur2 < 0) {
-		    Safefree(pv2);
-		    return 0;
-		}
-	    }
-	    pv2tmp = TRUE;
+	    char *pv = bytes_from_utf8((U8*)pv2, &cur2, &is_utf8);
+	    if (is_utf8)
+		return 0;
+	    pv2tmp = (pv != pv2);
+	    pv2 = pv;
 	}
     }
 
@@ -5601,6 +5595,8 @@ Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash)
         len = -len;
         is_utf8 = TRUE;
     }
+    if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+	src = (char*)bytes_from_utf8((U8*)src, (STRLEN*)&len, &is_utf8);
     if (!hash)
 	PERL_HASH(hash, src, len);
     new_SV(sv);
diff --git a/t/io/utf8.t b/t/io/utf8.t
index 04554e7..d0201aa 100755
--- a/t/io/utf8.t
+++ b/t/io/utf8.t
@@ -91,17 +91,17 @@ print "not " unless $y == 1;
 print "ok 17\n";
 }
 
-print F $b,"\n"; # This upgrades $b!
+print F $b,"\n"; # Don't upgrades $b
 
 { # Check byte length of $b
 use bytes; my $y = length($b);
-print "not " unless $y == 2;
+print "not ($y) " unless $y == 1;
 print "ok 18\n";
 }
 
 { my $x = tell(F); 
     { use bytes; $y += 3;}
-    print "not " unless $x == $y;
+    print "not ($x,$y) " unless $x == $y;
     print "ok 19\n";
 }
 
@@ -110,14 +110,14 @@ close F;
 open F, "a" or die $!; # Not UTF
 $x = <F>;
 chomp($x);
-print "not " unless $x eq v196.172.194.130;
+printf "not (%vd) ", $x unless $x eq v196.172.194.130;
 print "ok 20\n";
 
 open F, "<:utf8", "a" or die $!;
 $x = <F>;
 chomp($x);
 close F;
-print "not " unless $x eq chr(300).chr(130);
+printf "not (%vd) ", $x unless $x eq chr(300).chr(130);
 print "ok 21\n";
 
 # Now let's make it suffer.
diff --git a/t/lib/charnames.t b/t/lib/charnames.t
index 14da2e0..d00396f 100644
--- a/t/lib/charnames.t
+++ b/t/lib/charnames.t
@@ -8,7 +8,7 @@ BEGIN {
 }
 
 $| = 1;
-print "1..13\n";
+print "1..14\n";
 
 use charnames ':full';
 
@@ -44,8 +44,7 @@ $encoded_alpha = "\316\261";
 $encoded_bet = "\327\221";
 
 sub to_bytes {
-    use bytes;
-    "".shift;
+    pack"a*", shift;
 }
 
 {
@@ -90,3 +89,10 @@ sub to_bytes {
     print "ok 13\n";
 }
 
+{
+   use charnames qw(:full);
+   use utf8;
+   print "not " unless "\x{100}\N{CENT SIGN}" eq "\x{100}"."\N{CENT SIGN}";
+   print "ok 14\n";
+}
+
diff --git a/t/op/each.t b/t/op/each.t
index 35792ab..f1012c6 100755
--- a/t/op/each.t
+++ b/t/op/each.t
@@ -1,6 +1,12 @@
 #!./perl
 
-print "1..24\n";
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = '.'; 
+    push @INC, '../lib';
+}    
+
+print "1..25\n";
 
 $h{'abc'} = 'ABC';
 $h{'def'} = 'DEF';
@@ -156,3 +162,10 @@ print "#$b{$_}\n" for keys %b; # Used to core dump before change #8056.
 print "ok 23\n";
 print "#$u{$_}\n" for keys %u; # Used to core dump before change #8056.
 print "ok 24\n";
+
+%u = (qu"\xe3\x81\x82" => "downglade");
+for (keys %u) {
+    use bytes;
+    print "not " if length ne 3 or $_ ne "\xe3\x81\x82";
+    print "ok 25\n";
+}
diff --git a/t/op/tr.t b/t/op/tr.t
index 6a4e1aa..75887ab 100755
--- a/t/op/tr.t
+++ b/t/op/tr.t
@@ -5,7 +5,7 @@ BEGIN {
     @INC = '../lib';
 }
 
-print "1..49\n";
+print "1..51\n";
 
 $_ = "abcdefghijklmnopqrstuvwxyz";
 
@@ -287,3 +287,12 @@ print "ok 48\n";
 print "not " unless sprintf("%vd", $a) eq '196.172.200';
 print "ok 49\n";
 
+# UTF8 range
+
+($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/;
+print "not " unless $a eq v192.196.172.194.197.172;
+print "ok 50\n";
+
+($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
+print "not " unless $a eq v300.300.172.302.301.172;
+print "ok 51\n";
diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t
index 546b217..577e6b4 100755
--- a/t/pragma/utf8.t
+++ b/t/pragma/utf8.t
@@ -10,7 +10,7 @@ BEGIN {
     }
 }
 
-print "1..106\n";
+print "1..107\n";
 
 my $test = 1;
 
@@ -564,3 +564,16 @@ sub nok_bytes {
     print "ok $test\n";
     $test++;					# 106
 }
+
+{
+    use utf8;
+
+    my $w = 0;
+    local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
+    my $x = eval q/"\\/ . "\x{100}" . q/"/;;
+   
+    print "not " unless $w == 0 && $x eq "\x{100}";
+    print "ok $test\n";
+    $test++;					# 107
+}
+
diff --git a/toke.c b/toke.c
index 1b41dcc..a85a725 100644
--- a/toke.c
+++ b/toke.c
@@ -1388,8 +1388,7 @@ S_scan_const(pTHX_ char *start)
 			       "Unrecognized escape \\%c passed through",
 			       *s);
 		    /* default action is to copy the quoted character */
-		    *d++ = *s++;
-		    continue;
+		    goto default_action;
 		}
 
 	    /* \132 indicates an octal constant */
@@ -1479,6 +1478,13 @@ S_scan_const(pTHX_ char *start)
                     if (has_utf8 || uv > 255) {
 		        d = (char*)uv_to_utf8((U8*)d, uv);
 			has_utf8 = TRUE;
+			if (PL_lex_inwhat == OP_TRANS &&
+			    PL_sublex_info.sub_op) {
+			    PL_sublex_info.sub_op->op_private |=
+				(PL_lex_repl ? OPpTRANS_FROM_UTF
+					     : OPpTRANS_TO_UTF);
+			    utf = TRUE;
+			}
                     }
 		    else {
 		        *d++ = (char)uv;
@@ -1506,6 +1512,8 @@ S_scan_const(pTHX_ char *start)
 		    res = newSVpvn(s + 1, e - s - 1);
 		    res = new_constant( Nullch, 0, "charnames",
 					res, Nullsv, "\\N{...}" );
+		    if (has_utf8)
+			sv_utf8_upgrade(res);
 		    str = SvPV(res,len);
 		    if (!has_utf8 && SvUTF8(res)) {
 			char *ostart = SvPVX(sv);
@@ -1588,8 +1596,7 @@ S_scan_const(pTHX_ char *start)
 	    continue;
 	} /* end if (backslash) */
 
-       /* (now in tr/// code again) */
-
+    default_action:
        if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
            STRLEN len = (STRLEN) -1;
            UV uv;
@@ -1608,10 +1615,15 @@ S_scan_const(pTHX_ char *start)
                    *d++ = *s++;
            }
            has_utf8 = TRUE;
+	   if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
+	       PL_sublex_info.sub_op->op_private |=
+		   (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+	       utf = TRUE;
+	   }
            continue;
        }
 
-	*d++ = *s++;
+       *d++ = *s++;
     } /* while loop to process each character */
 
     /* terminate the string and set up the sv */
@@ -4742,7 +4754,8 @@ Perl_yylex(pTHX)
 	case KEY_qq:
 	case KEY_qu:
 	    s = scan_str(s,FALSE,FALSE);
-	    if (tmp == KEY_qu && is_utf8_string((U8*)s, SvCUR(PL_lex_stuff)))
+	    if (tmp == KEY_qu &&
+		is_utf8_string((U8*)SvPVX(PL_lex_stuff), SvCUR(PL_lex_stuff)))
 		SvUTF8_on(PL_lex_stuff);
 	    if (!s)
 		missingterm((char*)0);
diff --git a/utf8.c b/utf8.c
index 156e63f..046df74 100644
--- a/utf8.c
+++ b/utf8.c
@@ -583,6 +583,63 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
 }
 
 /*
+=for apidoc A|U8 *|bytes_from_utf8|U8 *s|STRLEN *len|bool *is_utf8
+
+Converts a string C<s> of length C<len> from UTF8 into byte encoding.
+Unlike <utf8_to_bytes> but like C<bytes_to_utf8>, returns a pointer to
+the newly-created string, and updates C<len> to contain the new length.
+Returns the original string if no conversion occurs, C<len> and
+C<is_utf8> are unchanged. Do nothing if C<is_utf8> points to 0. Sets
+C<is_utf8> to 0 if C<s> is converted or malformed .
+
+=cut */
+
+U8 *
+Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8)
+{
+    U8 *send;
+    U8 *d;
+    U8 *start = s;
+    I32 count = 0;
+
+    if (!*is_utf8)
+	return start;
+
+    /* ensure valid UTF8 and chars < 256 before updating string */
+    for (send = s + *len; s < send;) {
+	U8 c = *s++;
+        if (!UTF8_IS_ASCII(c)) {
+	    if (UTF8_IS_CONTINUATION(c) || s >= send ||
+		!UTF8_IS_CONTINUATION(*s)) {
+		*is_utf8 = 0;		
+		return start;
+	    }
+	    if ((c & 0xfc) != 0xc0)
+		return start;
+	    s++, count++;
+        }
+    }
+
+    *is_utf8 = 0;		
+
+    if (!count)
+	return start;
+
+    Newz(801, d, (*len) - count + 1, U8);
+    d = s = start;
+    while (s < send) {
+	U8 c = *s++;
+	if (UTF8_IS_ASCII(c))
+	    *d++ = c;
+	else
+	    *d++ = UTF8_ACCUMULATE(c&3, *s++);
+    }
+    *d = '\0';
+    *len = d - start;
+    return start;
+}
+
+/*
 =for apidoc A|U8 *|bytes_to_utf8|U8 *s|STRLEN *len
 
 Converts a string C<s> of length C<len> from ASCII into UTF8 encoding.