From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Wed, 25 Oct 2000 20:00:48 +0000 (+0000)
Subject: Continue the internal UTF-8 API tweaking.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=dcad28805702d580064bc39a267d63c58bbb3b3f;p=p5sagit%2Fp5-mst-13.2.git

Continue the internal UTF-8 API tweaking.
Rename utf8_to_uv_chk() back to utf8_to_uv() because it's
used much more than the simpler API, now called utf8_to_uv_simple().
Still not quite happy with API, too much partial duplication
of functionality.

p4raw-id: //depot/perl@7439
---

diff --git a/doop.c b/doop.c
index 3cd8f07..fa927c2 100644
--- a/doop.c
+++ b/doop.c
@@ -77,7 +77,7 @@ S_do_trans_simple(pTHX_ SV *sv)
 
         ulen = 1;
         /* Need to check this, otherwise 128..255 won't match */
-	c = utf8_to_uv_chk(s, send - s, &ulen, 0);
+	c = utf8_to_uv(s, send - s, &ulen, 0);
         if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
             matches++;
             if (ch < 0x80)
@@ -125,7 +125,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
             STRLEN ulen;
             ulen = 1;
             if (hasutf)
-                c = utf8_to_uv_chk(s, send - s, &ulen, 0);
+                c = utf8_to_uv(s, send - s, &ulen, 0);
             else
                 c = *s;
             if (c < 0x100 && tbl[c] >= 0)
@@ -364,7 +364,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
 	    }
 	    else if (uv == none) {	/* "none" is unmapped character */
 		STRLEN ulen;
-		*d++ = (U8)utf8_to_uv_chk(s, send - s, &ulen, 0);
+		*d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
 		s += ulen;
 		puv = 0xfeedface;
 		continue;
@@ -405,7 +405,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
 	    }
 	    else if (uv == none) {	/* "none" is unmapped character */
 		STRLEN ulen;
-		*d++ = (U8)utf8_to_uv_chk(s, send - s, &ulen, 0);
+		*d++ = (U8)utf8_to_uv(s, send - s, &ulen, 0);
 		s += ulen;
 		continue;
 	    }
@@ -550,9 +550,8 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
     if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
 	Perl_croak(aTHX_ "Illegal number of bits in vec");
 
-    if (SvUTF8(sv)) {
+    if (SvUTF8(sv))
 	(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
-    }
 
     offset *= size;	/* turn into bit offset */
     len = (offset + size + 7) / 8;	/* required number of bytes */
@@ -969,10 +968,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
 	switch (optype) {
 	case OP_BIT_AND:
 	    while (lulen && rulen) {
-		luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
+		luc = utf8_to_uv((U8*)lc, lulen, &ulen, 0);
 		lc += ulen;
 		lulen -= ulen;
-		ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
+		ruc = utf8_to_uv((U8*)rc, rulen, &ulen, 0);
 		rc += ulen;
 		rulen -= ulen;
 		duc = luc & ruc;
@@ -984,10 +983,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
 	    break;
 	case OP_BIT_XOR:
 	    while (lulen && rulen) {
-		luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
+		luc = utf8_to_uv((U8*)lc, lulen, &ulen, 0);
 		lc += ulen;
 		lulen -= ulen;
-		ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
+		ruc = utf8_to_uv((U8*)rc, rulen, &ulen, 0);
 		rc += ulen;
 		rulen -= ulen;
 		duc = luc ^ ruc;
@@ -996,10 +995,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
 	    goto mop_up_utf;
 	case OP_BIT_OR:
 	    while (lulen && rulen) {
-		luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
+		luc = utf8_to_uv((U8*)lc, lulen, &ulen, 0);
 		lc += ulen;
 		lulen -= ulen;
-		ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
+		ruc = utf8_to_uv((U8*)rc, rulen, &ulen, 0);
 		rc += ulen;
 		rulen -= ulen;
 		duc = luc | ruc;
diff --git a/embed.h b/embed.h
index eab037f..a588819 100644
--- a/embed.h
+++ b/embed.h
@@ -729,8 +729,8 @@
 #define utf8_hop		Perl_utf8_hop
 #define utf8_to_bytes		Perl_utf8_to_bytes
 #define bytes_to_utf8		Perl_bytes_to_utf8
+#define utf8_to_uv_simple	Perl_utf8_to_uv_simple
 #define utf8_to_uv		Perl_utf8_to_uv
-#define utf8_to_uv_chk		Perl_utf8_to_uv_chk
 #define uv_to_utf8		Perl_uv_to_utf8
 #define vivify_defelem		Perl_vivify_defelem
 #define vivify_ref		Perl_vivify_ref
@@ -2189,8 +2189,8 @@
 #define utf8_hop(a,b)		Perl_utf8_hop(aTHX_ a,b)
 #define utf8_to_bytes(a,b)	Perl_utf8_to_bytes(aTHX_ a,b)
 #define bytes_to_utf8(a,b)	Perl_bytes_to_utf8(aTHX_ a,b)
-#define utf8_to_uv(a,b)		Perl_utf8_to_uv(aTHX_ a,b)
-#define utf8_to_uv_chk(a,b,c,d)	Perl_utf8_to_uv_chk(aTHX_ a,b,c,d)
+#define utf8_to_uv_simple(a,b)	Perl_utf8_to_uv_simple(aTHX_ a,b)
+#define utf8_to_uv(a,b,c,d)	Perl_utf8_to_uv(aTHX_ a,b,c,d)
 #define uv_to_utf8(a,b)		Perl_uv_to_utf8(aTHX_ a,b)
 #define vivify_defelem(a)	Perl_vivify_defelem(aTHX_ a)
 #define vivify_ref(a,b)		Perl_vivify_ref(aTHX_ a,b)
@@ -4290,10 +4290,10 @@
 #define utf8_to_bytes		Perl_utf8_to_bytes
 #define Perl_bytes_to_utf8	CPerlObj::Perl_bytes_to_utf8
 #define bytes_to_utf8		Perl_bytes_to_utf8
+#define Perl_utf8_to_uv_simple	CPerlObj::Perl_utf8_to_uv_simple
+#define utf8_to_uv_simple	Perl_utf8_to_uv_simple
 #define Perl_utf8_to_uv		CPerlObj::Perl_utf8_to_uv
 #define utf8_to_uv		Perl_utf8_to_uv
-#define Perl_utf8_to_uv_chk	CPerlObj::Perl_utf8_to_uv_chk
-#define utf8_to_uv_chk		Perl_utf8_to_uv_chk
 #define Perl_uv_to_utf8		CPerlObj::Perl_uv_to_utf8
 #define uv_to_utf8		Perl_uv_to_utf8
 #define Perl_vivify_defelem	CPerlObj::Perl_vivify_defelem
diff --git a/embed.pl b/embed.pl
index 8f80bbf..62135fc 100755
--- a/embed.pl
+++ b/embed.pl
@@ -2074,9 +2074,9 @@ Ap	|I32	|utf8_distance	|U8 *a|U8 *b
 Ap	|U8*	|utf8_hop	|U8 *s|I32 off
 ApM	|U8*	|utf8_to_bytes	|U8 *s|STRLEN *len
 ApM	|U8*	|bytes_to_utf8	|U8 *s|STRLEN *len
-Ap	|UV	|utf8_to_uv	|U8 *s|STRLEN* retlen
-Ap	|UV	|utf8_to_uv_chk	|U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
-Ap	|U8*	|uv_to_utf8	|U8 *d|UV uv
+Ap	|UV	|utf8_to_uv_simple|U8 *s|STRLEN* retlen
+Ap	|UV	|utf8_to_uv	|U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
+Ap	|U8*	|uv_to_utf8|U8 *d|UV uv
 p	|void	|vivify_defelem	|SV* sv
 p	|void	|vivify_ref	|SV* sv|U32 to_what
 p	|I32	|wait4pid	|Pid_t pid|int* statusp|int flags
diff --git a/objXSUB.h b/objXSUB.h
index bc04f03..4d5ff6b 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -1869,14 +1869,14 @@
 #define Perl_bytes_to_utf8	pPerl->Perl_bytes_to_utf8
 #undef  bytes_to_utf8
 #define bytes_to_utf8		Perl_bytes_to_utf8
+#undef  Perl_utf8_to_uv_simple
+#define Perl_utf8_to_uv_simple	pPerl->Perl_utf8_to_uv_simple
+#undef  utf8_to_uv_simple
+#define utf8_to_uv_simple	Perl_utf8_to_uv_simple
 #undef  Perl_utf8_to_uv
 #define Perl_utf8_to_uv		pPerl->Perl_utf8_to_uv
 #undef  utf8_to_uv
 #define utf8_to_uv		Perl_utf8_to_uv
-#undef  Perl_utf8_to_uv_chk
-#define Perl_utf8_to_uv_chk	pPerl->Perl_utf8_to_uv_chk
-#undef  utf8_to_uv_chk
-#define utf8_to_uv_chk		Perl_utf8_to_uv_chk
 #undef  Perl_uv_to_utf8
 #define Perl_uv_to_utf8		pPerl->Perl_uv_to_utf8
 #undef  uv_to_utf8
diff --git a/op.c b/op.c
index 9e256a3..1aa558e 100644
--- a/op.c
+++ b/op.c
@@ -2658,7 +2658,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
 	    for (j = 0; j < i; j++) {
 		U8 *s = cp[j];
 		I32 cur = j < i ? cp[j+1] - s : tend - s;
-		UV  val = utf8_to_uv_chk(s, cur, &ulen, 0);
+		UV  val = utf8_to_uv(s, cur, &ulen, 0);
 		s += ulen;
 		diff = val - nextmin;
 		if (diff > 0) {
@@ -2671,7 +2671,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
 		    }
 	        }
 		if (*s == 0xff)
-		    val = utf8_to_uv_chk(s+1, cur - 1, &ulen, 0);
+		    val = utf8_to_uv(s+1, cur - 1, &ulen, 0);
 		if (val >= nextmin)
 		    nextmin = val + 1;
 	    }
@@ -2698,11 +2698,11 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
 	while (t < tend || tfirst <= tlast) {
 	    /* see if we need more "t" chars */
 	    if (tfirst > tlast) {
-		tfirst = (I32)utf8_to_uv_chk(t, tend - t, &ulen, 0);
+		tfirst = (I32)utf8_to_uv(t, tend - t, &ulen, 0);
 		t += ulen;
 		if (t < tend && *t == 0xff) {	/* illegal utf8 val indicates range */
 		    t++;
-		    tlast = (I32)utf8_to_uv_chk(t, tend - t, &ulen, 0);
+		    tlast = (I32)utf8_to_uv(t, tend - t, &ulen, 0);
 		    t += ulen;
 		}
 		else
@@ -2712,11 +2712,11 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
 	    /* now see if we need more "r" chars */
 	    if (rfirst > rlast) {
 		if (r < rend) {
-		    rfirst = (I32)utf8_to_uv_chk(r, rend - r, &ulen, 0);
+		    rfirst = (I32)utf8_to_uv(r, rend - r, &ulen, 0);
 		    r += ulen;
 		    if (r < rend && *r == 0xff) {	/* illegal utf8 val indicates range */
 			r++;
-			rlast = (I32)utf8_to_uv_chk(r, rend - r, &ulen, 0);
+			rlast = (I32)utf8_to_uv(r, rend - r, &ulen, 0);
 			r += ulen;
 		    }
 		    else
diff --git a/perlapi.c b/perlapi.c
index 1f1343d..efa7164 100644
--- a/perlapi.c
+++ b/perlapi.c
@@ -1327,7 +1327,7 @@ Perl_to_uni_lower_lc(pTHXo_ U32 c)
 }
 
 #undef  Perl_is_utf8_char
-int
+STRLEN
 Perl_is_utf8_char(pTHXo_ U8 *p)
 {
     return ((CPerlObj*)pPerl)->Perl_is_utf8_char(p);
@@ -3378,18 +3378,18 @@ Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN *len)
     return ((CPerlObj*)pPerl)->Perl_bytes_to_utf8(s, len);
 }
 
-#undef  Perl_utf8_to_uv
+#undef  Perl_utf8_to_uv_simple
 UV
-Perl_utf8_to_uv(pTHXo_ U8 *s, STRLEN* retlen)
+Perl_utf8_to_uv_simple(pTHXo_ U8 *s, STRLEN* retlen)
 {
-    return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, retlen);
+    return ((CPerlObj*)pPerl)->Perl_utf8_to_uv_simple(s, retlen);
 }
 
-#undef  Perl_utf8_to_uv_chk
+#undef  Perl_utf8_to_uv
 UV
-Perl_utf8_to_uv_chk(pTHXo_ U8 *s, STRLEN curlen, STRLEN* retlen, bool checking)
+Perl_utf8_to_uv(pTHXo_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags)
 {
-    return ((CPerlObj*)pPerl)->Perl_utf8_to_uv_chk(s, curlen, retlen, checking);
+    return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, curlen, retlen, flags);
 }
 
 #undef  Perl_uv_to_utf8
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 730d89f..634180f 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -2368,19 +2368,19 @@ false, defined or undefined.  Does not handle 'get' magic.
 =for hackers
 Found in file sv.h
 
-=item svtype
+=item SvTYPE
 
-An enum of flags for Perl types.  These are found in the file B<sv.h> 
-in the C<svtype> enum.  Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV.  See C<svtype>.
+
+	svtype	SvTYPE(SV* sv)
 
 =for hackers
 Found in file sv.h
 
-=item SvTYPE
-
-Returns the type of the SV.  See C<svtype>.
+=item svtype
 
-	svtype	SvTYPE(SV* sv)
+An enum of flags for Perl types.  These are found in the file B<sv.h> 
+in the C<svtype> enum.  Test these flags with the C<SvTYPE> macro.
 
 =for hackers
 Found in file sv.h
@@ -3218,32 +3218,32 @@ Found in file utf8.c
 =item utf8_to_uv
 
 Returns the character value of the first character in the string C<s>
-which is assumed to be in UTF8 encoding; C<retlen> will be set to the
-length, in bytes, of that character, and the pointer C<s> will be
-advanced to the end of the character.
+which is assumed to be in UTF8 encoding and no longer than C<curlen>;
+C<retlen> will be set to the length, in bytes, of that character,
+and the pointer C<s> will be advanced to the end of the character.
 
-If C<s> does not point to a well-formed UTF8 character, an optional UTF8
+If C<s> does not point to a well-formed UTF8 character, the behaviour
+is dependent on the value of C<checking>: if this is true, it is
+assumed that the caller will raise a warning, and this function will
+set C<retlen> to C<-1> and return. If C<checking> is not true, an optional UTF8
 warning is produced.
 
-	U8* s	utf8_to_uv(STRLEN *retlen)
+	U8* s	utf8_to_uv(STRLEN curlen, I32 *retlen, U32 flags)
 
 =for hackers
 Found in file utf8.c
 
-=item utf8_to_uv_chk
+=item utf8_to_uv_simple
 
 Returns the character value of the first character in the string C<s>
-which is assumed to be in UTF8 encoding and no longer than C<curlen>;
-C<retlen> will be set to the length, in bytes, of that character,
-and the pointer C<s> will be advanced to the end of the character.
+which is assumed to be in UTF8 encoding; C<retlen> will be set to the
+length, in bytes, of that character, and the pointer C<s> will be
+advanced to the end of the character.
 
-If C<s> does not point to a well-formed UTF8 character, the behaviour
-is dependent on the value of C<checking>: if this is true, it is
-assumed that the caller will raise a warning, and this function will
-set C<retlen> to C<-1> and return. If C<checking> is not true, an optional UTF8
-warning is produced.
+If C<s> does not point to a well-formed UTF8 character, zero is
+returned and retlen is set, if possible, to -1.
 
-	U8* s	utf8_to_uv_chk(STRLEN curlen, I32 *retlen, I32 checking)
+	U8* s	utf8_to_uv_simple(STRLEN *retlen)
 
 =for hackers
 Found in file utf8.c
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index 145c953..c9954d8 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -71,11 +71,6 @@ on Windows.
 Regardless of the above, the C<bytes> pragma can always be used to force
 byte semantics in a particular lexical scope.  See L<bytes>.
 
-One effect of the C<utf8> pragma is that the internal UTF-8 decoding
-becomes stricter so that the character 0xFFFF (UTF-8 bytes 0xEF 0xBF
-0xBF), and the bytes 0xFE and 0xFF, start to cause warnings if they
-appear in the data.
-
 The C<utf8> pragma is primarily a compatibility device that enables
 recognition of UTF-8 in literals encountered by the parser.  It may also
 be used for enabling some of the more experimental Unicode support features.
diff --git a/pp.c b/pp.c
index ba50627..6d77ca1 100644
--- a/pp.c
+++ b/pp.c
@@ -1484,7 +1484,7 @@ PP(pp_complement)
 
 	  send = tmps + len;
 	  while (tmps < send) {
-	    UV c = utf8_to_uv_chk(tmps, 0, &l, UTF8_ALLOW_ANY);
+	    UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
 	    tmps += UTF8SKIP(tmps);
 	    targlen += UNISKIP(~c);
 	  }
@@ -1493,7 +1493,7 @@ PP(pp_complement)
 	  tmps -= len;
 	  Newz(0, result, targlen + 1, U8);
 	  while (tmps < send) {
-	    UV c = utf8_to_uv_chk(tmps, 0, &l, UTF8_ALLOW_ANY);
+	    UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
 	    tmps += UTF8SKIP(tmps);
 	    result = uv_to_utf8(result,(UV)~c);
 	  }
@@ -2240,7 +2240,7 @@ PP(pp_ord)
     STRLEN retlen;
 
     if ((*tmps & 0x80) && DO_UTF8(tmpsv))
-	value = utf8_to_uv_chk(tmps, len, &retlen, 0);
+	value = utf8_to_uv(tmps, len, &retlen, 0);
     else
 	value = (UV)(*tmps & 255);
     XPUSHu(value);
@@ -2307,7 +2307,7 @@ PP(pp_ucfirst)
 	STRLEN ulen;
 	U8 tmpbuf[UTF8_MAXLEN];
 	U8 *tend;
-	UV uv = utf8_to_uv_chk(s, slen, &ulen, 0);
+	UV uv = utf8_to_uv(s, slen, &ulen, 0);
 
 	if (PL_op->op_private & OPpLOCALE) {
 	    TAINT;
@@ -2366,7 +2366,7 @@ PP(pp_lcfirst)
 	STRLEN ulen;
 	U8 tmpbuf[UTF8_MAXLEN];
 	U8 *tend;
-	UV uv = utf8_to_uv_chk(s, slen, &ulen, 0);
+	UV uv = utf8_to_uv(s, slen, &ulen, 0);
 
 	if (PL_op->op_private & OPpLOCALE) {
 	    TAINT;
@@ -2443,7 +2443,7 @@ PP(pp_uc)
 		TAINT;
 		SvTAINTED_on(TARG);
 		while (s < send) {
-		    d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, len, &ulen, 0)));
+		    d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
 		    s += ulen;
 		}
 	    }
@@ -2517,7 +2517,7 @@ PP(pp_lc)
 		TAINT;
 		SvTAINTED_on(TARG);
 		while (s < send) {
-		    d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, len, &ulen, 0)));
+		    d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
 		    s += ulen;
 		}
 	    }
@@ -3660,7 +3660,7 @@ PP(pp_unpack)
 	    if (checksum) {
 		while (len-- > 0 && s < strend) {
 		    STRLEN alen;
-		    auint = utf8_to_uv_chk((U8*)s, strend - s, &alen, 0);
+		    auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
 		    along = alen;
 		    s += along;
 		    if (checksum > 32)
@@ -3674,7 +3674,7 @@ PP(pp_unpack)
 		EXTEND_MORTAL(len);
 		while (len-- > 0 && s < strend) {
 		    STRLEN alen;
-		    auint = utf8_to_uv_chk((U8*)s, strend - s, &alen, 0);
+		    auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
 		    along = alen;
 		    s += along;
 		    sv = NEWSV(37, 0);
diff --git a/pp_ctl.c b/pp_ctl.c
index 33f91ee..a65cb1b 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -2975,13 +2975,13 @@ PP(pp_require)
 	    U8 *s = (U8*)SvPVX(sv);
 	    U8 *end = (U8*)SvPVX(sv) + SvCUR(sv);
 	    if (s < end) {
-		rev = utf8_to_uv_chk(s, end - s, &len, 0);
+		rev = utf8_to_uv(s, end - s, &len, 0);
 		s += len;
 		if (s < end) {
-		    ver = utf8_to_uv_chk(s, end - s, &len, 0);
+		    ver = utf8_to_uv(s, end - s, &len, 0);
 		    s += len;
 		    if (s < end)
-			sver = utf8_to_uv_chk(s, end - s, &len, 0);
+			sver = utf8_to_uv(s, end - s, &len, 0);
 		}
 	    }
 	    if (PERL_REVISION < rev
diff --git a/proto.h b/proto.h
index 14a6e47..06ef1b1 100644
--- a/proto.h
+++ b/proto.h
@@ -809,8 +809,8 @@ PERL_CALLCONV I32	Perl_utf8_distance(pTHX_ U8 *a, U8 *b);
 PERL_CALLCONV U8*	Perl_utf8_hop(pTHX_ U8 *s, I32 off);
 PERL_CALLCONV U8*	Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
 PERL_CALLCONV U8*	Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len);
-PERL_CALLCONV UV	Perl_utf8_to_uv(pTHX_ U8 *s, STRLEN* retlen);
-PERL_CALLCONV UV	Perl_utf8_to_uv_chk(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags);
+PERL_CALLCONV UV	Perl_utf8_to_uv_simple(pTHX_ U8 *s, STRLEN* retlen);
+PERL_CALLCONV UV	Perl_utf8_to_uv(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags);
 PERL_CALLCONV U8*	Perl_uv_to_utf8(pTHX_ U8 *d, UV uv);
 PERL_CALLCONV void	Perl_vivify_defelem(pTHX_ SV* sv);
 PERL_CALLCONV void	Perl_vivify_ref(pTHX_ SV* sv, U32 to_what);
diff --git a/regcomp.c b/regcomp.c
index 3f2b10c..1946720 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2884,7 +2884,7 @@ tryagain:
 		default:
 		  normal_default:
 		    if ((*p & 0xc0) == 0xc0 && UTF) {
-			ender = utf8_to_uv_chk((U8*)p, PL_regxend - p,
+			ender = utf8_to_uv((U8*)p, PL_regxend - p,
 					       &numlen, 0);
 			p += numlen;
 		    }
@@ -3639,14 +3639,14 @@ S_regclassutf8(pTHX)
 	namedclass = OOB_NAMEDCLASS;
 	if (!range)
 	    rangebegin = PL_regcomp_parse;
-	value = utf8_to_uv_chk((U8*)PL_regcomp_parse,
+	value = utf8_to_uv((U8*)PL_regcomp_parse,
 			       PL_regxend - PL_regcomp_parse,
 			       &numlen, 0);
 	PL_regcomp_parse += numlen;
 	if (value == '[')
 	    namedclass = regpposixcc(value);
 	else if (value == '\\') {
-	    value = (U32)utf8_to_uv_chk((U8*)PL_regcomp_parse,
+	    value = (U32)utf8_to_uv((U8*)PL_regcomp_parse,
 					PL_regxend - PL_regcomp_parse,
 					&numlen, 0);
 	    PL_regcomp_parse += numlen;
diff --git a/regexec.c b/regexec.c
index 350f432..a71f1d8 100644
--- a/regexec.c
+++ b/regexec.c
@@ -917,7 +917,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 	    PL_reg_flags |= RF_tainted;
 	    /* FALL THROUGH */
 	case BOUNDUTF8:
-	    tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1),
+	    tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1),
 							strend - s,
 							0, 0) : '\n';
 	    tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
@@ -955,7 +955,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 	    PL_reg_flags |= RF_tainted;
 	    /* FALL THROUGH */
 	case NBOUNDUTF8:
-	    tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1),
+	    tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1),
 							strend - s,
 							0, 0) : '\n';
 	    tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
@@ -2002,7 +2002,7 @@ S_regmatch(pTHX_ regnode *prog)
 		while (s < e) {
 		    if (l >= PL_regeol)
 			sayNO;
-		    if (utf8_to_uv_chk((U8*)s, e - s, 0, 0) != (c1 ?
+		    if (utf8_to_uv((U8*)s, e - s, 0, 0) != (c1 ?
 						  toLOWER_utf8((U8*)l) :
 						  toLOWER_LC_utf8((U8*)l)))
 		    {
@@ -2140,7 +2140,7 @@ S_regmatch(pTHX_ regnode *prog)
 	case NBOUNDUTF8:
 	    /* was last char in word? */
 	    ln = (locinput != PL_regbol)
-		? utf8_to_uv_chk(reghop((U8*)locinput, -1),
+		? utf8_to_uv(reghop((U8*)locinput, -1),
 				 PL_regeol - locinput, 0, 0) : PL_regprev;
 	    if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
 		ln = isALNUM_uni(ln);
diff --git a/sv.c b/sv.c
index 2790cfd..726f853 100644
--- a/sv.c
+++ b/sv.c
@@ -6364,7 +6364,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
 		    break;
 		}
 		if (utf)
-		    iv = (IV)utf8_to_uv_chk(vecstr, veclen, &ulen, 0);
+		    iv = (IV)utf8_to_uv(vecstr, veclen, &ulen, 0);
 		else {
 		    iv = *vecstr;
 		    ulen = 1;
@@ -6447,7 +6447,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
 		    break;
 		}
 		if (utf)
-		    uv = utf8_to_uv_chk(vecstr, veclen, &ulen, 0);
+		    uv = utf8_to_uv(vecstr, veclen, &ulen, 0);
 		else {
 		    uv = *vecstr;
 		    ulen = 1;
diff --git a/toke.c b/toke.c
index 78ed359..3572b0e 100644
--- a/toke.c
+++ b/toke.c
@@ -816,7 +816,7 @@ Perl_str_to_version(pTHX_ SV *sv)
 	STRLEN skip;
 	UV n;
 	if (utf)
-	    n = utf8_to_uv_chk((U8*)start, len, &skip, 0);
+	    n = utf8_to_uv((U8*)start, len, &skip, 0);
 	else {
 	    n = *(U8*)start;
 	    skip = 1;
@@ -1331,7 +1331,7 @@ S_scan_const(pTHX_ char *start)
 	    STRLEN len;
 	    UV uv;
 
-	    uv = utf8_to_uv_chk((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
+	    uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
 	    if (len == 1) {
 		/* Illegal UTF8 (a high-bit byte), make it valid. */
 		char *old_pvx = SvPVX(sv);
diff --git a/utf8.c b/utf8.c
index 7bb34b7..80f8846 100644
--- a/utf8.c
+++ b/utf8.c
@@ -171,7 +171,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
 }
 
 /*
-=for apidoc Am|U8* s|utf8_to_uv_chk|STRLEN curlen|I32 *retlen|U32 flags
+=for apidoc Am|U8* s|utf8_to_uv|STRLEN curlen|I32 *retlen|U32 flags
 
 Returns the character value of the first character in the string C<s>
 which is assumed to be in UTF8 encoding and no longer than C<curlen>;
@@ -179,16 +179,15 @@ C<retlen> will be set to the length, in bytes, of that character,
 and the pointer C<s> will be advanced to the end of the character.
 
 If C<s> does not point to a well-formed UTF8 character, the behaviour
-is dependent on the value of C<checking>: if this is true, it is
-assumed that the caller will raise a warning, and this function will
-set C<retlen> to C<-1> and return. If C<checking> is not true, an optional UTF8
-warning is produced.
+is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
+it is assumed that the caller will raise a warning, and this function
+will set C<retlen> to C<-1> and return.  The C<flags> can also contain
+various flags to allow deviations from the strict UTF-8 encoding.
 
-=cut
-*/
+=cut */
 
 UV
-Perl_utf8_to_uv_chk(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
+Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 {
     dTHR;
     UV uv = *s, ouv;
@@ -324,7 +323,7 @@ malformed:
 }
 
 /*
-=for apidoc Am|U8* s|utf8_to_uv|STRLEN *retlen
+=for apidoc Am|U8* s|utf8_to_uv_simple|STRLEN *retlen
 
 Returns the character value of the first character in the string C<s>
 which is assumed to be in UTF8 encoding; C<retlen> will be set to the
@@ -338,9 +337,9 @@ returned and retlen is set, if possible, to -1.
 */
 
 UV
-Perl_utf8_to_uv(pTHX_ U8* s, STRLEN* retlen)
+Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen)
 {
-    return Perl_utf8_to_uv_chk(aTHX_ s, (STRLEN)-1, retlen, 0);
+    return Perl_utf8_to_uv(aTHX_ s, (STRLEN)-1, retlen, 0);
 }
 
 /* utf8_distance(a,b) returns the number of UTF8 characters between
@@ -400,30 +399,30 @@ Returns zero on failure, setting C<len> to -1.
 U8 *
 Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
 {
-    dTHR;
     U8 *send;
     U8 *d;
-    U8 *save;
-
-    send = s + *len;
-    d = save = s;
+    U8 *save = s;
 
     /* ensure valid UTF8 and chars < 256 before updating string */
-    while (s < send) {
-	U8 c = *s++;
+    for (send = s + *len; s < send; ) {
+        U8 c = *s++;
+
         if (c >= 0x80 &&
-	    ( (s >= send) || ((*s++ & 0xc0) != 0x80) || ((c & 0xfe) != 0xc2))) {
-	    *len = -1;
-	    return 0;
-	}
+            ((s >= send) ||
+	     ((*s++ & 0xc0) != 0x80) || ((c & 0xfe) != 0xc2))) {
+            *len = -1;
+            return 0;
+        }
     }
-    s = save;
+
+    d = s = save;
     while (s < send) {
-        if (*s < 0x80)
-            *d++ = *s++;
+        if (*s < 0x80) {
+	    *d++ = *s++;
+	}
         else {
             STRLEN ulen;
-            *d++ = (U8)utf8_to_uv(s, &ulen);
+            *d++ = (U8)utf8_to_uv_simple(s, &ulen);
             s += ulen;
         }
     }
@@ -951,7 +950,7 @@ Perl_to_utf8_upper(pTHX_ U8 *p)
     if (!PL_utf8_toupper)
 	PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
     uv = swash_fetch(PL_utf8_toupper, p);
-    return uv ? uv : utf8_to_uv_chk(p,STRLEN_MAX,0,0);
+    return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
 }
 
 UV
@@ -962,7 +961,7 @@ Perl_to_utf8_title(pTHX_ U8 *p)
     if (!PL_utf8_totitle)
 	PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
     uv = swash_fetch(PL_utf8_totitle, p);
-    return uv ? uv : utf8_to_uv_chk(p,STRLEN_MAX,0,0);
+    return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
 }
 
 UV
@@ -973,7 +972,7 @@ Perl_to_utf8_lower(pTHX_ U8 *p)
     if (!PL_utf8_tolower)
 	PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
     uv = swash_fetch(PL_utf8_tolower, p);
-    return uv ? uv : utf8_to_uv_chk(p,STRLEN_MAX,0,0);
+    return uv ? uv : utf8_to_uv(p,STRLEN_MAX,0,0);
 }
 
 /* a "swash" is a swatch hash */
@@ -1063,7 +1062,7 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
 	    PUSHMARK(SP);
 	    EXTEND(SP,3);
 	    PUSHs((SV*)sv);
-	    PUSHs(sv_2mortal(newSViv(utf8_to_uv_chk(ptr, STRLEN_MAX, 0, 0) & ~(needents - 1))));
+	    PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, STRLEN_MAX, 0, 0) & ~(needents - 1))));
 	    PUSHs(sv_2mortal(newSViv(needents)));
 	    PUTBACK;
 	    if (call_method("SWASHGET", G_SCALAR))