Re-instate Perl_utf8_to_uv without checking parameter - added in change 7075.

diff --git a/doop.c b/doop.c

index 80cc0f6..b75ffaa 100644 (file)
--- a/doop.c
+++ b/doop.c
@@ -77,12 +77,12 @@ S_do_trans_simple(pTHX_ SV *sv)
 
         ulen = 1;
         /* Need to check this, otherwise 128..255 won't match */
-       c = utf8_to_uv(s, &ulen, 0);
+       c = utf8_to_uv_chk(s, &ulen, 0);
         if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
             matches++;
-            if (ch < 0x80) 
+            if (ch < 0x80)
                 *d++ = ch;
-            else         
+            else
                 d = uv_to_utf8(d,ch);
             s += ulen;
         }
@@ -125,7 +125,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
             I32 ulen;
             ulen = 1;
             if (hasutf)
-                c = utf8_to_uv(s,&ulen, 0);
+                c = utf8_to_uv_chk(s,&ulen, 0);
             else
                 c = *s;
             if (c < 0x100 && tbl[c] >= 0)
@@ -222,7 +222,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
     UV extra = none + 1;
     UV final;
     UV uv;
-    I32 isutf; 
+    I32 isutf;
     I32 howmany;
 
     isutf = SvUTF8(sv);
@@ -258,7 +258,7 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
            i = UTF8SKIP(s);
            s += i;
            matches++;
-            if (i > 1 && !isutf++) 
+            if (i > 1 && !isutf++)
                 HALF_UTF8_UPGRADE(dstart,d);
            d = uv_to_utf8(d, final);
        }
@@ -337,7 +337,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
     if (squash) {
        UV puv = 0xfeedface;
        while (s < send) {
-            if (SvUTF8(sv)) 
+            if (SvUTF8(sv))
                uv = swash_fetch(rv, s);
            else {
                U8 tmpbuf[2];
@@ -354,7 +354,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
            if (uv < none) {
                matches++;
                if (uv != puv) {
-                    if ((uv & 0x80) && !isutf++) 
+                    if ((uv & 0x80) && !isutf++)
                         HALF_UTF8_UPGRADE(dst,d);
                    d = uv_to_utf8(d, uv);
                    puv = uv;
@@ -364,7 +364,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
            }
            else if (uv == none) {      /* "none" is unmapped character */
                I32 ulen;
-               *d++ = (U8)utf8_to_uv(s, &ulen, 0);
+               *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
                s += ulen;
                puv = 0xfeedface;
                continue;
@@ -384,7 +384,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
     }
     else {
        while (s < send) {
-            if (SvUTF8(sv)) 
+            if (SvUTF8(sv))
                uv = swash_fetch(rv, s);
            else {
                U8 tmpbuf[2];
@@ -405,7 +405,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
            }
            else if (uv == none) {      /* "none" is unmapped character */
                I32 ulen;
-               *d++ = (U8)utf8_to_uv(s, &ulen, 0);
+               *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
                s += ulen;
                continue;
            }
@@ -435,7 +435,7 @@ Perl_do_trans(pTHX_ SV *sv)
 {
     dTHR;
     STRLEN len;
-    I32 hasutf = (PL_op->op_private & 
+    I32 hasutf = (PL_op->op_private &
                     (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
 
     if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
@@ -547,7 +547,7 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
 
     if (offset < 0)
        return retnum;
-    if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 
+    if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
        Perl_croak(aTHX_ "Illegal number of bits in vec");
 
     if (SvUTF8(sv)) {
@@ -625,7 +625,7 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
                        ((UV) s[offset + 4] << 24) +
                        ((UV) s[offset + 5] << 16);
                else
-                   retnum = 
+                   retnum =
                        ((UV) s[offset    ] << 56) +
                        ((UV) s[offset + 1] << 48) +
                        ((UV) s[offset + 2] << 40) +
@@ -708,9 +708,9 @@ Perl_do_vecset(pTHX_ SV *sv)
     if (offset < 0)
        Perl_croak(aTHX_ "Assigning to negative offset in vec");
     size = LvTARGLEN(sv);
-    if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 
+    if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */
        Perl_croak(aTHX_ "Illegal number of bits in vec");
-    
+
     offset *= size;                    /* turn into bit offset */
     len = (offset + size + 7) / 8;     /* required number of bytes */
     if (len > targlen) {
@@ -718,7 +718,7 @@ Perl_do_vecset(pTHX_ SV *sv)
        (void)memzero((char *)(s + targlen), len - targlen + 1);
        SvCUR_set(targ, len);
     }
-    
+
     if (size < 8) {
        mask = (1 << size) - 1;
        size = offset & 7;
@@ -767,7 +767,7 @@ Perl_do_chop(pTHX_ register SV *astr, register SV *sv)
     STRLEN len;
     char *s;
     dTHR;
-    
+
     if (SvTYPE(sv) == SVt_PVAV) {
        register I32 i;
         I32 max;
@@ -901,7 +901,7 @@ Perl_do_chomp(pTHX_ register SV *sv)
   nope:
     SvSETMAGIC(sv);
     return count;
-} 
+}
 
 void
 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
@@ -969,10 +969,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
        switch (optype) {
        case OP_BIT_AND:
            while (lulen && rulen) {
-               luc = utf8_to_uv((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
                rc += ulen;
                rulen -= ulen;
                duc = luc & ruc;
@@ -984,10 +984,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
            break;
        case OP_BIT_XOR:
            while (lulen && rulen) {
-               luc = utf8_to_uv((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
                rc += ulen;
                rulen -= ulen;
                duc = luc ^ ruc;
@@ -996,10 +996,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
            goto mop_up_utf;
        case OP_BIT_OR:
            while (lulen && rulen) {
-               luc = utf8_to_uv((U8*)lc, &ulen, 0);
+               luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
                lc += ulen;
                lulen -= ulen;
-               ruc = utf8_to_uv((U8*)rc, &ulen, 0);
+               ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
                rc += ulen;
                rulen -= ulen;
                duc = luc | ruc;
@@ -1107,8 +1107,8 @@ Perl_do_kv(pTHX)
     I32 dokeys =   (PL_op->op_type == OP_KEYS);
     I32 dovalues = (PL_op->op_type == OP_VALUES);
     I32 realhv = (SvTYPE(hv) == SVt_PVHV);
-    
-    if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV) 
+
+    if (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV)
        dokeys = dovalues = TRUE;
 
     if (!hv) {
diff --git a/embed.h b/embed.h

index 7e030a9..404acfa 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -730,6 +730,7 @@
 #define utf8_to_bytes          Perl_utf8_to_bytes
 #define bytes_to_utf8          Perl_bytes_to_utf8
 #define utf8_to_uv             Perl_utf8_to_uv
+#define utf8_to_uv_chk         Perl_utf8_to_uv_chk
 #define uv_to_utf8             Perl_uv_to_utf8
 #define vivify_defelem         Perl_vivify_defelem
 #define vivify_ref             Perl_vivify_ref
@@ -2186,7 +2187,8 @@
 #define utf8_hop(a,b)          Perl_utf8_hop(aTHX_ a,b)
 #define utf8_to_bytes(a,b)     Perl_utf8_to_bytes(aTHX_ a,b)
 #define bytes_to_utf8(a,b)     Perl_bytes_to_utf8(aTHX_ a,b)
-#define utf8_to_uv(a,b,c)      Perl_utf8_to_uv(aTHX_ a,b,c)
+#define utf8_to_uv(a,b)                Perl_utf8_to_uv(aTHX_ a,b)
+#define utf8_to_uv_chk(a,b,c)  Perl_utf8_to_uv_chk(aTHX_ a,b,c)
 #define uv_to_utf8(a,b)                Perl_uv_to_utf8(aTHX_ a,b)
 #define vivify_defelem(a)      Perl_vivify_defelem(aTHX_ a)
 #define vivify_ref(a,b)                Perl_vivify_ref(aTHX_ a,b)
@@ -4286,6 +4288,8 @@
 #define bytes_to_utf8          Perl_bytes_to_utf8
 #define Perl_utf8_to_uv                CPerlObj::Perl_utf8_to_uv
 #define utf8_to_uv             Perl_utf8_to_uv
+#define Perl_utf8_to_uv_chk    CPerlObj::Perl_utf8_to_uv_chk
+#define utf8_to_uv_chk         Perl_utf8_to_uv_chk
 #define Perl_uv_to_utf8                CPerlObj::Perl_uv_to_utf8
 #define uv_to_utf8             Perl_uv_to_utf8
 #define Perl_vivify_defelem    CPerlObj::Perl_vivify_defelem
diff --git a/embed.pl b/embed.pl

index f84be4a..5ae80eb 100755 (executable)
--- a/embed.pl
+++ b/embed.pl
@@ -2074,7 +2074,8 @@ Ap        |I32    |utf8_distance  |U8 *a|U8 *b
 Ap     |U8*    |utf8_hop       |U8 *s|I32 off
 ApM    |U8*    |utf8_to_bytes  |U8 *s|STRLEN *len
 ApM    |U8*    |bytes_to_utf8  |U8 *s|STRLEN *len
-Ap     |UV     |utf8_to_uv     |U8 *s|I32* retlen|bool checking
+Ap     |UV     |utf8_to_uv     |U8 *s|I32* retlen
+Ap     |UV     |utf8_to_uv_chk |U8 *s|I32* retlen|bool checking
 Ap     |U8*    |uv_to_utf8     |U8 *d|UV uv
 p      |void   |vivify_defelem |SV* sv
 p      |void   |vivify_ref     |SV* sv|U32 to_what
diff --git a/global.sym b/global.sym

index 080d78c..0dea03e 100644 (file)
--- a/global.sym
+++ b/global.sym
@@ -466,6 +466,7 @@ Perl_utf8_hop
 Perl_utf8_to_bytes
 Perl_bytes_to_utf8
 Perl_utf8_to_uv
+Perl_utf8_to_uv_chk
 Perl_uv_to_utf8
 Perl_warn
 Perl_vwarn
diff --git a/handy.h b/handy.h

index c240c42..f0e39af 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -48,10 +48,10 @@ Null SV pointer.
    just figure out all the headers such a test needs.
    Andy Dougherty      August 1996
 */
-/* bool is built-in for g++-2.6.3 and later, which might be used 
+/* bool is built-in for g++-2.6.3 and later, which might be used
    for extensions.  <_G_config.h> defines _G_HAVE_BOOL, but we can't
    be sure _G_config.h will be included before this file.  _G_config.h
-   also defines _G_HAVE_BOOL for both gcc and g++, but only g++ 
+   also defines _G_HAVE_BOOL for both gcc and g++, but only g++
    actually has bool.  Hence, _G_HAVE_BOOL is pretty useless for us.
    g++ can be identified by __GNUG__.
    Andy Dougherty      February 2000
@@ -101,8 +101,8 @@ Null SV pointer.
    Similarly, there is no guarantee that I16 and U16 have exactly 16
    bits.
 
-   For dealing with issues that may arise from various 32/64-bit 
-   systems, we will ask Configure to check out 
+   For dealing with issues that may arise from various 32/64-bit
+   systems, we will ask Configure to check out
 
        SHORTSIZE == sizeof(short)
        INTSIZE == sizeof(int)
@@ -448,21 +448,21 @@ Converts the specified character to lowercase.
 #define isPSXSPC_utf8(c)       (isSPACE_utf8(c) ||(c) == '\f')
 #define isBLANK_utf8(c)                isBLANK(c) /* could be wrong */
 
-#define isALNUM_LC_utf8(p)     isALNUM_LC_uni(utf8_to_uv(p, 0, 0))
-#define isIDFIRST_LC_utf8(p)   isIDFIRST_LC_uni(utf8_to_uv(p, 0, 0))
-#define isALPHA_LC_utf8(p)     isALPHA_LC_uni(utf8_to_uv(p, 0, 0))
-#define isSPACE_LC_utf8(p)     isSPACE_LC_uni(utf8_to_uv(p, 0, 0))
-#define isDIGIT_LC_utf8(p)     isDIGIT_LC_uni(utf8_to_uv(p, 0, 0))
-#define isUPPER_LC_utf8(p)     isUPPER_LC_uni(utf8_to_uv(p, 0, 0))
-#define isLOWER_LC_utf8(p)     isLOWER_LC_uni(utf8_to_uv(p, 0, 0))
-#define isALNUMC_LC_utf8(p)    isALNUMC_LC_uni(utf8_to_uv(p, 0, 0))
-#define isCNTRL_LC_utf8(p)     isCNTRL_LC_uni(utf8_to_uv(p, 0, 0))
-#define isGRAPH_LC_utf8(p)     isGRAPH_LC_uni(utf8_to_uv(p, 0, 0))
-#define isPRINT_LC_utf8(p)     isPRINT_LC_uni(utf8_to_uv(p, 0, 0))
-#define isPUNCT_LC_utf8(p)     isPUNCT_LC_uni(utf8_to_uv(p, 0, 0))
-#define toUPPER_LC_utf8(p)     toUPPER_LC_uni(utf8_to_uv(p, 0, 0))
-#define toTITLE_LC_utf8(p)     toTITLE_LC_uni(utf8_to_uv(p, 0, 0))
-#define toLOWER_LC_utf8(p)     toLOWER_LC_uni(utf8_to_uv(p, 0, 0))
+#define isALNUM_LC_utf8(p)     isALNUM_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isIDFIRST_LC_utf8(p)   isIDFIRST_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isALPHA_LC_utf8(p)     isALPHA_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isSPACE_LC_utf8(p)     isSPACE_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isDIGIT_LC_utf8(p)     isDIGIT_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isUPPER_LC_utf8(p)     isUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isLOWER_LC_utf8(p)     isLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isALNUMC_LC_utf8(p)    isALNUMC_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isCNTRL_LC_utf8(p)     isCNTRL_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isGRAPH_LC_utf8(p)     isGRAPH_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isPRINT_LC_utf8(p)     isPRINT_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define isPUNCT_LC_utf8(p)     isPUNCT_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define toUPPER_LC_utf8(p)     toUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define toTITLE_LC_utf8(p)     toTITLE_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define toLOWER_LC_utf8(p)     toLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0))
 
 #define isPSXSPC_LC_utf8(c)    (isSPACE_LC_utf8(c) ||(c) == '\f')
 #define isBLANK_LC_utf8(c)     isBLANK(c) /* could be wrong */
@@ -484,7 +484,7 @@ typedef U16 line_t;
 #endif
 
 
-/* 
+/*
    XXX LEAKTEST doesn't really work in perl5.  There are direct calls to
    safemalloc() in the source, so LEAKTEST won't pick them up.
    (The main "offenders" are extensions.)
@@ -501,7 +501,7 @@ typedef U16 line_t;
 Creates a new SV.  A non-zero C<len> parameter indicates the number of
 bytes of preallocated string space the SV should have.  An extra byte for a
 tailing NUL is also reserved.  (SvPOK is not set for the SV even if string
-space is allocated.)  The reference count for the new SV is set to 1. 
+space is allocated.)  The reference count for the new SV is set to 1.
 C<id> is an integer id between 0 and 1299 (used to identify leaks).
 
 =for apidoc Am|void|New|int id|void* ptr|int nitems|type
diff --git a/objXSUB.h b/objXSUB.h

index 00184c9..bc04f03 100644 (file)
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -1873,6 +1873,10 @@
 #define Perl_utf8_to_uv                pPerl->Perl_utf8_to_uv
 #undef  utf8_to_uv
 #define utf8_to_uv             Perl_utf8_to_uv
+#undef  Perl_utf8_to_uv_chk
+#define Perl_utf8_to_uv_chk    pPerl->Perl_utf8_to_uv_chk
+#undef  utf8_to_uv_chk
+#define utf8_to_uv_chk         Perl_utf8_to_uv_chk
 #undef  Perl_uv_to_utf8
 #define Perl_uv_to_utf8                pPerl->Perl_uv_to_utf8
 #undef  uv_to_utf8
diff --git a/op.c b/op.c

index d24396a..4856d98 100644 (file)
--- a/op.c
+++ b/op.c
@@ -2656,7 +2656,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
            qsort(cp, i, sizeof(U8*), utf8compare);
            for (j = 0; j < i; j++) {
                U8 *s = cp[j];
-               UV val = utf8_to_uv(s, &ulen, 0);
+               UV val = utf8_to_uv_chk(s, &ulen, 0);
                s += ulen;
                diff = val - nextmin;
                if (diff > 0) {
@@ -2669,7 +2669,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                    }
                }
                if (*s == 0xff)
-                   val = utf8_to_uv(s+1, &ulen, 0);
+                   val = utf8_to_uv_chk(s+1, &ulen, 0);
                if (val >= nextmin)
                    nextmin = val + 1;
            }
@@ -2696,10 +2696,10 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
        while (t < tend || tfirst <= tlast) {
            /* see if we need more "t" chars */
            if (tfirst > tlast) {
-               tfirst = (I32)utf8_to_uv(t, &ulen, 0);
+               tfirst = (I32)utf8_to_uv_chk(t, &ulen, 0);
                t += ulen;
                if (t < tend && *t == 0xff) {   /* illegal utf8 val indicates range */
-                   tlast = (I32)utf8_to_uv(++t, &ulen, 0);
+                   tlast = (I32)utf8_to_uv_chk(++t, &ulen, 0);
                    t += ulen;
                }
                else
@@ -2709,10 +2709,10 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
            /* now see if we need more "r" chars */
            if (rfirst > rlast) {
                if (r < rend) {
-                   rfirst = (I32)utf8_to_uv(r, &ulen, 0);
+                   rfirst = (I32)utf8_to_uv_chk(r, &ulen, 0);
                    r += ulen;
                    if (r < rend && *r == 0xff) {       /* illegal utf8 val indicates range */
-                       rlast = (I32)utf8_to_uv(++r, &ulen, 0);
+                       rlast = (I32)utf8_to_uv_chk(++r, &ulen, 0);
                        r += ulen;
                    }
                    else
diff --git a/perlapi.c b/perlapi.c

index 614f94f..39a13ba 100644 (file)
--- a/perlapi.c
+++ b/perlapi.c
@@ -3380,9 +3380,16 @@ Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN *len)
 
 #undef  Perl_utf8_to_uv
 UV
-Perl_utf8_to_uv(pTHXo_ U8 *s, I32* retlen, bool checking)
+Perl_utf8_to_uv(pTHXo_ U8 *s, I32* retlen)
 {
-    return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, retlen, checking);
+    return ((CPerlObj*)pPerl)->Perl_utf8_to_uv(s, retlen);
+}
+
+#undef  Perl_utf8_to_uv_chk
+UV
+Perl_utf8_to_uv_chk(pTHXo_ U8 *s, I32* retlen, bool checking)
+{
+    return ((CPerlObj*)pPerl)->Perl_utf8_to_uv_chk(s, retlen, checking);
 }
 
 #undef  Perl_uv_to_utf8
diff --git a/pod/perlapi.pod b/pod/perlapi.pod

index 5d5bc5f..78d6fa4 100644 (file)
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -2355,19 +2355,19 @@ false, defined or undefined.  Does not handle 'get' magic.
 =for hackers
 Found in file sv.h
 
-=item svtype
+=item SvTYPE
 
-An enum of flags for Perl types.  These are found in the file B<sv.h> 
-in the C<svtype> enum.  Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV.  See C<svtype>.
+
+       svtype  SvTYPE(SV* sv)
 
 =for hackers
 Found in file sv.h
 
-=item SvTYPE
-
-Returns the type of the SV.  See C<svtype>.
+=item svtype
 
-       svtype  SvTYPE(SV* sv)
+An enum of flags for Perl types.  These are found in the file B<sv.h> 
+in the C<svtype> enum.  Test these flags with the C<SvTYPE> macro.
 
 =for hackers
 Found in file sv.h
diff --git a/pp.c b/pp.c

index d4a1df0..01cb070 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -2195,7 +2195,7 @@ PP(pp_ord)
     I32 retlen;
 
     if ((*tmps & 0x80) && DO_UTF8(tmpsv))
-       value = utf8_to_uv(tmps, &retlen, 0);
+       value = utf8_to_uv_chk(tmps, &retlen, 0);
     else
        value = (UV)(*tmps & 255);
     XPUSHu(value);
@@ -2262,7 +2262,7 @@ PP(pp_ucfirst)
        I32 ulen;
        U8 tmpbuf[UTF8_MAXLEN];
        U8 *tend;
-       UV uv = utf8_to_uv(s, &ulen, 0);
+       UV uv = utf8_to_uv_chk(s, &ulen, 0);
 
        if (PL_op->op_private & OPpLOCALE) {
            TAINT;
@@ -2321,7 +2321,7 @@ PP(pp_lcfirst)
        I32 ulen;
        U8 tmpbuf[UTF8_MAXLEN];
        U8 *tend;
-       UV uv = utf8_to_uv(s, &ulen, 0);
+       UV uv = utf8_to_uv_chk(s, &ulen, 0);
 
        if (PL_op->op_private & OPpLOCALE) {
            TAINT;
@@ -2398,7 +2398,7 @@ PP(pp_uc)
                TAINT;
                SvTAINTED_on(TARG);
                while (s < send) {
-                   d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, &ulen, 0)));
+                   d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, &ulen, 0)));
                    s += ulen;
                }
            }
@@ -2472,7 +2472,7 @@ PP(pp_lc)
                TAINT;
                SvTAINTED_on(TARG);
                while (s < send) {
-                   d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, &ulen, 0)));
+                   d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, &ulen, 0)));
                    s += ulen;
                }
            }
@@ -3614,7 +3614,7 @@ PP(pp_unpack)
                len = strend - s;
            if (checksum) {
                while (len-- > 0 && s < strend) {
-                   auint = utf8_to_uv((U8*)s, &along, 0);
+                   auint = utf8_to_uv_chk((U8*)s, &along, 0);
                    s += along;
                    if (checksum > 32)
                        cdouble += (NV)auint;
@@ -3626,7 +3626,7 @@ PP(pp_unpack)
                EXTEND(SP, len);
                EXTEND_MORTAL(len);
                while (len-- > 0 && s < strend) {
-                   auint = utf8_to_uv((U8*)s, &along, 0);
+                   auint = utf8_to_uv_chk((U8*)s, &along, 0);
                    s += along;
                    sv = NEWSV(37, 0);
                    sv_setuv(sv, (UV)auint);
diff --git a/pp_ctl.c b/pp_ctl.c

index 3cc74e5..254cce8 100644 (file)
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -2959,13 +2959,13 @@ PP(pp_require)
            U8 *s = (U8*)SvPVX(sv);
            U8 *end = (U8*)SvPVX(sv) + SvCUR(sv);
            if (s < end) {
-               rev = utf8_to_uv(s, &len, 0);
+               rev = utf8_to_uv_chk(s, &len, 0);
                s += len;
                if (s < end) {
-                   ver = utf8_to_uv(s, &len, 0);
+                   ver = utf8_to_uv_chk(s, &len, 0);
                    s += len;
                    if (s < end)
-                       sver = utf8_to_uv(s, &len, 0);
+                       sver = utf8_to_uv_chk(s, &len, 0);
                }
            }
            if (PERL_REVISION < rev
diff --git a/proto.h b/proto.h

index 79b584d..604a664 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -809,7 +809,8 @@ PERL_CALLCONV I32   Perl_utf8_distance(pTHX_ U8 *a, U8 *b);
 PERL_CALLCONV U8*      Perl_utf8_hop(pTHX_ U8 *s, I32 off);
 PERL_CALLCONV U8*      Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
 PERL_CALLCONV U8*      Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len);
-PERL_CALLCONV UV       Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen, bool checking);
+PERL_CALLCONV UV       Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen);
+PERL_CALLCONV UV       Perl_utf8_to_uv_chk(pTHX_ U8 *s, I32* retlen, bool checking);
 PERL_CALLCONV U8*      Perl_uv_to_utf8(pTHX_ U8 *d, UV uv);
 PERL_CALLCONV void     Perl_vivify_defelem(pTHX_ SV* sv);
 PERL_CALLCONV void     Perl_vivify_ref(pTHX_ SV* sv, U32 to_what);
diff --git a/regcomp.c b/regcomp.c

index 766b84c..e7042ea 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -2884,7 +2884,7 @@ tryagain:
                default:
                  normal_default:
                    if ((*p & 0xc0) == 0xc0 && UTF) {
-                       ender = utf8_to_uv((U8*)p, &numlen, 0);
+                       ender = utf8_to_uv_chk((U8*)p, &numlen, 0);
                        p += numlen;
                    }
                    else
@@ -3638,12 +3638,12 @@ S_regclassutf8(pTHX)
        namedclass = OOB_NAMEDCLASS;
        if (!range)
            rangebegin = PL_regcomp_parse;
-       value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen, 0);
+       value = utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0);
        PL_regcomp_parse += numlen;
        if (value == '[')
            namedclass = regpposixcc(value);
        else if (value == '\\') {
-           value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen, 0);
+           value = (U32)utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0);
            PL_regcomp_parse += numlen;
            /* Some compilers cannot handle switching on 64-bit integer
             * values, therefore value cannot be an UV.  Yes, this will
diff --git a/regexec.c b/regexec.c

index 990791b..ea52383 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -914,7 +914,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
        case BOUNDUTF8:
-           tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0, 0) : '\n';
+           tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n';
            tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
            while (s < strend) {
                if (tmp == !(OP(c) == BOUNDUTF8 ?
@@ -950,7 +950,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
        case NBOUNDUTF8:
-           tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0, 0) : '\n';
+           tmp = (I32)(s != startpos) ? utf8_to_uv_chk(reghop((U8*)s, -1), 0, 0) : '\n';
            tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
            while (s < strend) {
                if (tmp == !(OP(c) == NBOUNDUTF8 ?
@@ -1995,7 +1995,7 @@ S_regmatch(pTHX_ regnode *prog)
                while (s < e) {
                    if (l >= PL_regeol)
                        sayNO;
-                   if (utf8_to_uv((U8*)s, 0, 0) != (c1 ?
+                   if (utf8_to_uv_chk((U8*)s, 0, 0) != (c1 ?
                                                  toLOWER_utf8((U8*)l) :
                                                  toLOWER_LC_utf8((U8*)l)))
                    {
@@ -2133,7 +2133,7 @@ S_regmatch(pTHX_ regnode *prog)
        case NBOUNDUTF8:
            /* was last char in word? */
            ln = (locinput != PL_regbol)
-               ? utf8_to_uv(reghop((U8*)locinput, -1), 0, 0) : PL_regprev;
+               ? utf8_to_uv_chk(reghop((U8*)locinput, -1), 0, 0) : PL_regprev;
            if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) {
                ln = isALNUM_uni(ln);
                n = swash_fetch(PL_utf8_alnum, (U8*)locinput);
diff --git a/sv.c b/sv.c

index 561d9d9..d584c54 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -6318,7 +6318,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
                    break;
                }
                if (utf)
-                   iv = (IV)utf8_to_uv(vecstr, &ulen, 0);
+                   iv = (IV)utf8_to_uv_chk(vecstr, &ulen, 0);
                else {
                    iv = *vecstr;
                    ulen = 1;
@@ -6400,7 +6400,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
                    break;
                }
                if (utf)
-                   uv = utf8_to_uv(vecstr, &ulen, 0);
+                   uv = utf8_to_uv_chk(vecstr, &ulen, 0);
                else {
                    uv = *vecstr;
                    ulen = 1;
diff --git a/toke.c b/toke.c

index 783f282..9834d90 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -812,7 +812,7 @@ Perl_str_to_version(pTHX_ SV *sv)
        I32 skip;
        UV n;
        if (utf)
-           n = utf8_to_uv((U8*)start, &skip, 0);
+           n = utf8_to_uv_chk((U8*)start, &skip, 0);
        else {
            n = *(U8*)start;
            skip = 1;
@@ -1323,7 +1323,7 @@ S_scan_const(pTHX_ char *start)
        /* (now in tr/// code again) */
 
        if (*s & 0x80 && thisutf) {
-          (void)utf8_to_uv((U8*)s, &len, 0);
+          (void)utf8_to_uv_chk((U8*)s, &len, 0);
           if (len == 1) {
               /* illegal UTF8, make it valid */
               char *old_pvx = SvPVX(sv);
diff --git a/utf8.c b/utf8.c

index d23c9f7..3ab402c 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -143,7 +143,7 @@ string, false otherwise.
 =cut
 */
 
-bool 
+bool
 Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
 {
     U8* x=s;
@@ -159,7 +159,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
 }
 
 /*
-=for apidoc Am|U8* s|utf8_to_uv|I32 *retlen|I32 checking
+=for apidoc Am|U8* s|utf8_to_uv_chk|I32 *retlen|I32 checking
 
 Returns the character value of the first character in the string C<s>
 which is assumed to be in UTF8 encoding; C<retlen> will be set to the
@@ -176,7 +176,7 @@ warning is produced.
 */
 
 UV
-Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking)
+Perl_utf8_to_uv_chk(pTHX_ U8* s, I32* retlen, bool checking)
 {
     UV uv = *s;
     int len;
@@ -192,7 +192,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking)
            return 0;
        }
 
-       if (ckWARN_d(WARN_UTF8))     
+       if (ckWARN_d(WARN_UTF8))
            Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
        if (retlen)
            *retlen = 1;
@@ -219,7 +219,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking)
                return 0;
             }
 
-           if (ckWARN_d(WARN_UTF8))     
+           if (ckWARN_d(WARN_UTF8))
                Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character");
            if (retlen)
                *retlen -= len + 1;
@@ -231,6 +231,26 @@ Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen, bool checking)
     return uv;
 }
 
+/*
+=for apidoc Am|U8* s|utf8_to_uv|I32 *retlen
+
+Returns the character value of the first character in the string C<s>
+which is assumed to be in UTF8 encoding; C<retlen> will be set to the
+length, in bytes, of that character, and the pointer C<s> will be
+advanced to the end of the character.
+
+If C<s> does not point to a well-formed UTF8 character, an optional UTF8
+warning is produced.
+
+=cut
+*/
+
+UV
+Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen)
+{
+ return Perl_utf8_to_uv_chk(aTHX_ s, retlen, 0);
+}
+
 /* utf8_distance(a,b) returns the number of UTF8 characters between
    the pointers a and b                                                        */
 
@@ -302,7 +322,7 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
         if (c >= 0x80 &&
            ( (s >= send) || ((*s++ & 0xc0) != 0x80) || ((c & 0xfe) != 0xc2))) {
            *len = -1;
-           return 0;    
+           return 0;
        }
     }
     s = save;
@@ -311,7 +331,7 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len)
             *d++ = *s++;
         else {
             I32 ulen;
-            *d++ = (U8)utf8_to_uv(s, &ulen, 0);
+            *d++ = (U8)utf8_to_uv(s, &ulen);
             s += ulen;
         }
     }
@@ -839,7 +859,7 @@ Perl_to_utf8_upper(pTHX_ U8 *p)
     if (!PL_utf8_toupper)
        PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0);
     uv = swash_fetch(PL_utf8_toupper, p);
-    return uv ? uv : utf8_to_uv(p,0,0);
+    return uv ? uv : utf8_to_uv_chk(p,0,0);
 }
 
 UV
@@ -850,7 +870,7 @@ Perl_to_utf8_title(pTHX_ U8 *p)
     if (!PL_utf8_totitle)
        PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0);
     uv = swash_fetch(PL_utf8_totitle, p);
-    return uv ? uv : utf8_to_uv(p,0,0);
+    return uv ? uv : utf8_to_uv_chk(p,0,0);
 }
 
 UV
@@ -861,7 +881,7 @@ Perl_to_utf8_lower(pTHX_ U8 *p)
     if (!PL_utf8_tolower)
        PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0);
     uv = swash_fetch(PL_utf8_tolower, p);
-    return uv ? uv : utf8_to_uv(p,0,0);
+    return uv ? uv : utf8_to_uv_chk(p,0,0);
 }
 
 /* a "swash" is a swatch hash */
@@ -871,7 +891,7 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none)
 {
     SV* retval;
     char tmpbuf[256];
-    dSP;    
+    dSP;
 
     if (!gv_stashpv(pkg, 0)) { /* demand load utf8 */
        ENTER;
@@ -895,7 +915,7 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none)
     if (PL_curcop == &PL_compiling)    /* XXX ought to be handled by lex_start */
        strncpy(tmpbuf, PL_tokenbuf, sizeof tmpbuf);
     if (call_method("SWASHNEW", G_SCALAR))
-       retval = newSVsv(*PL_stack_sp--);    
+       retval = newSVsv(*PL_stack_sp--);
     else
        retval = &PL_sv_undef;
     LEAVE;
@@ -951,11 +971,11 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr)
            PUSHMARK(SP);
            EXTEND(SP,3);
            PUSHs((SV*)sv);
-           PUSHs(sv_2mortal(newSViv(utf8_to_uv(ptr, 0, 0) & ~(needents - 1))));
+           PUSHs(sv_2mortal(newSViv(utf8_to_uv_chk(ptr, 0, 0) & ~(needents - 1))));
            PUSHs(sv_2mortal(newSViv(needents)));
            PUTBACK;
            if (call_method("SWASHGET", G_SCALAR))
-               retval = newSVsv(*PL_stack_sp--);    
+               retval = newSVsv(*PL_stack_sp--);
            else
                retval = &PL_sv_undef;
            POPSTACK;
doop.c		patch \| blob \| blame \| history
embed.h		patch \| blob \| blame \| history
embed.pl		patch \| blob \| blame \| history
global.sym		patch \| blob \| blame \| history
handy.h		patch \| blob \| blame \| history
objXSUB.h		patch \| blob \| blame \| history
op.c		patch \| blob \| blame \| history
perlapi.c		patch \| blob \| blame \| history
pod/perlapi.pod		patch \| blob \| blame \| history
pp.c		patch \| blob \| blame \| history
pp_ctl.c		patch \| blob \| blame \| history
proto.h		patch \| blob \| blame \| history
regcomp.c		patch \| blob \| blame \| history
regexec.c		patch \| blob \| blame \| history
sv.c		patch \| blob \| blame \| history
toke.c		patch \| blob \| blame \| history
utf8.c		patch \| blob \| blame \| history