Make uv_to_utf8() to zero-terminate its output buffer,
Jarkko Hietaniemi [Sun, 3 Dec 2000 20:57:19 +0000 (20:57 +0000)]
always use (at least) UTF8_MAXLEN + 1 U8s deep buffer.

p4raw-id: //depot/perl@7967

op.c
pp.c
regcomp.c
regexec.c
sv.c
toke.c
utf8.c

diff --git a/op.c b/op.c
index 9d00b7b..50db696 100644 (file)
--- a/op.c
+++ b/op.c
@@ -2646,7 +2646,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
        I32 to_utf      = o->op_private & OPpTRANS_TO_UTF;
 
        if (complement) {
-           U8 tmpbuf[UTF8_MAXLEN];
+           U8 tmpbuf[UTF8_MAXLEN+1];
            U8** cp;
            I32* cl;
            UV nextmin = 0;
diff --git a/pp.c b/pp.c
index 17beb6c..10e6c6a 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -2321,7 +2321,7 @@ PP(pp_ucfirst)
 
     if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
        STRLEN ulen;
-       U8 tmpbuf[UTF8_MAXLEN];
+       U8 tmpbuf[UTF8_MAXLEN+1];
        U8 *tend;
        UV uv = utf8_to_uv(s, slen, &ulen, 0);
 
@@ -2380,7 +2380,7 @@ PP(pp_lcfirst)
 
     if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
        STRLEN ulen;
-       U8 tmpbuf[UTF8_MAXLEN];
+       U8 tmpbuf[UTF8_MAXLEN+1];
        U8 *tend;
        UV uv = utf8_to_uv(s, slen, &ulen, 0);
 
@@ -4727,7 +4727,7 @@ PP(pp_pack)
            while (len-- > 0) {
                fromstr = NEXTFROM;
                auint = SvUV(fromstr);
-               SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN);
+               SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
                SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint)
                               - SvPVX(cat));
            }
index 3b4f481..cf100d7 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4029,13 +4029,7 @@ STATIC void
 S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp)
 {
     dTHR;
-    if (SIZE_ONLY) {
-       U8 tmpbuf[UTF8_MAXLEN];
-       *lenp = uv_to_utf8(tmpbuf, uv) - tmpbuf;
-    }
-    else
-       *lenp = uv_to_utf8((U8*)s, uv) - (U8*)s;
-
+    *lenp = SIZE_ONLY ? UNISKIP(uv) : (uv_to_utf8((U8*)s, uv) - (U8*)s);
 }
 
 /*
index 18c06d5..1f79f30 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -3795,7 +3795,7 @@ S_reginclassutf8(pTHX_ regnode *f, U8 *p)
     if (swash_fetch(sv, p))
        match = TRUE;
     else if (flags & ANYOF_FOLD) {
-       U8 tmpbuf[UTF8_MAXLEN];
+       U8 tmpbuf[UTF8_MAXLEN+1];
        if (flags & ANYOF_LOCALE) {
            PL_reg_flags |= RF_tainted;
            uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p));
diff --git a/sv.c b/sv.c
index 01076cb..912d694 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -6067,7 +6067,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
        bool is_utf = FALSE;
        
        char esignbuf[4];
-       U8 utf8buf[UTF8_MAXLEN];
+       U8 utf8buf[UTF8_MAXLEN+1];
        STRLEN esignlen = 0;
 
        char *eptr = Nullch;
diff --git a/toke.c b/toke.c
index 0c803d4..7b68091 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -7183,7 +7183,7 @@ vstring:
                pos++;
            if (!isALPHA(*pos)) {
                UV rev;
-               U8 tmpbuf[UTF8_MAXLEN];
+               U8 tmpbuf[UTF8_MAXLEN+1];
                U8 *tmpend;
                bool utf8 = FALSE;
                s++;                            /* get past 'v' */
diff --git a/utf8.c b/utf8.c
index 9e943ac..5713d65 100644 (file)
--- a/utf8.c
+++ b/utf8.c
 /* Unicode support */
 
 U8 *
-Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
+Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */
 {
     if (uv < 0x80) {
        *d++ = uv;
+       *d   = 0;
        return d;
     }
     if (uv < 0x800) {
        *d++ = (( uv >>  6)         | 0xc0);
        *d++ = (( uv        & 0x3f) | 0x80);
+       *d   = 0;
        return d;
     }
     if (uv < 0x10000) {
        *d++ = (( uv >> 12)         | 0xe0);
        *d++ = (((uv >>  6) & 0x3f) | 0x80);
        *d++ = (( uv        & 0x3f) | 0x80);
+       *d   = 0;
        return d;
     }
     if (uv < 0x200000) {
@@ -49,6 +52,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
        *d++ = (((uv >> 12) & 0x3f) | 0x80);
        *d++ = (((uv >>  6) & 0x3f) | 0x80);
        *d++ = (( uv        & 0x3f) | 0x80);
+       *d   = 0;
        return d;
     }
     if (uv < 0x4000000) {
@@ -57,6 +61,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
        *d++ = (((uv >> 12) & 0x3f) | 0x80);
        *d++ = (((uv >>  6) & 0x3f) | 0x80);
        *d++ = (( uv        & 0x3f) | 0x80);
+       *d   = 0;
        return d;
     }
     if (uv < 0x80000000) {
@@ -66,6 +71,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
        *d++ = (((uv >> 12) & 0x3f) | 0x80);
        *d++ = (((uv >>  6) & 0x3f) | 0x80);
        *d++ = (( uv        & 0x3f) | 0x80);
+       *d   = 0;
        return d;
     }
 #ifdef HAS_QUAD
@@ -79,6 +85,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
        *d++ = (((uv >> 12) & 0x3f) | 0x80);
        *d++ = (((uv >>  6) & 0x3f) | 0x80);
        *d++ = (( uv        & 0x3f) | 0x80);
+       *d   = 0;
        return d;
     }
 #ifdef HAS_QUAD
@@ -96,6 +103,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
        *d++ = (((uv >> 12) & 0x3f) | 0x80);
        *d++ = (((uv >>  6) & 0x3f) | 0x80);
        *d++ = (( uv        & 0x3f) | 0x80);
+       *d   = 0;
        return d;
     }
 #endif
@@ -593,7 +601,7 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
 bool
 Perl_is_uni_alnum(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_alnum(tmpbuf);
 }
@@ -601,7 +609,7 @@ Perl_is_uni_alnum(pTHX_ U32 c)
 bool
 Perl_is_uni_alnumc(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_alnumc(tmpbuf);
 }
@@ -609,7 +617,7 @@ Perl_is_uni_alnumc(pTHX_ U32 c)
 bool
 Perl_is_uni_idfirst(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_idfirst(tmpbuf);
 }
@@ -617,7 +625,7 @@ Perl_is_uni_idfirst(pTHX_ U32 c)
 bool
 Perl_is_uni_alpha(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_alpha(tmpbuf);
 }
@@ -625,7 +633,7 @@ Perl_is_uni_alpha(pTHX_ U32 c)
 bool
 Perl_is_uni_ascii(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_ascii(tmpbuf);
 }
@@ -633,7 +641,7 @@ Perl_is_uni_ascii(pTHX_ U32 c)
 bool
 Perl_is_uni_space(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_space(tmpbuf);
 }
@@ -641,7 +649,7 @@ Perl_is_uni_space(pTHX_ U32 c)
 bool
 Perl_is_uni_digit(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_digit(tmpbuf);
 }
@@ -649,7 +657,7 @@ Perl_is_uni_digit(pTHX_ U32 c)
 bool
 Perl_is_uni_upper(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_upper(tmpbuf);
 }
@@ -657,7 +665,7 @@ Perl_is_uni_upper(pTHX_ U32 c)
 bool
 Perl_is_uni_lower(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_lower(tmpbuf);
 }
@@ -665,7 +673,7 @@ Perl_is_uni_lower(pTHX_ U32 c)
 bool
 Perl_is_uni_cntrl(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_cntrl(tmpbuf);
 }
@@ -673,7 +681,7 @@ Perl_is_uni_cntrl(pTHX_ U32 c)
 bool
 Perl_is_uni_graph(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_graph(tmpbuf);
 }
@@ -681,7 +689,7 @@ Perl_is_uni_graph(pTHX_ U32 c)
 bool
 Perl_is_uni_print(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_print(tmpbuf);
 }
@@ -689,7 +697,7 @@ Perl_is_uni_print(pTHX_ U32 c)
 bool
 Perl_is_uni_punct(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_punct(tmpbuf);
 }
@@ -697,7 +705,7 @@ Perl_is_uni_punct(pTHX_ U32 c)
 bool
 Perl_is_uni_xdigit(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return is_utf8_xdigit(tmpbuf);
 }
@@ -705,7 +713,7 @@ Perl_is_uni_xdigit(pTHX_ U32 c)
 U32
 Perl_to_uni_upper(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return to_utf8_upper(tmpbuf);
 }
@@ -713,7 +721,7 @@ Perl_to_uni_upper(pTHX_ U32 c)
 U32
 Perl_to_uni_title(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return to_utf8_title(tmpbuf);
 }
@@ -721,7 +729,7 @@ Perl_to_uni_title(pTHX_ U32 c)
 U32
 Perl_to_uni_lower(pTHX_ U32 c)
 {
-    U8 tmpbuf[UTF8_MAXLEN];
+    U8 tmpbuf[UTF8_MAXLEN+1];
     uv_to_utf8(tmpbuf, (UV)c);
     return to_utf8_lower(tmpbuf);
 }