From: Jarkko Hietaniemi Date: Sun, 3 Dec 2000 20:57:19 +0000 (+0000) Subject: Make uv_to_utf8() to zero-terminate its output buffer, X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ad391ad9bbfeaf73d3944b50240313a5677bcc60;p=p5sagit%2Fp5-mst-13.2.git Make uv_to_utf8() to zero-terminate its output buffer, always use (at least) UTF8_MAXLEN + 1 U8s deep buffer. p4raw-id: //depot/perl@7967 --- diff --git a/op.c b/op.c index 9d00b7b..50db696 100644 --- a/op.c +++ b/op.c @@ -2646,7 +2646,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) I32 to_utf = o->op_private & OPpTRANS_TO_UTF; if (complement) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; U8** cp; I32* cl; UV nextmin = 0; diff --git a/pp.c b/pp.c index 17beb6c..10e6c6a 100644 --- a/pp.c +++ b/pp.c @@ -2321,7 +2321,7 @@ PP(pp_ucfirst) if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { STRLEN ulen; - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; UV uv = utf8_to_uv(s, slen, &ulen, 0); @@ -2380,7 +2380,7 @@ PP(pp_lcfirst) if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { STRLEN ulen; - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tend; UV uv = utf8_to_uv(s, slen, &ulen, 0); @@ -4727,7 +4727,7 @@ PP(pp_pack) while (len-- > 0) { fromstr = NEXTFROM; auint = SvUV(fromstr); - SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN); + SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1); SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint) - SvPVX(cat)); } diff --git a/regcomp.c b/regcomp.c index 3b4f481..cf100d7 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4029,13 +4029,7 @@ STATIC void S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp) { dTHR; - if (SIZE_ONLY) { - U8 tmpbuf[UTF8_MAXLEN]; - *lenp = uv_to_utf8(tmpbuf, uv) - tmpbuf; - } - else - *lenp = uv_to_utf8((U8*)s, uv) - (U8*)s; - + *lenp = SIZE_ONLY ? UNISKIP(uv) : (uv_to_utf8((U8*)s, uv) - (U8*)s); } /* diff --git a/regexec.c b/regexec.c index 18c06d5..1f79f30 100644 --- a/regexec.c +++ b/regexec.c @@ -3795,7 +3795,7 @@ S_reginclassutf8(pTHX_ regnode *f, U8 *p) if (swash_fetch(sv, p)) match = TRUE; else if (flags & ANYOF_FOLD) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; if (flags & ANYOF_LOCALE) { PL_reg_flags |= RF_tainted; uv_to_utf8(tmpbuf, toLOWER_LC_utf8(p)); diff --git a/sv.c b/sv.c index 01076cb..912d694 100644 --- a/sv.c +++ b/sv.c @@ -6067,7 +6067,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV bool is_utf = FALSE; char esignbuf[4]; - U8 utf8buf[UTF8_MAXLEN]; + U8 utf8buf[UTF8_MAXLEN+1]; STRLEN esignlen = 0; char *eptr = Nullch; diff --git a/toke.c b/toke.c index 0c803d4..7b68091 100644 --- a/toke.c +++ b/toke.c @@ -7183,7 +7183,7 @@ vstring: pos++; if (!isALPHA(*pos)) { UV rev; - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tmpend; bool utf8 = FALSE; s++; /* get past 'v' */ diff --git a/utf8.c b/utf8.c index 9e943ac..5713d65 100644 --- a/utf8.c +++ b/utf8.c @@ -27,21 +27,24 @@ /* Unicode support */ U8 * -Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) +Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */ { if (uv < 0x80) { *d++ = uv; + *d = 0; return d; } if (uv < 0x800) { *d++ = (( uv >> 6) | 0xc0); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } if (uv < 0x10000) { *d++ = (( uv >> 12) | 0xe0); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } if (uv < 0x200000) { @@ -49,6 +52,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } if (uv < 0x4000000) { @@ -57,6 +61,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } if (uv < 0x80000000) { @@ -66,6 +71,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } #ifdef HAS_QUAD @@ -79,6 +85,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } #ifdef HAS_QUAD @@ -96,6 +103,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } #endif @@ -593,7 +601,7 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) bool Perl_is_uni_alnum(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_alnum(tmpbuf); } @@ -601,7 +609,7 @@ Perl_is_uni_alnum(pTHX_ U32 c) bool Perl_is_uni_alnumc(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_alnumc(tmpbuf); } @@ -609,7 +617,7 @@ Perl_is_uni_alnumc(pTHX_ U32 c) bool Perl_is_uni_idfirst(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_idfirst(tmpbuf); } @@ -617,7 +625,7 @@ Perl_is_uni_idfirst(pTHX_ U32 c) bool Perl_is_uni_alpha(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_alpha(tmpbuf); } @@ -625,7 +633,7 @@ Perl_is_uni_alpha(pTHX_ U32 c) bool Perl_is_uni_ascii(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_ascii(tmpbuf); } @@ -633,7 +641,7 @@ Perl_is_uni_ascii(pTHX_ U32 c) bool Perl_is_uni_space(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_space(tmpbuf); } @@ -641,7 +649,7 @@ Perl_is_uni_space(pTHX_ U32 c) bool Perl_is_uni_digit(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_digit(tmpbuf); } @@ -649,7 +657,7 @@ Perl_is_uni_digit(pTHX_ U32 c) bool Perl_is_uni_upper(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_upper(tmpbuf); } @@ -657,7 +665,7 @@ Perl_is_uni_upper(pTHX_ U32 c) bool Perl_is_uni_lower(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_lower(tmpbuf); } @@ -665,7 +673,7 @@ Perl_is_uni_lower(pTHX_ U32 c) bool Perl_is_uni_cntrl(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_cntrl(tmpbuf); } @@ -673,7 +681,7 @@ Perl_is_uni_cntrl(pTHX_ U32 c) bool Perl_is_uni_graph(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_graph(tmpbuf); } @@ -681,7 +689,7 @@ Perl_is_uni_graph(pTHX_ U32 c) bool Perl_is_uni_print(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_print(tmpbuf); } @@ -689,7 +697,7 @@ Perl_is_uni_print(pTHX_ U32 c) bool Perl_is_uni_punct(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_punct(tmpbuf); } @@ -697,7 +705,7 @@ Perl_is_uni_punct(pTHX_ U32 c) bool Perl_is_uni_xdigit(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_xdigit(tmpbuf); } @@ -705,7 +713,7 @@ Perl_is_uni_xdigit(pTHX_ U32 c) U32 Perl_to_uni_upper(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return to_utf8_upper(tmpbuf); } @@ -713,7 +721,7 @@ Perl_to_uni_upper(pTHX_ U32 c) U32 Perl_to_uni_title(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return to_utf8_title(tmpbuf); } @@ -721,7 +729,7 @@ Perl_to_uni_title(pTHX_ U32 c) U32 Perl_to_uni_lower(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return to_utf8_lower(tmpbuf); }