From: Andreas König Date: Fri, 11 Jan 2002 15:47:15 +0000 (+0100) Subject: Fix for X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=2cf2cfc6ec6384231f5e996b330e88a28ca597d7;p=p5sagit%2Fp5-mst-13.2.git Fix for Subject: UTF-8 sprintf bug in bleadperl Message-ID: I have tests, have just to figure out where to put them since op/sprintf looks a bit unfriendly for tests of somewhat freer form. p4raw-id: //depot/perl@14194 --- diff --git a/doop.c b/doop.c index 9f0fa64..0c7abf4 100644 --- a/doop.c +++ b/doop.c @@ -697,6 +697,8 @@ Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) char *pat = SvPV(*sarg, patlen); bool do_taint = FALSE; + if (DO_UTF8(*sarg)) + SvUTF8_on(sv); sv_vsetpvfn(sv, pat, patlen, Null(va_list*), sarg + 1, len - 1, &do_taint); SvSETMAGIC(sv); if (do_taint) diff --git a/sv.c b/sv.c index bc2782d..d5dffef 100644 --- a/sv.c +++ b/sv.c @@ -7655,6 +7655,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV I32 svix = 0; static char nullstr[] = "(null)"; SV *argsv = Nullsv; + bool has_utf8 = FALSE; /* has the result utf8? */ /* no matter what, this is a string now */ (void)SvPV_force(sv, origlen); @@ -7688,13 +7689,16 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } } + if (!args && svix < svmax && DO_UTF8(*svargs)) + has_utf8 = TRUE; + patend = (char*)pat + patlen; for (p = (char*)pat; p < patend; p = q) { bool alt = FALSE; bool left = FALSE; bool vectorize = FALSE; bool vectorarg = FALSE; - bool vec_utf = FALSE; + bool vec_utf8 = FALSE; char fill = ' '; char plus = 0; char intsize = 0; @@ -7702,7 +7706,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN zeros = 0; bool has_precis = FALSE; STRLEN precis = 0; - bool is_utf = FALSE; + bool is_utf8 = FALSE; /* is this item utf8? */ char esignbuf[4]; U8 utf8buf[UTF8_MAXLEN+1]; @@ -7827,17 +7831,17 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV svargs[ewix ? ewix-1 : svix++] : &PL_sv_undef; dotstr = SvPVx(vecsv, dotstrlen); if (DO_UTF8(vecsv)) - is_utf = TRUE; + is_utf8 = TRUE; } if (args) { vecsv = va_arg(*args, SV*); vecstr = (U8*)SvPVx(vecsv,veclen); - vec_utf = DO_UTF8(vecsv); + vec_utf8 = DO_UTF8(vecsv); } else if (efix ? efix <= svmax : svix < svmax) { vecsv = svargs[efix ? efix-1 : svix++]; vecstr = (U8*)SvPVx(vecsv,veclen); - vec_utf = DO_UTF8(vecsv); + vec_utf8 = DO_UTF8(vecsv); } else { vecstr = (U8*)""; @@ -7931,7 +7935,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV && !IN_BYTES) { eptr = (char*)utf8buf; elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf; - is_utf = TRUE; + is_utf8 = TRUE; } else { c = (char)uv; @@ -7967,7 +7971,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV if (width) { /* fudge width (can't fudge elen) */ width += elen - sv_len_utf8(argsv); } - is_utf = TRUE; + is_utf8 = TRUE; } } goto string; @@ -7983,7 +7987,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV argsv = va_arg(*args, SV*); eptr = SvPVx(argsv, elen); if (DO_UTF8(argsv)) - is_utf = TRUE; + is_utf8 = TRUE; string: vectorize = FALSE; @@ -8013,8 +8017,9 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN ulen; if (!veclen) continue; - if (vec_utf) - uv = utf8n_to_uvchr(vecstr, veclen, &ulen, UTF8_ALLOW_ANYUV); + if (vec_utf8) + uv = utf8n_to_uvchr(vecstr, veclen, &ulen, + UTF8_ALLOW_ANYUV); else { uv = *vecstr; ulen = 1; @@ -8098,8 +8103,9 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV vector: if (!veclen) continue; - if (vec_utf) - uv = utf8n_to_uvchr(vecstr, veclen, &ulen, UTF8_ALLOW_ANYUV); + if (vec_utf8) + uv = utf8n_to_uvchr(vecstr, veclen, &ulen, + UTF8_ALLOW_ANYUV); else { uv = *vecstr; ulen = 1; @@ -8354,6 +8360,20 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV *p++ = '0'; } if (elen) { + if (is_utf8 != has_utf8) { + if (is_utf8) { + if (SvCUR(sv)) { + sv_utf8_upgrade(sv); + p = SvEND(sv); + } + } + else { + SV *nsv = sv_2mortal(newSVpvn(eptr, elen)); + sv_utf8_upgrade(nsv); + eptr = SvPVX(nsv); + elen = SvCUR(nsv); + } + } Copy(eptr, p, elen, char); p += elen; } @@ -8369,7 +8389,9 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV else vectorize = FALSE; /* done iterating over vecstr */ } - if (is_utf) + if (is_utf8) + has_utf8 = TRUE; + if (has_utf8) SvUTF8_on(sv); *p = '\0'; SvCUR(sv) = p - SvPVX(sv);