X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=704718e23ba4dabeba889b3255ee686da72c8f75;hb=c7997937a3d3c62c11d46b84cf8e39979d05cd2e;hp=6d6d39e9f5be3c368d5787ebb8178be0aed54540;hpb=f2f6ab5ed2d2f824b4f6c3085a4a2275c2f8500a;p=p5sagit%2Fp5-mst-13.2.git diff --git a/sv.c b/sv.c index 6d6d39e..704718e 100644 --- a/sv.c +++ b/sv.c @@ -2966,8 +2966,12 @@ Perl_sv_utf8_upgrade(pTHX_ register SV *sv) if (!sv) return 0; - if (!SvPOK(sv)) - (void) SvPV_nolen(sv); + if (!SvPOK(sv)) { + STRLEN len = 0; + (void) sv_2pv(sv,&len); + if (!SvPOK(sv)) + return len; + } if (SvUTF8(sv)) return SvCUR(sv); @@ -3817,8 +3821,9 @@ Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) /* like set but assuming =for apidoc sv_catpvn Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. Handles 'get' magic, but not -'set' magic. See C. +C indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +Handles 'get' magic, but not 'set' magic. See C. =cut */ @@ -3916,10 +3921,10 @@ Perl_sv_catsv_mg(pTHX_ SV *dsv, register SV *ssv) =for apidoc sv_catpv Concatenates the string onto the end of the string which is in the SV. -Handles 'get' magic, but not 'set' magic. See C. +If the SV has the UTF8 status set, then the bytes appended should be +valid UTF8. Handles 'get' magic, but not 'set' magic. See C. -=cut -*/ +=cut */ void Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) @@ -4733,8 +4738,9 @@ Perl_sv_pos_b2u(pTHX_ register SV *sv, I32* offsetp) len = 0; while (s < send) { STRLEN n; - /* We can use low level directly here as we are not looking at the values */ - if (utf8n_to_uvuni(s, UTF8SKIP(s), &n, 0)) { + /* Call utf8n_to_uvchr() to validate the sequence */ + utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0); + if (n > 0) { s += n; len++; } @@ -6759,12 +6765,15 @@ Perl_sv_catpvf_mg_nocontext(SV *sv, const char* pat, ...) /* =for apidoc sv_catpvf -Processes its arguments like C and appends the formatted output -to an SV. Handles 'get' magic, but not 'set' magic. C must -typically be called after calling this function to handle 'set' magic. +Processes its arguments like C and appends the formatted +output to an SV. If the appended data contains "wide" characters +(including, but not limited to, SVs with a UTF-8 PV formatted with %s, +and characters >255 formatted with %c), the original SV might get +upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. +C must typically be called after calling this function +to handle 'set' magic. -=cut -*/ +=cut */ void Perl_sv_catpvf(pTHX_ SV *sv, const char* pat, ...) @@ -7129,7 +7138,9 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV case 'c': uv = args ? va_arg(*args, int) : SvIVx(argsv); - if ((uv > 255 || (!UNI_IS_INVARIANT(uv) || SvUTF8(sv))) && !IN_BYTE) { + if ((uv > 255 || + (!UNI_IS_INVARIANT(uv) && SvUTF8(sv))) + && !IN_BYTE) { eptr = (char*)utf8buf; elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf; is_utf = TRUE;