if (!sv)
return 0;
- if (!SvPOK(sv))
- (void) SvPV_nolen(sv);
+ if (!SvPOK(sv)) {
+ STRLEN len = 0;
+ (void) sv_2pv(sv,&len);
+ if (!SvPOK(sv))
+ return len;
+ }
if (SvUTF8(sv))
return SvCUR(sv);
=for apidoc sv_catpvn
Concatenates the string onto the end of the string which is in the SV. The
-C<len> indicates number of bytes to copy. Handles 'get' magic, but not
-'set' magic. See C<sv_catpvn_mg>.
+C<len> indicates number of bytes to copy. If the SV has the UTF8
+status set, then the bytes appended should be valid UTF8.
+Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>.
=cut
*/
=for apidoc sv_catpv
Concatenates the string onto the end of the string which is in the SV.
-Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>.
+If the SV has the UTF8 status set, then the bytes appended should be
+valid UTF8. Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>.
-=cut
-*/
+=cut */
void
Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr)
len = 0;
while (s < send) {
STRLEN n;
- /* We can use low level directly here as we are not looking at the values */
- if (utf8n_to_uvuni(s, UTF8SKIP(s), &n, 0)) {
+ /* Call utf8n_to_uvchr() to validate the sequence */
+ utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0);
+ if (n > 0) {
s += n;
len++;
}
/*
=for apidoc sv_catpvf
-Processes its arguments like C<sprintf> and appends the formatted output
-to an SV. Handles 'get' magic, but not 'set' magic. C<SvSETMAGIC()> must
-typically be called after calling this function to handle 'set' magic.
+Processes its arguments like C<sprintf> and appends the formatted
+output to an SV. If the appended data contains "wide" characters
+(including, but not limited to, SVs with a UTF-8 PV formatted with %s,
+and characters >255 formatted with %c), the original SV might get
+upgraded to UTF-8. Handles 'get' magic, but not 'set' magic.
+C<SvSETMAGIC()> must typically be called after calling this function
+to handle 'set' magic.
-=cut
-*/
+=cut */
void
Perl_sv_catpvf(pTHX_ SV *sv, const char* pat, ...)
case 'c':
uv = args ? va_arg(*args, int) : SvIVx(argsv);
- if ((uv > 255 || (!UNI_IS_INVARIANT(uv) || SvUTF8(sv))) && !IN_BYTE) {
+ if ((uv > 255 ||
+ (!UNI_IS_INVARIANT(uv) && SvUTF8(sv)))
+ && !IN_BYTE) {
eptr = (char*)utf8buf;
elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf;
is_utf = TRUE;