From: Gisle Aas Date: Tue, 27 Mar 2001 11:30:24 +0000 (-0800) Subject: Re: perl@9359 breaks HTML::Parser X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d5ce4a7cd9d16a6d235e1b45a0768034bbdac1f0;p=p5sagit%2Fp5-mst-13.2.git Re: perl@9359 breaks HTML::Parser Message-ID: Clarify the UTF-8 issues of the API docs. (Slightly reworded and expanded.) p4raw-id: //depot/perl@9386 --- diff --git a/pod/perlapi.pod b/pod/perlapi.pod index aaa9e90..711db4d 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -2179,6 +2179,7 @@ Found in file sv.h =item SvPOK_only Tells an SV that it is a string and disables all other OK bits. +Will also turn off the UTF8 status. void SvPOK_only(SV* sv) @@ -2187,8 +2188,8 @@ Found in file sv.h =item SvPOK_only_UTF8 -Tells an SV that it is a UTF8 string (do not use frivolously) -and disables all other OK bits. +Tells an SV that it is a string and disables all other OK bits, +and leaves the UTF8 status as it was. void SvPOK_only_UTF8(SV* sv) @@ -2494,7 +2495,8 @@ Found in file sv.h =item SvUTF8_on -Tells an SV that it is a string and encoded in UTF8. Do not use frivolously. +Turn on the UTF8 status of an SV (the data is not changed, just the flag). +Do not use frivolously. void SvUTF8_on(SV *sv) @@ -2544,7 +2546,8 @@ Found in file sv.c =item sv_catpv Concatenates the string onto the end of the string which is in the SV. -Handles 'get' magic, but not 'set' magic. See C. +If the SV has the UTF8 status set, then the bytes appended should be +valid UTF8. Handles 'get' magic, but not 'set' magic. See C. void sv_catpv(SV* sv, const char* ptr) @@ -2553,9 +2556,13 @@ Found in file sv.c =item sv_catpvf -Processes its arguments like C and appends the formatted output -to an SV. Handles 'get' magic, but not 'set' magic. C must -typically be called after calling this function to handle 'set' magic. +Processes its arguments like C and appends the formatted +output to an SV. If the appended data contains "wide" characters +(including, but not limited to, SVs with a UTF-8 PV formatted with %s, +and characters >255 formatted with %c), the original SV might get +upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. +C must typically be called after calling this function +to handle 'set' magic. void sv_catpvf(SV* sv, const char* pat, ...) @@ -2574,8 +2581,9 @@ Found in file sv.c =item sv_catpvn Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. Handles 'get' magic, but not -'set' magic. See C. +C indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +Handles 'get' magic, but not 'set' magic. See C. void sv_catpvn(SV* sv, const char* ptr, STRLEN len) diff --git a/sv.c b/sv.c index 75b35a8..7e98be6 100644 --- a/sv.c +++ b/sv.c @@ -3817,8 +3817,9 @@ Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) /* like set but assuming =for apidoc sv_catpvn Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. Handles 'get' magic, but not -'set' magic. See C. +C indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +Handles 'get' magic, but not 'set' magic. See C. =cut */ @@ -3916,10 +3917,10 @@ Perl_sv_catsv_mg(pTHX_ SV *dsv, register SV *ssv) =for apidoc sv_catpv Concatenates the string onto the end of the string which is in the SV. -Handles 'get' magic, but not 'set' magic. See C. +If the SV has the UTF8 status set, then the bytes appended should be +valid UTF8. Handles 'get' magic, but not 'set' magic. See C. -=cut -*/ +=cut */ void Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) @@ -6760,12 +6761,15 @@ Perl_sv_catpvf_mg_nocontext(SV *sv, const char* pat, ...) /* =for apidoc sv_catpvf -Processes its arguments like C and appends the formatted output -to an SV. Handles 'get' magic, but not 'set' magic. C must -typically be called after calling this function to handle 'set' magic. +Processes its arguments like C and appends the formatted +output to an SV. If the appended data contains "wide" characters +(including, but not limited to, SVs with a UTF-8 PV formatted with %s, +and characters >255 formatted with %c), the original SV might get +upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. +C must typically be called after calling this function +to handle 'set' magic. -=cut -*/ +=cut */ void Perl_sv_catpvf(pTHX_ SV *sv, const char* pat, ...) diff --git a/sv.h b/sv.h index 2785f14..ab04b05 100644 --- a/sv.h +++ b/sv.h @@ -478,6 +478,7 @@ Unsets the PV status of an SV. =for apidoc Am|void|SvPOK_only|SV* sv Tells an SV that it is a string and disables all other OK bits. +Will also turn off the UTF8 status. =for apidoc Am|bool|SvOOK|SV* sv Returns a boolean indicating whether the SvIVX is a valid offset value for @@ -584,14 +585,15 @@ Set the length of the string which is in the SV. See C. Returns a boolean indicating whether the SV contains UTF-8 encoded data. =for apidoc Am|void|SvUTF8_on|SV *sv -Tells an SV that it is a string and encoded in UTF8. Do not use frivolously. +Turn on the UTF8 status of an SV (the data is not changed, just the flag). +Do not use frivolously. =for apidoc Am|void|SvUTF8_off|SV *sv Unsets the UTF8 status of an SV. =for apidoc Am|void|SvPOK_only_UTF8|SV* sv -Tells an SV that it is a UTF8 string (do not use frivolously) -and disables all other OK bits. +Tells an SV that it is a string and disables all other OK bits, +and leaves the UTF8 status as it was. =cut */