From: Simon Cozens Date: Mon, 26 Jun 2000 04:55:45 +0000 (+0000) Subject: bytes<->utf8 fixes X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1e72252ad7b8e23d1a1142285b8aa82986bd2491;p=p5sagit%2Fp5-mst-13.2.git bytes<->utf8 fixes Message-ID: p4raw-id: //depot/cfgperl@6242 --- diff --git a/embed.pl b/embed.pl index 419ae4e..5b63a35 100755 --- a/embed.pl +++ b/embed.pl @@ -2063,7 +2063,7 @@ Ap |U8* |utf16_to_utf8_reversed|U16* p|U8 *d|I32 bytelen Ap |I32 |utf8_distance |U8 *a|U8 *b Ap |U8* |utf8_hop |U8 *s|I32 off Ap |U8* |utf8_to_bytes |U8 *s|STRLEN len -Ap |U8* |bytes_to_utf8 |U8 *s|STRLEN len +Ap |U8* |bytes_to_utf8 |U8 *s|STRLEN *len Ap |UV |utf8_to_uv |U8 *s|I32* retlen Ap |U8* |uv_to_utf8 |U8 *d|UV uv p |void |vivify_defelem |SV* sv diff --git a/perlapi.c b/perlapi.c index a911be2..ccb7c8f 100755 --- a/perlapi.c +++ b/perlapi.c @@ -3352,7 +3352,7 @@ Perl_utf8_to_bytes(pTHXo_ U8 *s, STRLEN len) #undef Perl_bytes_to_utf8 U8* -Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN len) +Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN *len) { return ((CPerlObj*)pPerl)->Perl_bytes_to_utf8(s, len); } diff --git a/pod/perlapi.pod b/pod/perlapi.pod index f274641..1e48809 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -153,9 +153,10 @@ Found in file av.c =item bytes_to_utf8 Converts a string C of length C from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C to +reflect the new length. - U8 * bytes_to_utf8(U8 *s, STRLEN len) + U8 * bytes_to_utf8(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -2942,6 +2943,7 @@ Found in file handy.h Converts a string C of length C from UTF8 into ASCII encoding. Unlike C, this over-writes the original string. +Returns zero on failure after converting as much as possible. U8 * utf8_to_bytes(U8 *s, STRLEN len) diff --git a/proto.h b/proto.h index 2171aa1..c1d9a66 100644 --- a/proto.h +++ b/proto.h @@ -810,7 +810,7 @@ PERL_CALLCONV U8* Perl_utf16_to_utf8_reversed(pTHX_ U16* p, U8 *d, I32 bytelen); PERL_CALLCONV I32 Perl_utf8_distance(pTHX_ U8 *a, U8 *b); PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off); PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN len); -PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN len); +PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len); PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen); PERL_CALLCONV U8* Perl_uv_to_utf8(pTHX_ U8 *d, UV uv); PERL_CALLCONV void Perl_vivify_defelem(pTHX_ SV* sv); diff --git a/utf8.c b/utf8.c index b77cfdc..9bb89a4 100644 --- a/utf8.c +++ b/utf8.c @@ -227,6 +227,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) Converts a string C of length C from UTF8 into ASCII encoding. Unlike C, this over-writes the original string. +Returns zero on failure after converting as much as possible. =cut */ @@ -247,6 +248,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) else { I32 ulen; UV uv = utf8_to_uv(s, &ulen); + if (uv > 255) { + *d = '\0'; + return 0; + } s += ulen; *d++ = (U8)uv; } @@ -256,24 +261,25 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) } /* -=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN len +=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len Converts a string C of length C from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C to +reflect the new length. =cut */ U8* -Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) +Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len) { dTHR; U8 *send; U8 *d; U8 *dst; - send = s + len; + send = s + (*len); - Newz(801, d, len * 2 + 1, U8); + Newz(801, d, (*len) * 2 + 1, U8); dst = d; while (s < send) { @@ -286,6 +292,7 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) } } *d = '\0'; + *len = d-dst; return dst; }