X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=utf8.c;h=666ec3476ae272f463d1a2d20a42383eca15db73;hb=6662521eef19f96de52b97fb5fa07a85826679ee;hp=b77cfdcd7502bcca249a8379783e0dcd3ead9327;hpb=7fdcfa2fc732bf742f6e1c2b723ab188bf8dc010;p=p5sagit%2Fp5-mst-13.2.git diff --git a/utf8.c b/utf8.c index b77cfdc..666ec34 100644 --- a/utf8.c +++ b/utf8.c @@ -134,6 +134,30 @@ Perl_is_utf8_char(pTHX_ U8 *s) return len; } +/* +=for apidoc Am|bool_utf8_string|U8 *s|STRLEN len + +Returns true if first C bytes of the given string form valid a UTF8 +string, false otherwise. + +=cut +*/ + +bool +Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) +{ + U8* x=s; + U8* send=s+len; + int c; + while (x < send) { + c = is_utf8_char(x); + x += c; + if (!c || x > send) + return 0; + } + return 1; +} + UV Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen) { @@ -227,6 +251,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) Converts a string C of length C from UTF8 into ASCII encoding. Unlike C, this over-writes the original string. +Returns zero on failure after converting as much as possible. =cut */ @@ -247,6 +272,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) else { I32 ulen; UV uv = utf8_to_uv(s, &ulen); + if (uv > 255) { + *d = '\0'; + return 0; + } s += ulen; *d++ = (U8)uv; } @@ -256,24 +285,25 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) } /* -=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN len +=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len Converts a string C of length C from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C to +reflect the new length. =cut */ U8* -Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) +Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len) { dTHR; U8 *send; U8 *d; U8 *dst; - send = s + len; + send = s + (*len); - Newz(801, d, len * 2 + 1, U8); + Newz(801, d, (*len) * 2 + 1, U8); dst = d; while (s < send) { @@ -286,6 +316,7 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) } } *d = '\0'; + *len = d-dst; return dst; }