X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=utf8.c;h=156e63f7174990cf0255cb6b6cf5008eceeffc12;hb=eebe148573e5a07582cda46391148ca89c563ade;hp=65f109618357e6d0b671ac2bc701acba59f76d6b;hpb=77003bb1f5b79e478d4e39dbf22a7d33aacd2fd5;p=p5sagit%2Fp5-mst-13.2.git diff --git a/utf8.c b/utf8.c index 65f1096..156e63f 100644 --- a/utf8.c +++ b/utf8.c @@ -26,8 +26,25 @@ /* Unicode support */ +/* +=for apidoc A|U8*|uv_to_utf8|U8 *d|UV uv + +Adds the UTF8 representation of the Unicode codepoint C to the end +of the string C; C should be have at least C free +bytes available. The return value is the pointer to the byte after the +end of the new character. In other words, + + d = uv_to_utf8(d, uv); + +is the recommended Unicode-aware way of saying + + *(d++) = uv; + +=cut +*/ + U8 * -Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */ +Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) { if (uv < 0x80) { *d++ = uv; @@ -101,9 +118,15 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */ #endif } -/* Tests if some arbitrary number of bytes begins in a valid UTF-8 character. - * The actual number of bytes in the UTF-8 character will be returned if it - * is valid, otherwise 0. */ +/* +=for apidoc A|STRLEN|is_utf8_char|U8 *s + +Tests if some arbitrary number of bytes begins in a valid UTF-8 character. +The actual number of bytes in the UTF-8 character will be returned if it +is valid, otherwise 0. + +=cut +*/ STRLEN Perl_is_utf8_char(pTHX_ U8 *s) { @@ -143,7 +166,7 @@ Perl_is_utf8_char(pTHX_ U8 *s) } /* -=for apidoc Am|is_utf8_string|U8 *s|STRLEN len +=for apidoc A|bool|is_utf8_string|U8 *s|STRLEN len Returns true if first C bytes of the given string form valid a UTF8 string, false otherwise. @@ -175,7 +198,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) } /* -=for apidoc Am|U8* s|utf8_to_uv|STRLEN curlen|STRLEN *retlen|U32 flags +=for apidoc A|U8* s|utf8_to_uv|STRLEN curlen|STRLEN *retlen|U32 flags Returns the character value of the first character in the string C which is assumed to be in UTF8 encoding and no longer than C; @@ -390,7 +413,7 @@ malformed: } /* -=for apidoc Am|U8* s|utf8_to_uv_simple|STRLEN *retlen +=for apidoc A|U8* s|utf8_to_uv_simple|STRLEN *retlen Returns the character value of the first character in the string C which is assumed to be in UTF8 encoding; C will be set to the @@ -409,7 +432,7 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen) } /* -=for apidoc Am|STRLEN|utf8_length|U8* s|U8 *e +=for apidoc A|STRLEN|utf8_length|U8* s|U8 *e Return the length of the UTF-8 char encoded string C in characters. Stops at C (inclusive). If C s> or if the scan would end @@ -442,7 +465,7 @@ Perl_utf8_length(pTHX_ U8* s, U8* e) } /* -=for apidoc Am|IV|utf8_distance|U8 *a|U8 *b +=for apidoc A|IV|utf8_distance|U8 *a|U8 *b Returns the number of UTF8 characters between the UTF-8 pointers C and C. @@ -486,7 +509,7 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b) } /* -=for apidoc Am|U8*|utf8_hop|U8 *s|I32 off +=for apidoc A|U8*|utf8_hop|U8 *s|I32 off Return the UTF-8 pointer C displaced by C characters, either forward or backward. @@ -519,7 +542,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) } /* -=for apidoc Am|U8 *|utf8_to_bytes|U8 *s|STRLEN *len +=for apidoc A|U8 *|utf8_to_bytes|U8 *s|STRLEN *len Converts a string C of length C from UTF8 into byte encoding. Unlike C, this over-writes the original string, and @@ -560,7 +583,7 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len) } /* -=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len +=for apidoc A|U8 *|bytes_to_utf8|U8 *s|STRLEN *len Converts a string C of length C from ASCII into UTF8 encoding. Returns a pointer to the newly-created string, and sets C to