Ap |U32 |to_uni_upper_lc|U32 c
Ap |U32 |to_uni_title_lc|U32 c
Ap |U32 |to_uni_lower_lc|U32 c
-Ap |STRLEN |is_utf8_char |U8 *p
-Ap |bool |is_utf8_string |U8 *s|STRLEN len
+Apd |STRLEN |is_utf8_char |U8 *p
+Apd |bool |is_utf8_string |U8 *s|STRLEN len
Ap |bool |is_utf8_alnum |U8 *p
Ap |bool |is_utf8_alnumc |U8 *p
Ap |bool |is_utf8_idfirst|U8 *p
p |void |utilize |int aver|I32 floor|OP* version|OP* id|OP* arg
Ap |U8* |utf16_to_utf8 |U8* p|U8 *d|I32 bytelen|I32 *newlen
Ap |U8* |utf16_to_utf8_reversed|U8* p|U8 *d|I32 bytelen|I32 *newlen
-Ap |STRLEN |utf8_length |U8* s|U8 *e
-Ap |IV |utf8_distance |U8 *a|U8 *b
-Ap |U8* |utf8_hop |U8 *s|I32 off
-ApM |U8* |utf8_to_bytes |U8 *s|STRLEN *len
-ApM |U8* |bytes_to_utf8 |U8 *s|STRLEN *len
-Ap |UV |utf8_to_uv_simple|U8 *s|STRLEN* retlen
-Ap |UV |utf8_to_uv |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
-Ap |U8* |uv_to_utf8 |U8 *d|UV uv
+Adp |STRLEN |utf8_length |U8* s|U8 *e
+Apd |IV |utf8_distance |U8 *a|U8 *b
+Apd |U8* |utf8_hop |U8 *s|I32 off
+ApMd |U8* |utf8_to_bytes |U8 *s|STRLEN *len
+ApMd |U8* |bytes_to_utf8 |U8 *s|STRLEN *len
+Apd |UV |utf8_to_uv_simple|U8 *s|STRLEN* retlen
+Adp |UV |utf8_to_uv |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags
+Apd |U8* |uv_to_utf8 |U8 *d|UV uv
p |void |vivify_defelem |SV* sv
p |void |vivify_ref |SV* sv|U32 to_what
p |I32 |wait4pid |Pid_t pid|int* statusp|int flags
Returns a pointer to the newly-created string, and sets C<len> to
reflect the new length.
- U8 * bytes_to_utf8(U8 *s, STRLEN *len)
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+ U8* bytes_to_utf8(U8 *s, STRLEN *len)
=for hackers
Found in file utf8.c
=for hackers
Found in file handy.h
+=item is_utf8_char
+
+Tests if some arbitrary number of bytes begins in a valid UTF-8 character.
+The actual number of bytes in the UTF-8 character will be returned if it
+is valid, otherwise 0.
+
+ STRLEN is_utf8_char(U8 *p)
+
+=for hackers
+Found in file utf8.c
+
+=item is_utf8_string
+
+Returns true if first C<len> bytes of the given string form valid a UTF8
+string, false otherwise.
+
+ bool is_utf8_string(U8 *s, STRLEN len)
+
+=for hackers
+Found in file utf8.c
+
=item items
Variable which is setup by C<xsubpp> to indicate the number of
=for hackers
Found in file sv.h
-=item SvTYPE
-
-Returns the type of the SV. See C<svtype>.
+=item svtype
- svtype SvTYPE(SV* sv)
+An enum of flags for Perl types. These are found in the file B<sv.h>
+in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
=for hackers
Found in file sv.h
-=item svtype
+=item SvTYPE
-An enum of flags for Perl types. These are found in the file B<sv.h>
-in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV. See C<svtype>.
+
+ svtype SvTYPE(SV* sv)
=for hackers
Found in file sv.h
=for hackers
Found in file handy.h
-=item U8 *s
-
-Returns true if first C<len> bytes of the given string form valid a UTF8
-string, false otherwise.
-
- is_utf8_string U8 *s(STRLEN len)
-
-=for hackers
-Found in file utf8.c
-
=item utf8_distance
Returns the number of UTF8 characters between the UTF-8 pointers C<a>
updates len to contain the new length.
Returns zero on failure, setting C<len> to -1.
- U8 * utf8_to_bytes(U8 *s, STRLEN *len)
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+ U8* utf8_to_bytes(U8 *s, STRLEN *len)
=for hackers
Found in file utf8.c
The C<flags> can also contain various flags to allow deviations from
the strict UTF-8 encoding (see F<utf8.h>).
- U8* s utf8_to_uv(STRLEN curlen, STRLEN *retlen, U32 flags)
+ UV utf8_to_uv(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags)
=for hackers
Found in file utf8.c
If C<s> does not point to a well-formed UTF8 character, zero is
returned and retlen is set, if possible, to -1.
- U8* s utf8_to_uv_simple(STRLEN *retlen)
+ UV utf8_to_uv_simple(U8 *s, STRLEN* retlen)
+
+=for hackers
+Found in file utf8.c
+
+=item uv_to_utf8
+
+Adds the UTF8 representation of the Unicode codepoint C<uv> to the end
+of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free
+bytes available. The return value is the pointer to the byte after the
+end of the new character. In other words,
+
+ d = uv_to_utf8(d, uv);
+
+is the recommended Unicode-aware way of saying
+
+ *(d++) = uv;
+
+ U8* uv_to_utf8(U8 *d, UV uv)
=for hackers
Found in file utf8.c
/* Unicode support */
+/*
+=for apidoc A|U8*|uv_to_utf8|U8 *d|UV uv
+
+Adds the UTF8 representation of the Unicode codepoint C<uv> to the end
+of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free
+bytes available. The return value is the pointer to the byte after the
+end of the new character. In other words,
+
+ d = uv_to_utf8(d, uv);
+
+is the recommended Unicode-aware way of saying
+
+ *(d++) = uv;
+
+=cut
+*/
+
U8 *
-Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */
+Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
{
if (uv < 0x80) {
*d++ = uv;
#endif
}
-/* Tests if some arbitrary number of bytes begins in a valid UTF-8 character.
- * The actual number of bytes in the UTF-8 character will be returned if it
- * is valid, otherwise 0. */
+/*
+=for apidoc A|STRLEN|is_utf8_char|U8 *s
+
+Tests if some arbitrary number of bytes begins in a valid UTF-8 character.
+The actual number of bytes in the UTF-8 character will be returned if it
+is valid, otherwise 0.
+
+=cut
+*/
STRLEN
Perl_is_utf8_char(pTHX_ U8 *s)
{
}
/*
-=for apidoc Am|is_utf8_string|U8 *s|STRLEN len
+=for apidoc A|bool|is_utf8_string|U8 *s|STRLEN len
Returns true if first C<len> bytes of the given string form valid a UTF8
string, false otherwise.
}
/*
-=for apidoc Am|U8* s|utf8_to_uv|STRLEN curlen|STRLEN *retlen|U32 flags
+=for apidoc A|U8* s|utf8_to_uv|STRLEN curlen|STRLEN *retlen|U32 flags
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding and no longer than C<curlen>;
}
/*
-=for apidoc Am|U8* s|utf8_to_uv_simple|STRLEN *retlen
+=for apidoc A|U8* s|utf8_to_uv_simple|STRLEN *retlen
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding; C<retlen> will be set to the
}
/*
-=for apidoc Am|STRLEN|utf8_length|U8* s|U8 *e
+=for apidoc A|STRLEN|utf8_length|U8* s|U8 *e
Return the length of the UTF-8 char encoded string C<s> in characters.
Stops at C<e> (inclusive). If C<e E<lt> s> or if the scan would end
}
/*
-=for apidoc Am|IV|utf8_distance|U8 *a|U8 *b
+=for apidoc A|IV|utf8_distance|U8 *a|U8 *b
Returns the number of UTF8 characters between the UTF-8 pointers C<a>
and C<b>.
}
/*
-=for apidoc Am|U8*|utf8_hop|U8 *s|I32 off
+=for apidoc A|U8*|utf8_hop|U8 *s|I32 off
Return the UTF-8 pointer C<s> displaced by C<off> characters, either
forward or backward.
}
/*
-=for apidoc Am|U8 *|utf8_to_bytes|U8 *s|STRLEN *len
+=for apidoc A|U8 *|utf8_to_bytes|U8 *s|STRLEN *len
Converts a string C<s> of length C<len> from UTF8 into byte encoding.
Unlike C<bytes_to_utf8>, this over-writes the original string, and
}
/*
-=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len
+=for apidoc A|U8 *|bytes_to_utf8|U8 *s|STRLEN *len
Converts a string C<s> of length C<len> from ASCII into UTF8 encoding.
Returns a pointer to the newly-created string, and sets C<len> to