X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pod%2Fperlapi.pod;h=ef3a260ae87bf25485334ce31c7e861609a939a4;hb=438cc6089d922e42fd6635c1b8212ad034402745;hp=f7ad2d38c0167ba8353de7e4457c8e0b16458600;hpb=7ea3cd407b6ec2a3e424bdfbc486b6e01d6d28bd;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pod/perlapi.pod b/pod/perlapi.pod index f7ad2d3..ef3a260 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -182,13 +182,33 @@ must then use C to assign values to these new elements. =for hackers Found in file av.c +=item bytes_from_utf8 + +Converts a string C of length C from UTF8 into byte encoding. +Unlike but like C, returns a pointer to +the newly-created string, and updates C to contain the new +length. Returns the original string if no conversion occurs, C +is unchanged. Do nothing if C points to 0. Sets C to +0 if C is converted or contains all 7bit characters. + +NOTE: this function is experimental and may change or be +removed without notice. + + U8* bytes_from_utf8(U8 *s, STRLEN *len, bool *is_utf8) + +=for hackers +Found in file utf8.c + =item bytes_to_utf8 Converts a string C of length C from ASCII into UTF8 encoding. Returns a pointer to the newly-created string, and sets C to reflect the new length. - U8 * bytes_to_utf8(U8 *s, STRLEN *len) +NOTE: this function is experimental and may change or be +removed without notice. + + U8* bytes_to_utf8(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -1013,6 +1033,30 @@ character. =for hackers Found in file handy.h +=item is_utf8_char + +Tests if some arbitrary number of bytes begins in a valid UTF-8 +character. Note that an ASCII character is a valid UTF-8 character. +The actual number of bytes in the UTF-8 character will be returned if +it is valid, otherwise 0. + + STRLEN is_utf8_char(U8 *p) + +=for hackers +Found in file utf8.c + +=item is_utf8_string + +Returns true if first C bytes of the given string form a valid UTF8 +string, false otherwise. Note that 'a valid UTF8 string' does not mean +'a string that contains UTF8' because a valid ASCII string is a valid +UTF8 string. + + bool is_utf8_string(U8 *s, STRLEN len) + +=for hackers +Found in file utf8.c + =item items Variable which is setup by C to indicate the number of @@ -1441,51 +1485,6 @@ Tells a Perl interpreter to run. See L. =for hackers Found in file perl.c -=item PL_DBsingle - -When Perl is run in debugging mode, with the B<-d> switch, this SV is a -boolean which indicates whether subs are being single-stepped. -Single-stepping is automatically turned on after every step. This is the C -variable which corresponds to Perl's $DB::single variable. See -C. - - SV * PL_DBsingle - -=for hackers -Found in file intrpvar.h - -=item PL_DBsub - -When Perl is run in debugging mode, with the B<-d> switch, this GV contains -the SV which holds the name of the sub being debugged. This is the C -variable which corresponds to Perl's $DB::sub variable. See -C. - - GV * PL_DBsub - -=for hackers -Found in file intrpvar.h - -=item PL_DBtrace - -Trace variable used when Perl is run in debugging mode, with the B<-d> -switch. This is the C variable which corresponds to Perl's $DB::trace -variable. See C. - - SV * PL_DBtrace - -=for hackers -Found in file intrpvar.h - -=item PL_dowarn - -The C variable which corresponds to Perl's $^W warning variable. - - bool PL_dowarn - -=for hackers -Found in file intrpvar.h - =item PL_modglobal C is a general purpose, interpreter global HV for use by @@ -2369,19 +2368,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B +in the C enum. Test these flags with the C macro. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B -in the C enum. Test these flags with the C macro. +Returns the type of the SV. See C. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h @@ -2582,8 +2581,9 @@ Found in file sv.c =item sv_catsv -Concatenates the string from SV C onto the end of the string in SV -C. Handles 'get' magic, but not 'set' magic. See C. +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. Handles 'get' magic, but +not 'set' magic. See C. void sv_catsv(SV* dsv, SV* ssv) @@ -3219,16 +3219,6 @@ Converts the specified character to uppercase. =for hackers Found in file handy.h -=item U8 *s - -Returns true if first C bytes of the given string form valid a UTF8 -string, false otherwise. - - is_utf8_string U8 *s(STRLEN len) - -=for hackers -Found in file utf8.c - =item utf8_distance Returns the number of UTF8 characters between the UTF-8 pointers C @@ -3274,7 +3264,10 @@ Unlike C, this over-writes the original string, and updates len to contain the new length. Returns zero on failure, setting C to -1. - U8 * utf8_to_bytes(U8 *s, STRLEN *len) +NOTE: this function is experimental and may change or be +removed without notice. + + U8* utf8_to_bytes(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -3283,8 +3276,7 @@ Found in file utf8.c Returns the character value of the first character in the string C which is assumed to be in UTF8 encoding and no longer than C; -C will be set to the length, in bytes, of that character, -and the pointer C will be advanced to the end of the character. +C will be set to the length, in bytes, of that character. If C does not point to a well-formed UTF8 character, the behaviour is dependent on the value of C: if it contains UTF8_CHECK_ONLY, @@ -3297,7 +3289,7 @@ length of the UTF-8 character in bytes, and zero will be returned. The C can also contain various flags to allow deviations from the strict UTF-8 encoding (see F). - U8* s utf8_to_uv(STRLEN curlen, STRLEN *retlen, U32 flags) + UV utf8_to_uv(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags) =for hackers Found in file utf8.c @@ -3306,13 +3298,30 @@ Found in file utf8.c Returns the character value of the first character in the string C which is assumed to be in UTF8 encoding; C will be set to the -length, in bytes, of that character, and the pointer C will be -advanced to the end of the character. +length, in bytes, of that character. If C does not point to a well-formed UTF8 character, zero is returned and retlen is set, if possible, to -1. - U8* s utf8_to_uv_simple(STRLEN *retlen) + UV utf8_to_uv_simple(U8 *s, STRLEN* retlen) + +=for hackers +Found in file utf8.c + +=item uv_to_utf8 + +Adds the UTF8 representation of the Unicode codepoint C to the end +of the string C; C should be have at least C free +bytes available. The return value is the pointer to the byte after the +end of the new character. In other words, + + d = uv_to_utf8(d, uv); + +is the recommended Unicode-aware way of saying + + *(d++) = uv; + + U8* uv_to_utf8(U8 *d, UV uv) =for hackers Found in file utf8.c