X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pod%2Fperlapi.pod;h=ef3a260ae87bf25485334ce31c7e861609a939a4;hb=438cc6089d922e42fd6635c1b8212ad034402745;hp=9974edcfb38d6f59441bea836b8f0936aaf20176;hpb=b2a2e44b450818f2f7253c8bae310ef6d04df564;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 9974edc..ef3a260 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -182,13 +182,33 @@ must then use C to assign values to these new elements. =for hackers Found in file av.c +=item bytes_from_utf8 + +Converts a string C of length C from UTF8 into byte encoding. +Unlike but like C, returns a pointer to +the newly-created string, and updates C to contain the new +length. Returns the original string if no conversion occurs, C +is unchanged. Do nothing if C points to 0. Sets C to +0 if C is converted or contains all 7bit characters. + +NOTE: this function is experimental and may change or be +removed without notice. + + U8* bytes_from_utf8(U8 *s, STRLEN *len, bool *is_utf8) + +=for hackers +Found in file utf8.c + =item bytes_to_utf8 Converts a string C of length C from ASCII into UTF8 encoding. Returns a pointer to the newly-created string, and sets C to reflect the new length. - U8 * bytes_to_utf8(U8 *s, STRLEN *len) +NOTE: this function is experimental and may change or be +removed without notice. + + U8* bytes_to_utf8(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -287,6 +307,19 @@ Returns the stash of the CV. =for hackers Found in file cv.h +=item cv_const_sv + +If C is a constant sub eligible for inlining. returns the constant +value returned by the sub. Otherwise, returns NULL. + +Constant subs can be created with C or as described in +L. + + SV* cv_const_sv(CV* cv) + +=for hackers +Found in file op.c + =item dMARK Declare a stack marker variable, C, for the XSUB. See C and @@ -748,7 +781,7 @@ hash and returned to the caller. The C is the length of the key. The C value will normally be zero; if set to G_DISCARD then NULL will be returned. - SV* hv_delete(HV* tb, const char* key, U32 klen, I32 flags) + SV* hv_delete(HV* tb, const char* key, I32 klen, I32 flags) =for hackers Found in file hv.c @@ -770,7 +803,7 @@ Found in file hv.c Returns a boolean indicating whether the specified hash key exists. The C is the length of the key. - bool hv_exists(HV* tb, const char* key, U32 klen) + bool hv_exists(HV* tb, const char* key, I32 klen) =for hackers Found in file hv.c @@ -796,7 +829,7 @@ dereferencing it to a C. See L for more information on how to use this function on tied hashes. - SV** hv_fetch(HV* tb, const char* key, U32 klen, I32 lval) + SV** hv_fetch(HV* tb, const char* key, I32 klen, I32 lval) =for hackers Found in file hv.c @@ -907,7 +940,7 @@ the call, and decrementing it if the function returned NULL. See L for more information on how to use this function on tied hashes. - SV** hv_store(HV* tb, const char* key, U32 klen, SV* val, U32 hash) + SV** hv_store(HV* tb, const char* key, I32 klen, SV* val, U32 hash) =for hackers Found in file hv.c @@ -1000,6 +1033,30 @@ character. =for hackers Found in file handy.h +=item is_utf8_char + +Tests if some arbitrary number of bytes begins in a valid UTF-8 +character. Note that an ASCII character is a valid UTF-8 character. +The actual number of bytes in the UTF-8 character will be returned if +it is valid, otherwise 0. + + STRLEN is_utf8_char(U8 *p) + +=for hackers +Found in file utf8.c + +=item is_utf8_string + +Returns true if first C bytes of the given string form a valid UTF8 +string, false otherwise. Note that 'a valid UTF8 string' does not mean +'a string that contains UTF8' because a valid ASCII string is a valid +UTF8 string. + + bool is_utf8_string(U8 *s, STRLEN len) + +=for hackers +Found in file utf8.c + =item items Variable which is setup by C to indicate the number of @@ -1032,7 +1089,8 @@ Found in file scope.h =item looks_like_number Test if an the content of an SV looks like a number (or is a -number). +number). C and C are treated as numbers (so will not +issue a non-numeric warning), even if your atof() doesn't grok them. I32 looks_like_number(SV* sv) @@ -1162,7 +1220,7 @@ Found in file handy.h Creates a constant sub equivalent to Perl C which is eligible for inlining at compile-time. - void newCONSTSUB(HV* stash, char* name, SV* sv) + CV* newCONSTSUB(HV* stash, char* name, SV* sv) =for hackers Found in file op.c @@ -1201,7 +1259,7 @@ Found in file sv.c Creates a new SV. A non-zero C parameter indicates the number of bytes of preallocated string space the SV should have. An extra byte for a tailing NUL is also reserved. (SvPOK is not set for the SV even if string -space is allocated.) The reference count for the new SV is set to 1. +space is allocated.) The reference count for the new SV is set to 1. C is an integer id between 0 and 1299 (used to identify leaks). SV* NEWSV(int id, STRLEN len) @@ -1270,7 +1328,7 @@ The idea here is that as string table is used for shared hash keys these strings will have SvPVX == HeKEY and hash lookup will avoid string compare. - SV* newSVpvn_share(const char* s, STRLEN len, U32 hash) + SV* newSVpvn_share(const char* s, I32 len, U32 hash) =for hackers Found in file sv.c @@ -1427,57 +1485,12 @@ Tells a Perl interpreter to run. See L. =for hackers Found in file perl.c -=item PL_DBsingle - -When Perl is run in debugging mode, with the B<-d> switch, this SV is a -boolean which indicates whether subs are being single-stepped. -Single-stepping is automatically turned on after every step. This is the C -variable which corresponds to Perl's $DB::single variable. See -C. - - SV * PL_DBsingle - -=for hackers -Found in file intrpvar.h - -=item PL_DBsub - -When Perl is run in debugging mode, with the B<-d> switch, this GV contains -the SV which holds the name of the sub being debugged. This is the C -variable which corresponds to Perl's $DB::sub variable. See -C. - - GV * PL_DBsub - -=for hackers -Found in file intrpvar.h - -=item PL_DBtrace - -Trace variable used when Perl is run in debugging mode, with the B<-d> -switch. This is the C variable which corresponds to Perl's $DB::trace -variable. See C. - - SV * PL_DBtrace - -=for hackers -Found in file intrpvar.h - -=item PL_dowarn - -The C variable which corresponds to Perl's $^W warning variable. - - bool PL_dowarn - -=for hackers -Found in file intrpvar.h - =item PL_modglobal -C is a general purpose, interpreter global HV for use by +C is a general purpose, interpreter global HV for use by extensions that need to keep information on a per-interpreter basis. -In a pinch, it can also be used as a symbol table for extensions -to share data among each other. It is a good idea to use keys +In a pinch, it can also be used as a symbol table for extensions +to share data among each other. It is a good idea to use keys prefixed by the package name of the extension that owns the data. HV* PL_modglobal @@ -2151,6 +2164,16 @@ Tells an SV that it is a string and disables all other OK bits. =for hackers Found in file sv.h +=item SvPOK_only_UTF8 + +Tells an SV that it is a UTF8 string (do not use frivolously) +and disables all other OK bits. + + void SvPOK_only_UTF8(SV* sv) + +=for hackers +Found in file sv.h + =item SvPV Returns a pointer to the string in the SV, or a stringified form of the SV @@ -2345,19 +2368,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B +in the C enum. Test these flags with the C macro. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B -in the C enum. Test these flags with the C macro. +Returns the type of the SV. See C. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h @@ -2411,6 +2434,15 @@ Type flag for blessed scalars. See C. =for hackers Found in file sv.h +=item SvUOK + +Returns a boolean indicating whether the SV contains an unsigned integer. + + void SvUOK(SV* sv) + +=for hackers +Found in file sv.h + =item SvUPGRADE Used to upgrade an SV to a more complex form. Uses C to @@ -2421,6 +2453,33 @@ perform the upgrade if necessary. See C. =for hackers Found in file sv.h +=item SvUTF8 + +Returns a boolean indicating whether the SV contains UTF-8 encoded data. + + void SvUTF8(SV* sv) + +=for hackers +Found in file sv.h + +=item SvUTF8_off + +Unsets the UTF8 status of an SV. + + void SvUTF8_off(SV *sv) + +=for hackers +Found in file sv.h + +=item SvUTF8_on + +Tells an SV that it is a string and encoded in UTF8. Do not use frivolously. + + void SvUTF8_on(SV *sv) + +=for hackers +Found in file sv.h + =item SvUV Coerces the given SV to an unsigned integer and returns it. @@ -2522,8 +2581,9 @@ Found in file sv.c =item sv_catsv -Concatenates the string from SV C onto the end of the string in SV -C. Handles 'get' magic, but not 'set' magic. See C. +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. Handles 'get' magic, but +not 'set' magic. See C. void sv_catsv(SV* dsv, SV* ssv) @@ -3013,13 +3073,29 @@ Found in file sv.c Unsets the RV status of the SV, and decrements the reference count of whatever was being referenced by the RV. This can almost be thought of -as a reversal of C. See C. +as a reversal of C. This is C with the C +being zero. See C. void sv_unref(SV* sv) =for hackers Found in file sv.c +=item sv_unref_flags + +Unsets the RV status of the SV, and decrements the reference count of +whatever was being referenced by the RV. This can almost be thought of +as a reversal of C. The C argument can contain +C to force the reference count to be decremented +(otherwise the decrementing is conditional on the reference count being +different from one or the reference being a readonly SV). +See C. + + void sv_unref_flags(SV* sv, U32 flags) + +=for hackers +Found in file sv.c + =item sv_upgrade Upgrade an SV to a more complex form. Use C. See @@ -3143,12 +3219,40 @@ Converts the specified character to uppercase. =for hackers Found in file handy.h -=item U8 *s +=item utf8_distance + +Returns the number of UTF8 characters between the UTF-8 pointers C +and C. + +WARNING: use only if you *know* that the pointers point inside the +same UTF-8 buffer. + + IV utf8_distance(U8 *a, U8 *b) + +=for hackers +Found in file utf8.c + +=item utf8_hop + +Return the UTF-8 pointer C displaced by C characters, either +forward or backward. + +WARNING: do not use the following unless you *know* C is within +the UTF-8 data pointed to by C *and* that on entry C is aligned +on the first byte of character or just after the last byte of a character. + + U8* utf8_hop(U8 *s, I32 off) + +=for hackers +Found in file utf8.c + +=item utf8_length -Returns true if first C bytes of the given string form valid a UTF8 -string, false otherwise. +Return the length of the UTF-8 char encoded string C in characters. +Stops at C (inclusive). If C s> or if the scan would end +up past C, croaks. - is_utf8_string U8 *s(STRLEN len) + STRLEN utf8_length(U8* s, U8 *e) =for hackers Found in file utf8.c @@ -3158,9 +3262,66 @@ Found in file utf8.c Converts a string C of length C from UTF8 into byte encoding. Unlike C, this over-writes the original string, and updates len to contain the new length. -Returns zero on failure leaving the string and len unchanged +Returns zero on failure, setting C to -1. + +NOTE: this function is experimental and may change or be +removed without notice. + + U8* utf8_to_bytes(U8 *s, STRLEN *len) + +=for hackers +Found in file utf8.c + +=item utf8_to_uv + +Returns the character value of the first character in the string C +which is assumed to be in UTF8 encoding and no longer than C; +C will be set to the length, in bytes, of that character. + +If C does not point to a well-formed UTF8 character, the behaviour +is dependent on the value of C: if it contains UTF8_CHECK_ONLY, +it is assumed that the caller will raise a warning, and this function +will silently just set C to C<-1> and return zero. If the +C does not contain UTF8_CHECK_ONLY, warnings about +malformations will be given, C will be set to the expected +length of the UTF-8 character in bytes, and zero will be returned. + +The C can also contain various flags to allow deviations from +the strict UTF-8 encoding (see F). + + UV utf8_to_uv(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags) + +=for hackers +Found in file utf8.c + +=item utf8_to_uv_simple + +Returns the character value of the first character in the string C +which is assumed to be in UTF8 encoding; C will be set to the +length, in bytes, of that character. + +If C does not point to a well-formed UTF8 character, zero is +returned and retlen is set, if possible, to -1. + + UV utf8_to_uv_simple(U8 *s, STRLEN* retlen) + +=for hackers +Found in file utf8.c + +=item uv_to_utf8 + +Adds the UTF8 representation of the Unicode codepoint C to the end +of the string C; C should be have at least C free +bytes available. The return value is the pointer to the byte after the +end of the new character. In other words, + + d = uv_to_utf8(d, uv); + +is the recommended Unicode-aware way of saying + + *(d++) = uv; - U8 * utf8_to_bytes(U8 *s, STRLEN *len) + U8* uv_to_utf8(U8 *d, UV uv) =for hackers Found in file utf8.c