X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pod%2Fperlapi.pod;h=5fbc20162c78a304dc9c2621538bf62b5524acdf;hb=ec38d36ccc38aad33e07cbbbf1e8816341de661d;hp=308508414859ee4dadaaaef42eee2f804c3d9898;hpb=5689539bfd8a909e5d9e6f7106c2ee986b0fc447;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 3085084..5fbc201 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -182,13 +182,33 @@ must then use C to assign values to these new elements. =for hackers Found in file av.c +=item bytes_from_utf8 + +Converts a string C of length C from UTF8 into byte encoding. +Unlike but like C, returns a pointer to +the newly-created string, and updates C to contain the new +length. Returns the original string if no conversion occurs, C +is unchanged. Do nothing if C points to 0. Sets C to +0 if C is converted or contains all 7bit characters. + +NOTE: this function is experimental and may change or be +removed without notice. + + U8* bytes_from_utf8(U8 *s, STRLEN *len, bool *is_utf8) + +=for hackers +Found in file utf8.c + =item bytes_to_utf8 Converts a string C of length C from ASCII into UTF8 encoding. Returns a pointer to the newly-created string, and sets C to reflect the new length. - U8 * bytes_to_utf8(U8 *s, STRLEN *len) +NOTE: this function is experimental and may change or be +removed without notice. + + U8* bytes_to_utf8(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -1013,6 +1033,30 @@ character. =for hackers Found in file handy.h +=item is_utf8_char + +Tests if some arbitrary number of bytes begins in a valid UTF-8 +character. Note that an INVARIANT (i.e. ASCII) character is a valid UTF-8 character. +The actual number of bytes in the UTF-8 character will be returned if +it is valid, otherwise 0. + + STRLEN is_utf8_char(U8 *p) + +=for hackers +Found in file utf8.c + +=item is_utf8_string + +Returns true if first C bytes of the given string form a valid UTF8 +string, false otherwise. Note that 'a valid UTF8 string' does not mean +'a string that contains UTF8' because a valid ASCII string is a valid +UTF8 string. + + bool is_utf8_string(U8 *s, STRLEN len) + +=for hackers +Found in file utf8.c + =item items Variable which is setup by C to indicate the number of @@ -1042,10 +1086,27 @@ Closing bracket on a callback. See C and L. =for hackers Found in file scope.h +=item load_module + +Loads the module whose name is pointed to by the string part of name. +Note that the actual module name, not its filename, should be given. +Eg, "Foo::Bar" instead of "Foo/Bar.pm". flags can be any of +PERL_LOADMOD_DENY, PERL_LOADMOD_NOIMPORT, or PERL_LOADMOD_IMPORT_OPS +(or 0 for no flags). ver, if specified, provides version semantics +similar to C. The optional trailing SV* +arguments can be used to specify arguments to the module's import() +method, similar to C. + + void load_module(U32 flags, SV* name, SV* ver, ...) + +=for hackers +Found in file op.c + =item looks_like_number Test if an the content of an SV looks like a number (or is a -number). +number). C and C are treated as numbers (so will not +issue a non-numeric warning), even if your atof() doesn't grok them. I32 looks_like_number(SV* sv) @@ -1440,51 +1501,6 @@ Tells a Perl interpreter to run. See L. =for hackers Found in file perl.c -=item PL_DBsingle - -When Perl is run in debugging mode, with the B<-d> switch, this SV is a -boolean which indicates whether subs are being single-stepped. -Single-stepping is automatically turned on after every step. This is the C -variable which corresponds to Perl's $DB::single variable. See -C. - - SV * PL_DBsingle - -=for hackers -Found in file intrpvar.h - -=item PL_DBsub - -When Perl is run in debugging mode, with the B<-d> switch, this GV contains -the SV which holds the name of the sub being debugged. This is the C -variable which corresponds to Perl's $DB::sub variable. See -C. - - GV * PL_DBsub - -=for hackers -Found in file intrpvar.h - -=item PL_DBtrace - -Trace variable used when Perl is run in debugging mode, with the B<-d> -switch. This is the C variable which corresponds to Perl's $DB::trace -variable. See C. - - SV * PL_DBtrace - -=for hackers -Found in file intrpvar.h - -=item PL_dowarn - -The C variable which corresponds to Perl's $^W warning variable. - - bool PL_dowarn - -=for hackers -Found in file intrpvar.h - =item PL_modglobal C is a general purpose, interpreter global HV for use by @@ -1568,13 +1584,34 @@ Found in file pp.h =item POPp -Pops a string off the stack. +Pops a string off the stack. Deprecated. New code should provide +a STRLEN n_a and use POPpx. char* POPp =for hackers Found in file pp.h +=item POPpbytex + +Pops a string off the stack which must consist of bytes i.e. characters < 256. +Requires a variable STRLEN n_a in scope. + + char* POPpbytex + +=for hackers +Found in file pp.h + +=item POPpx + +Pops a string off the stack. +Requires a variable STRLEN n_a in scope. + + char* POPpx + +=for hackers +Found in file pp.h + =item POPs Pops an SV off the stack. @@ -1676,7 +1713,9 @@ Found in file handy.h =item require_pv -Tells Perl to C a module. +Tells Perl to C the file named by the string argument. It is +analogous to the Perl code C. It's even +implemented that way; consider using Perl_load_module instead. NOTE: the perl_ form of this function is deprecated. @@ -1895,7 +1934,7 @@ indicated number of bytes (remember to reserve space for an extra trailing NUL character). Calls C to perform the expansion if necessary. Returns a pointer to the character buffer. - void SvGROW(SV* sv, STRLEN len) + char * SvGROW(SV* sv, STRLEN len) =for hackers Found in file sv.h @@ -2158,6 +2197,7 @@ Found in file sv.h =item SvPOK_only Tells an SV that it is a string and disables all other OK bits. +Will also turn off the UTF8 status. void SvPOK_only(SV* sv) @@ -2166,8 +2206,8 @@ Found in file sv.h =item SvPOK_only_UTF8 -Tells an SV that it is a UTF8 string (do not use frivolously) -and disables all other OK bits. +Tells an SV that it is a string and disables all other OK bits, +and leaves the UTF8 status as it was. void SvPOK_only_UTF8(SV* sv) @@ -2434,6 +2474,15 @@ Type flag for blessed scalars. See C. =for hackers Found in file sv.h +=item SvUOK + +Returns a boolean indicating whether the SV contains an unsigned integer. + + void SvUOK(SV* sv) + +=for hackers +Found in file sv.h + =item SvUPGRADE Used to upgrade an SV to a more complex form. Uses C to @@ -2464,7 +2513,8 @@ Found in file sv.h =item SvUTF8_on -Tells an SV that it is a string and encoded in UTF8. Do not use frivolously. +Turn on the UTF8 status of an SV (the data is not changed, just the flag). +Do not use frivolously. void SvUTF8_on(SV *sv) @@ -2514,7 +2564,8 @@ Found in file sv.c =item sv_catpv Concatenates the string onto the end of the string which is in the SV. -Handles 'get' magic, but not 'set' magic. See C. +If the SV has the UTF8 status set, then the bytes appended should be +valid UTF8. Handles 'get' magic, but not 'set' magic. See C. void sv_catpv(SV* sv, const char* ptr) @@ -2523,9 +2574,13 @@ Found in file sv.c =item sv_catpvf -Processes its arguments like C and appends the formatted output -to an SV. Handles 'get' magic, but not 'set' magic. C must -typically be called after calling this function to handle 'set' magic. +Processes its arguments like C and appends the formatted +output to an SV. If the appended data contains "wide" characters +(including, but not limited to, SVs with a UTF-8 PV formatted with %s, +and characters >255 formatted with %c), the original SV might get +upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. +C must typically be called after calling this function +to handle 'set' magic. void sv_catpvf(SV* sv, const char* pat, ...) @@ -2544,8 +2599,9 @@ Found in file sv.c =item sv_catpvn Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. Handles 'get' magic, but not -'set' magic. See C. +C indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +Handles 'get' magic, but not 'set' magic. See C. void sv_catpvn(SV* sv, const char* ptr, STRLEN len) @@ -2572,8 +2628,9 @@ Found in file sv.c =item sv_catsv -Concatenates the string from SV C onto the end of the string in SV -C. Handles 'get' magic, but not 'set' magic. See C. +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. Handles 'get' magic, but +not 'set' magic. See C. void sv_catsv(SV* dsv, SV* ssv) @@ -3001,6 +3058,19 @@ Note that C copies the pointer while this copies the string. =for hackers Found in file sv.c +=item sv_setref_uv + +Copies an unsigned integer into a new SV, optionally blessing the SV. The C +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C argument indicates the package for the +blessing. Set C to C to avoid the blessing. The new SV +will be returned and will have a reference count of 1. + + SV* sv_setref_uv(SV* rv, const char* classname, UV uv) + +=for hackers +Found in file sv.c + =item sv_setsv Copies the contents of the source SV C into the destination SV C. @@ -3120,6 +3190,20 @@ Like C, but also handles 'set' magic. =for hackers Found in file sv.c +=item sv_utf8_decode + +Convert the octets in the PV from UTF-8 to chars. Scan for validity and then +turn of SvUTF8 if needed so that we see characters. Used as a building block +for decode_utf8 in Encode.xs + +NOTE: this function is experimental and may change or be +removed without notice. + + bool sv_utf8_decode(SV *sv) + +=for hackers +Found in file sv.c + =item sv_utf8_downgrade Attempt to convert the PV of an SV from UTF8-encoded to byte encoding. @@ -3138,10 +3222,8 @@ Found in file sv.c =item sv_utf8_encode Convert the PV of an SV to UTF8-encoded, but then turn off the C -flag so that it looks like bytes again. Nothing calls this. - -NOTE: this function is experimental and may change or be -removed without notice. +flag so that it looks like octets again. Used as a building block +for encode_utf8 in Encode.xs void sv_utf8_encode(SV *sv) @@ -3151,8 +3233,11 @@ Found in file sv.c =item sv_utf8_upgrade Convert the PV of an SV to its UTF8-encoded form. +Forces the SV to string form it it is not already. +Always sets the SvUTF8 flag to avoid future validity checks even +if all the bytes have hibit clear. - void sv_utf8_upgrade(SV *sv) + STRLEN sv_utf8_upgrade(SV *sv) =for hackers Found in file sv.c @@ -3209,12 +3294,40 @@ Converts the specified character to uppercase. =for hackers Found in file handy.h -=item U8 *s +=item utf8n_to_uvchr -Returns true if first C bytes of the given string form valid a UTF8 -string, false otherwise. +Returns the native character value of the first character in the string C +which is assumed to be in UTF8 encoding; C will be set to the +length, in bytes, of that character. + +Allows length and flags to be passed to low level routine. - is_utf8_string U8 *s(STRLEN len) + UV utf8n_to_uvchr(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags) + +=for hackers +Found in file utf8.c + +=item utf8n_to_uvuni + +Bottom level UTF-8 decode routine. +Returns the unicode code point value of the first character in the string C +which is assumed to be in UTF8 encoding and no longer than C; +C will be set to the length, in bytes, of that character. + +If C does not point to a well-formed UTF8 character, the behaviour +is dependent on the value of C: if it contains UTF8_CHECK_ONLY, +it is assumed that the caller will raise a warning, and this function +will silently just set C to C<-1> and return zero. If the +C does not contain UTF8_CHECK_ONLY, warnings about +malformations will be given, C will be set to the expected +length of the UTF-8 character in bytes, and zero will be returned. + +The C can also contain various flags to allow deviations from +the strict UTF-8 encoding (see F). + +Most code should use utf8_to_uvchr() rather than call this directly. + + UV utf8n_to_uvuni(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags) =for hackers Found in file utf8.c @@ -3264,45 +3377,77 @@ Unlike C, this over-writes the original string, and updates len to contain the new length. Returns zero on failure, setting C to -1. - U8 * utf8_to_bytes(U8 *s, STRLEN *len) +NOTE: this function is experimental and may change or be +removed without notice. + + U8* utf8_to_bytes(U8 *s, STRLEN *len) =for hackers Found in file utf8.c -=item utf8_to_uv +=item utf8_to_uvchr -Returns the character value of the first character in the string C -which is assumed to be in UTF8 encoding and no longer than C; -C will be set to the length, in bytes, of that character, -and the pointer C will be advanced to the end of the character. - -If C does not point to a well-formed UTF8 character, the behaviour -is dependent on the value of C: if it contains UTF8_CHECK_ONLY, -it is assumed that the caller will raise a warning, and this function -will silently just set C to C<-1> and return zero. If the -C does not contain UTF8_CHECK_ONLY, warnings about -malformations will be given, C will be set to the expected -length of the UTF-8 character in bytes, and zero will be returned. +Returns the native character value of the first character in the string C +which is assumed to be in UTF8 encoding; C will be set to the +length, in bytes, of that character. -The C can also contain various flags to allow deviations from -the strict UTF-8 encoding (see F). +If C does not point to a well-formed UTF8 character, zero is +returned and retlen is set, if possible, to -1. - U8* s utf8_to_uv(STRLEN curlen, STRLEN *retlen, U32 flags) + UV utf8_to_uvchr(U8 *s, STRLEN* retlen) =for hackers Found in file utf8.c -=item utf8_to_uv_simple +=item utf8_to_uvuni -Returns the character value of the first character in the string C +Returns the Unicode code point of the first character in the string C which is assumed to be in UTF8 encoding; C will be set to the -length, in bytes, of that character, and the pointer C will be -advanced to the end of the character. +length, in bytes, of that character. + +This function should only be used when returned UV is considered +an index into the Unicode semantic tables (e.g. swashes). If C does not point to a well-formed UTF8 character, zero is returned and retlen is set, if possible, to -1. - U8* s utf8_to_uv_simple(STRLEN *retlen) + UV utf8_to_uvuni(U8 *s, STRLEN* retlen) + +=for hackers +Found in file utf8.c + +=item uvchr_to_utf8 + +Adds the UTF8 representation of the Native codepoint C to the end +of the string C; C should be have at least C free +bytes available. The return value is the pointer to the byte after the +end of the new character. In other words, + + d = uvchr_to_utf8(d, uv); + +is the recommended wide native character-aware way of saying + + *(d++) = uv; + + U8* uvchr_to_utf8(U8 *d, UV uv) + +=for hackers +Found in file utf8.c + +=item uvuni_to_utf8 + +Adds the UTF8 representation of the Unicode codepoint C to the end +of the string C; C should be have at least C free +bytes available. The return value is the pointer to the byte after the +end of the new character. In other words, + + d = uvuni_to_utf8(d, uv); + +is the recommended Unicode-aware way of saying + + *(d++) = uv; + + U8* uvuni_to_utf8(U8 *d, UV uv) =for hackers Found in file utf8.c