X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=pod%2Fperlapi.pod;h=91880e498f2528266b709d660b2c50bb753744f9;hb=038fcae385eee134ba22a23fbbf09eafafbe7927;hp=dc52664ef7b8752830e8f4b0e64c86f4057ba6f0;hpb=a4f1a0295dfc0733a51ca0623d486d082d04773a;p=p5sagit%2Fp5-mst-13.2.git diff --git a/pod/perlapi.pod b/pod/perlapi.pod index dc52664..91880e4 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -297,6 +297,8 @@ Sort an array. Here is an example: sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale); +See lib/sort.pm for details about controlling the sorting algorithm. + void sortsv(SV ** array, size_t num_elts, SVCOMPARE_t cmp) =for hackers @@ -510,6 +512,35 @@ Found in file handy.h Create and return a new interpreter by cloning the current one. +perl_clone takes these flags as parameters: + +CLONEf_COPY_STACKS - is used to, well, copy the stacks also, +without it we only clone the data and zero the stacks, +with it we copy the stacks and the new perl interpreter is +ready to run at the exact same point as the previous one. +The pseudo-fork code uses COPY_STACKS while the +threads->new doesn't. + +CLONEf_KEEP_PTR_TABLE +perl_clone keeps a ptr_table with the pointer of the old +variable as a key and the new variable as a value, +this allows it to check if something has been cloned and not +clone it again but rather just use the value and increase the +refcount. If KEEP_PTR_TABLE is not set then perl_clone will kill +the ptr_table using the function +C, +reason to keep it around is if you want to dup some of your own +variable who are outside the graph perl scans, example of this +code is in threads.xs create + +CLONEf_CLONE_HOST +This is a win32 thing, it is ignored on unix, it tells perls +win32host code (which is c++) to clone itself, this is needed on +win32 if you want to run two threads at the same time, +if you just want to do some stuff in a separate perl interpreter +and then throw it away and return to the original one, +you don't need to do anything. + PerlInterpreter* perl_clone(PerlInterpreter* interp, UV flags) =for hackers @@ -560,6 +591,18 @@ Found in file cv.h =over 8 +=item cv_undef + +Clear out all the active components of a CV. This can happen either +by an explicit C, or by the reference count going to zero. +In the former case, we keep the CvOUTSIDE pointer, so that any anonymous +children can still follow the full lexical scope chain. + + void cv_undef(CV* cv) + +=for hackers +Found in file op.c + =item load_module Loads the module whose name is pointed to by the string part of name. @@ -576,6 +619,16 @@ method, similar to C. =for hackers Found in file op.c +=item nothreadhook + +Stub that provides thread hook for perl_destruct when there are +no threads. + + int nothreadhook() + +=for hackers +Found in file perl.c + =item perl_alloc Allocates a new Perl interpreter. See L. @@ -634,7 +687,7 @@ Found in file perl.c Tells Perl to C the file named by the string argument. It is analogous to the Perl code C. It's even -implemented that way; consider using Perl_load_module instead. +implemented that way; consider using load_module instead. NOTE: the perl_ form of this function is deprecated. @@ -646,6 +699,54 @@ Found in file perl.c =back +=head1 Functions in file pp_pack.c + + +=over 8 + +=item packlist + +The engine implementing pack() Perl function. + + void packlist(SV *cat, char *pat, char *patend, SV **beglist, SV **endlist) + +=for hackers +Found in file pp_pack.c + +=item pack_cat + +The engine implementing pack() Perl function. Note: parameters next_in_list and +flags are not used. This call should not be used; use packlist instead. + + void pack_cat(SV *cat, char *pat, char *patend, SV **beglist, SV **endlist, SV ***next_in_list, U32 flags) + +=for hackers +Found in file pp_pack.c + +=item unpackstring + +The engine implementing unpack() Perl function. C puts the +extracted list items on the stack and returns the number of elements. +Issue C before and C after the call to this function. + + I32 unpackstring(char *pat, char *patend, char *s, char *strend, U32 flags) + +=for hackers +Found in file pp_pack.c + +=item unpack_str + +The engine implementing unpack() Perl function. Note: parameters strbeg, new_s +and ocnt are not used. This call should not be used, use unpackstring instead. + + I32 unpack_str(char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags) + +=for hackers +Found in file pp_pack.c + + +=back + =head1 Global Variables =over 8 @@ -780,6 +881,20 @@ C apply equally to these functions. =for hackers Found in file gv.c +=item gv_fetchmeth_autoload + +Same as gv_fetchmeth(), but looks for autoloaded subroutines too. +Returns a glob for the subroutine. + +For an autoloaded subroutine without a GV, will create a GV even +if C. For an autoloaded subroutine without a stub, GvCV() +of the result may be zero. + + GV* gv_fetchmeth_autoload(HV* stash, const char* name, STRLEN len, I32 level) + +=for hackers +Found in file gv.c + =item gv_stashpv Returns a pointer to the stash for a specified package. C should @@ -819,9 +934,10 @@ is to be expected. (For information only--not to be used). =for hackers Found in file hv.h -=item Nullch +=item Nullch Null character pointer. + =for hackers Found in file handy.h @@ -951,6 +1067,15 @@ Returns the package name of a stash. See C, C. =for hackers Found in file hv.h +=item hv_assert + +Check that a hash is in an internally consistent state. + + void hv_assert(HV* tb) + +=for hackers +Found in file hv.c + =item hv_clear Clears a hash, making it empty. @@ -960,6 +1085,21 @@ Clears a hash, making it empty. =for hackers Found in file hv.c +=item hv_clear_placeholders + +Clears any placeholders from a hash. If a restricted hash has any of its keys +marked as readonly and the key is subsequently deleted, the key is not actually +deleted but is marked by assigning it a value of &PL_sv_placeholder. This tags +it so it will be ignored by future operations such as iterating over the hash, +but will still allow the hash to have a value reaasigned to the key at some +future point. This function clears any such placeholder keys from the hash. +See Hash::Util::lock_keys() for an example of its use. + + void hv_clear_placeholders(HV* hb) + +=for hackers +Found in file hv.c + =item hv_delete Deletes a key/value pair in the hash. The value SV is removed from the @@ -1048,6 +1188,7 @@ NOTE: Before version 5.004_65, C used to return the number of hash buckets that happen to be in use. If you still need that esoteric value, you can get it through the macro C. + I32 hv_iterinit(HV* tb) =for hackers @@ -1078,6 +1219,14 @@ Found in file hv.c Returns entries from a hash iterator. See C. +You may call C or C on the hash entry that the +iterator currently points to, without losing your place or invalidating your +iterator. Note that in this case the current entry is deleted from the hash +with your iterator holding the last reference to it. Your iterator is flagged +to free the entry on the next call to C, so you must not discard +your iterator immediately else the entry will leak - call C to +trigger the resource deallocation. + HE* hv_iternext(HV* tb) =for hackers @@ -1093,6 +1242,25 @@ operation. =for hackers Found in file hv.c +=item hv_iternext_flags + +Returns entries from a hash iterator. See C and C. +The C value will normally be zero; if HV_ITERNEXT_WANTPLACEHOLDERS is +set the placeholders keys (for restricted hashes) will be returned in addition +to normal keys. By default placeholders are automatically skipped over. +Currently a placeholder is implemented with a value that is +C<&Perl_sv_placeholder>. Note that the implementation of placeholders and +restricted hashes may change, and the implementation currently is +insufficiently abstracted for any change to be tidy. + +NOTE: this function is experimental and may change or be +removed without notice. + + HE* hv_iternext_flags(HV* tb, I32 flags) + +=for hackers +Found in file hv.c + =item hv_iterval Returns the value from the current position of the hash iterator. See @@ -1112,6 +1280,15 @@ Adds magic to a hash. See C. =for hackers Found in file hv.c +=item hv_scalar + +Evaluates the hash in scalar context and returns the result. Handles magic when the hash is tied. + + SV* hv_scalar(HV* hv) + +=for hackers +Found in file hv.c + =item hv_store Stores an SV in a hash. The hash key is specified as C and C is @@ -1121,7 +1298,15 @@ NULL if the operation failed or if the value did not need to be actually stored within the hash (as in the case of tied hashes). Otherwise it can be dereferenced to get the original C. Note that the caller is responsible for suitably incrementing the reference count of C before -the call, and decrementing it if the function returned NULL. +the call, and decrementing it if the function returned NULL. Effectively +a successful hv_store takes ownership of one reference to C. This is +usually what you want; a newly created SV has a reference count of one, so +if all your code does is create SVs then store them in a hash, hv_store +will own the only reference to the new SV, and your code doesn't need to do +anything further to tidy up. hv_store is not implemented as a call to +hv_store_ent, and does not create a temporary SV for the key, so if your +key data is not already in SV form then use hv_store in preference to +hv_store_ent. See L for more information on how to use this function on tied hashes. @@ -1141,7 +1326,17 @@ stored within the hash (as in the case of tied hashes). Otherwise the contents of the return value can be accessed using the C macros described here. Note that the caller is responsible for suitably incrementing the reference count of C before the call, and -decrementing it if the function returned NULL. +decrementing it if the function returned NULL. Effectively a successful +hv_store_ent takes ownership of one reference to C. This is +usually what you want; a newly created SV has a reference count of one, so +if all your code does is create SVs then store them in a hash, hv_store +will own the only reference to the new SV, and your code doesn't need to do +anything further to tidy up. Note that hv_store_ent only reads the C; +unlike C it does not take ownership of it, so maintaining the correct +reference count on C is entirely the caller's responsibility. hv_store +is not implemented as a call to hv_store_ent, and does not create a temporary +SV for the key, so if your key data is not already in SV form then use +hv_store in preference to hv_store_ent. See L for more information on how to use this function on tied hashes. @@ -1406,6 +1601,16 @@ memory is zeroed with C. =for hackers Found in file handy.h +=item Poison + +Fill up memory with a pattern (byte 0xAB over and over again) that +hopefully catches attempts to access uninitialized memory. + + void Poison(void* dest, int nitems, type) + +=for hackers +Found in file handy.h + =item Renew The XSUB-writer's interface to the C C function. @@ -1436,30 +1641,34 @@ Found in file handy.h =item savepv -Copy a string to a safe spot. This does not use an SV. +Perl's version of C. Returns a pointer to a newly allocated +string which is a duplicate of C. The size of the string is +determined by C. The memory allocated for the new string can +be freed with the C function. - char* savepv(const char* sv) + char* savepv(const char* pv) =for hackers Found in file util.c =item savepvn -Copy a string to a safe spot. The C indicates number of bytes to -copy. If pointer is NULL allocate space for a string of size specified. -This does not use an SV. +Perl's version of what C would be if it existed. Returns a +pointer to a newly allocated string which is a duplicate of the first +C bytes from C. The memory allocated for the new string can be +freed with the C function. - char* savepvn(const char* sv, I32 len) + char* savepvn(const char* pv, I32 len) =for hackers Found in file util.c =item savesharedpv -Copy a string to a safe spot in memory shared between threads. -This does not use an SV. +A version of C which allocates the duplicate string in memory +which is shared between threads. - char* savesharedpv(const char* sv) + char* savesharedpv(const char* pv) =for hackers Found in file util.c @@ -1541,6 +1750,43 @@ Fill the sv with current working directory =for hackers Found in file util.c +=item new_version + +Returns a new version object based on the passed in SV: + + SV *sv = new_version(SV *ver); + +Does not alter the passed in ver SV. See "upg_version" if you +want to upgrade the SV. + + SV* new_version(SV *ver) + +=for hackers +Found in file util.c + +=item scan_version + +Returns a pointer to the next character after the parsed +version string, as well as upgrading the passed in SV to +an RV. + +Function must be called with an already existing SV like + + sv = newSV(0); + s = scan_version(s,SV *sv, bool qv); + +Performs some preprocessing to the string to ensure that +it has the correct characteristics of a version. Flags the +object if it contains an underscore (which denotes this +is a alpha version). The boolean qv denotes that the version +should be interpreted as if it had multiple decimals, even if +it doesn't. + + char* scan_version(char *vstr, SV *sv, bool qv) + +=for hackers +Found in file util.c + =item strEQ Test two strings to see if they are equal. Returns true or false. @@ -1622,6 +1868,92 @@ wrapper for C). =for hackers Found in file handy.h +=item sv_nolocking + +Dummy routine which "locks" an SV when there is no locking module present. +Exists to avoid test for a NULL function pointer and because it could potentially warn under +some level of strict-ness. + + void sv_nolocking(SV *) + +=for hackers +Found in file util.c + +=item sv_nosharing + +Dummy routine which "shares" an SV when there is no sharing module present. +Exists to avoid test for a NULL function pointer and because it could potentially warn under +some level of strict-ness. + + void sv_nosharing(SV *) + +=for hackers +Found in file util.c + +=item sv_nounlocking + +Dummy routine which "unlocks" an SV when there is no locking module present. +Exists to avoid test for a NULL function pointer and because it could potentially warn under +some level of strict-ness. + + void sv_nounlocking(SV *) + +=for hackers +Found in file util.c + +=item upg_version + +In-place upgrade of the supplied SV to a version object. + + SV *sv = upg_version(SV *sv); + +Returns a pointer to the upgraded SV. + + SV* upg_version(SV *ver) + +=for hackers +Found in file util.c + +=item vcmp + +Version object aware cmp. Both operands must already have been +converted into version objects. + + int vcmp(SV *lvs, SV *rvs) + +=for hackers +Found in file util.c + +=item vnumify + +Accepts a version object and returns the normalized floating +point representation. Call like: + + sv = vnumify(rv); + +NOTE: you can pass either the object directly or the SV +contained within the RV. + + SV* vnumify(SV *vs) + +=for hackers +Found in file util.c + +=item vstringify + +Accepts a version object and returns the normalized string +representation. Call like: + + sv = vstringify(rv); + +NOTE: you can pass either the object directly or the SV +contained within the RV. + + SV* vstringify(SV *vs) + +=for hackers +Found in file util.c + =back @@ -1790,6 +2122,23 @@ Found in file op.c =back +=head1 Pad Data Structures + +=over 8 + +=item pad_sv + +Get the value at offset po in the current pad. +Use macro PAD_SV instead of calling this function directly. + + SV* pad_sv(PADOFFSET po) + +=for hackers +Found in file pad.c + + +=back + =head1 Stack Manipulation Macros =over 8 @@ -2108,6 +2457,15 @@ Return C<&PL_sv_undef> from an XSUB immediately. Uses C. =for hackers Found in file XSUB.h +=item XSRETURN_UV + +Return an integer from an XSUB immediately. Uses C. + + void XSRETURN_UV(IV uv) + +=for hackers +Found in file XSUB.h + =item XSRETURN_YES Return C<&PL_sv_yes> from an XSUB immediately. Uses C. @@ -2403,24 +2761,6 @@ The reference count for the SV is set to 1. =for hackers Found in file sv.c -=item new_vstring - -Returns a pointer to the next character after the parsed -vstring, as well as updating the passed in sv. - -Function must be called like - - sv = NEWSV(92,5); - s = new_vstring(s,sv); - -The sv must already be large enough to store the vstring -passed in. - - char* new_vstring(char *vstr, SV *sv) - -=for hackers -Found in file util.c - =item SvCUR Returns the length of the string which is in the SV. See C. @@ -2484,7 +2824,7 @@ Found in file sv.h Returns a boolean indicating whether the SV contains a signed integer. - void SvIOK_notUV(SV* sv) + bool SvIOK_notUV(SV* sv) =for hackers Found in file sv.h @@ -2529,7 +2869,28 @@ Found in file sv.h Returns a boolean indicating whether the SV contains an unsigned integer. - void SvIOK_UV(SV* sv) + bool SvIOK_UV(SV* sv) + +=for hackers +Found in file sv.h + +=item SvIsCOW + +Returns a boolean indicating whether the SV is Copy-On-Write. (either shared +hash key scalars, or full Copy On Write scalars if 5.9.0 is configured for +COW) + + bool SvIsCOW(SV* sv) + +=for hackers +Found in file sv.h + +=item SvIsCOW_shared_hash + +Returns a boolean indicating whether the SV is Copy-On-Write shared hash key +scalar. + + bool SvIsCOW_shared_hash(SV* sv) =for hackers Found in file sv.h @@ -2564,6 +2925,15 @@ Only use when you are sure SvIOK is true. See also C. =for hackers Found in file sv.h +=item SvIV_nomg + +Like C but doesn't process magic. + + IV SvIV_nomg(SV* sv) + +=for hackers +Found in file sv.h + =item SvLEN Returns the size of the string buffer in the SV, not including any part @@ -2741,7 +3111,7 @@ Found in file sv.h =item SvPOK_only Tells an SV that it is a string and disables all other OK bits. -Will also turn off the UTF8 status. +Will also turn off the UTF-8 status. void SvPOK_only(SV* sv) @@ -2751,7 +3121,7 @@ Found in file sv.h =item SvPOK_only_UTF8 Tells an SV that it is a string and disables all other OK bits, -and leaves the UTF8 status as it was. +and leaves the UTF-8 status as it was. void SvPOK_only_UTF8(SV* sv) @@ -2760,8 +3130,9 @@ Found in file sv.h =item SvPV -Returns a pointer to the string in the SV, or a stringified form of the SV -if the SV does not contain a string. Handles 'get' magic. See also +Returns a pointer to the string in the SV, or a stringified form of +the SV if the SV does not contain a string. The SV may cache the +stringified version becoming C. Handles 'get' magic. See also C for a version which guarantees to evaluate sv only once. char* SvPV(SV* sv, STRLEN len) @@ -2784,7 +3155,6 @@ Like C, but converts sv to byte representation first if necessary. Guarantees to evaluate sv only once; use the more efficient C otherwise. - char* SvPVbytex(SV* sv, STRLEN len) =for hackers @@ -2889,8 +3259,9 @@ Found in file sv.h =item SvPV_force -Like but will force the SV into becoming a string (SvPOK). You want -force if you are going to update the SvPVX directly. +Like C but will force the SV into containing just a string +(C). You want force if you are going to update the C +directly. char* SvPV_force(SV* sv, STRLEN len) @@ -2899,8 +3270,9 @@ Found in file sv.h =item SvPV_force_nomg -Like but will force the SV into becoming a string (SvPOK). You want -force if you are going to update the SvPVX directly. Doesn't process magic. +Like C but will force the SV into containing just a string +(C). You want force if you are going to update the C +directly. Doesn't process magic. char* SvPV_force_nomg(SV* sv, STRLEN len) @@ -2909,14 +3281,24 @@ Found in file sv.h =item SvPV_nolen -Returns a pointer to the string in the SV, or a stringified form of the SV -if the SV does not contain a string. Handles 'get' magic. +Returns a pointer to the string in the SV, or a stringified form of +the SV if the SV does not contain a string. The SV may cache the +stringified form becoming C. Handles 'get' magic. char* SvPV_nolen(SV* sv) =for hackers Found in file sv.h +=item SvPV_nomg + +Like C but doesn't process magic. + + char* SvPV_nomg(SV* sv, STRLEN len) + +=for hackers +Found in file sv.h + =item SvREFCNT Returns the value of the object's reference count. @@ -2991,7 +3373,7 @@ Found in file sv.h =item SvTAINT -Taints an SV if tainting is enabled +Taints an SV if tainting is enabled. void SvTAINT(SV* sv) @@ -3024,7 +3406,7 @@ Found in file sv.h =item SvTAINTED_on -Marks an SV as tainted. +Marks an SV as tainted if tainting is enabled. void SvTAINTED_on(SV* sv) @@ -3084,14 +3466,14 @@ Found in file sv.h Returns a boolean indicating whether the SV contains UTF-8 encoded data. - void SvUTF8(SV* sv) + bool SvUTF8(SV* sv) =for hackers Found in file sv.h =item SvUTF8_off -Unsets the UTF8 status of an SV. +Unsets the UTF-8 status of an SV. void SvUTF8_off(SV *sv) @@ -3100,7 +3482,7 @@ Found in file sv.h =item SvUTF8_on -Turn on the UTF8 status of an SV (the data is not changed, just the flag). +Turn on the UTF-8 status of an SV (the data is not changed, just the flag). Do not use frivolously. void SvUTF8_on(SV *sv) @@ -3138,6 +3520,24 @@ evaluate sv only once. Use the more efficient C otherwise. =for hackers Found in file sv.h +=item SvUV_nomg + +Like C but doesn't process magic. + + UV SvUV_nomg(SV* sv) + +=for hackers +Found in file sv.h + +=item SvVOK + +Returns a boolean indicating whether the SV contains a v-string. + + bool SvVOK(SV* sv) + +=for hackers +Found in file sv.h + =item sv_2bool This function is only called on magical items, and is only used by @@ -3169,12 +3569,13 @@ named after the PV if we're a string. =for hackers Found in file sv.c -=item sv_2iv +=item sv_2iv_flags -Return the integer value of an SV, doing any necessary string conversion, -magic etc. Normally used via the C and C macros. +Return the integer value of an SV, doing any necessary string +conversion. If flags includes SV_GMAGIC, does an mg_get() first. +Normally used via the C and C macros. - IV sv_2iv(SV* sv) + IV sv_2iv_flags(SV* sv, I32 flags) =for hackers Found in file sv.c @@ -3204,7 +3605,7 @@ Found in file sv.c =item sv_2pvbyte Return a pointer to the byte-encoded representation of the SV, and set *lp -to its length. May cause the SV to be downgraded from UTF8 as a +to its length. May cause the SV to be downgraded from UTF-8 as a side-effect. Usually accessed via the C macro. @@ -3217,7 +3618,7 @@ Found in file sv.c =item sv_2pvbyte_nolen Return a pointer to the byte-encoded representation of the SV. -May cause the SV to be downgraded from UTF8 as a side-effect. +May cause the SV to be downgraded from UTF-8 as a side-effect. Usually accessed via the C macro. @@ -3228,8 +3629,8 @@ Found in file sv.c =item sv_2pvutf8 -Return a pointer to the UTF8-encoded representation of the SV, and set *lp -to its length. May cause the SV to be upgraded to UTF8 as a side-effect. +Return a pointer to the UTF-8-encoded representation of the SV, and set *lp +to its length. May cause the SV to be upgraded to UTF-8 as a side-effect. Usually accessed via the C macro. @@ -3240,8 +3641,8 @@ Found in file sv.c =item sv_2pvutf8_nolen -Return a pointer to the UTF8-encoded representation of the SV. -May cause the SV to be upgraded to UTF8 as a side-effect. +Return a pointer to the UTF-8-encoded representation of the SV. +May cause the SV to be upgraded to UTF-8 as a side-effect. Usually accessed via the C macro. @@ -3272,13 +3673,13 @@ use the macro wrapper C instead. =for hackers Found in file sv.c -=item sv_2uv +=item sv_2uv_flags Return the unsigned integer value of an SV, doing any necessary string -conversion, magic etc. Normally used via the C and C -macros. +conversion. If flags includes SV_GMAGIC, does an mg_get() first. +Normally used via the C and C macros. - UV sv_2uv(SV* sv) + UV sv_2uv_flags(SV* sv, I32 flags) =for hackers Found in file sv.c @@ -3307,8 +3708,8 @@ Found in file sv.c =item sv_catpv Concatenates the string onto the end of the string which is in the SV. -If the SV has the UTF8 status set, then the bytes appended should be -valid UTF8. Handles 'get' magic, but not 'set' magic. See C. +If the SV has the UTF-8 status set, then the bytes appended should be +valid UTF-8. Handles 'get' magic, but not 'set' magic. See C. void sv_catpv(SV* sv, const char* ptr) @@ -3342,8 +3743,8 @@ Found in file sv.c =item sv_catpvn Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. If the SV has the UTF8 -status set, then the bytes appended should be valid UTF8. +C indicates number of bytes to copy. If the SV has the UTF-8 +status set, then the bytes appended should be valid UTF-8. Handles 'get' magic, but not 'set' magic. See C. void sv_catpvn(SV* sv, const char* ptr, STRLEN len) @@ -3354,8 +3755,8 @@ Found in file sv.c =item sv_catpvn_flags Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. If the SV has the UTF8 -status set, then the bytes appended should be valid UTF8. +C indicates number of bytes to copy. If the SV has the UTF-8 +status set, then the bytes appended should be valid UTF-8. If C has C bit set, will C on C if appropriate, else not. C and C are implemented in terms of this function. @@ -3421,6 +3822,8 @@ Efficient removal of characters from the beginning of the string buffer. SvPOK(sv) must be true and the C must be a pointer to somewhere inside the string buffer. The C becomes the first character of the adjusted string. Uses the "OOK hack". +Beware: after this function returns, C and SvPVX(sv) may no longer +refer to the same chunk of data. void sv_chop(SV* sv, char* ptr) @@ -3479,6 +3882,21 @@ settings. =for hackers Found in file sv.c +=item sv_copypv + +Copies a stringified representation of the source SV into the +destination SV. Automatically performs any necessary mg_get and +coercion of numeric values into strings. Guaranteed to preserve +UTF-8 flag even from overloaded objects. Similar in nature to +sv_2pv[_flags] but operates directly on an SV instead of just the +string. Mostly uses sv_2pv_flags to do its work, except when that +would lose the UTF-8'ness of the PV. + + void sv_copypv(SV* dsv, SV* ssv) + +=for hackers +Found in file sv.c + =item sv_dec Auto-decrement of the value in the SV, doing string to numeric conversion @@ -3526,8 +3944,13 @@ Found in file sv.c Undo various types of fakery on an SV: if the PV is a shared string, make a private copy; if we're a ref, stop refing; if we're a glob, downgrade to -an xpvmg. The C parameter gets passed to C -when unrefing. C calls this function with flags set to 0. +an xpvmg; if we're a copy-on-write scalar, this is the on-write time when +we do the copy, and is also used locally. If C is set +then a copy-on-write scalar drops its PV buffer (if any) and becomes +SvPOK_off rather than making a copy. (Used where this scalar is about to be +set to some other value.) In addition, the C parameter gets passed to +C when unrefing. C calls this function +with flags set to 0. void sv_force_normal_flags(SV *sv, U32 flags) @@ -3632,7 +4055,7 @@ Found in file sv.c =item sv_len_utf8 Returns the number of characters in the string in an SV, counting wide -UTF8 bytes as a single character. Handles magic and type coercion. +UTF-8 bytes as a single character. Handles magic and type coercion. STRLEN sv_len_utf8(SV* sv) @@ -3704,39 +4127,6 @@ instead. =for hackers Found in file sv.c -=item sv_nolocking - -Dummy routine which "locks" an SV when there is no locking module present. -Exists to avoid test for a NULL function pointer and because it could potentially warn under -some level of strict-ness. - - void sv_nolocking(SV *) - -=for hackers -Found in file util.c - -=item sv_nosharing - -Dummy routine which "shares" an SV when there is no sharing module present. -Exists to avoid test for a NULL function pointer and because it could potentially warn under -some level of strict-ness. - - void sv_nosharing(SV *) - -=for hackers -Found in file util.c - -=item sv_nounlocking - -Dummy routine which "unlocks" an SV when there is no locking module present. -Exists to avoid test for a NULL function pointer and because it could potentially warn under -some level of strict-ness. - - void sv_nounlocking(SV *) - -=for hackers -Found in file util.c - =item sv_nv A private implementation of the C macro for compilers which can't @@ -3750,7 +4140,7 @@ Found in file sv.c =item sv_pos_b2u Converts the value pointed to by offsetp from a count of bytes from the -start of the string, to a count of the equivalent number of UTF8 chars. +start of the string, to a count of the equivalent number of UTF-8 chars. Handles magic and type coercion. void sv_pos_b2u(SV* sv, I32* offsetp) @@ -3760,7 +4150,7 @@ Found in file sv.c =item sv_pos_u2b -Converts the value pointed to by offsetp from a count of UTF8 chars from +Converts the value pointed to by offsetp from a count of UTF-8 chars from the start of the string, to a count of the equivalent number of bytes; if lenp is non-zero, it does the same to lenp, but this time starting from the offset, rather than from the start of the string. Handles magic and @@ -3773,8 +4163,7 @@ Found in file sv.c =item sv_pv -A private implementation of the C macro for compilers which can't -cope with complex macro expressions. Always use the macro instead. +Use the C macro instead char* sv_pv(SV *sv) @@ -3783,9 +4172,7 @@ Found in file sv.c =item sv_pvbyte -A private implementation of the C macro for compilers -which can't cope with complex macro expressions. Always use the macro -instead. +Use C instead. char* sv_pvbyte(SV *sv) @@ -3851,9 +4238,7 @@ Found in file sv.c =item sv_pvutf8 -A private implementation of the C macro for compilers -which can't cope with complex macro expressions. Always use the macro -instead. +Use the C macro instead char* sv_pvutf8(SV *sv) @@ -4056,7 +4441,7 @@ Copies an integer into a new SV, optionally blessing the SV. The C argument will be upgraded to an RV. That RV will be modified to point to the new SV. The C argument indicates the package for the blessing. Set C to C to avoid the blessing. The new SV -will be returned and will have a reference count of 1. +will have a reference count of 1, and the RV will be returned. SV* sv_setref_iv(SV* rv, const char* classname, IV iv) @@ -4069,7 +4454,7 @@ Copies a double into a new SV, optionally blessing the SV. The C argument will be upgraded to an RV. That RV will be modified to point to the new SV. The C argument indicates the package for the blessing. Set C to C to avoid the blessing. The new SV -will be returned and will have a reference count of 1. +will have a reference count of 1, and the RV will be returned. SV* sv_setref_nv(SV* rv, const char* classname, NV nv) @@ -4083,7 +4468,7 @@ argument will be upgraded to an RV. That RV will be modified to point to the new SV. If the C argument is NULL then C will be placed into the SV. The C argument indicates the package for the blessing. Set C to C to avoid the blessing. The new SV -will be returned and will have a reference count of 1. +will have a reference count of 1, and the RV will be returned. Do not use with other Perl types such as HV, AV, SV, CV, because those objects will become corrupted by the pointer copy process. @@ -4101,8 +4486,8 @@ Copies a string into a new SV, optionally blessing the SV. The length of the string must be specified with C. The C argument will be upgraded to an RV. That RV will be modified to point to the new SV. The C argument indicates the package for the blessing. Set C to -C to avoid the blessing. The new SV will be returned and will have -a reference count of 1. +C to avoid the blessing. The new SV will have a reference count +of 1, and the RV will be returned. Note that C copies the pointer while this copies the string. @@ -4117,7 +4502,7 @@ Copies an unsigned integer into a new SV, optionally blessing the SV. The C argument will be upgraded to an RV. That RV will be modified to point to the new SV. The C argument indicates the package for the blessing. Set C to C to avoid the blessing. The new SV -will be returned and will have a reference count of 1. +will have a reference count of 1, and the RV will be returned. SV* sv_setref_uv(SV* rv, const char* classname, UV uv) @@ -4136,7 +4521,6 @@ You probably want to use one of the assortment of wrappers, such as C, C, C and C. - void sv_setsv(SV* dsv, SV* ssv) =for hackers @@ -4315,11 +4699,14 @@ Found in file sv.c =item sv_utf8_downgrade -Attempt to convert the PV of an SV from UTF8-encoded to byte encoding. +Attempt to convert the PV of an SV from UTF-8-encoded to byte encoding. This may not be possible if the PV contains non-byte encoding characters; if this is the case, either returns false or, if C is not true, croaks. +This is not as a general purpose Unicode to byte encoding interface: +use the Encode extension for that. + NOTE: this function is experimental and may change or be removed without notice. @@ -4330,7 +4717,7 @@ Found in file sv.c =item sv_utf8_encode -Convert the PV of an SV to UTF8-encoded, but then turn off the C +Convert the PV of an SV to UTF-8-encoded, but then turn off the C flag so that it looks like octets again. Used as a building block for encode_utf8 in Encode.xs @@ -4341,11 +4728,14 @@ Found in file sv.c =item sv_utf8_upgrade -Convert the PV of an SV to its UTF8-encoded form. +Convert the PV of an SV to its UTF-8-encoded form. Forces the SV to string form if it is not already. Always sets the SvUTF8 flag to avoid future validity checks even if all the bytes have hibit clear. +This is not as a general purpose byte encoding to Unicode interface: +use the Encode extension for that. + STRLEN sv_utf8_upgrade(SV *sv) =for hackers @@ -4353,13 +4743,16 @@ Found in file sv.c =item sv_utf8_upgrade_flags -Convert the PV of an SV to its UTF8-encoded form. +Convert the PV of an SV to its UTF-8-encoded form. Forces the SV to string form if it is not already. Always sets the SvUTF8 flag to avoid future validity checks even if all the bytes have hibit clear. If C has C bit set, will C on C if appropriate, else not. C and C are implemented in terms of this function. +This is not as a general purpose byte encoding to Unicode interface: +use the Encode extension for that. + STRLEN sv_utf8_upgrade_flags(SV *sv, I32 flags) =for hackers @@ -4411,7 +4804,7 @@ Found in file sv.c =item bytes_from_utf8 -Converts a string C of length C from UTF8 into byte encoding. +Converts a string C of length C from UTF-8 into byte encoding. Unlike but like C, returns a pointer to the newly-created string, and updates C to contain the new length. Returns the original string if no conversion occurs, C @@ -4428,10 +4821,13 @@ Found in file utf8.c =item bytes_to_utf8 -Converts a string C of length C from ASCII into UTF8 encoding. +Converts a string C of length C from ASCII into UTF-8 encoding. Returns a pointer to the newly-created string, and sets C to reflect the new length. +If you want to convert to UTF-8 from other encodings than ASCII, +see sv_recode_to_utf8(). + NOTE: this function is experimental and may change or be removed without notice. @@ -4471,9 +4867,9 @@ Found in file utf8.c =item is_utf8_char Tests if some arbitrary number of bytes begins in a valid UTF-8 -character. Note that an INVARIANT (i.e. ASCII) character is a valid UTF-8 character. -The actual number of bytes in the UTF-8 character will be returned if -it is valid, otherwise 0. +character. Note that an INVARIANT (i.e. ASCII) character is a valid +UTF-8 character. The actual number of bytes in the UTF-8 character +will be returned if it is valid, otherwise 0. STRLEN is_utf8_char(U8 *p) @@ -4482,16 +4878,26 @@ Found in file utf8.c =item is_utf8_string -Returns true if first C bytes of the given string form a valid UTF8 -string, false otherwise. Note that 'a valid UTF8 string' does not mean -'a string that contains UTF8' because a valid ASCII string is a valid -UTF8 string. +Returns true if first C bytes of the given string form a valid +UTF-8 string, false otherwise. Note that 'a valid UTF-8 string' does +not mean 'a string that contains code points above 0x7F encoded in UTF-8' +because a valid ASCII string is a valid UTF-8 string. bool is_utf8_string(U8 *s, STRLEN len) =for hackers Found in file utf8.c +=item is_utf8_string_loc + +Like is_ut8_string but store the location of the failure in +the last argument. + + bool is_utf8_string_loc(U8 *s, STRLEN len, U8 **p) + +=for hackers +Found in file utf8.c + =item pv_uni_display Build to the scalar dsv a displayable version of the string spv, @@ -4512,6 +4918,23 @@ The pointer to the PV of the dsv is returned. =for hackers Found in file utf8.c +=item sv_cat_decode + +The encoding is assumed to be an Encode object, the PV of the ssv is +assumed to be octets in that encoding and decoding the input starts +from the position which (PV + *offset) pointed to. The dsv will be +concatenated the decoded UTF-8 string from ssv. Decoding will terminate +when the string tstr appears in decoding output or the input ends on +the PV of the ssv. The value which the offset points will be modified +to the last input position on the ssv. + +Returns TRUE if the terminator was found, else returns FALSE. + + bool sv_cat_decode(SV* dsv, SV *encoding, SV *ssv, int *offset, char* tstr, int tlen) + +=for hackers +Found in file sv.c + =item sv_recode_to_utf8 The encoding is assumed to be an Encode object, on entry the PV @@ -4554,16 +4977,18 @@ The "ustrp" is a pointer to the character buffer to put the conversion result to. The "lenp" is a pointer to the length of the result. -The "swash" is a pointer to the swash to use. +The "swashp" is a pointer to the swash to use. -The "normal" is a string like "ToLower" which means the swash -$utf8::ToLower, which is stored in lib/unicore/To/Lower.pl, -and loaded by SWASHGET, using lib/utf8_heavy.pl. +Both the special and normal mappings are stored lib/unicore/To/Foo.pl, +and loaded by SWASHGET, using lib/utf8_heavy.pl. The special (usually, +but not always, a multicharacter mapping), is tried first. -The "special" is a string like "utf8::ToSpecLower", which means -the hash %utf8::ToSpecLower, which is stored in the same file, -lib/unicore/To/Lower.pl, and also loaded by SWASHGET. The access -to the hash is by Perl_to_utf8_case(). +The "special" is a string like "utf8::ToSpecLower", which means the +hash %utf8::ToSpecLower. The access to the hash is through +Perl_to_utf8_case(). + +The "normal" is a string like "ToLower" which means the swash +%utf8::ToLower. UV to_utf8_case(U8 *p, U8* ustrp, STRLEN *lenp, SV **swash, char *normal, char *special) @@ -4637,7 +5062,7 @@ Found in file utf8.c =item utf8n_to_uvchr Returns the native character value of the first character in the string C -which is assumed to be in UTF8 encoding; C will be set to the +which is assumed to be in UTF-8 encoding; C will be set to the length, in bytes, of that character. Allows length and flags to be passed to low level routine. @@ -4651,10 +5076,10 @@ Found in file utf8.c Bottom level UTF-8 decode routine. Returns the unicode code point value of the first character in the string C -which is assumed to be in UTF8 encoding and no longer than C; +which is assumed to be in UTF-8 encoding and no longer than C; C will be set to the length, in bytes, of that character. -If C does not point to a well-formed UTF8 character, the behaviour +If C does not point to a well-formed UTF-8 character, the behaviour is dependent on the value of C: if it contains UTF8_CHECK_ONLY, it is assumed that the caller will raise a warning, and this function will silently just set C to C<-1> and return zero. If the @@ -4674,7 +5099,7 @@ Found in file utf8.c =item utf8_distance -Returns the number of UTF8 characters between the UTF-8 pointers C +Returns the number of UTF-8 characters between the UTF-8 pointers C and C. WARNING: use only if you *know* that the pointers point inside the @@ -4712,7 +5137,7 @@ Found in file utf8.c =item utf8_to_bytes -Converts a string C of length C from UTF8 into byte encoding. +Converts a string C of length C from UTF-8 into byte encoding. Unlike C, this over-writes the original string, and updates len to contain the new length. Returns zero on failure, setting C to -1. @@ -4728,10 +5153,10 @@ Found in file utf8.c =item utf8_to_uvchr Returns the native character value of the first character in the string C -which is assumed to be in UTF8 encoding; C will be set to the +which is assumed to be in UTF-8 encoding; C will be set to the length, in bytes, of that character. -If C does not point to a well-formed UTF8 character, zero is +If C does not point to a well-formed UTF-8 character, zero is returned and retlen is set, if possible, to -1. UV utf8_to_uvchr(U8 *s, STRLEN* retlen) @@ -4742,13 +5167,13 @@ Found in file utf8.c =item utf8_to_uvuni Returns the Unicode code point of the first character in the string C -which is assumed to be in UTF8 encoding; C will be set to the +which is assumed to be in UTF-8 encoding; C will be set to the length, in bytes, of that character. This function should only be used when returned UV is considered an index into the Unicode semantic tables (e.g. swashes). -If C does not point to a well-formed UTF8 character, zero is +If C does not point to a well-formed UTF-8 character, zero is returned and retlen is set, if possible, to -1. UV utf8_to_uvuni(U8 *s, STRLEN* retlen) @@ -4758,7 +5183,7 @@ Found in file utf8.c =item uvchr_to_utf8 -Adds the UTF8 representation of the Native codepoint C to the end +Adds the UTF-8 representation of the Native codepoint C to the end of the string C; C should be have at least C free bytes available. The return value is the pointer to the byte after the end of the new character. In other words, @@ -4776,7 +5201,7 @@ Found in file utf8.c =item uvuni_to_utf8_flags -Adds the UTF8 representation of the Unicode codepoint C to the end +Adds the UTF-8 representation of the Unicode codepoint C to the end of the string C; C should be have at least C free bytes available. The return value is the pointer to the byte after the end of the new character. In other words,