X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=1418cf78e99c2d4e34dc359caa9e4ed31ff08c9c;hb=0bfa2a8afc04ca8a47987d5890bbbe751faf4444;hp=148dcec2164de8d525779a285dacdc7eb74c0f08;hpb=a2fd015ef0ad9c0113b835ff60d62684050408f4;p=p5sagit%2Fp5-mst-13.2.git diff --git a/sv.c b/sv.c index 148dcec..1418cf7 100644 --- a/sv.c +++ b/sv.c @@ -112,8 +112,7 @@ list, and call more_xiv() etc to add a new arena if the list is empty. At the time of very final cleanup, sv_free_arenas() is called from perl_destruct() to physically free all the arenas allocated since the -start of the interpreter. Note that this also clears PL_he_arenaroot, -which is otherwise dealt with in hv.c. +start of the interpreter. Manipulation of any of the PL_*root pointers is protected by enclosing LOCK_SV_MUTEX; ... UNLOCK_SV_MUTEX calls which should Do the Right Thing @@ -358,7 +357,7 @@ and split it into a list of free SVs. void Perl_sv_add_arena(pTHX_ char *ptr, U32 size, U32 flags) { - SV* sva = (SV*)ptr; + SV* const sva = (SV*)ptr; register SV* sv; register SV* svend; @@ -521,7 +520,7 @@ do_clean_all(pTHX_ SV *sv) DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) )); SvFLAGS(sv) |= SVf_BREAK; if (PL_comppad == (AV*)sv) { - PL_comppad = Nullav; + PL_comppad = NULL; PL_curpad = Null(SV**); } SvREFCNT_dec(sv); @@ -596,8 +595,6 @@ Perl_sv_free_arenas(pTHX) PL_body_roots[i] = 0; } - free_arena(he); - Safefree(PL_nice_chunk); PL_nice_chunk = Nullch; PL_nice_chunk_size = 0; @@ -605,9340 +602,8546 @@ Perl_sv_free_arenas(pTHX) PL_sv_root = 0; } -/* --------------------------------------------------------------------- - * - * support functions for report_uninit() - */ +/* + Here are mid-level routines that manage the allocation of bodies out + of the various arenas. There are 5 kinds of arenas: -/* the maxiumum size of array or hash where we will scan looking - * for the undefined element that triggered the warning */ + 1. SV-head arenas, which are discussed and handled above + 2. regular body arenas + 3. arenas for reduced-size bodies + 4. Hash-Entry arenas + 5. pte arenas (thread related) -#define FUV_MAX_SEARCH_SIZE 1000 + Arena types 2 & 3 are chained by body-type off an array of + arena-root pointers, which is indexed by svtype. Some of the + larger/less used body types are malloced singly, since a large + unused block of them is wasteful. Also, several svtypes dont have + bodies; the data fits into the sv-head itself. The arena-root + pointer thus has a few unused root-pointers (which may be hijacked + later for arena types 4,5) -/* Look for an entry in the hash whose value has the same SV as val; - * If so, return a mortal copy of the key. */ + 3 differs from 2 as an optimization; some body types have several + unused fields in the front of the structure (which are kept in-place + for consistency). These bodies can be allocated in smaller chunks, + because the leading fields arent accessed. Pointers to such bodies + are decremented to point at the unused 'ghost' memory, knowing that + the pointers are used with offsets to the real memory. -STATIC SV* -S_find_hash_subscript(pTHX_ HV *hv, SV* val) + HE, HEK arenas are managed separately, with separate code, but may + be merge-able later.. + + PTE arenas are not sv-bodies, but they share these mid-level + mechanics, so are considered here. The new mid-level mechanics rely + on the sv_type of the body being allocated, so we just reserve one + of the unused body-slots for PTEs, then use it in those (2) PTE + contexts below (line ~10k) +*/ + +STATIC void * +S_more_bodies (pTHX_ size_t size, svtype sv_type) { - dVAR; - register HE **array; - I32 i; + void ** const arena_root = &PL_body_arenaroots[sv_type]; + void ** const root = &PL_body_roots[sv_type]; + char *start; + const char *end; + const size_t count = PERL_ARENA_SIZE / size; - if (!hv || SvMAGICAL(hv) || !HvARRAY(hv) || - (HvTOTALKEYS(hv) > FUV_MAX_SEARCH_SIZE)) - return Nullsv; + Newx(start, count*size, char); + *((void **) start) = *arena_root; + *arena_root = (void *)start; - array = HvARRAY(hv); + end = start + (count-1) * size; - for (i=HvMAX(hv); i>0; i--) { - register HE *entry; - for (entry = array[i]; entry; entry = HeNEXT(entry)) { - if (HeVAL(entry) != val) - continue; - if ( HeVAL(entry) == &PL_sv_undef || - HeVAL(entry) == &PL_sv_placeholder) - continue; - if (!HeKEY(entry)) - return Nullsv; - if (HeKLEN(entry) == HEf_SVKEY) - return sv_mortalcopy(HeKEY_sv(entry)); - return sv_2mortal(newSVpvn(HeKEY(entry), HeKLEN(entry))); - } - } - return Nullsv; -} + /* The initial slot is used to link the arenas together, so it isn't to be + linked into the list of ready-to-use bodies. */ -/* Look for an entry in the array whose value has the same SV as val; - * If so, return the index, otherwise return -1. */ + start += size; -STATIC I32 -S_find_array_subscript(pTHX_ AV *av, SV* val) -{ - SV** svp; - I32 i; - if (!av || SvMAGICAL(av) || !AvARRAY(av) || - (AvFILLp(av) > FUV_MAX_SEARCH_SIZE)) - return -1; + *root = (void *)start; - svp = AvARRAY(av); - for (i=AvFILLp(av); i>=0; i--) { - if (svp[i] == val && svp[i] != &PL_sv_undef) - return i; + while (start < end) { + char * const next = start + size; + *(void**) start = (void *)next; + start = next; } - return -1; + *(void **)start = 0; + + return *root; } -/* S_varname(): return the name of a variable, optionally with a subscript. - * If gv is non-zero, use the name of that global, along with gvtype (one - * of "$", "@", "%"); otherwise use the name of the lexical at pad offset - * targ. Depending on the value of the subscript_type flag, return: - */ +/* grab a new thing from the free list, allocating more if necessary */ -#define FUV_SUBSCRIPT_NONE 1 /* "@foo" */ -#define FUV_SUBSCRIPT_ARRAY 2 /* "$foo[aindex]" */ -#define FUV_SUBSCRIPT_HASH 3 /* "$foo{keyname}" */ -#define FUV_SUBSCRIPT_WITHIN 4 /* "within @foo" */ +/* 1st, the inline version */ -STATIC SV* -S_varname(pTHX_ GV *gv, const char gvtype, PADOFFSET targ, - SV* keyname, I32 aindex, int subscript_type) -{ +#define new_body_inline(xpv, size, sv_type) \ + STMT_START { \ + void ** const r3wt = &PL_body_roots[sv_type]; \ + LOCK_SV_MUTEX; \ + xpv = *((void **)(r3wt)) \ + ? *((void **)(r3wt)) : S_more_bodies(aTHX_ size, sv_type); \ + *(r3wt) = *(void**)(xpv); \ + UNLOCK_SV_MUTEX; \ + } STMT_END - SV * const name = sv_newmortal(); - if (gv) { - char buffer[2]; - buffer[0] = gvtype; - buffer[1] = 0; +/* now use the inline version in the proper function */ - /* as gv_fullname4(), but add literal '^' for $^FOO names */ +#ifndef PURIFY - gv_fullname4(name, gv, buffer, 0); +/* This isn't being used with -DPURIFY, so don't declare it. Otherwise + compilers issue warnings. */ - if ((unsigned int)SvPVX(name)[1] <= 26) { - buffer[0] = '^'; - buffer[1] = SvPVX(name)[1] + 'A' - 1; +STATIC void * +S_new_body(pTHX_ size_t size, svtype sv_type) +{ + void *xpv; + new_body_inline(xpv, size, sv_type); + return xpv; +} - /* Swap the 1 unprintable control character for the 2 byte pretty - version - ie substr($name, 1, 1) = $buffer; */ - sv_insert(name, 1, 1, buffer, 2); - } - } - else { - U32 unused; - CV * const cv = find_runcv(&unused); - SV *sv; - AV *av; +#endif - if (!cv || !CvPADLIST(cv)) - return Nullsv; - av = (AV*)(*av_fetch(CvPADLIST(cv), 0, FALSE)); - sv = *av_fetch(av, targ, FALSE); - /* SvLEN in a pad name is not to be trusted */ - sv_setpv(name, SvPV_nolen_const(sv)); - } +/* return a thing to the free list */ - if (subscript_type == FUV_SUBSCRIPT_HASH) { - SV * const sv = NEWSV(0,0); - *SvPVX(name) = '$'; - Perl_sv_catpvf(aTHX_ name, "{%s}", - pv_display(sv,SvPVX_const(keyname), SvCUR(keyname), 0, 32)); - SvREFCNT_dec(sv); - } - else if (subscript_type == FUV_SUBSCRIPT_ARRAY) { - *SvPVX(name) = '$'; - Perl_sv_catpvf(aTHX_ name, "[%"IVdf"]", (IV)aindex); - } - else if (subscript_type == FUV_SUBSCRIPT_WITHIN) - sv_insert(name, 0, 0, "within ", 7); +#define del_body(thing, root) \ + STMT_START { \ + void ** const thing_copy = (void **)thing;\ + LOCK_SV_MUTEX; \ + *thing_copy = *root; \ + *root = (void*)thing_copy; \ + UNLOCK_SV_MUTEX; \ + } STMT_END - return name; -} +/* + Revisiting type 3 arenas, there are 4 body-types which have some + members that are never accessed. They are XPV, XPVIV, XPVAV, + XPVHV, which have corresponding types: xpv_allocated, + xpviv_allocated, xpvav_allocated, xpvhv_allocated, + For these types, the arenas are carved up into *_allocated size + chunks, we thus avoid wasted memory for those unaccessed members. + When bodies are allocated, we adjust the pointer back in memory by + the size of the bit not allocated, so it's as if we allocated the + full structure. (But things will all go boom if you write to the + part that is "not there", because you'll be overwriting the last + members of the preceding structure in memory.) -/* -=for apidoc find_uninit_var + We calculate the correction using the STRUCT_OFFSET macro. For example, if + xpv_allocated is the same structure as XPV then the two OFFSETs sum to zero, + and the pointer is unchanged. If the allocated structure is smaller (no + initial NV actually allocated) then the net effect is to subtract the size + of the NV from the pointer, to return a new pointer as if an initial NV were + actually allocated. -Find the name of the undefined variable (if any) that caused the operator o -to issue a "Use of uninitialized value" warning. -If match is true, only return a name if it's value matches uninit_sv. -So roughly speaking, if a unary operator (such as OP_COS) generates a -warning, then following the direct child of the op may yield an -OP_PADSV or OP_GV that gives the name of the undefined variable. On the -other hand, with OP_ADD there are two branches to follow, so we only print -the variable name if we get an exact match. + This is the same trick as was used for NV and IV bodies. Ironically it + doesn't need to be used for NV bodies any more, because NV is now at the + start of the structure. IV bodies don't need it either, because they are + no longer allocated. */ -The name is returned as a mortal SV. +/* The following 2 arrays hide the above details in a pair of + lookup-tables, allowing us to be body-type agnostic. -Assumes that PL_op is the op that originally triggered the error, and that -PL_comppad/PL_curpad points to the currently executing pad. + size maps svtype to its body's allocated size. + offset maps svtype to the body-pointer adjustment needed -=cut + NB: elements in latter are 0 or <0, and are added during + allocation, and subtracted during deallocation. It may be clearer + to invert the values, and call it shrinkage_by_svtype. */ -STATIC SV * -S_find_uninit_var(pTHX_ OP* obase, SV* uninit_sv, bool match) -{ - dVAR; - SV *sv; - AV *av; - GV *gv; - OP *o, *o2, *kid; +struct body_details { + size_t size; /* Size to allocate */ + size_t copy; /* Size of structure to copy (may be shorter) */ + size_t offset; + bool cant_upgrade; /* Can upgrade this type */ + bool zero_nv; /* zero the NV when upgrading from this */ + bool arena; /* Allocated from an arena */ +}; - if (!obase || (match && (!uninit_sv || uninit_sv == &PL_sv_undef || - uninit_sv == &PL_sv_placeholder))) - return Nullsv; +#define HADNV FALSE +#define NONV TRUE - switch (obase->op_type) { +#ifdef PURIFY +/* With -DPURFIY we allocate everything directly, and don't use arenas. + This seems a rather elegant way to simplify some of the code below. */ +#define HASARENA FALSE +#else +#define HASARENA TRUE +#endif +#define NOARENA FALSE - case OP_RV2AV: - case OP_RV2HV: - case OP_PADAV: - case OP_PADHV: - { - const bool pad = (obase->op_type == OP_PADAV || obase->op_type == OP_PADHV); - const bool hash = (obase->op_type == OP_PADHV || obase->op_type == OP_RV2HV); - I32 index = 0; - SV *keysv = Nullsv; - int subscript_type = FUV_SUBSCRIPT_WITHIN; +/* A macro to work out the offset needed to subtract from a pointer to (say) - if (pad) { /* @lex, %lex */ - sv = PAD_SVl(obase->op_targ); - gv = Nullgv; - } - else { - if (cUNOPx(obase)->op_first->op_type == OP_GV) { - /* @global, %global */ - gv = cGVOPx_gv(cUNOPx(obase)->op_first); - if (!gv) - break; - sv = hash ? (SV*)GvHV(gv): (SV*)GvAV(gv); - } - else /* @{expr}, %{expr} */ - return find_uninit_var(cUNOPx(obase)->op_first, - uninit_sv, match); - } +typedef struct { + STRLEN xpv_cur; + STRLEN xpv_len; +} xpv_allocated; - /* attempt to find a match within the aggregate */ - if (hash) { - keysv = S_find_hash_subscript(aTHX_ (HV*)sv, uninit_sv); - if (keysv) - subscript_type = FUV_SUBSCRIPT_HASH; - } - else { - index = S_find_array_subscript(aTHX_ (AV*)sv, uninit_sv); - if (index >= 0) - subscript_type = FUV_SUBSCRIPT_ARRAY; - } +to make its members accessible via a pointer to (say) - if (match && subscript_type == FUV_SUBSCRIPT_WITHIN) - break; +struct xpv { + NV xnv_nv; + STRLEN xpv_cur; + STRLEN xpv_len; +}; - return varname(gv, hash ? '%' : '@', obase->op_targ, - keysv, index, subscript_type); - } +*/ - case OP_PADSV: - if (match && PAD_SVl(obase->op_targ) != uninit_sv) - break; - return varname(Nullgv, '$', obase->op_targ, - Nullsv, 0, FUV_SUBSCRIPT_NONE); +#define relative_STRUCT_OFFSET(longer, shorter, member) \ + (STRUCT_OFFSET(shorter, member) - STRUCT_OFFSET(longer, member)) - case OP_GVSV: - gv = cGVOPx_gv(obase); - if (!gv || (match && GvSV(gv) != uninit_sv)) - break; - return varname(gv, '$', 0, Nullsv, 0, FUV_SUBSCRIPT_NONE); +/* Calculate the length to copy. Specifically work out the length less any + final padding the compiler needed to add. See the comment in sv_upgrade + for why copying the padding proved to be a bug. */ - case OP_AELEMFAST: - if (obase->op_flags & OPf_SPECIAL) { /* lexical array */ - if (match) { - SV **svp; - av = (AV*)PAD_SV(obase->op_targ); - if (!av || SvRMAGICAL(av)) - break; - svp = av_fetch(av, (I32)obase->op_private, FALSE); - if (!svp || *svp != uninit_sv) - break; - } - return varname(Nullgv, '$', obase->op_targ, - Nullsv, (I32)obase->op_private, FUV_SUBSCRIPT_ARRAY); - } - else { - gv = cGVOPx_gv(obase); - if (!gv) - break; - if (match) { - SV **svp; - av = GvAV(gv); - if (!av || SvRMAGICAL(av)) - break; - svp = av_fetch(av, (I32)obase->op_private, FALSE); - if (!svp || *svp != uninit_sv) - break; - } - return varname(gv, '$', 0, - Nullsv, (I32)obase->op_private, FUV_SUBSCRIPT_ARRAY); - } - break; +#define copy_length(type, last_member) \ + STRUCT_OFFSET(type, last_member) \ + + sizeof (((type*)SvANY((SV*)0))->last_member) - case OP_EXISTS: - o = cUNOPx(obase)->op_first; - if (!o || o->op_type != OP_NULL || - ! (o->op_targ == OP_AELEM || o->op_targ == OP_HELEM)) - break; - return find_uninit_var(cBINOPo->op_last, uninit_sv, match); +static const struct body_details bodies_by_type[] = { + {0, 0, 0, FALSE, NONV, NOARENA}, + /* IVs are in the head, so the allocation size is 0 */ + {0, sizeof(IV), STRUCT_OFFSET(XPVIV, xiv_iv), FALSE, NONV, NOARENA}, + /* 8 bytes on most ILP32 with IEEE doubles */ + {sizeof(NV), sizeof(NV), 0, FALSE, HADNV, HASARENA}, + /* RVs are in the head now */ + /* However, this slot is overloaded and used by the pte */ + {0, 0, 0, FALSE, NONV, NOARENA}, + /* 8 bytes on most ILP32 with IEEE doubles */ + {sizeof(xpv_allocated), + copy_length(XPV, xpv_len) + + relative_STRUCT_OFFSET(XPV, xpv_allocated, xpv_cur), + - relative_STRUCT_OFFSET(XPV, xpv_allocated, xpv_cur), + FALSE, NONV, HASARENA}, + /* 12 */ + {sizeof(xpviv_allocated), + copy_length(XPVIV, xiv_u) + + relative_STRUCT_OFFSET(XPVIV, xpviv_allocated, xpv_cur), + - relative_STRUCT_OFFSET(XPVIV, xpviv_allocated, xpv_cur), + FALSE, NONV, HASARENA}, + /* 20 */ + {sizeof(XPVNV), copy_length(XPVNV, xiv_u), 0, FALSE, HADNV, HASARENA}, + /* 28 */ + {sizeof(XPVMG), copy_length(XPVMG, xmg_stash), 0, FALSE, HADNV, HASARENA}, + /* 36 */ + {sizeof(XPVBM), sizeof(XPVBM), 0, TRUE, HADNV, HASARENA}, + /* 48 */ + {sizeof(XPVGV), sizeof(XPVGV), 0, TRUE, HADNV, HASARENA}, + /* 64 */ + {sizeof(XPVLV), sizeof(XPVLV), 0, TRUE, HADNV, HASARENA}, + /* 20 */ + {sizeof(xpvav_allocated), + copy_length(XPVAV, xmg_stash) + + relative_STRUCT_OFFSET(XPVAV, xpvav_allocated, xav_fill), + - relative_STRUCT_OFFSET(XPVAV, xpvav_allocated, xav_fill), + TRUE, HADNV, HASARENA}, + /* 20 */ + {sizeof(xpvhv_allocated), + copy_length(XPVHV, xmg_stash) + + relative_STRUCT_OFFSET(XPVHV, xpvhv_allocated, xhv_fill), + - relative_STRUCT_OFFSET(XPVHV, xpvhv_allocated, xhv_fill), + TRUE, HADNV, HASARENA}, + /* 76 */ + {sizeof(XPVCV), sizeof(XPVCV), 0, TRUE, HADNV, HASARENA}, + /* 80 */ + {sizeof(XPVFM), sizeof(XPVFM), 0, TRUE, HADNV, NOARENA}, + /* 84 */ + {sizeof(XPVIO), sizeof(XPVIO), 0, TRUE, HADNV, NOARENA} +}; - case OP_AELEM: - case OP_HELEM: - if (PL_op == obase) - /* $a[uninit_expr] or $h{uninit_expr} */ - return find_uninit_var(cBINOPx(obase)->op_last, uninit_sv, match); +#define new_body_type(sv_type) \ + (void *)((char *)S_new_body(aTHX_ bodies_by_type[sv_type].size, sv_type)\ + - bodies_by_type[sv_type].offset) - gv = Nullgv; - o = cBINOPx(obase)->op_first; - kid = cBINOPx(obase)->op_last; +#define del_body_type(p, sv_type) \ + del_body(p, &PL_body_roots[sv_type]) - /* get the av or hv, and optionally the gv */ - sv = Nullsv; - if (o->op_type == OP_PADAV || o->op_type == OP_PADHV) { - sv = PAD_SV(o->op_targ); - } - else if ((o->op_type == OP_RV2AV || o->op_type == OP_RV2HV) - && cUNOPo->op_first->op_type == OP_GV) - { - gv = cGVOPx_gv(cUNOPo->op_first); - if (!gv) - break; - sv = o->op_type == OP_RV2HV ? (SV*)GvHV(gv) : (SV*)GvAV(gv); - } - if (!sv) - break; - if (kid && kid->op_type == OP_CONST && SvOK(cSVOPx_sv(kid))) { - /* index is constant */ - if (match) { - if (SvMAGICAL(sv)) - break; - if (obase->op_type == OP_HELEM) { - HE* he = hv_fetch_ent((HV*)sv, cSVOPx_sv(kid), 0, 0); - if (!he || HeVAL(he) != uninit_sv) - break; - } - else { - SV ** const svp = av_fetch((AV*)sv, SvIV(cSVOPx_sv(kid)), FALSE); - if (!svp || *svp != uninit_sv) - break; - } - } - if (obase->op_type == OP_HELEM) - return varname(gv, '%', o->op_targ, - cSVOPx_sv(kid), 0, FUV_SUBSCRIPT_HASH); - else - return varname(gv, '@', o->op_targ, Nullsv, - SvIV(cSVOPx_sv(kid)), FUV_SUBSCRIPT_ARRAY); - ; - } - else { - /* index is an expression; - * attempt to find a match within the aggregate */ - if (obase->op_type == OP_HELEM) { - SV * const keysv = S_find_hash_subscript(aTHX_ (HV*)sv, uninit_sv); - if (keysv) - return varname(gv, '%', o->op_targ, - keysv, 0, FUV_SUBSCRIPT_HASH); - } - else { - const I32 index = S_find_array_subscript(aTHX_ (AV*)sv, uninit_sv); - if (index >= 0) - return varname(gv, '@', o->op_targ, - Nullsv, index, FUV_SUBSCRIPT_ARRAY); - } - if (match) - break; - return varname(gv, - (o->op_type == OP_PADAV || o->op_type == OP_RV2AV) - ? '@' : '%', - o->op_targ, Nullsv, 0, FUV_SUBSCRIPT_WITHIN); - } +#define new_body_allocated(sv_type) \ + (void *)((char *)S_new_body(aTHX_ bodies_by_type[sv_type].size, sv_type)\ + - bodies_by_type[sv_type].offset) - break; +#define del_body_allocated(p, sv_type) \ + del_body(p + bodies_by_type[sv_type].offset, &PL_body_roots[sv_type]) - case OP_AASSIGN: - /* only examine RHS */ - return find_uninit_var(cBINOPx(obase)->op_first, uninit_sv, match); - case OP_OPEN: - o = cUNOPx(obase)->op_first; - if (o->op_type == OP_PUSHMARK) - o = o->op_sibling; +#define my_safemalloc(s) (void*)safemalloc(s) +#define my_safecalloc(s) (void*)safecalloc(s, 1) +#define my_safefree(p) safefree((char*)p) - if (!o->op_sibling) { - /* one-arg version of open is highly magical */ +#ifdef PURIFY - if (o->op_type == OP_GV) { /* open FOO; */ - gv = cGVOPx_gv(o); - if (match && GvSV(gv) != uninit_sv) - break; - return varname(gv, '$', 0, - Nullsv, 0, FUV_SUBSCRIPT_NONE); - } - /* other possibilities not handled are: - * open $x; or open my $x; should return '${*$x}' - * open expr; should return '$'.expr ideally - */ - break; - } - goto do_op; +#define new_XNV() my_safemalloc(sizeof(XPVNV)) +#define del_XNV(p) my_safefree(p) - /* ops where $_ may be an implicit arg */ - case OP_TRANS: - case OP_SUBST: - case OP_MATCH: - if ( !(obase->op_flags & OPf_STACKED)) { - if (uninit_sv == ((obase->op_private & OPpTARGET_MY) - ? PAD_SVl(obase->op_targ) - : DEFSV)) - { - sv = sv_newmortal(); - sv_setpvn(sv, "$_", 2); - return sv; - } - } - goto do_op; +#define new_XPVNV() my_safemalloc(sizeof(XPVNV)) +#define del_XPVNV(p) my_safefree(p) - case OP_PRTF: - case OP_PRINT: - /* skip filehandle as it can't produce 'undef' warning */ - o = cUNOPx(obase)->op_first; - if ((obase->op_flags & OPf_STACKED) && o->op_type == OP_PUSHMARK) - o = o->op_sibling->op_sibling; - goto do_op2; +#define new_XPVAV() my_safemalloc(sizeof(XPVAV)) +#define del_XPVAV(p) my_safefree(p) +#define new_XPVHV() my_safemalloc(sizeof(XPVHV)) +#define del_XPVHV(p) my_safefree(p) - case OP_RV2SV: - case OP_CUSTOM: - case OP_ENTERSUB: - match = 1; /* XS or custom code could trigger random warnings */ - goto do_op; +#define new_XPVMG() my_safemalloc(sizeof(XPVMG)) +#define del_XPVMG(p) my_safefree(p) - case OP_SCHOMP: - case OP_CHOMP: - if (SvROK(PL_rs) && uninit_sv == SvRV(PL_rs)) - return sv_2mortal(newSVpvn("${$/}", 5)); - /* FALL THROUGH */ +#define new_XPVGV() my_safemalloc(sizeof(XPVGV)) +#define del_XPVGV(p) my_safefree(p) - default: - do_op: - if (!(obase->op_flags & OPf_KIDS)) - break; - o = cUNOPx(obase)->op_first; - - do_op2: - if (!o) - break; +#else /* !PURIFY */ - /* if all except one arg are constant, or have no side-effects, - * or are optimized away, then it's unambiguous */ - o2 = Nullop; - for (kid=o; kid; kid = kid->op_sibling) { - if (kid && - ( (kid->op_type == OP_CONST && SvOK(cSVOPx_sv(kid))) - || (kid->op_type == OP_NULL && ! (kid->op_flags & OPf_KIDS)) - || (kid->op_type == OP_PUSHMARK) - ) - ) - continue; - if (o2) { /* more than one found */ - o2 = Nullop; - break; - } - o2 = kid; - } - if (o2) - return find_uninit_var(o2, uninit_sv, match); +#define new_XNV() new_body_type(SVt_NV) +#define del_XNV(p) del_body_type(p, SVt_NV) - /* scan all args */ - while (o) { - sv = find_uninit_var(o, uninit_sv, 1); - if (sv) - return sv; - o = o->op_sibling; - } - break; - } - return Nullsv; -} +#define new_XPVNV() new_body_type(SVt_PVNV) +#define del_XPVNV(p) del_body_type(p, SVt_PVNV) +#define new_XPVAV() new_body_allocated(SVt_PVAV) +#define del_XPVAV(p) del_body_allocated(p, SVt_PVAV) + +#define new_XPVHV() new_body_allocated(SVt_PVHV) +#define del_XPVHV(p) del_body_allocated(p, SVt_PVHV) + +#define new_XPVMG() new_body_type(SVt_PVMG) +#define del_XPVMG(p) del_body_type(p, SVt_PVMG) + +#define new_XPVGV() new_body_type(SVt_PVGV) +#define del_XPVGV(p) del_body_type(p, SVt_PVGV) + +#endif /* PURIFY */ + +/* no arena for you! */ + +#define new_NOARENA(details) \ + my_safemalloc((details)->size + (details)->offset) +#define new_NOARENAZ(details) \ + my_safecalloc((details)->size + (details)->offset) /* -=for apidoc report_uninit +=for apidoc sv_upgrade -Print appropriate "Use of uninitialized variable" warning +Upgrade an SV to a more complex form. Generally adds a new body type to the +SV, then copies across as much information as possible from the old body. +You generally want to use the C macro wrapper. See also C. =cut */ void -Perl_report_uninit(pTHX_ SV* uninit_sv) +Perl_sv_upgrade(pTHX_ register SV *sv, U32 new_type) { - if (PL_op) { - SV* varname = Nullsv; - if (uninit_sv) { - varname = find_uninit_var(PL_op, uninit_sv,0); - if (varname) - sv_insert(varname, 0, 0, " ", 1); - } - Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit, - varname ? SvPV_nolen_const(varname) : "", - " in ", OP_DESC(PL_op)); - } - else - Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit, - "", "", ""); -} + void* old_body; + void* new_body; + const U32 old_type = SvTYPE(sv); + const struct body_details *const old_type_details + = bodies_by_type + old_type; + const struct body_details *new_type_details = bodies_by_type + new_type; -/* - Here are mid-level routines that manage the allocation of bodies out - of the various arenas. There are 5 kinds of arenas: + if (new_type != SVt_PV && SvIsCOW(sv)) { + sv_force_normal_flags(sv, 0); + } - 1. SV-head arenas, which are discussed and handled above - 2. regular body arenas - 3. arenas for reduced-size bodies - 4. Hash-Entry arenas - 5. pte arenas (thread related) + if (old_type == new_type) + return; - Arena types 2 & 3 are chained by body-type off an array of - arena-root pointers, which is indexed by svtype. Some of the - larger/less used body types are malloced singly, since a large - unused block of them is wasteful. Also, several svtypes dont have - bodies; the data fits into the sv-head itself. The arena-root - pointer thus has a few unused root-pointers (which may be hijacked - later for arena types 4,5) + if (old_type > new_type) + Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d", + (int)old_type, (int)new_type); - 3 differs from 2 as an optimization; some body types have several - unused fields in the front of the structure (which are kept in-place - for consistency). These bodies can be allocated in smaller chunks, - because the leading fields arent accessed. Pointers to such bodies - are decremented to point at the unused 'ghost' memory, knowing that - the pointers are used with offsets to the real memory. - HE, HEK arenas are managed separately, with separate code, but may - be merge-able later.. + old_body = SvANY(sv); - PTE arenas are not sv-bodies, but they share these mid-level - mechanics, so are considered here. The new mid-level mechanics rely - on the sv_type of the body being allocated, so we just reserve one - of the unused body-slots for PTEs, then use it in those (2) PTE - contexts below (line ~10k) -*/ + /* Copying structures onto other structures that have been neatly zeroed + has a subtle gotcha. Consider XPVMG -STATIC void * -S_more_bodies (pTHX_ size_t size, svtype sv_type) -{ - void **arena_root = &PL_body_arenaroots[sv_type]; - void **root = &PL_body_roots[sv_type]; - char *start; - const char *end; - const size_t count = PERL_ARENA_SIZE / size; + +------+------+------+------+------+-------+-------+ + | NV | CUR | LEN | IV | MAGIC | STASH | + +------+------+------+------+------+-------+-------+ + 0 4 8 12 16 20 24 28 - Newx(start, count*size, char); - *((void **) start) = *arena_root; - *arena_root = (void *)start; + where NVs are aligned to 8 bytes, so that sizeof that structure is + actually 32 bytes long, with 4 bytes of padding at the end: - end = start + (count-1) * size; + +------+------+------+------+------+-------+-------+------+ + | NV | CUR | LEN | IV | MAGIC | STASH | ??? | + +------+------+------+------+------+-------+-------+------+ + 0 4 8 12 16 20 24 28 32 - /* The initial slot is used to link the arenas together, so it isn't to be - linked into the list of ready-to-use bodies. */ + so what happens if you allocate memory for this structure: - start += size; + +------+------+------+------+------+-------+-------+------+------+... + | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME | + +------+------+------+------+------+-------+-------+------+------+... + 0 4 8 12 16 20 24 28 32 36 - *root = (void *)start; + zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you + expect, because you copy the area marked ??? onto GP. Now, ??? may have + started out as zero once, but it's quite possible that it isn't. So now, + rather than a nicely zeroed GP, you have it pointing somewhere random. + Bugs ensue. - while (start < end) { - char * const next = start + size; - *(void**) start = (void *)next; - start = next; - } - *(void **)start = 0; + (In fact, GP ends up pointing at a previous GP structure, because the + principle cause of the padding in XPVMG getting garbage is a copy of + sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob) - return *root; -} + So we are careful and work out the size of used parts of all the + structures. */ -/* grab a new thing from the free list, allocating more if necessary */ + switch (old_type) { + case SVt_NULL: + break; + case SVt_IV: + if (new_type < SVt_PVIV) { + new_type = (new_type == SVt_NV) + ? SVt_PVNV : SVt_PVIV; + new_type_details = bodies_by_type + new_type; + } + break; + case SVt_NV: + if (new_type < SVt_PVNV) { + new_type = SVt_PVNV; + new_type_details = bodies_by_type + new_type; + } + break; + case SVt_RV: + break; + case SVt_PV: + assert(new_type > SVt_PV); + assert(SVt_IV < SVt_PV); + assert(SVt_NV < SVt_PV); + break; + case SVt_PVIV: + break; + case SVt_PVNV: + break; + case SVt_PVMG: + /* Because the XPVMG of PL_mess_sv isn't allocated from the arena, + there's no way that it can be safely upgraded, because perl.c + expects to Safefree(SvANY(PL_mess_sv)) */ + assert(sv != PL_mess_sv); + /* This flag bit is used to mean other things in other scalar types. + Given that it only has meaning inside the pad, it shouldn't be set + on anything that can get upgraded. */ + assert((SvFLAGS(sv) & SVpad_TYPED) == 0); + break; + default: + if (old_type_details->cant_upgrade) + Perl_croak(aTHX_ "Can't upgrade that kind of scalar"); + } -/* 1st, the inline version */ + SvFLAGS(sv) &= ~SVTYPEMASK; + SvFLAGS(sv) |= new_type; -#define new_body_inline(xpv, root, size, sv_type) \ - STMT_START { \ - LOCK_SV_MUTEX; \ - xpv = *((void **)(root)) \ - ? *((void **)(root)) : S_more_bodies(aTHX_ size, sv_type); \ - *(root) = *(void**)(xpv); \ - UNLOCK_SV_MUTEX; \ - } STMT_END + switch (new_type) { + case SVt_NULL: + Perl_croak(aTHX_ "Can't upgrade to undef"); + case SVt_IV: + assert(old_type == SVt_NULL); + SvANY(sv) = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv)); + SvIV_set(sv, 0); + return; + case SVt_NV: + assert(old_type == SVt_NULL); + SvANY(sv) = new_XNV(); + SvNV_set(sv, 0); + return; + case SVt_RV: + assert(old_type == SVt_NULL); + SvANY(sv) = &sv->sv_u.svu_rv; + SvRV_set(sv, 0); + return; + case SVt_PVHV: + SvANY(sv) = new_XPVHV(); + HvFILL(sv) = 0; + HvMAX(sv) = 0; + HvTOTALKEYS(sv) = 0; -/* now use the inline version in the proper function */ + goto hv_av_common; -#ifndef PURIFY + case SVt_PVAV: + SvANY(sv) = new_XPVAV(); + AvMAX(sv) = -1; + AvFILLp(sv) = -1; + AvALLOC(sv) = 0; + AvREAL_only(sv); -/* This isn't being used with -DPURIFY, so don't declare it. Otherwise - compilers issue warnings. */ + hv_av_common: + /* SVt_NULL isn't the only thing upgraded to AV or HV. + The target created by newSVrv also is, and it can have magic. + However, it never has SvPVX set. + */ + if (old_type >= SVt_RV) { + assert(SvPVX_const(sv) == 0); + } -STATIC void * -S_new_body(pTHX_ size_t size, svtype sv_type) -{ - void *xpv; - new_body_inline(xpv, &PL_body_roots[sv_type], size, sv_type); - return xpv; -} + /* Could put this in the else clause below, as PVMG must have SvPVX + 0 already (the assertion above) */ + SvPV_set(sv, (char*)0); -#endif + if (old_type >= SVt_PVMG) { + SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_magic); + SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash); + } else { + SvMAGIC_set(sv, 0); + SvSTASH_set(sv, 0); + } + break; -/* return a thing to the free list */ -#define del_body(thing, root) \ - STMT_START { \ - void **thing_copy = (void **)thing; \ - LOCK_SV_MUTEX; \ - *thing_copy = *root; \ - *root = (void*)thing_copy; \ - UNLOCK_SV_MUTEX; \ - } STMT_END + case SVt_PVIV: + /* XXX Is this still needed? Was it ever needed? Surely as there is + no route from NV to PVIV, NOK can never be true */ + assert(!SvNOKp(sv)); + assert(!SvNOK(sv)); + case SVt_PVIO: + case SVt_PVFM: + case SVt_PVBM: + case SVt_PVGV: + case SVt_PVCV: + case SVt_PVLV: + case SVt_PVMG: + case SVt_PVNV: + case SVt_PV: -/* - Revisiting type 3 arenas, there are 4 body-types which have some - members that are never accessed. They are XPV, XPVIV, XPVAV, - XPVHV, which have corresponding types: xpv_allocated, - xpviv_allocated, xpvav_allocated, xpvhv_allocated, + assert(new_type_details->size); + /* We always allocated the full length item with PURIFY. To do this + we fake things so that arena is false for all 16 types.. */ + if(new_type_details->arena) { + /* This points to the start of the allocated area. */ + new_body_inline(new_body, new_type_details->size, new_type); + Zero(new_body, new_type_details->size, char); + new_body = ((char *)new_body) - new_type_details->offset; + } else { + new_body = new_NOARENAZ(new_type_details); + } + SvANY(sv) = new_body; - For these types, the arenas are carved up into *_allocated size - chunks, we thus avoid wasted memory for those unaccessed members. - When bodies are allocated, we adjust the pointer back in memory by - the size of the bit not allocated, so it's as if we allocated the - full structure. (But things will all go boom if you write to the - part that is "not there", because you'll be overwriting the last - members of the preceding structure in memory.) + if (old_type_details->copy) { + Copy((char *)old_body + old_type_details->offset, + (char *)new_body + old_type_details->offset, + old_type_details->copy, char); + } - We calculate the correction using the STRUCT_OFFSET macro. For example, if - xpv_allocated is the same structure as XPV then the two OFFSETs sum to zero, - and the pointer is unchanged. If the allocated structure is smaller (no - initial NV actually allocated) then the net effect is to subtract the size - of the NV from the pointer, to return a new pointer as if an initial NV were - actually allocated. +#ifndef NV_ZERO_IS_ALLBITS_ZERO + /* If NV 0.0 is store as all bits 0 then Zero() already creates a correct + 0.0 for us. */ + if (old_type_details->zero_nv) + SvNV_set(sv, 0); +#endif - This is the same trick as was used for NV and IV bodies. Ironically it - doesn't need to be used for NV bodies any more, because NV is now at the - start of the structure. IV bodies don't need it either, because they are - no longer allocated. */ + if (new_type == SVt_PVIO) + IoPAGE_LEN(sv) = 60; + if (old_type < SVt_RV) + SvPV_set(sv, 0); + break; + default: + Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu", new_type); + } -/* The following 2 arrays hide the above details in a pair of - lookup-tables, allowing us to be body-type agnostic. + if (old_type_details->size) { + /* If the old body had an allocated size, then we need to free it. */ +#ifdef PURIFY + my_safefree(old_body); +#else + del_body((void*)((char*)old_body + old_type_details->offset), + &PL_body_roots[old_type]); +#endif + } +} - size maps svtype to its body's allocated size. - offset maps svtype to the body-pointer adjustment needed +/* +=for apidoc sv_backoff - NB: elements in latter are 0 or <0, and are added during - allocation, and subtracted during deallocation. It may be clearer - to invert the values, and call it shrinkage_by_svtype. +Remove any string offset. You should normally use the C macro +wrapper instead. + +=cut */ -struct body_details { - size_t size; /* Size to allocate */ - size_t copy; /* Size of structure to copy (may be shorter) */ - int offset; - bool cant_upgrade; /* Can upgrade this type */ - bool zero_nv; /* zero the NV when upgrading from this */ -}; +int +Perl_sv_backoff(pTHX_ register SV *sv) +{ + assert(SvOOK(sv)); + assert(SvTYPE(sv) != SVt_PVHV); + assert(SvTYPE(sv) != SVt_PVAV); + if (SvIVX(sv)) { + const char * const s = SvPVX_const(sv); + SvLEN_set(sv, SvLEN(sv) + SvIVX(sv)); + SvPV_set(sv, SvPVX(sv) - SvIVX(sv)); + SvIV_set(sv, 0); + Move(s, SvPVX(sv), SvCUR(sv)+1, char); + } + SvFLAGS(sv) &= ~SVf_OOK; + return 0; +} -struct body_details bodies_by_type[] = { - {0, 0, 0, FALSE, TRUE}, - /* IVs are in the head, so the allocation size is 0 */ - {0, sizeof(IV), STRUCT_OFFSET(XPVIV, xiv_iv), FALSE, TRUE}, - /* 8 bytes on most ILP32 with IEEE doubles */ - {sizeof(NV), sizeof(NV), 0, FALSE, FALSE}, - /* RVs are in the head now */ - {0, 0, 0, FALSE, TRUE}, - /* 8 bytes on most ILP32 with IEEE doubles */ - {sizeof(xpv_allocated), - STRUCT_OFFSET(XPV, xpv_len) + sizeof (((XPV*)SvANY((SV*)0))->xpv_len) - + STRUCT_OFFSET(xpv_allocated, xpv_cur) - STRUCT_OFFSET(XPV, xpv_cur), - + STRUCT_OFFSET(xpv_allocated, xpv_cur) - STRUCT_OFFSET(XPV, xpv_cur) - , FALSE, TRUE}, - /* 12 */ - {sizeof(xpviv_allocated), - STRUCT_OFFSET(XPVIV, xiv_u) + sizeof (((XPVIV*)SvANY((SV*)0))->xiv_u) - + STRUCT_OFFSET(xpviv_allocated, xpv_cur) - STRUCT_OFFSET(XPVIV, xpv_cur), - + STRUCT_OFFSET(xpviv_allocated, xpv_cur) - STRUCT_OFFSET(XPVIV, xpv_cur) - , FALSE, TRUE}, - /* 20 */ - {sizeof(XPVNV), - STRUCT_OFFSET(XPVNV, xiv_u) + sizeof (((XPVNV*)SvANY((SV*)0))->xiv_u), - 0, FALSE, FALSE}, - /* 28 */ - {sizeof(XPVMG), - STRUCT_OFFSET(XPVMG, xmg_stash) + sizeof (((XPVMG*)SvANY((SV*)0))->xmg_stash), - 0, FALSE, FALSE}, - /* 36 */ - {sizeof(XPVBM), 0, 0, TRUE, FALSE}, - /* 48 */ - {sizeof(XPVGV), 0, 0, TRUE, FALSE}, - /* 64 */ - {sizeof(XPVLV), 0, 0, TRUE, FALSE}, - /* 20 */ - {sizeof(xpvav_allocated), 0, - STRUCT_OFFSET(xpvav_allocated, xav_fill) - - STRUCT_OFFSET(XPVAV, xav_fill), TRUE, FALSE}, - /* 20 */ - {sizeof(xpvhv_allocated), 0, - STRUCT_OFFSET(xpvhv_allocated, xhv_fill) - - STRUCT_OFFSET(XPVHV, xhv_fill), TRUE, FALSE}, - /* 76 */ - {sizeof(XPVCV), 0, 0, TRUE, FALSE}, - /* 80 */ - {sizeof(XPVFM), 0, 0, TRUE, FALSE}, - /* 84 */ - {sizeof(XPVIO), 0, 0, TRUE, FALSE} -}; +/* +=for apidoc sv_grow -#define new_body_type(sv_type) \ - (void *)((char *)S_new_body(aTHX_ bodies_by_type[sv_type].size, sv_type)\ - + bodies_by_type[sv_type].offset) +Expands the character buffer in the SV. If necessary, uses C and +upgrades the SV to C. Returns a pointer to the character buffer. +Use the C wrapper instead. -#define del_body_type(p, sv_type) \ - del_body(p, &PL_body_roots[sv_type]) +=cut +*/ +char * +Perl_sv_grow(pTHX_ register SV *sv, register STRLEN newlen) +{ + register char *s; -#define new_body_allocated(sv_type) \ - (void *)((char *)S_new_body(aTHX_ bodies_by_type[sv_type].size, sv_type)\ - + bodies_by_type[sv_type].offset) +#ifdef HAS_64K_LIMIT + if (newlen >= 0x10000) { + PerlIO_printf(Perl_debug_log, + "Allocation too large: %"UVxf"\n", (UV)newlen); + my_exit(1); + } +#endif /* HAS_64K_LIMIT */ + if (SvROK(sv)) + sv_unref(sv); + if (SvTYPE(sv) < SVt_PV) { + sv_upgrade(sv, SVt_PV); + s = SvPVX_mutable(sv); + } + else if (SvOOK(sv)) { /* pv is offset? */ + sv_backoff(sv); + s = SvPVX_mutable(sv); + if (newlen > SvLEN(sv)) + newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */ +#ifdef HAS_64K_LIMIT + if (newlen >= 0x10000) + newlen = 0xFFFF; +#endif + } + else + s = SvPVX_mutable(sv); -#define del_body_allocated(p, sv_type) \ - del_body(p - bodies_by_type[sv_type].offset, &PL_body_roots[sv_type]) + if (newlen > SvLEN(sv)) { /* need more room? */ + newlen = PERL_STRLEN_ROUNDUP(newlen); + if (SvLEN(sv) && s) { +#ifdef MYMALLOC + const STRLEN l = malloced_size((void*)SvPVX_const(sv)); + if (newlen <= l) { + SvLEN_set(sv, l); + return s; + } else +#endif + s = saferealloc(s, newlen); + } + else { + s = safemalloc(newlen); + if (SvPVX_const(sv) && SvCUR(sv)) { + Move(SvPVX_const(sv), s, (newlen < SvCUR(sv)) ? newlen : SvCUR(sv), char); + } + } + SvPV_set(sv, s); + SvLEN_set(sv, newlen); + } + return s; +} +/* +=for apidoc sv_setiv -#define my_safemalloc(s) (void*)safemalloc(s) -#define my_safefree(p) safefree((char*)p) +Copies an integer into the given SV, upgrading first if necessary. +Does not handle 'set' magic. See also C. -#ifdef PURIFY +=cut +*/ -#define new_XNV() my_safemalloc(sizeof(XPVNV)) -#define del_XNV(p) my_safefree(p) +void +Perl_sv_setiv(pTHX_ register SV *sv, IV i) +{ + SV_CHECK_THINKFIRST_COW_DROP(sv); + switch (SvTYPE(sv)) { + case SVt_NULL: + sv_upgrade(sv, SVt_IV); + break; + case SVt_NV: + sv_upgrade(sv, SVt_PVNV); + break; + case SVt_RV: + case SVt_PV: + sv_upgrade(sv, SVt_PVIV); + break; -#define new_XPV() my_safemalloc(sizeof(XPV)) -#define del_XPV(p) my_safefree(p) + case SVt_PVGV: + case SVt_PVAV: + case SVt_PVHV: + case SVt_PVCV: + case SVt_PVFM: + case SVt_PVIO: + Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0), + OP_DESC(PL_op)); + } + (void)SvIOK_only(sv); /* validate number */ + SvIV_set(sv, i); + SvTAINT(sv); +} -#define new_XPVIV() my_safemalloc(sizeof(XPVIV)) -#define del_XPVIV(p) my_safefree(p) +/* +=for apidoc sv_setiv_mg -#define new_XPVNV() my_safemalloc(sizeof(XPVNV)) -#define del_XPVNV(p) my_safefree(p) +Like C, but also handles 'set' magic. -#define new_XPVCV() my_safemalloc(sizeof(XPVCV)) -#define del_XPVCV(p) my_safefree(p) +=cut +*/ -#define new_XPVAV() my_safemalloc(sizeof(XPVAV)) -#define del_XPVAV(p) my_safefree(p) +void +Perl_sv_setiv_mg(pTHX_ register SV *sv, IV i) +{ + sv_setiv(sv,i); + SvSETMAGIC(sv); +} -#define new_XPVHV() my_safemalloc(sizeof(XPVHV)) -#define del_XPVHV(p) my_safefree(p) +/* +=for apidoc sv_setuv -#define new_XPVMG() my_safemalloc(sizeof(XPVMG)) -#define del_XPVMG(p) my_safefree(p) +Copies an unsigned integer into the given SV, upgrading first if necessary. +Does not handle 'set' magic. See also C. -#define new_XPVGV() my_safemalloc(sizeof(XPVGV)) -#define del_XPVGV(p) my_safefree(p) +=cut +*/ -#define new_XPVLV() my_safemalloc(sizeof(XPVLV)) -#define del_XPVLV(p) my_safefree(p) +void +Perl_sv_setuv(pTHX_ register SV *sv, UV u) +{ + /* With these two if statements: + u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 -#define new_XPVBM() my_safemalloc(sizeof(XPVBM)) -#define del_XPVBM(p) my_safefree(p) + without + u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 -#else /* !PURIFY */ + If you wish to remove them, please benchmark to see what the effect is + */ + if (u <= (UV)IV_MAX) { + sv_setiv(sv, (IV)u); + return; + } + sv_setiv(sv, 0); + SvIsUV_on(sv); + SvUV_set(sv, u); +} -#define new_XNV() new_body_type(SVt_NV) -#define del_XNV(p) del_body_type(p, SVt_NV) +/* +=for apidoc sv_setuv_mg -#define new_XPV() new_body_allocated(SVt_PV) -#define del_XPV(p) del_body_allocated(p, SVt_PV) +Like C, but also handles 'set' magic. -#define new_XPVIV() new_body_allocated(SVt_PVIV) -#define del_XPVIV(p) del_body_allocated(p, SVt_PVIV) +=cut +*/ -#define new_XPVNV() new_body_type(SVt_PVNV) -#define del_XPVNV(p) del_body_type(p, SVt_PVNV) +void +Perl_sv_setuv_mg(pTHX_ register SV *sv, UV u) +{ + sv_setiv(sv, 0); + SvIsUV_on(sv); + sv_setuv(sv,u); + SvSETMAGIC(sv); +} -#define new_XPVCV() new_body_type(SVt_PVCV) -#define del_XPVCV(p) del_body_type(p, SVt_PVCV) +/* +=for apidoc sv_setnv -#define new_XPVAV() new_body_allocated(SVt_PVAV) -#define del_XPVAV(p) del_body_allocated(p, SVt_PVAV) +Copies a double into the given SV, upgrading first if necessary. +Does not handle 'set' magic. See also C. -#define new_XPVHV() new_body_allocated(SVt_PVHV) -#define del_XPVHV(p) del_body_allocated(p, SVt_PVHV) +=cut +*/ -#define new_XPVMG() new_body_type(SVt_PVMG) -#define del_XPVMG(p) del_body_type(p, SVt_PVMG) +void +Perl_sv_setnv(pTHX_ register SV *sv, NV num) +{ + SV_CHECK_THINKFIRST_COW_DROP(sv); + switch (SvTYPE(sv)) { + case SVt_NULL: + case SVt_IV: + sv_upgrade(sv, SVt_NV); + break; + case SVt_RV: + case SVt_PV: + case SVt_PVIV: + sv_upgrade(sv, SVt_PVNV); + break; -#define new_XPVGV() new_body_type(SVt_PVGV) -#define del_XPVGV(p) del_body_type(p, SVt_PVGV) + case SVt_PVGV: + case SVt_PVAV: + case SVt_PVHV: + case SVt_PVCV: + case SVt_PVFM: + case SVt_PVIO: + Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0), + OP_NAME(PL_op)); + } + SvNV_set(sv, num); + (void)SvNOK_only(sv); /* validate number */ + SvTAINT(sv); +} -#define new_XPVLV() new_body_type(SVt_PVLV) -#define del_XPVLV(p) del_body_type(p, SVt_PVLV) +/* +=for apidoc sv_setnv_mg -#define new_XPVBM() new_body_type(SVt_PVBM) -#define del_XPVBM(p) del_body_type(p, SVt_PVBM) +Like C, but also handles 'set' magic. -#endif /* PURIFY */ +=cut +*/ -/* no arena for you! */ -#define new_XPVFM() my_safemalloc(sizeof(XPVFM)) -#define del_XPVFM(p) my_safefree(p) +void +Perl_sv_setnv_mg(pTHX_ register SV *sv, NV num) +{ + sv_setnv(sv,num); + SvSETMAGIC(sv); +} + +/* Print an "isn't numeric" warning, using a cleaned-up, + * printable version of the offending string + */ -#define new_XPVIO() my_safemalloc(sizeof(XPVIO)) -#define del_XPVIO(p) my_safefree(p) +STATIC void +S_not_a_number(pTHX_ SV *sv) +{ + SV *dsv; + char tmpbuf[64]; + const char *pv; + if (DO_UTF8(sv)) { + dsv = sv_2mortal(newSVpvn("", 0)); + pv = sv_uni_display(dsv, sv, 10, 0); + } else { + char *d = tmpbuf; + const char * const limit = tmpbuf + sizeof(tmpbuf) - 8; + /* each *s can expand to 4 chars + "...\0", + i.e. need room for 8 chars */ + + const char *s = SvPVX_const(sv); + const char * const end = s + SvCUR(sv); + for ( ; s < end && d < limit; s++ ) { + int ch = *s & 0xFF; + if (ch & 128 && !isPRINT_LC(ch)) { + *d++ = 'M'; + *d++ = '-'; + ch &= 127; + } + if (ch == '\n') { + *d++ = '\\'; + *d++ = 'n'; + } + else if (ch == '\r') { + *d++ = '\\'; + *d++ = 'r'; + } + else if (ch == '\f') { + *d++ = '\\'; + *d++ = 'f'; + } + else if (ch == '\\') { + *d++ = '\\'; + *d++ = '\\'; + } + else if (ch == '\0') { + *d++ = '\\'; + *d++ = '0'; + } + else if (isPRINT_LC(ch)) + *d++ = ch; + else { + *d++ = '^'; + *d++ = toCTRL(ch); + } + } + if (s < end) { + *d++ = '.'; + *d++ = '.'; + *d++ = '.'; + } + *d = '\0'; + pv = tmpbuf; + } + if (PL_op) + Perl_warner(aTHX_ packWARN(WARN_NUMERIC), + "Argument \"%s\" isn't numeric in %s", pv, + OP_DESC(PL_op)); + else + Perl_warner(aTHX_ packWARN(WARN_NUMERIC), + "Argument \"%s\" isn't numeric", pv); +} /* -=for apidoc sv_upgrade +=for apidoc looks_like_number -Upgrade an SV to a more complex form. Generally adds a new body type to the -SV, then copies across as much information as possible from the old body. -You generally want to use the C macro wrapper. See also C. +Test if the content of an SV looks like a number (or is a number). +C and C are treated as numbers (so will not issue a +non-numeric warning), even if your atof() doesn't grok them. =cut */ -void -Perl_sv_upgrade(pTHX_ register SV *sv, U32 mt) +I32 +Perl_looks_like_number(pTHX_ SV *sv) { - void** old_body_arena; - size_t old_body_offset; - size_t old_body_length; /* Well, the length to copy. */ - void* old_body; - void* new_body; - size_t new_body_length; - size_t new_body_offset; - void** new_body_arena; - void** new_body_arenaroot; - const U32 old_type = SvTYPE(sv); - const struct body_details *const old_type_details - = bodies_by_type + old_type; + register const char *sbegin; + STRLEN len; - if (mt != SVt_PV && SvIsCOW(sv)) { - sv_force_normal_flags(sv, 0); + if (SvPOK(sv)) { + sbegin = SvPVX_const(sv); + len = SvCUR(sv); } + else if (SvPOKp(sv)) + sbegin = SvPV_const(sv, len); + else + return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK); + return grok_number(sbegin, len, NULL); +} - if (old_type == mt) - return; +/* Actually, ISO C leaves conversion of UV to IV undefined, but + until proven guilty, assume that things are not that bad... */ - if (old_type > mt) - Perl_croak(aTHX_ "sv_upgrade from type %d down to type %d", - (int)old_type, (int)mt); +/* + NV_PRESERVES_UV: + As 64 bit platforms often have an NV that doesn't preserve all bits of + an IV (an assumption perl has been based on to date) it becomes necessary + to remove the assumption that the NV always carries enough precision to + recreate the IV whenever needed, and that the NV is the canonical form. + Instead, IV/UV and NV need to be given equal rights. So as to not lose + precision as a side effect of conversion (which would lead to insanity + and the dragon(s) in t/op/numconvert.t getting very angry) the intent is + 1) to distinguish between IV/UV/NV slots that have cached a valid + conversion where precision was lost and IV/UV/NV slots that have a + valid conversion which has lost no precision + 2) to ensure that if a numeric conversion to one form is requested that + would lose precision, the precise conversion (or differently + imprecise conversion) is also performed and cached, to prevent + requests for different numeric formats on the same SV causing + lossy conversion chains. (lossless conversion chains are perfectly + acceptable (still)) - old_body = SvANY(sv); - old_body_arena = 0; - old_body_offset = 0; - old_body_length = 0; - new_body_offset = 0; - new_body_length = ~0; - /* Copying structures onto other structures that have been neatly zeroed - has a subtle gotcha. Consider XPVMG + flags are used: + SvIOKp is true if the IV slot contains a valid value + SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true) + SvNOKp is true if the NV slot contains a valid value + SvNOK is true only if the NV value is accurate - +------+------+------+------+------+-------+-------+ - | NV | CUR | LEN | IV | MAGIC | STASH | - +------+------+------+------+------+-------+-------+ - 0 4 8 12 16 20 24 28 + so + while converting from PV to NV, check to see if converting that NV to an + IV(or UV) would lose accuracy over a direct conversion from PV to + IV(or UV). If it would, cache both conversions, return NV, but mark + SV as IOK NOKp (ie not NOK). - where NVs are aligned to 8 bytes, so that sizeof that structure is - actually 32 bytes long, with 4 bytes of padding at the end: + While converting from PV to IV, check to see if converting that IV to an + NV would lose accuracy over a direct conversion from PV to NV. If it + would, cache both conversions, flag similarly. - +------+------+------+------+------+-------+-------+------+ - | NV | CUR | LEN | IV | MAGIC | STASH | ??? | - +------+------+------+------+------+-------+-------+------+ - 0 4 8 12 16 20 24 28 32 + Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite + correctly because if IV & NV were set NV *always* overruled. + Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning + changes - now IV and NV together means that the two are interchangeable: + SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX; - so what happens if you allocate memory for this structure: + The benefit of this is that operations such as pp_add know that if + SvIOK is true for both left and right operands, then integer addition + can be used instead of floating point (for cases where the result won't + overflow). Before, floating point was always used, which could lead to + loss of precision compared with integer addition. - +------+------+------+------+------+-------+-------+------+------+... - | NV | CUR | LEN | IV | MAGIC | STASH | GP | NAME | - +------+------+------+------+------+-------+-------+------+------+... - 0 4 8 12 16 20 24 28 32 36 + * making IV and NV equal status should make maths accurate on 64 bit + platforms + * may speed up maths somewhat if pp_add and friends start to use + integers when possible instead of fp. (Hopefully the overhead in + looking for SvIOK and checking for overflow will not outweigh the + fp to integer speedup) + * will slow down integer operations (callers of SvIV) on "inaccurate" + values, as the change from SvIOK to SvIOKp will cause a call into + sv_2iv each time rather than a macro access direct to the IV slot + * should speed up number->string conversion on integers as IV is + favoured when IV and NV are equally accurate - zero it, then copy sizeof(XPVMG) bytes on top of it? Not quite what you - expect, because you copy the area marked ??? onto GP. Now, ??? may have - started out as zero once, but it's quite possible that it isn't. So now, - rather than a nicely zeroed GP, you have it pointing somewhere random. - Bugs ensue. + #################################################################### + You had better be using SvIOK_notUV if you want an IV for arithmetic: + SvIOK is true if (IV or UV), so you might be getting (IV)SvUV. + On the other hand, SvUOK is true iff UV. + #################################################################### - (In fact, GP ends up pointing at a previous GP structure, because the - principle cause of the padding in XPVMG getting garbage is a copy of - sizeof(XPVMG) bytes from a XPVGV structure in sv_unglob) + Your mileage will vary depending your CPU's relative fp to integer + performance ratio. +*/ - So we are careful and work out the size of used parts of all the - structures. */ +#ifndef NV_PRESERVES_UV +# define IS_NUMBER_UNDERFLOW_IV 1 +# define IS_NUMBER_UNDERFLOW_UV 2 +# define IS_NUMBER_IV_AND_UV 2 +# define IS_NUMBER_OVERFLOW_IV 4 +# define IS_NUMBER_OVERFLOW_UV 5 - switch (old_type) { - case SVt_NULL: - break; - case SVt_IV: - if (mt == SVt_NV) - mt = SVt_PVNV; - else if (mt < SVt_PVIV) - mt = SVt_PVIV; - old_body_offset = old_type_details->offset; - old_body_length = old_type_details->copy; - break; - case SVt_NV: - old_body_arena = &PL_body_roots[old_type]; - old_body_length = old_type_details->copy; - if (mt < SVt_PVNV) - mt = SVt_PVNV; - break; - case SVt_RV: - break; - case SVt_PV: - old_body_arena = &PL_body_roots[SVt_PV]; - old_body_offset = - bodies_by_type[SVt_PV].offset; - old_body_length = bodies_by_type[SVt_PV].copy; - if (mt <= SVt_IV) - mt = SVt_PVIV; - else if (mt == SVt_NV) - mt = SVt_PVNV; - break; - case SVt_PVIV: - old_body_arena = &PL_body_roots[SVt_PVIV]; - old_body_offset = - bodies_by_type[SVt_PVIV].offset; - old_body_length = bodies_by_type[SVt_PVIV].copy; - break; - case SVt_PVNV: - old_body_arena = &PL_body_roots[SVt_PVNV]; - old_body_length = bodies_by_type[SVt_PVNV].copy; - break; - case SVt_PVMG: - /* Because the XPVMG of PL_mess_sv isn't allocated from the arena, - there's no way that it can be safely upgraded, because perl.c - expects to Safefree(SvANY(PL_mess_sv)) */ - assert(sv != PL_mess_sv); - /* This flag bit is used to mean other things in other scalar types. - Given that it only has meaning inside the pad, it shouldn't be set - on anything that can get upgraded. */ - assert((SvFLAGS(sv) & SVpad_TYPED) == 0); - old_body_arena = &PL_body_roots[SVt_PVMG]; - old_body_length = bodies_by_type[SVt_PVMG].copy; - break; - default: - if (old_type_details->cant_upgrade) - Perl_croak(aTHX_ "Can't upgrade that kind of scalar"); - } +/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */ - SvFLAGS(sv) &= ~SVTYPEMASK; - SvFLAGS(sv) |= mt; +/* For sv_2nv these three cases are "SvNOK and don't bother casting" */ +STATIC int +S_sv_2iuv_non_preserve(pTHX_ register SV *sv, I32 numtype) +{ + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); + if (SvNVX(sv) < (NV)IV_MIN) { + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + SvIV_set(sv, IV_MIN); + return IS_NUMBER_UNDERFLOW_IV; + } + if (SvNVX(sv) > (NV)UV_MAX) { + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + SvIsUV_on(sv); + SvUV_set(sv, UV_MAX); + return IS_NUMBER_OVERFLOW_UV; + } + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + /* Can't use strtol etc to convert this string. (See truth table in + sv_2iv */ + if (SvNVX(sv) <= (UV)IV_MAX) { + SvIV_set(sv, I_V(SvNVX(sv))); + if ((NV)(SvIVX(sv)) == SvNVX(sv)) { + SvIOK_on(sv); /* Integer is precise. NOK, IOK */ + } else { + /* Integer is imprecise. NOK, IOKp */ + } + return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV; + } + SvIsUV_on(sv); + SvUV_set(sv, U_V(SvNVX(sv))); + if ((NV)(SvUVX(sv)) == SvNVX(sv)) { + if (SvUVX(sv) == UV_MAX) { + /* As we know that NVs don't preserve UVs, UV_MAX cannot + possibly be preserved by NV. Hence, it must be overflow. + NOK, IOKp */ + return IS_NUMBER_OVERFLOW_UV; + } + SvIOK_on(sv); /* Integer is precise. NOK, UOK */ + } else { + /* Integer is imprecise. NOK, IOKp */ + } + return IS_NUMBER_OVERFLOW_IV; +} +#endif /* !NV_PRESERVES_UV*/ - switch (mt) { - case SVt_NULL: - Perl_croak(aTHX_ "Can't upgrade to undef"); - case SVt_IV: - assert(old_type == SVt_NULL); - SvANY(sv) = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv)); - SvIV_set(sv, 0); - return; - case SVt_NV: - assert(old_type == SVt_NULL); - SvANY(sv) = new_XNV(); - SvNV_set(sv, 0); - return; - case SVt_RV: - assert(old_type == SVt_NULL); - SvANY(sv) = &sv->sv_u.svu_rv; - SvRV_set(sv, 0); - return; - case SVt_PVHV: - SvANY(sv) = new_XPVHV(); - HvFILL(sv) = 0; - HvMAX(sv) = 0; - HvTOTALKEYS(sv) = 0; +STATIC bool +S_sv_2iuv_common(pTHX_ SV *sv) { + if (SvNOKp(sv)) { + /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv + * without also getting a cached IV/UV from it at the same time + * (ie PV->NV conversion should detect loss of accuracy and cache + * IV or UV at same time to avoid this. */ + /* IV-over-UV optimisation - choose to cache IV if possible */ - goto hv_av_common; + if (SvTYPE(sv) == SVt_NV) + sv_upgrade(sv, SVt_PVNV); - case SVt_PVAV: - SvANY(sv) = new_XPVAV(); - AvMAX(sv) = -1; - AvFILLp(sv) = -1; - AvALLOC(sv) = 0; - AvREAL_only(sv); + (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */ + /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost + certainly cast into the IV range at IV_MAX, whereas the correct + answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary + cases go to UV */ + if (SvNVX(sv) < (NV)IV_MAX + 0.5) { + SvIV_set(sv, I_V(SvNVX(sv))); + if (SvNVX(sv) == (NV) SvIVX(sv) +#ifndef NV_PRESERVES_UV + && (((UV)1 << NV_PRESERVES_UV_BITS) > + (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv))) + /* Don't flag it as "accurately an integer" if the number + came from a (by definition imprecise) NV operation, and + we're outside the range of NV integer precision */ +#endif + ) { + SvIOK_on(sv); /* Can this go wrong with rounding? NWC */ + DEBUG_c(PerlIO_printf(Perl_debug_log, + "0x%"UVxf" iv(%"NVgf" => %"IVdf") (precise)\n", + PTR2UV(sv), + SvNVX(sv), + SvIVX(sv))); - hv_av_common: - /* SVt_NULL isn't the only thing upgraded to AV or HV. - The target created by newSVrv also is, and it can have magic. - However, it never has SvPVX set. - */ - if (old_type >= SVt_RV) { - assert(SvPVX_const(sv) == 0); + } else { + /* IV not precise. No need to convert from PV, as NV + conversion would already have cached IV if it detected + that PV->IV would be better than PV->NV->IV + flags already correct - don't set public IOK. */ + DEBUG_c(PerlIO_printf(Perl_debug_log, + "0x%"UVxf" iv(%"NVgf" => %"IVdf") (imprecise)\n", + PTR2UV(sv), + SvNVX(sv), + SvIVX(sv))); + } + /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN, + but the cast (NV)IV_MIN rounds to a the value less (more + negative) than IV_MIN which happens to be equal to SvNVX ?? + Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and + NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and + (NV)UVX == NVX are both true, but the values differ. :-( + Hopefully for 2s complement IV_MIN is something like + 0x8000000000000000 which will be exact. NWC */ } - - /* Could put this in the else clause below, as PVMG must have SvPVX - 0 already (the assertion above) */ - SvPV_set(sv, (char*)0); - - if (old_type >= SVt_PVMG) { - SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_magic); - SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash); - } else { - SvMAGIC_set(sv, 0); - SvSTASH_set(sv, 0); + else { + SvUV_set(sv, U_V(SvNVX(sv))); + if ( + (SvNVX(sv) == (NV) SvUVX(sv)) +#ifndef NV_PRESERVES_UV + /* Make sure it's not 0xFFFFFFFFFFFFFFFF */ + /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */ + && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv)) + /* Don't flag it as "accurately an integer" if the number + came from a (by definition imprecise) NV operation, and + we're outside the range of NV integer precision */ +#endif + ) + SvIOK_on(sv); + SvIsUV_on(sv); + DEBUG_c(PerlIO_printf(Perl_debug_log, + "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n", + PTR2UV(sv), + SvUVX(sv), + SvUVX(sv))); } - break; - - case SVt_PVIO: - new_body = new_XPVIO(); - new_body_length = sizeof(XPVIO); - goto zero; - case SVt_PVFM: - new_body = new_XPVFM(); - new_body_length = sizeof(XPVFM); - goto zero; - - case SVt_PVBM: - case SVt_PVGV: - case SVt_PVCV: - case SVt_PVLV: - case SVt_PVMG: - case SVt_PVNV: - new_body_length = bodies_by_type[mt].size; - new_body_arena = &PL_body_roots[mt]; - new_body_arenaroot = &PL_body_arenaroots[mt]; - goto new_body; + } + else if (SvPOKp(sv) && SvLEN(sv)) { + UV value; + const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); + /* We want to avoid a possible problem when we cache an IV/ a UV which + may be later translated to an NV, and the resulting NV is not + the same as the direct translation of the initial string + (eg 123.456 can shortcut to the IV 123 with atol(), but we must + be careful to ensure that the value with the .456 is around if the + NV value is requested in the future). + + This means that if we cache such an IV/a UV, we need to cache the + NV as well. Moreover, we trade speed for space, and do not + cache the NV if we are sure it's not needed. + */ - case SVt_PVIV: - new_body_offset = - bodies_by_type[SVt_PVIV].offset; - new_body_length = sizeof(XPVIV) - new_body_offset; - new_body_arena = &PL_body_roots[SVt_PVIV]; - new_body_arenaroot = &PL_body_arenaroots[SVt_PVIV]; - /* XXX Is this still needed? Was it ever needed? Surely as there is - no route from NV to PVIV, NOK can never be true */ - if (SvNIOK(sv)) + /* SVt_PVNV is one higher than SVt_PVIV, hence this order */ + if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) + == IS_NUMBER_IN_UV) { + /* It's definitely an integer, only upgrade to PVIV */ + if (SvTYPE(sv) < SVt_PVIV) + sv_upgrade(sv, SVt_PVIV); (void)SvIOK_on(sv); - SvNOK_off(sv); - goto new_body_no_NV; - case SVt_PV: - new_body_offset = - bodies_by_type[SVt_PV].offset; - new_body_length = sizeof(XPV) - new_body_offset; - new_body_arena = &PL_body_roots[SVt_PV]; - new_body_arenaroot = &PL_body_arenaroots[SVt_PV]; - new_body_no_NV: - /* PV and PVIV don't have an NV slot. */ - - new_body: - assert(new_body_length); -#ifndef PURIFY - /* This points to the start of the allocated area. */ - new_body_inline(new_body, new_body_arena, new_body_length, mt); -#else - /* We always allocated the full length item with PURIFY */ - new_body_length += new_body_offset; - new_body_offset = 0; - new_body = my_safemalloc(new_body_length); - -#endif - zero: - Zero(new_body, new_body_length, char); - new_body = ((char *)new_body) - new_body_offset; - SvANY(sv) = new_body; + } else if (SvTYPE(sv) < SVt_PVNV) + sv_upgrade(sv, SVt_PVNV); - if (old_body_length) { - Copy((char *)old_body + old_body_offset, - (char *)new_body + old_body_offset, - old_body_length, char); - } - -#ifndef NV_ZERO_IS_ALLBITS_ZERO - /* If NV 0.0 is store as all bits 0 then Zero() already creates a correct - 0.0 for us. */ - if (old_type_details->zero_nv) - SvNV_set(sv, 0); + /* If NV preserves UV then we only use the UV value if we know that + we aren't going to call atof() below. If NVs don't preserve UVs + then the value returned may have more precision than atof() will + return, even though value isn't perfectly accurate. */ + if ((numtype & (IS_NUMBER_IN_UV +#ifdef NV_PRESERVES_UV + | IS_NUMBER_NOT_INT #endif + )) == IS_NUMBER_IN_UV) { + /* This won't turn off the public IOK flag if it was set above */ + (void)SvIOKp_on(sv); - if (mt == SVt_PVIO) - IoPAGE_LEN(sv) = 60; - if (old_type < SVt_RV) - SvPV_set(sv, 0); - break; - default: - Perl_croak(aTHX_ "panic: sv_upgrade to unknown type %lu", mt); - } + if (!(numtype & IS_NUMBER_NEG)) { + /* positive */; + if (value <= (UV)IV_MAX) { + SvIV_set(sv, (IV)value); + } else { + /* it didn't overflow, and it was positive. */ + SvUV_set(sv, value); + SvIsUV_on(sv); + } + } else { + /* 2s complement assumption */ + if (value <= (UV)IV_MIN) { + SvIV_set(sv, -(IV)value); + } else { + /* Too negative for an IV. This is a double upgrade, but + I'm assuming it will be rare. */ + if (SvTYPE(sv) < SVt_PVNV) + sv_upgrade(sv, SVt_PVNV); + SvNOK_on(sv); + SvIOK_off(sv); + SvIOKp_on(sv); + SvNV_set(sv, -(NV)value); + SvIV_set(sv, IV_MIN); + } + } + } + /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we + will be in the previous block to set the IV slot, and the next + block to set the NV slot. So no else here. */ + + if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) + != IS_NUMBER_IN_UV) { + /* It wasn't an (integer that doesn't overflow the UV). */ + SvNV_set(sv, Atof(SvPVX_const(sv))); + if (! numtype && ckWARN(WARN_NUMERIC)) + not_a_number(sv); - if (old_body_arena) { -#ifdef PURIFY - my_safefree(old_body); +#if defined(USE_LONG_DOUBLE) + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n", + PTR2UV(sv), SvNVX(sv))); #else - del_body((void*)((char*)old_body + old_body_offset), - old_body_arena); + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"NVgf")\n", + PTR2UV(sv), SvNVX(sv))); #endif + +#ifdef NV_PRESERVES_UV + (void)SvIOKp_on(sv); + (void)SvNOK_on(sv); + if (SvNVX(sv) < (NV)IV_MAX + 0.5) { + SvIV_set(sv, I_V(SvNVX(sv))); + if ((NV)(SvIVX(sv)) == SvNVX(sv)) { + SvIOK_on(sv); + } else { + /* Integer is imprecise. NOK, IOKp */ + } + /* UV will not work better than IV */ + } else { + if (SvNVX(sv) > (NV)UV_MAX) { + SvIsUV_on(sv); + /* Integer is inaccurate. NOK, IOKp, is UV */ + SvUV_set(sv, UV_MAX); + } else { + SvUV_set(sv, U_V(SvNVX(sv))); + /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs + NV preservse UV so can do correct comparison. */ + if ((NV)(SvUVX(sv)) == SvNVX(sv)) { + SvIOK_on(sv); + } else { + /* Integer is imprecise. NOK, IOKp, is UV */ + } + } + SvIsUV_on(sv); + } +#else /* NV_PRESERVES_UV */ + if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) + == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) { + /* The IV/UV slot will have been set from value returned by + grok_number above. The NV slot has just been set using + Atof. */ + SvNOK_on(sv); + assert (SvIOKp(sv)); + } else { + if (((UV)1 << NV_PRESERVES_UV_BITS) > + U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { + /* Small enough to preserve all bits. */ + (void)SvIOKp_on(sv); + SvNOK_on(sv); + SvIV_set(sv, I_V(SvNVX(sv))); + if ((NV)(SvIVX(sv)) == SvNVX(sv)) + SvIOK_on(sv); + /* Assumption: first non-preserved integer is < IV_MAX, + this NV is in the preserved range, therefore: */ + if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) + < (UV)IV_MAX)) { + Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); + } + } else { + /* IN_UV NOT_INT + 0 0 already failed to read UV. + 0 1 already failed to read UV. + 1 0 you won't get here in this case. IV/UV + slot set, public IOK, Atof() unneeded. + 1 1 already read UV. + so there's no point in sv_2iuv_non_preserve() attempting + to use atol, strtol, strtoul etc. */ + sv_2iuv_non_preserve (sv, numtype); + } + } +#endif /* NV_PRESERVES_UV */ + } + } + else { + if (!(SvFLAGS(sv) & SVs_PADTMP)) { + if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); + } + if (SvTYPE(sv) < SVt_IV) + /* Typically the caller expects that sv_any is not NULL now. */ + sv_upgrade(sv, SVt_IV); + /* Return 0 from the caller. */ + return TRUE; } + return FALSE; } /* -=for apidoc sv_backoff +=for apidoc sv_2iv_flags -Remove any string offset. You should normally use the C macro -wrapper instead. +Return the integer value of an SV, doing any necessary string +conversion. If flags includes SV_GMAGIC, does an mg_get() first. +Normally used via the C and C macros. =cut */ -int -Perl_sv_backoff(pTHX_ register SV *sv) +IV +Perl_sv_2iv_flags(pTHX_ register SV *sv, I32 flags) { - assert(SvOOK(sv)); - assert(SvTYPE(sv) != SVt_PVHV); - assert(SvTYPE(sv) != SVt_PVAV); - if (SvIVX(sv)) { - const char * const s = SvPVX_const(sv); - SvLEN_set(sv, SvLEN(sv) + SvIVX(sv)); - SvPV_set(sv, SvPVX(sv) - SvIVX(sv)); - SvIV_set(sv, 0); - Move(s, SvPVX(sv), SvCUR(sv)+1, char); + if (!sv) + return 0; + if (SvGMAGICAL(sv)) { + if (flags & SV_GMAGIC) + mg_get(sv); + if (SvIOKp(sv)) + return SvIVX(sv); + if (SvNOKp(sv)) { + return I_V(SvNVX(sv)); + } + if (SvPOKp(sv) && SvLEN(sv)) { + UV value; + const int numtype + = grok_number(SvPVX_const(sv), SvCUR(sv), &value); + + if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) + == IS_NUMBER_IN_UV) { + /* It's definitely an integer */ + if (numtype & IS_NUMBER_NEG) { + if (value < (UV)IV_MIN) + return -(IV)value; + } else { + if (value < (UV)IV_MAX) + return (IV)value; + } + } + if (!numtype) { + if (ckWARN(WARN_NUMERIC)) + not_a_number(sv); + } + return I_V(Atof(SvPVX_const(sv))); + } + if (SvROK(sv)) { + goto return_rok; + } + assert(SvTYPE(sv) >= SVt_PVMG); + /* This falls through to the report_uninit inside S_sv_2iuv_common. */ + } else if (SvTHINKFIRST(sv)) { + if (SvROK(sv)) { + return_rok: + if (SvAMAGIC(sv)) { + SV * const tmpstr=AMG_CALLun(sv,numer); + if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { + return SvIV(tmpstr); + } + } + return PTR2IV(SvRV(sv)); + } + if (SvIsCOW(sv)) { + sv_force_normal_flags(sv, 0); + } + if (SvREADONLY(sv) && !SvOK(sv)) { + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); + return 0; + } } - SvFLAGS(sv) &= ~SVf_OOK; - return 0; + if (!SvIOKp(sv)) { + if (S_sv_2iuv_common(aTHX_ sv)) + return 0; + } + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"IVdf")\n", + PTR2UV(sv),SvIVX(sv))); + return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv); } /* -=for apidoc sv_grow +=for apidoc sv_2uv_flags -Expands the character buffer in the SV. If necessary, uses C and -upgrades the SV to C. Returns a pointer to the character buffer. -Use the C wrapper instead. +Return the unsigned integer value of an SV, doing any necessary string +conversion. If flags includes SV_GMAGIC, does an mg_get() first. +Normally used via the C and C macros. =cut */ -char * -Perl_sv_grow(pTHX_ register SV *sv, register STRLEN newlen) +UV +Perl_sv_2uv_flags(pTHX_ register SV *sv, I32 flags) { - register char *s; - -#ifdef HAS_64K_LIMIT - if (newlen >= 0x10000) { - PerlIO_printf(Perl_debug_log, - "Allocation too large: %"UVxf"\n", (UV)newlen); - my_exit(1); + if (!sv) + return 0; + if (SvGMAGICAL(sv)) { + if (flags & SV_GMAGIC) + mg_get(sv); + if (SvIOKp(sv)) + return SvUVX(sv); + if (SvNOKp(sv)) + return U_V(SvNVX(sv)); + if (SvPOKp(sv) && SvLEN(sv)) { + UV value; + const int numtype + = grok_number(SvPVX_const(sv), SvCUR(sv), &value); + + if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) + == IS_NUMBER_IN_UV) { + /* It's definitely an integer */ + if (!(numtype & IS_NUMBER_NEG)) + return value; + } + if (!numtype) { + if (ckWARN(WARN_NUMERIC)) + not_a_number(sv); + } + return U_V(Atof(SvPVX_const(sv))); + } + if (SvROK(sv)) { + goto return_rok; + } + assert(SvTYPE(sv) >= SVt_PVMG); + /* This falls through to the report_uninit inside S_sv_2iuv_common. */ + } else if (SvTHINKFIRST(sv)) { + if (SvROK(sv)) { + return_rok: + if (SvAMAGIC(sv)) { + SV *const tmpstr = AMG_CALLun(sv,numer); + if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { + return SvUV(tmpstr); + } + } + return PTR2UV(SvRV(sv)); + } + if (SvIsCOW(sv)) { + sv_force_normal_flags(sv, 0); + } + if (SvREADONLY(sv) && !SvOK(sv)) { + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); + return 0; + } } -#endif /* HAS_64K_LIMIT */ - if (SvROK(sv)) - sv_unref(sv); - if (SvTYPE(sv) < SVt_PV) { - sv_upgrade(sv, SVt_PV); - s = SvPVX_mutable(sv); + if (!SvIOKp(sv)) { + if (S_sv_2iuv_common(aTHX_ sv)) + return 0; } - else if (SvOOK(sv)) { /* pv is offset? */ - sv_backoff(sv); - s = SvPVX_mutable(sv); - if (newlen > SvLEN(sv)) - newlen += 10 * (newlen - SvCUR(sv)); /* avoid copy each time */ -#ifdef HAS_64K_LIMIT - if (newlen >= 0x10000) - newlen = 0xFFFF; -#endif - } - else - s = SvPVX_mutable(sv); - if (newlen > SvLEN(sv)) { /* need more room? */ - newlen = PERL_STRLEN_ROUNDUP(newlen); - if (SvLEN(sv) && s) { -#ifdef MYMALLOC - const STRLEN l = malloced_size((void*)SvPVX_const(sv)); - if (newlen <= l) { - SvLEN_set(sv, l); - return s; - } else -#endif - s = saferealloc(s, newlen); - } - else { - s = safemalloc(newlen); - if (SvPVX_const(sv) && SvCUR(sv)) { - Move(SvPVX_const(sv), s, (newlen < SvCUR(sv)) ? newlen : SvCUR(sv), char); - } - } - SvPV_set(sv, s); - SvLEN_set(sv, newlen); - } - return s; + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"UVuf")\n", + PTR2UV(sv),SvUVX(sv))); + return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv); } /* -=for apidoc sv_setiv +=for apidoc sv_2nv -Copies an integer into the given SV, upgrading first if necessary. -Does not handle 'set' magic. See also C. +Return the num value of an SV, doing any necessary string or integer +conversion, magic etc. Normally used via the C and C +macros. =cut */ -void -Perl_sv_setiv(pTHX_ register SV *sv, IV i) +NV +Perl_sv_2nv(pTHX_ register SV *sv) { - SV_CHECK_THINKFIRST_COW_DROP(sv); - switch (SvTYPE(sv)) { - case SVt_NULL: - sv_upgrade(sv, SVt_IV); - break; - case SVt_NV: + if (!sv) + return 0.0; + if (SvGMAGICAL(sv)) { + mg_get(sv); + if (SvNOKp(sv)) + return SvNVX(sv); + if (SvPOKp(sv) && SvLEN(sv)) { + if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) && + !grok_number(SvPVX_const(sv), SvCUR(sv), NULL)) + not_a_number(sv); + return Atof(SvPVX_const(sv)); + } + if (SvIOKp(sv)) { + if (SvIsUV(sv)) + return (NV)SvUVX(sv); + else + return (NV)SvIVX(sv); + } + if (SvROK(sv)) { + goto return_rok; + } + assert(SvTYPE(sv) >= SVt_PVMG); + /* This falls through to the report_uninit near the end of the + function. */ + } else if (SvTHINKFIRST(sv)) { + if (SvROK(sv)) { + return_rok: + if (SvAMAGIC(sv)) { + SV *const tmpstr = AMG_CALLun(sv,numer); + if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { + return SvNV(tmpstr); + } + } + return PTR2NV(SvRV(sv)); + } + if (SvIsCOW(sv)) { + sv_force_normal_flags(sv, 0); + } + if (SvREADONLY(sv) && !SvOK(sv)) { + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); + return 0.0; + } + } + if (SvTYPE(sv) < SVt_NV) { + /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */ + sv_upgrade(sv, SVt_NV); +#ifdef USE_LONG_DOUBLE + DEBUG_c({ + STORE_NUMERIC_LOCAL_SET_STANDARD(); + PerlIO_printf(Perl_debug_log, + "0x%"UVxf" num(%" PERL_PRIgldbl ")\n", + PTR2UV(sv), SvNVX(sv)); + RESTORE_NUMERIC_LOCAL(); + }); +#else + DEBUG_c({ + STORE_NUMERIC_LOCAL_SET_STANDARD(); + PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%"NVgf")\n", + PTR2UV(sv), SvNVX(sv)); + RESTORE_NUMERIC_LOCAL(); + }); +#endif + } + else if (SvTYPE(sv) < SVt_PVNV) sv_upgrade(sv, SVt_PVNV); - break; - case SVt_RV: - case SVt_PV: - sv_upgrade(sv, SVt_PVIV); - break; - - case SVt_PVGV: - case SVt_PVAV: - case SVt_PVHV: - case SVt_PVCV: - case SVt_PVFM: - case SVt_PVIO: - Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0), - OP_DESC(PL_op)); + if (SvNOKp(sv)) { + return SvNVX(sv); } - (void)SvIOK_only(sv); /* validate number */ - SvIV_set(sv, i); - SvTAINT(sv); -} + if (SvIOKp(sv)) { + SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv)); +#ifdef NV_PRESERVES_UV + SvNOK_on(sv); +#else + /* Only set the public NV OK flag if this NV preserves the IV */ + /* Check it's not 0xFFFFFFFFFFFFFFFF */ + if (SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv)))) + : (SvIVX(sv) == I_V(SvNVX(sv)))) + SvNOK_on(sv); + else + SvNOKp_on(sv); +#endif + } + else if (SvPOKp(sv) && SvLEN(sv)) { + UV value; + const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); + if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC)) + not_a_number(sv); +#ifdef NV_PRESERVES_UV + if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) + == IS_NUMBER_IN_UV) { + /* It's definitely an integer */ + SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value); + } else + SvNV_set(sv, Atof(SvPVX_const(sv))); + SvNOK_on(sv); +#else + SvNV_set(sv, Atof(SvPVX_const(sv))); + /* Only set the public NV OK flag if this NV preserves the value in + the PV at least as well as an IV/UV would. + Not sure how to do this 100% reliably. */ + /* if that shift count is out of range then Configure's test is + wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS == + UV_BITS */ + if (((UV)1 << NV_PRESERVES_UV_BITS) > + U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { + SvNOK_on(sv); /* Definitely small enough to preserve all bits */ + } else if (!(numtype & IS_NUMBER_IN_UV)) { + /* Can't use strtol etc to convert this string, so don't try. + sv_2iv and sv_2uv will use the NV to convert, not the PV. */ + SvNOK_on(sv); + } else { + /* value has been set. It may not be precise. */ + if ((numtype & IS_NUMBER_NEG) && (value > (UV)IV_MIN)) { + /* 2s complement assumption for (UV)IV_MIN */ + SvNOK_on(sv); /* Integer is too negative. */ + } else { + SvNOKp_on(sv); + SvIOKp_on(sv); -/* -=for apidoc sv_setiv_mg + if (numtype & IS_NUMBER_NEG) { + SvIV_set(sv, -(IV)value); + } else if (value <= (UV)IV_MAX) { + SvIV_set(sv, (IV)value); + } else { + SvUV_set(sv, value); + SvIsUV_on(sv); + } -Like C, but also handles 'set' magic. + if (numtype & IS_NUMBER_NOT_INT) { + /* I believe that even if the original PV had decimals, + they are lost beyond the limit of the FP precision. + However, neither is canonical, so both only get p + flags. NWC, 2000/11/25 */ + /* Both already have p flags, so do nothing */ + } else { + const NV nv = SvNVX(sv); + if (SvNVX(sv) < (NV)IV_MAX + 0.5) { + if (SvIVX(sv) == I_V(nv)) { + SvNOK_on(sv); + } else { + /* It had no "." so it must be integer. */ + } + SvIOK_on(sv); + } else { + /* between IV_MAX and NV(UV_MAX). + Could be slightly > UV_MAX */ -=cut -*/ + if (numtype & IS_NUMBER_NOT_INT) { + /* UV and NV both imprecise. */ + } else { + const UV nv_as_uv = U_V(nv); -void -Perl_sv_setiv_mg(pTHX_ register SV *sv, IV i) -{ - sv_setiv(sv,i); - SvSETMAGIC(sv); + if (value == nv_as_uv && SvUVX(sv) != UV_MAX) { + SvNOK_on(sv); + } + SvIOK_on(sv); + } + } + } + } + } +#endif /* NV_PRESERVES_UV */ + } + else { + if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); + assert (SvTYPE(sv) >= SVt_NV); + /* Typically the caller expects that sv_any is not NULL now. */ + /* XXX Ilya implies that this is a bug in callers that assume this + and ideally should be fixed. */ + return 0.0; + } +#if defined(USE_LONG_DOUBLE) + DEBUG_c({ + STORE_NUMERIC_LOCAL_SET_STANDARD(); + PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n", + PTR2UV(sv), SvNVX(sv)); + RESTORE_NUMERIC_LOCAL(); + }); +#else + DEBUG_c({ + STORE_NUMERIC_LOCAL_SET_STANDARD(); + PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%"NVgf")\n", + PTR2UV(sv), SvNVX(sv)); + RESTORE_NUMERIC_LOCAL(); + }); +#endif + return SvNVX(sv); } -/* -=for apidoc sv_setuv +/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or + * UV as a string towards the end of buf, and return pointers to start and + * end of it. + * + * We assume that buf is at least TYPE_CHARS(UV) long. + */ -Copies an unsigned integer into the given SV, upgrading first if necessary. -Does not handle 'set' magic. See also C. +static char * +S_uiv_2buf(char *buf, IV iv, UV uv, int is_uv, char **peob) +{ + char *ptr = buf + TYPE_CHARS(UV); + char * const ebuf = ptr; + int sign; -=cut -*/ + if (is_uv) + sign = 0; + else if (iv >= 0) { + uv = iv; + sign = 0; + } else { + uv = -iv; + sign = 1; + } + do { + *--ptr = '0' + (char)(uv % 10); + } while (uv /= 10); + if (sign) + *--ptr = '-'; + *peob = ebuf; + return ptr; +} -void -Perl_sv_setuv(pTHX_ register SV *sv, UV u) -{ - /* With these two if statements: - u=1.49 s=0.52 cu=72.49 cs=10.64 scripts=270 tests=20865 +/* stringify_regexp(): private routine for use by sv_2pv_flags(): converts + * a regexp to its stringified form. + */ - without - u=1.35 s=0.47 cu=73.45 cs=11.43 scripts=270 tests=20865 +static char * +S_stringify_regexp(pTHX_ SV *sv, MAGIC *mg, STRLEN *lp) { + const regexp * const re = (regexp *)mg->mg_obj; + + if (!mg->mg_ptr) { + const char *fptr = "msix"; + char reflags[6]; + char ch; + int left = 0; + int right = 4; + bool need_newline = 0; + U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12); + + while((ch = *fptr++)) { + if(reganch & 1) { + reflags[left++] = ch; + } + else { + reflags[right--] = ch; + } + reganch >>= 1; + } + if(left != 4) { + reflags[left] = '-'; + left = 5; + } - If you wish to remove them, please benchmark to see what the effect is - */ - if (u <= (UV)IV_MAX) { - sv_setiv(sv, (IV)u); - return; + mg->mg_len = re->prelen + 4 + left; + /* + * If /x was used, we have to worry about a regex ending with a + * comment later being embedded within another regex. If so, we don't + * want this regex's "commentization" to leak out to the right part of + * the enclosing regex, we must cap it with a newline. + * + * So, if /x was used, we scan backwards from the end of the regex. If + * we find a '#' before we find a newline, we need to add a newline + * ourself. If we find a '\n' first (or if we don't find '#' or '\n'), + * we don't need to add anything. -jfriedl + */ + if (PMf_EXTENDED & re->reganch) { + const char *endptr = re->precomp + re->prelen; + while (endptr >= re->precomp) { + const char c = *(endptr--); + if (c == '\n') + break; /* don't need another */ + if (c == '#') { + /* we end while in a comment, so we need a newline */ + mg->mg_len++; /* save space for it */ + need_newline = 1; /* note to add it */ + break; + } + } + } + + Newx(mg->mg_ptr, mg->mg_len + 1 + left, char); + mg->mg_ptr[0] = '('; + mg->mg_ptr[1] = '?'; + Copy(reflags, mg->mg_ptr+2, left, char); + *(mg->mg_ptr+left+2) = ':'; + Copy(re->precomp, mg->mg_ptr+3+left, re->prelen, char); + if (need_newline) + mg->mg_ptr[mg->mg_len - 2] = '\n'; + mg->mg_ptr[mg->mg_len - 1] = ')'; + mg->mg_ptr[mg->mg_len] = 0; } - sv_setiv(sv, 0); - SvIsUV_on(sv); - SvUV_set(sv, u); + PL_reginterp_cnt += re->program[0].next_off; + + if (re->reganch & ROPT_UTF8) + SvUTF8_on(sv); + else + SvUTF8_off(sv); + if (lp) + *lp = mg->mg_len; + return mg->mg_ptr; } /* -=for apidoc sv_setuv_mg +=for apidoc sv_2pv_flags -Like C, but also handles 'set' magic. +Returns a pointer to the string value of an SV, and sets *lp to its length. +If flags includes SV_GMAGIC, does an mg_get() first. Coerces sv to a string +if necessary. +Normally invoked via the C macro. C and C +usually end up here too. =cut */ -void -Perl_sv_setuv_mg(pTHX_ register SV *sv, UV u) +char * +Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) { - sv_setiv(sv, 0); - SvIsUV_on(sv); - sv_setuv(sv,u); - SvSETMAGIC(sv); + register char *s; + + if (!sv) { + if (lp) + *lp = 0; + return (char *)""; + } + if (SvGMAGICAL(sv)) { + if (flags & SV_GMAGIC) + mg_get(sv); + if (SvPOKp(sv)) { + if (lp) + *lp = SvCUR(sv); + if (flags & SV_MUTABLE_RETURN) + return SvPVX_mutable(sv); + if (flags & SV_CONST_RETURN) + return (char *)SvPVX_const(sv); + return SvPVX(sv); + } + if (SvIOKp(sv) || SvNOKp(sv)) { + char tbuf[64]; /* Must fit sprintf/Gconvert of longest IV/NV */ + STRLEN len; + + if (SvIOKp(sv)) { + len = SvIsUV(sv) ? my_sprintf(tbuf,"%"UVuf, (UV)SvUVX(sv)) + : my_sprintf(tbuf,"%"IVdf, (IV)SvIVX(sv)); + } else { + Gconvert(SvNVX(sv), NV_DIG, 0, tbuf); + len = strlen(tbuf); + } + if (SvROK(sv)) { /* XXX Skip this when sv_pvn_force calls */ + /* Sneaky stuff here */ + SV * const tsv = newSVpvn(tbuf, len); + + sv_2mortal(tsv); + if (lp) + *lp = SvCUR(tsv); + return SvPVX(tsv); + } + else { + dVAR; + +#ifdef FIXNEGATIVEZERO + if (len == 2 && tbuf[0] == '-' && tbuf[1] == '0') { + tbuf[0] = '0'; + tbuf[1] = 0; + len = 1; + } +#endif + SvUPGRADE(sv, SVt_PV); + if (lp) + *lp = len; + s = SvGROW_mutable(sv, len + 1); + SvCUR_set(sv, len); + SvPOKp_on(sv); + return memcpy(s, tbuf, len + 1); + } + } + if (SvROK(sv)) { + goto return_rok; + } + assert(SvTYPE(sv) >= SVt_PVMG); + /* This falls through to the report_uninit near the end of the + function. */ + } else if (SvTHINKFIRST(sv)) { + if (SvROK(sv)) { + return_rok: + if (SvAMAGIC(sv)) { + SV *const tmpstr = AMG_CALLun(sv,string); + if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { + /* Unwrap this: */ + /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr); + */ + + char *pv; + if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) { + if (flags & SV_CONST_RETURN) { + pv = (char *) SvPVX_const(tmpstr); + } else { + pv = (flags & SV_MUTABLE_RETURN) + ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr); + } + if (lp) + *lp = SvCUR(tmpstr); + } else { + pv = sv_2pv_flags(tmpstr, lp, flags); + } + if (SvUTF8(tmpstr)) + SvUTF8_on(sv); + else + SvUTF8_off(sv); + return pv; + } + } + { + SV *tsv; + MAGIC *mg; + const SV *const referent = (SV*)SvRV(sv); + + if (!referent) { + tsv = sv_2mortal(newSVpvn("NULLREF", 7)); + } else if (SvTYPE(referent) == SVt_PVMG + && ((SvFLAGS(referent) & + (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG)) + == (SVs_OBJECT|SVs_SMG)) + && (mg = mg_find(referent, PERL_MAGIC_qr))) { + return S_stringify_regexp(aTHX_ sv, mg, lp); + } else { + const char *const typestr = sv_reftype(referent, 0); + + tsv = sv_newmortal(); + if (SvOBJECT(referent)) { + const char *const name = HvNAME_get(SvSTASH(referent)); + Perl_sv_setpvf(aTHX_ tsv, "%s=%s(0x%"UVxf")", + name ? name : "__ANON__" , typestr, + PTR2UV(referent)); + } + else + Perl_sv_setpvf(aTHX_ tsv, "%s(0x%"UVxf")", typestr, + PTR2UV(referent)); + } + if (lp) + *lp = SvCUR(tsv); + return SvPVX(tsv); + } + } + if (SvREADONLY(sv) && !SvOK(sv)) { + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); + if (lp) + *lp = 0; + return (char *)""; + } + } + if (SvIOK(sv) || ((SvIOKp(sv) && !SvNOKp(sv)))) { + /* I'm assuming that if both IV and NV are equally valid then + converting the IV is going to be more efficient */ + const U32 isIOK = SvIOK(sv); + const U32 isUIOK = SvIsUV(sv); + char buf[TYPE_CHARS(UV)]; + char *ebuf, *ptr; + + if (SvTYPE(sv) < SVt_PVIV) + sv_upgrade(sv, SVt_PVIV); + ptr = uiv_2buf(buf, SvIVX(sv), SvUVX(sv), isUIOK, &ebuf); + /* inlined from sv_setpvn */ + SvGROW_mutable(sv, (STRLEN)(ebuf - ptr + 1)); + Move(ptr,SvPVX_mutable(sv),ebuf - ptr,char); + SvCUR_set(sv, ebuf - ptr); + s = SvEND(sv); + *s = '\0'; + if (isIOK) + SvIOK_on(sv); + else + SvIOKp_on(sv); + if (isUIOK) + SvIsUV_on(sv); + } + else if (SvNOKp(sv)) { + const int olderrno = errno; + if (SvTYPE(sv) < SVt_PVNV) + sv_upgrade(sv, SVt_PVNV); + /* The +20 is pure guesswork. Configure test needed. --jhi */ + s = SvGROW_mutable(sv, NV_DIG + 20); + /* some Xenix systems wipe out errno here */ +#ifdef apollo + if (SvNVX(sv) == 0.0) + (void)strcpy(s,"0"); + else +#endif /*apollo*/ + { + Gconvert(SvNVX(sv), NV_DIG, 0, s); + } + errno = olderrno; +#ifdef FIXNEGATIVEZERO + if (*s == '-' && s[1] == '0' && !s[2]) + strcpy(s,"0"); +#endif + while (*s) s++; +#ifdef hcx + if (s[-1] == '.') + *--s = '\0'; +#endif + } + else { + if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED)) + report_uninit(sv); + if (lp) + *lp = 0; + if (SvTYPE(sv) < SVt_PV) + /* Typically the caller expects that sv_any is not NULL now. */ + sv_upgrade(sv, SVt_PV); + return (char *)""; + } + { + const STRLEN len = s - SvPVX_const(sv); + if (lp) + *lp = len; + SvCUR_set(sv, len); + } + SvPOK_on(sv); + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n", + PTR2UV(sv),SvPVX_const(sv))); + if (flags & SV_CONST_RETURN) + return (char *)SvPVX_const(sv); + if (flags & SV_MUTABLE_RETURN) + return SvPVX_mutable(sv); + return SvPVX(sv); } /* -=for apidoc sv_setnv +=for apidoc sv_copypv -Copies a double into the given SV, upgrading first if necessary. -Does not handle 'set' magic. See also C. +Copies a stringified representation of the source SV into the +destination SV. Automatically performs any necessary mg_get and +coercion of numeric values into strings. Guaranteed to preserve +UTF-8 flag even from overloaded objects. Similar in nature to +sv_2pv[_flags] but operates directly on an SV instead of just the +string. Mostly uses sv_2pv_flags to do its work, except when that +would lose the UTF-8'ness of the PV. =cut */ void -Perl_sv_setnv(pTHX_ register SV *sv, NV num) +Perl_sv_copypv(pTHX_ SV *dsv, register SV *ssv) { - SV_CHECK_THINKFIRST_COW_DROP(sv); - switch (SvTYPE(sv)) { - case SVt_NULL: - case SVt_IV: - sv_upgrade(sv, SVt_NV); - break; - case SVt_RV: - case SVt_PV: - case SVt_PVIV: - sv_upgrade(sv, SVt_PVNV); - break; - - case SVt_PVGV: - case SVt_PVAV: - case SVt_PVHV: - case SVt_PVCV: - case SVt_PVFM: - case SVt_PVIO: - Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0), - OP_NAME(PL_op)); - } - SvNV_set(sv, num); - (void)SvNOK_only(sv); /* validate number */ - SvTAINT(sv); + STRLEN len; + const char * const s = SvPV_const(ssv,len); + sv_setpvn(dsv,s,len); + if (SvUTF8(ssv)) + SvUTF8_on(dsv); + else + SvUTF8_off(dsv); } /* -=for apidoc sv_setnv_mg +=for apidoc sv_2pvbyte -Like C, but also handles 'set' magic. +Return a pointer to the byte-encoded representation of the SV, and set *lp +to its length. May cause the SV to be downgraded from UTF-8 as a +side-effect. + +Usually accessed via the C macro. =cut */ -void -Perl_sv_setnv_mg(pTHX_ register SV *sv, NV num) +char * +Perl_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp) { - sv_setnv(sv,num); - SvSETMAGIC(sv); + sv_utf8_downgrade(sv,0); + return lp ? SvPV(sv,*lp) : SvPV_nolen(sv); } -/* Print an "isn't numeric" warning, using a cleaned-up, - * printable version of the offending string - */ +/* +=for apidoc sv_2pvutf8 -STATIC void -S_not_a_number(pTHX_ SV *sv) -{ - SV *dsv; - char tmpbuf[64]; - const char *pv; +Return a pointer to the UTF-8-encoded representation of the SV, and set *lp +to its length. May cause the SV to be upgraded to UTF-8 as a side-effect. - if (DO_UTF8(sv)) { - dsv = sv_2mortal(newSVpvn("", 0)); - pv = sv_uni_display(dsv, sv, 10, 0); - } else { - char *d = tmpbuf; - const char * const limit = tmpbuf + sizeof(tmpbuf) - 8; - /* each *s can expand to 4 chars + "...\0", - i.e. need room for 8 chars */ - - const char *s, *end; - for (s = SvPVX_const(sv), end = s + SvCUR(sv); s < end && d < limit; - s++) { - int ch = *s & 0xFF; - if (ch & 128 && !isPRINT_LC(ch)) { - *d++ = 'M'; - *d++ = '-'; - ch &= 127; - } - if (ch == '\n') { - *d++ = '\\'; - *d++ = 'n'; - } - else if (ch == '\r') { - *d++ = '\\'; - *d++ = 'r'; - } - else if (ch == '\f') { - *d++ = '\\'; - *d++ = 'f'; - } - else if (ch == '\\') { - *d++ = '\\'; - *d++ = '\\'; - } - else if (ch == '\0') { - *d++ = '\\'; - *d++ = '0'; - } - else if (isPRINT_LC(ch)) - *d++ = ch; - else { - *d++ = '^'; - *d++ = toCTRL(ch); - } - } - if (s < end) { - *d++ = '.'; - *d++ = '.'; - *d++ = '.'; - } - *d = '\0'; - pv = tmpbuf; - } +Usually accessed via the C macro. - if (PL_op) - Perl_warner(aTHX_ packWARN(WARN_NUMERIC), - "Argument \"%s\" isn't numeric in %s", pv, - OP_DESC(PL_op)); - else - Perl_warner(aTHX_ packWARN(WARN_NUMERIC), - "Argument \"%s\" isn't numeric", pv); +=cut +*/ + +char * +Perl_sv_2pvutf8(pTHX_ register SV *sv, STRLEN *lp) +{ + sv_utf8_upgrade(sv); + return lp ? SvPV(sv,*lp) : SvPV_nolen(sv); } + /* -=for apidoc looks_like_number +=for apidoc sv_2bool -Test if the content of an SV looks like a number (or is a number). -C and C are treated as numbers (so will not issue a -non-numeric warning), even if your atof() doesn't grok them. +This function is only called on magical items, and is only used by +sv_true() or its macro equivalent. =cut */ -I32 -Perl_looks_like_number(pTHX_ SV *sv) +bool +Perl_sv_2bool(pTHX_ register SV *sv) { - register const char *sbegin; - STRLEN len; + SvGETMAGIC(sv); - if (SvPOK(sv)) { - sbegin = SvPVX_const(sv); - len = SvCUR(sv); + if (!SvOK(sv)) + return 0; + if (SvROK(sv)) { + SV* tmpsv; + if (SvAMAGIC(sv) && (tmpsv=AMG_CALLun(sv,bool_)) && + (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) + return (bool)SvTRUE(tmpsv); + return SvRV(sv) != 0; + } + if (SvPOKp(sv)) { + register XPV* const Xpvtmp = (XPV*)SvANY(sv); + if (Xpvtmp && + (*sv->sv_u.svu_pv > '0' || + Xpvtmp->xpv_cur > 1 || + (Xpvtmp->xpv_cur && *sv->sv_u.svu_pv != '0'))) + return 1; + else + return 0; + } + else { + if (SvIOKp(sv)) + return SvIVX(sv) != 0; + else { + if (SvNOKp(sv)) + return SvNVX(sv) != 0.0; + else + return FALSE; + } } - else if (SvPOKp(sv)) - sbegin = SvPV_const(sv, len); - else - return SvFLAGS(sv) & (SVf_NOK|SVp_NOK|SVf_IOK|SVp_IOK); - return grok_number(sbegin, len, NULL); } -/* Actually, ISO C leaves conversion of UV to IV undefined, but - until proven guilty, assume that things are not that bad... */ - /* - NV_PRESERVES_UV: - - As 64 bit platforms often have an NV that doesn't preserve all bits of - an IV (an assumption perl has been based on to date) it becomes necessary - to remove the assumption that the NV always carries enough precision to - recreate the IV whenever needed, and that the NV is the canonical form. - Instead, IV/UV and NV need to be given equal rights. So as to not lose - precision as a side effect of conversion (which would lead to insanity - and the dragon(s) in t/op/numconvert.t getting very angry) the intent is - 1) to distinguish between IV/UV/NV slots that have cached a valid - conversion where precision was lost and IV/UV/NV slots that have a - valid conversion which has lost no precision - 2) to ensure that if a numeric conversion to one form is requested that - would lose precision, the precise conversion (or differently - imprecise conversion) is also performed and cached, to prevent - requests for different numeric formats on the same SV causing - lossy conversion chains. (lossless conversion chains are perfectly - acceptable (still)) - - - flags are used: - SvIOKp is true if the IV slot contains a valid value - SvIOK is true only if the IV value is accurate (UV if SvIOK_UV true) - SvNOKp is true if the NV slot contains a valid value - SvNOK is true only if the NV value is accurate - - so - while converting from PV to NV, check to see if converting that NV to an - IV(or UV) would lose accuracy over a direct conversion from PV to - IV(or UV). If it would, cache both conversions, return NV, but mark - SV as IOK NOKp (ie not NOK). +=for apidoc sv_utf8_upgrade - While converting from PV to IV, check to see if converting that IV to an - NV would lose accuracy over a direct conversion from PV to NV. If it - would, cache both conversions, flag similarly. +Converts the PV of an SV to its UTF-8-encoded form. +Forces the SV to string form if it is not already. +Always sets the SvUTF8 flag to avoid future validity checks even +if all the bytes have hibit clear. - Before, the SV value "3.2" could become NV=3.2 IV=3 NOK, IOK quite - correctly because if IV & NV were set NV *always* overruled. - Now, "3.2" will become NV=3.2 IV=3 NOK, IOKp, because the flag's meaning - changes - now IV and NV together means that the two are interchangeable: - SvIVX == (IV) SvNVX && SvNVX == (NV) SvIVX; +This is not as a general purpose byte encoding to Unicode interface: +use the Encode extension for that. - The benefit of this is that operations such as pp_add know that if - SvIOK is true for both left and right operands, then integer addition - can be used instead of floating point (for cases where the result won't - overflow). Before, floating point was always used, which could lead to - loss of precision compared with integer addition. +=for apidoc sv_utf8_upgrade_flags - * making IV and NV equal status should make maths accurate on 64 bit - platforms - * may speed up maths somewhat if pp_add and friends start to use - integers when possible instead of fp. (Hopefully the overhead in - looking for SvIOK and checking for overflow will not outweigh the - fp to integer speedup) - * will slow down integer operations (callers of SvIV) on "inaccurate" - values, as the change from SvIOK to SvIOKp will cause a call into - sv_2iv each time rather than a macro access direct to the IV slot - * should speed up number->string conversion on integers as IV is - favoured when IV and NV are equally accurate +Converts the PV of an SV to its UTF-8-encoded form. +Forces the SV to string form if it is not already. +Always sets the SvUTF8 flag to avoid future validity checks even +if all the bytes have hibit clear. If C has C bit set, +will C on C if appropriate, else not. C and +C are implemented in terms of this function. - #################################################################### - You had better be using SvIOK_notUV if you want an IV for arithmetic: - SvIOK is true if (IV or UV), so you might be getting (IV)SvUV. - On the other hand, SvUOK is true iff UV. - #################################################################### +This is not as a general purpose byte encoding to Unicode interface: +use the Encode extension for that. - Your mileage will vary depending your CPU's relative fp to integer - performance ratio. +=cut */ -#ifndef NV_PRESERVES_UV -# define IS_NUMBER_UNDERFLOW_IV 1 -# define IS_NUMBER_UNDERFLOW_UV 2 -# define IS_NUMBER_IV_AND_UV 2 -# define IS_NUMBER_OVERFLOW_IV 4 -# define IS_NUMBER_OVERFLOW_UV 5 +STRLEN +Perl_sv_utf8_upgrade_flags(pTHX_ register SV *sv, I32 flags) +{ + if (sv == &PL_sv_undef) + return 0; + if (!SvPOK(sv)) { + STRLEN len = 0; + if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) { + (void) sv_2pv_flags(sv,&len, flags); + if (SvUTF8(sv)) + return len; + } else { + (void) SvPV_force(sv,len); + } + } -/* sv_2iuv_non_preserve(): private routine for use by sv_2iv() and sv_2uv() */ + if (SvUTF8(sv)) { + return SvCUR(sv); + } -/* For sv_2nv these three cases are "SvNOK and don't bother casting" */ -STATIC int -S_sv_2iuv_non_preserve(pTHX_ register SV *sv, I32 numtype) -{ - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); - if (SvNVX(sv) < (NV)IV_MIN) { - (void)SvIOKp_on(sv); - (void)SvNOK_on(sv); - SvIV_set(sv, IV_MIN); - return IS_NUMBER_UNDERFLOW_IV; + if (SvIsCOW(sv)) { + sv_force_normal_flags(sv, 0); } - if (SvNVX(sv) > (NV)UV_MAX) { - (void)SvIOKp_on(sv); - (void)SvNOK_on(sv); - SvIsUV_on(sv); - SvUV_set(sv, UV_MAX); - return IS_NUMBER_OVERFLOW_UV; - } - (void)SvIOKp_on(sv); - (void)SvNOK_on(sv); - /* Can't use strtol etc to convert this string. (See truth table in - sv_2iv */ - if (SvNVX(sv) <= (UV)IV_MAX) { - SvIV_set(sv, I_V(SvNVX(sv))); - if ((NV)(SvIVX(sv)) == SvNVX(sv)) { - SvIOK_on(sv); /* Integer is precise. NOK, IOK */ - } else { - /* Integer is imprecise. NOK, IOKp */ - } - return SvNVX(sv) < 0 ? IS_NUMBER_UNDERFLOW_UV : IS_NUMBER_IV_AND_UV; - } - SvIsUV_on(sv); - SvUV_set(sv, U_V(SvNVX(sv))); - if ((NV)(SvUVX(sv)) == SvNVX(sv)) { - if (SvUVX(sv) == UV_MAX) { - /* As we know that NVs don't preserve UVs, UV_MAX cannot - possibly be preserved by NV. Hence, it must be overflow. - NOK, IOKp */ - return IS_NUMBER_OVERFLOW_UV; - } - SvIOK_on(sv); /* Integer is precise. NOK, UOK */ - } else { - /* Integer is imprecise. NOK, IOKp */ + + if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) + sv_recode_to_utf8(sv, PL_encoding); + else { /* Assume Latin-1/EBCDIC */ + /* This function could be much more efficient if we + * had a FLAG in SVs to signal if there are any hibit + * chars in the PV. Given that there isn't such a flag + * make the loop as fast as possible. */ + const U8 * const s = (U8 *) SvPVX_const(sv); + const U8 * const e = (U8 *) SvEND(sv); + const U8 *t = s; + + while (t < e) { + const U8 ch = *t++; + /* Check for hi bit */ + if (!NATIVE_IS_INVARIANT(ch)) { + STRLEN len = SvCUR(sv) + 1; /* Plus the \0 */ + U8 * const recoded = bytes_to_utf8((U8*)s, &len); + + SvPV_free(sv); /* No longer using what was there before. */ + SvPV_set(sv, (char*)recoded); + SvCUR_set(sv, len - 1); + SvLEN_set(sv, len); /* No longer know the real size. */ + break; + } + } + /* Mark as UTF-8 even if no hibit - saves scanning loop */ + SvUTF8_on(sv); } - return IS_NUMBER_OVERFLOW_IV; + return SvCUR(sv); } -#endif /* !NV_PRESERVES_UV*/ /* -=for apidoc sv_2iv_flags +=for apidoc sv_utf8_downgrade -Return the integer value of an SV, doing any necessary string -conversion. If flags includes SV_GMAGIC, does an mg_get() first. -Normally used via the C and C macros. +Attempts to convert the PV of an SV from characters to bytes. +If the PV contains a character beyond byte, this conversion will fail; +in this case, either returns false or, if C is not +true, croaks. + +This is not as a general purpose Unicode to byte encoding interface: +use the Encode extension for that. =cut */ -IV -Perl_sv_2iv_flags(pTHX_ register SV *sv, I32 flags) +bool +Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) { - if (!sv) - return 0; - if (SvGMAGICAL(sv)) { - if (flags & SV_GMAGIC) - mg_get(sv); - if (SvIOKp(sv)) - return SvIVX(sv); - if (SvNOKp(sv)) { - return I_V(SvNVX(sv)); - } - if (SvPOKp(sv) && SvLEN(sv)) - return asIV(sv); - if (!SvROK(sv)) { - if (!(SvFLAGS(sv) & SVs_PADTMP)) { - if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - } - return 0; - } - } - if (SvTHINKFIRST(sv)) { - if (SvROK(sv)) { - if (SvAMAGIC(sv)) { - SV * const tmpstr=AMG_CALLun(sv,numer); - if (tmpstr && (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { - return SvIV(tmpstr); + if (SvPOKp(sv) && SvUTF8(sv)) { + if (SvCUR(sv)) { + U8 *s; + STRLEN len; + + if (SvIsCOW(sv)) { + sv_force_normal_flags(sv, 0); + } + s = (U8 *) SvPV(sv, len); + if (!utf8_to_bytes(s, &len)) { + if (fail_ok) + return FALSE; + else { + if (PL_op) + Perl_croak(aTHX_ "Wide character in %s", + OP_DESC(PL_op)); + else + Perl_croak(aTHX_ "Wide character"); } } - return PTR2IV(SvRV(sv)); - } - if (SvIsCOW(sv)) { - sv_force_normal_flags(sv, 0); - } - if (SvREADONLY(sv) && !SvOK(sv)) { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - return 0; - } - } - if (SvIOKp(sv)) { - if (SvIsUV(sv)) { - return (IV)(SvUVX(sv)); - } - else { - return SvIVX(sv); + SvCUR_set(sv, len); } } - if (SvNOKp(sv)) { - /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv - * without also getting a cached IV/UV from it at the same time - * (ie PV->NV conversion should detect loss of accuracy and cache - * IV or UV at same time to avoid this. NWC */ + SvUTF8_off(sv); + return TRUE; +} - if (SvTYPE(sv) == SVt_NV) - sv_upgrade(sv, SVt_PVNV); +/* +=for apidoc sv_utf8_encode - (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */ - /* < not <= as for NV doesn't preserve UV, ((NV)IV_MAX+1) will almost - certainly cast into the IV range at IV_MAX, whereas the correct - answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary - cases go to UV */ - if (SvNVX(sv) < (NV)IV_MAX + 0.5) { - SvIV_set(sv, I_V(SvNVX(sv))); - if (SvNVX(sv) == (NV) SvIVX(sv) -#ifndef NV_PRESERVES_UV - && (((UV)1 << NV_PRESERVES_UV_BITS) > - (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv))) - /* Don't flag it as "accurately an integer" if the number - came from a (by definition imprecise) NV operation, and - we're outside the range of NV integer precision */ -#endif - ) { - SvIOK_on(sv); /* Can this go wrong with rounding? NWC */ - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" iv(%"NVgf" => %"IVdf") (precise)\n", - PTR2UV(sv), - SvNVX(sv), - SvIVX(sv))); +Converts the PV of an SV to UTF-8, but then turns the C +flag off so that it looks like octets again. - } else { - /* IV not precise. No need to convert from PV, as NV - conversion would already have cached IV if it detected - that PV->IV would be better than PV->NV->IV - flags already correct - don't set public IOK. */ - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" iv(%"NVgf" => %"IVdf") (imprecise)\n", - PTR2UV(sv), - SvNVX(sv), - SvIVX(sv))); - } - /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN, - but the cast (NV)IV_MIN rounds to a the value less (more - negative) than IV_MIN which happens to be equal to SvNVX ?? - Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and - NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and - (NV)UVX == NVX are both true, but the values differ. :-( - Hopefully for 2s complement IV_MIN is something like - 0x8000000000000000 which will be exact. NWC */ - } - else { - SvUV_set(sv, U_V(SvNVX(sv))); - if ( - (SvNVX(sv) == (NV) SvUVX(sv)) -#ifndef NV_PRESERVES_UV - /* Make sure it's not 0xFFFFFFFFFFFFFFFF */ - /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */ - && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv)) - /* Don't flag it as "accurately an integer" if the number - came from a (by definition imprecise) NV operation, and - we're outside the range of NV integer precision */ -#endif - ) - SvIOK_on(sv); - SvIsUV_on(sv); - ret_iv_max: - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n", - PTR2UV(sv), - SvUVX(sv), - SvUVX(sv))); - return (IV)SvUVX(sv); - } +=cut +*/ + +void +Perl_sv_utf8_encode(pTHX_ register SV *sv) +{ + (void) sv_utf8_upgrade(sv); + if (SvIsCOW(sv)) { + sv_force_normal_flags(sv, 0); } - else if (SvPOKp(sv) && SvLEN(sv)) { - UV value; - const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); - /* We want to avoid a possible problem when we cache an IV which - may be later translated to an NV, and the resulting NV is not - the same as the direct translation of the initial string - (eg 123.456 can shortcut to the IV 123 with atol(), but we must - be careful to ensure that the value with the .456 is around if the - NV value is requested in the future). - - This means that if we cache such an IV, we need to cache the - NV as well. Moreover, we trade speed for space, and do not - cache the NV if we are sure it's not needed. - */ + if (SvREADONLY(sv)) { + Perl_croak(aTHX_ PL_no_modify); + } + SvUTF8_off(sv); +} - /* SVt_PVNV is one higher than SVt_PVIV, hence this order */ - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - == IS_NUMBER_IN_UV) { - /* It's definitely an integer, only upgrade to PVIV */ - if (SvTYPE(sv) < SVt_PVIV) - sv_upgrade(sv, SVt_PVIV); - (void)SvIOK_on(sv); - } else if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); +/* +=for apidoc sv_utf8_decode - /* If NV preserves UV then we only use the UV value if we know that - we aren't going to call atof() below. If NVs don't preserve UVs - then the value returned may have more precision than atof() will - return, even though value isn't perfectly accurate. */ - if ((numtype & (IS_NUMBER_IN_UV -#ifdef NV_PRESERVES_UV - | IS_NUMBER_NOT_INT -#endif - )) == IS_NUMBER_IN_UV) { - /* This won't turn off the public IOK flag if it was set above */ - (void)SvIOKp_on(sv); +If the PV of the SV is an octet sequence in UTF-8 +and contains a multiple-byte character, the C flag is turned on +so that it looks like a character. If the PV contains only single-byte +characters, the C flag stays being off. +Scans PV for validity and returns false if the PV is invalid UTF-8. - if (!(numtype & IS_NUMBER_NEG)) { - /* positive */; - if (value <= (UV)IV_MAX) { - SvIV_set(sv, (IV)value); - } else { - SvUV_set(sv, value); - SvIsUV_on(sv); - } - } else { - /* 2s complement assumption */ - if (value <= (UV)IV_MIN) { - SvIV_set(sv, -(IV)value); - } else { - /* Too negative for an IV. This is a double upgrade, but - I'm assuming it will be rare. */ - if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); - SvNOK_on(sv); - SvIOK_off(sv); - SvIOKp_on(sv); - SvNV_set(sv, -(NV)value); - SvIV_set(sv, IV_MIN); - } - } - } - /* For !NV_PRESERVES_UV and IS_NUMBER_IN_UV and IS_NUMBER_NOT_INT we - will be in the previous block to set the IV slot, and the next - block to set the NV slot. So no else here. */ - - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - != IS_NUMBER_IN_UV) { - /* It wasn't an (integer that doesn't overflow the UV). */ - SvNV_set(sv, Atof(SvPVX_const(sv))); - - if (! numtype && ckWARN(WARN_NUMERIC)) - not_a_number(sv); +=cut +*/ -#if defined(USE_LONG_DOUBLE) - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n", - PTR2UV(sv), SvNVX(sv))); -#else - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"NVgf")\n", - PTR2UV(sv), SvNVX(sv))); -#endif +bool +Perl_sv_utf8_decode(pTHX_ register SV *sv) +{ + if (SvPOKp(sv)) { + const U8 *c; + const U8 *e; + /* The octets may have got themselves encoded - get them back as + * bytes + */ + if (!sv_utf8_downgrade(sv, TRUE)) + return FALSE; -#ifdef NV_PRESERVES_UV - (void)SvIOKp_on(sv); - (void)SvNOK_on(sv); - if (SvNVX(sv) < (NV)IV_MAX + 0.5) { - SvIV_set(sv, I_V(SvNVX(sv))); - if ((NV)(SvIVX(sv)) == SvNVX(sv)) { - SvIOK_on(sv); - } else { - /* Integer is imprecise. NOK, IOKp */ - } - /* UV will not work better than IV */ - } else { - if (SvNVX(sv) > (NV)UV_MAX) { - SvIsUV_on(sv); - /* Integer is inaccurate. NOK, IOKp, is UV */ - SvUV_set(sv, UV_MAX); - SvIsUV_on(sv); - } else { - SvUV_set(sv, U_V(SvNVX(sv))); - /* 0xFFFFFFFFFFFFFFFF not an issue in here */ - if ((NV)(SvUVX(sv)) == SvNVX(sv)) { - SvIOK_on(sv); - SvIsUV_on(sv); - } else { - /* Integer is imprecise. NOK, IOKp, is UV */ - SvIsUV_on(sv); - } - } - goto ret_iv_max; + /* it is actually just a matter of turning the utf8 flag on, but + * we want to make sure everything inside is valid utf8 first. + */ + c = (const U8 *) SvPVX_const(sv); + if (!is_utf8_string(c, SvCUR(sv)+1)) + return FALSE; + e = (const U8 *) SvEND(sv); + while (c < e) { + const U8 ch = *c++; + if (!UTF8_IS_INVARIANT(ch)) { + SvUTF8_on(sv); + break; } -#else /* NV_PRESERVES_UV */ - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) { - /* The IV slot will have been set from value returned by - grok_number above. The NV slot has just been set using - Atof. */ - SvNOK_on(sv); - assert (SvIOKp(sv)); - } else { - if (((UV)1 << NV_PRESERVES_UV_BITS) > - U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { - /* Small enough to preserve all bits. */ - (void)SvIOKp_on(sv); - SvNOK_on(sv); - SvIV_set(sv, I_V(SvNVX(sv))); - if ((NV)(SvIVX(sv)) == SvNVX(sv)) - SvIOK_on(sv); - /* Assumption: first non-preserved integer is < IV_MAX, - this NV is in the preserved range, therefore: */ - if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) - < (UV)IV_MAX)) { - Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); - } - } else { - /* IN_UV NOT_INT - 0 0 already failed to read UV. - 0 1 already failed to read UV. - 1 0 you won't get here in this case. IV/UV - slot set, public IOK, Atof() unneeded. - 1 1 already read UV. - so there's no point in sv_2iuv_non_preserve() attempting - to use atol, strtol, strtoul etc. */ - if (sv_2iuv_non_preserve (sv, numtype) - >= IS_NUMBER_OVERFLOW_IV) - goto ret_iv_max; - } - } -#endif /* NV_PRESERVES_UV */ - } - } else { - if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - if (SvTYPE(sv) < SVt_IV) - /* Typically the caller expects that sv_any is not NULL now. */ - sv_upgrade(sv, SVt_IV); - return 0; + } } - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"IVdf")\n", - PTR2UV(sv),SvIVX(sv))); - return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv); + return TRUE; } /* -=for apidoc sv_2uv_flags +=for apidoc sv_setsv -Return the unsigned integer value of an SV, doing any necessary string -conversion. If flags includes SV_GMAGIC, does an mg_get() first. -Normally used via the C and C macros. +Copies the contents of the source SV C into the destination SV +C. The source SV may be destroyed if it is mortal, so don't use this +function if the source SV needs to be reused. Does not handle 'set' magic. +Loosely speaking, it performs a copy-by-value, obliterating any previous +content of the destination. + +You probably want to use one of the assortment of wrappers, such as +C, C, C and +C. + +=for apidoc sv_setsv_flags + +Copies the contents of the source SV C into the destination SV +C. The source SV may be destroyed if it is mortal, so don't use this +function if the source SV needs to be reused. Does not handle 'set' magic. +Loosely speaking, it performs a copy-by-value, obliterating any previous +content of the destination. +If the C parameter has the C bit set, will C on +C if appropriate, else not. If the C parameter has the +C bit set then the buffers of temps will not be stolen. +and C are implemented in terms of this function. + +You probably want to use one of the assortment of wrappers, such as +C, C, C and +C. + +This is the primary function for copying scalars, and most other +copy-ish functions and macros use this underneath. =cut */ -UV -Perl_sv_2uv_flags(pTHX_ register SV *sv, I32 flags) +void +Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags) { - if (!sv) - return 0; - if (SvGMAGICAL(sv)) { - if (flags & SV_GMAGIC) - mg_get(sv); - if (SvIOKp(sv)) - return SvUVX(sv); - if (SvNOKp(sv)) - return U_V(SvNVX(sv)); - if (SvPOKp(sv) && SvLEN(sv)) - return asUV(sv); - if (!SvROK(sv)) { - if (!(SvFLAGS(sv) & SVs_PADTMP)) { - if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - } - return 0; - } - } - if (SvTHINKFIRST(sv)) { - if (SvROK(sv)) { - SV* tmpstr; - if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer)) && - (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) - return SvUV(tmpstr); - return PTR2UV(SvRV(sv)); - } - if (SvIsCOW(sv)) { - sv_force_normal_flags(sv, 0); - } - if (SvREADONLY(sv) && !SvOK(sv)) { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - return 0; - } + register U32 sflags; + register int dtype; + register int stype; + + if (sstr == dstr) + return; + SV_CHECK_THINKFIRST_COW_DROP(dstr); + if (!sstr) + sstr = &PL_sv_undef; + stype = SvTYPE(sstr); + dtype = SvTYPE(dstr); + + SvAMAGIC_off(dstr); + if ( SvVOK(dstr) ) + { + /* need to nuke the magic */ + mg_free(dstr); + SvRMAGICAL_off(dstr); } - if (SvIOKp(sv)) { - if (SvIsUV(sv)) { - return SvUVX(sv); + + /* There's a lot of redundancy below but we're going for speed here */ + + switch (stype) { + case SVt_NULL: + undef_sstr: + if (dtype != SVt_PVGV) { + (void)SvOK_off(dstr); + return; } - else { - return (UV)SvIVX(sv); + break; + case SVt_IV: + if (SvIOK(sstr)) { + switch (dtype) { + case SVt_NULL: + sv_upgrade(dstr, SVt_IV); + break; + case SVt_NV: + sv_upgrade(dstr, SVt_PVNV); + break; + case SVt_RV: + case SVt_PV: + sv_upgrade(dstr, SVt_PVIV); + break; + } + (void)SvIOK_only(dstr); + SvIV_set(dstr, SvIVX(sstr)); + if (SvIsUV(sstr)) + SvIsUV_on(dstr); + if (SvTAINTED(sstr)) + SvTAINT(dstr); + return; } - } - if (SvNOKp(sv)) { - /* erm. not sure. *should* never get NOKp (without NOK) from sv_2nv - * without also getting a cached IV/UV from it at the same time - * (ie PV->NV conversion should detect loss of accuracy and cache - * IV or UV at same time to avoid this. */ - /* IV-over-UV optimisation - choose to cache IV if possible */ - - if (SvTYPE(sv) == SVt_NV) - sv_upgrade(sv, SVt_PVNV); + goto undef_sstr; - (void)SvIOKp_on(sv); /* Must do this first, to clear any SvOOK */ - if (SvNVX(sv) < (NV)IV_MAX + 0.5) { - SvIV_set(sv, I_V(SvNVX(sv))); - if (SvNVX(sv) == (NV) SvIVX(sv) -#ifndef NV_PRESERVES_UV - && (((UV)1 << NV_PRESERVES_UV_BITS) > - (UV)(SvIVX(sv) > 0 ? SvIVX(sv) : -SvIVX(sv))) - /* Don't flag it as "accurately an integer" if the number - came from a (by definition imprecise) NV operation, and - we're outside the range of NV integer precision */ -#endif - ) { - SvIOK_on(sv); /* Can this go wrong with rounding? NWC */ - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" uv(%"NVgf" => %"IVdf") (precise)\n", - PTR2UV(sv), - SvNVX(sv), - SvIVX(sv))); + case SVt_NV: + if (SvNOK(sstr)) { + switch (dtype) { + case SVt_NULL: + case SVt_IV: + sv_upgrade(dstr, SVt_NV); + break; + case SVt_RV: + case SVt_PV: + case SVt_PVIV: + sv_upgrade(dstr, SVt_PVNV); + break; + } + SvNV_set(dstr, SvNVX(sstr)); + (void)SvNOK_only(dstr); + if (SvTAINTED(sstr)) + SvTAINT(dstr); + return; + } + goto undef_sstr; - } else { - /* IV not precise. No need to convert from PV, as NV - conversion would already have cached IV if it detected - that PV->IV would be better than PV->NV->IV - flags already correct - don't set public IOK. */ - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" uv(%"NVgf" => %"IVdf") (imprecise)\n", - PTR2UV(sv), - SvNVX(sv), - SvIVX(sv))); + case SVt_RV: + if (dtype < SVt_RV) + sv_upgrade(dstr, SVt_RV); + else if (dtype == SVt_PVGV && + SvROK(sstr) && SvTYPE(SvRV(sstr)) == SVt_PVGV) { + sstr = SvRV(sstr); + if (sstr == dstr) { + if (GvIMPORTED(dstr) != GVf_IMPORTED + && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) + { + GvIMPORTED_on(dstr); + } + GvMULTI_on(dstr); + return; } - /* Can the above go wrong if SvIVX == IV_MIN and SvNVX < IV_MIN, - but the cast (NV)IV_MIN rounds to a the value less (more - negative) than IV_MIN which happens to be equal to SvNVX ?? - Analogous to 0xFFFFFFFFFFFFFFFF rounding up to NV (2**64) and - NV rounding back to 0xFFFFFFFFFFFFFFFF, so UVX == UV(NVX) and - (NV)UVX == NVX are both true, but the values differ. :-( - Hopefully for 2s complement IV_MIN is something like - 0x8000000000000000 which will be exact. NWC */ + goto glob_assign; } - else { - SvUV_set(sv, U_V(SvNVX(sv))); - if ( - (SvNVX(sv) == (NV) SvUVX(sv)) -#ifndef NV_PRESERVES_UV - /* Make sure it's not 0xFFFFFFFFFFFFFFFF */ - /*&& (SvUVX(sv) != UV_MAX) irrelevant with code below */ - && (((UV)1 << NV_PRESERVES_UV_BITS) > SvUVX(sv)) - /* Don't flag it as "accurately an integer" if the number - came from a (by definition imprecise) NV operation, and - we're outside the range of NV integer precision */ + break; + case SVt_PVFM: +#ifdef PERL_OLD_COPY_ON_WRITE + if ((SvFLAGS(sstr) & CAN_COW_MASK) == CAN_COW_FLAGS) { + if (dtype < SVt_PVIV) + sv_upgrade(dstr, SVt_PVIV); + break; + } + /* Fall through */ #endif - ) - SvIOK_on(sv); - SvIsUV_on(sv); - DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" 2uv(%"UVuf" => %"IVdf") (as unsigned)\n", - PTR2UV(sv), - SvUVX(sv), - SvUVX(sv))); + case SVt_PV: + if (dtype < SVt_PV) + sv_upgrade(dstr, SVt_PV); + break; + case SVt_PVIV: + if (dtype < SVt_PVIV) + sv_upgrade(dstr, SVt_PVIV); + break; + case SVt_PVNV: + if (dtype < SVt_PVNV) + sv_upgrade(dstr, SVt_PVNV); + break; + case SVt_PVAV: + case SVt_PVHV: + case SVt_PVCV: + case SVt_PVIO: + { + const char * const type = sv_reftype(sstr,0); + if (PL_op) + Perl_croak(aTHX_ "Bizarre copy of %s in %s", type, OP_NAME(PL_op)); + else + Perl_croak(aTHX_ "Bizarre copy of %s", type); } - } - else if (SvPOKp(sv) && SvLEN(sv)) { - UV value; - const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); - - /* We want to avoid a possible problem when we cache a UV which - may be later translated to an NV, and the resulting NV is not - the translation of the initial data. - - This means that if we cache such a UV, we need to cache the - NV as well. Moreover, we trade speed for space, and do not - cache the NV if not needed. - */ + break; - /* SVt_PVNV is one higher than SVt_PVIV, hence this order */ - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - == IS_NUMBER_IN_UV) { - /* It's definitely an integer, only upgrade to PVIV */ - if (SvTYPE(sv) < SVt_PVIV) - sv_upgrade(sv, SVt_PVIV); - (void)SvIOK_on(sv); - } else if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); + case SVt_PVGV: + if (dtype <= SVt_PVGV) { + glob_assign: + if (dtype != SVt_PVGV) { + const char * const name = GvNAME(sstr); + const STRLEN len = GvNAMELEN(sstr); + /* don't upgrade SVt_PVLV: it can hold a glob */ + if (dtype != SVt_PVLV) + sv_upgrade(dstr, SVt_PVGV); + sv_magic(dstr, dstr, PERL_MAGIC_glob, Nullch, 0); + GvSTASH(dstr) = GvSTASH(sstr); + if (GvSTASH(dstr)) + Perl_sv_add_backref(aTHX_ (SV*)GvSTASH(dstr), dstr); + GvNAME(dstr) = savepvn(name, len); + GvNAMELEN(dstr) = len; + SvFAKE_on(dstr); /* can coerce to non-glob */ + } - /* If NV preserves UV then we only use the UV value if we know that - we aren't going to call atof() below. If NVs don't preserve UVs - then the value returned may have more precision than atof() will - return, even though it isn't accurate. */ - if ((numtype & (IS_NUMBER_IN_UV -#ifdef NV_PRESERVES_UV - | IS_NUMBER_NOT_INT +#ifdef GV_UNIQUE_CHECK + if (GvUNIQUE((GV*)dstr)) { + Perl_croak(aTHX_ PL_no_modify); + } #endif - )) == IS_NUMBER_IN_UV) { - /* This won't turn off the public IOK flag if it was set above */ - (void)SvIOKp_on(sv); - if (!(numtype & IS_NUMBER_NEG)) { - /* positive */; - if (value <= (UV)IV_MAX) { - SvIV_set(sv, (IV)value); - } else { - /* it didn't overflow, and it was positive. */ - SvUV_set(sv, value); - SvIsUV_on(sv); - } - } else { - /* 2s complement assumption */ - if (value <= (UV)IV_MIN) { - SvIV_set(sv, -(IV)value); - } else { - /* Too negative for an IV. This is a double upgrade, but - I'm assuming it will be rare. */ - if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); - SvNOK_on(sv); - SvIOK_off(sv); - SvIOKp_on(sv); - SvNV_set(sv, -(NV)value); - SvIV_set(sv, IV_MIN); - } + (void)SvOK_off(dstr); + GvINTRO_off(dstr); /* one-shot flag */ + gp_free((GV*)dstr); + GvGP(dstr) = gp_ref(GvGP(sstr)); + if (SvTAINTED(sstr)) + SvTAINT(dstr); + if (GvIMPORTED(dstr) != GVf_IMPORTED + && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) + { + GvIMPORTED_on(dstr); } + GvMULTI_on(dstr); + return; } - - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - != IS_NUMBER_IN_UV) { - /* It wasn't an integer, or it overflowed the UV. */ - SvNV_set(sv, Atof(SvPVX_const(sv))); + /* FALL THROUGH */ - if (! numtype && ckWARN(WARN_NUMERIC)) - not_a_number(sv); + default: + if (SvGMAGICAL(sstr) && (flags & SV_GMAGIC)) { + mg_get(sstr); + if ((int)SvTYPE(sstr) != stype) { + stype = SvTYPE(sstr); + if (stype == SVt_PVGV && dtype <= SVt_PVGV) + goto glob_assign; + } + } + if (stype == SVt_PVLV) + SvUPGRADE(dstr, SVt_PVNV); + else + SvUPGRADE(dstr, (U32)stype); + } -#if defined(USE_LONG_DOUBLE) - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%" PERL_PRIgldbl ")\n", - PTR2UV(sv), SvNVX(sv))); -#else - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"NVgf")\n", - PTR2UV(sv), SvNVX(sv))); -#endif + sflags = SvFLAGS(sstr); -#ifdef NV_PRESERVES_UV - (void)SvIOKp_on(sv); - (void)SvNOK_on(sv); - if (SvNVX(sv) < (NV)IV_MAX + 0.5) { - SvIV_set(sv, I_V(SvNVX(sv))); - if ((NV)(SvIVX(sv)) == SvNVX(sv)) { - SvIOK_on(sv); - } else { - /* Integer is imprecise. NOK, IOKp */ - } - /* UV will not work better than IV */ - } else { - if (SvNVX(sv) > (NV)UV_MAX) { - SvIsUV_on(sv); - /* Integer is inaccurate. NOK, IOKp, is UV */ - SvUV_set(sv, UV_MAX); - SvIsUV_on(sv); - } else { - SvUV_set(sv, U_V(SvNVX(sv))); - /* 0xFFFFFFFFFFFFFFFF not an issue in here, NVs - NV preservse UV so can do correct comparison. */ - if ((NV)(SvUVX(sv)) == SvNVX(sv)) { - SvIOK_on(sv); - SvIsUV_on(sv); - } else { - /* Integer is imprecise. NOK, IOKp, is UV */ - SvIsUV_on(sv); - } + if (sflags & SVf_ROK) { + if (dtype >= SVt_PV) { + if (dtype == SVt_PVGV) { + SV * const sref = SvREFCNT_inc(SvRV(sstr)); + SV *dref = 0; + const int intro = GvINTRO(dstr); + +#ifdef GV_UNIQUE_CHECK + if (GvUNIQUE((GV*)dstr)) { + Perl_croak(aTHX_ PL_no_modify); } - } -#else /* NV_PRESERVES_UV */ - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - == (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) { - /* The UV slot will have been set from value returned by - grok_number above. The NV slot has just been set using - Atof. */ - SvNOK_on(sv); - assert (SvIOKp(sv)); - } else { - if (((UV)1 << NV_PRESERVES_UV_BITS) > - U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { - /* Small enough to preserve all bits. */ - (void)SvIOKp_on(sv); - SvNOK_on(sv); - SvIV_set(sv, I_V(SvNVX(sv))); - if ((NV)(SvIVX(sv)) == SvNVX(sv)) - SvIOK_on(sv); - /* Assumption: first non-preserved integer is < IV_MAX, - this NV is in the preserved range, therefore: */ - if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) - < (UV)IV_MAX)) { - Perl_croak(aTHX_ "sv_2uv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); - } - } else - sv_2iuv_non_preserve (sv, numtype); - } -#endif /* NV_PRESERVES_UV */ - } - } - else { - if (!(SvFLAGS(sv) & SVs_PADTMP)) { - if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - } - if (SvTYPE(sv) < SVt_IV) - /* Typically the caller expects that sv_any is not NULL now. */ - sv_upgrade(sv, SVt_IV); - return 0; - } - - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"UVuf")\n", - PTR2UV(sv),SvUVX(sv))); - return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv); -} - -/* -=for apidoc sv_2nv - -Return the num value of an SV, doing any necessary string or integer -conversion, magic etc. Normally used via the C and C -macros. - -=cut -*/ +#endif -NV -Perl_sv_2nv(pTHX_ register SV *sv) -{ - if (!sv) - return 0.0; - if (SvGMAGICAL(sv)) { - mg_get(sv); - if (SvNOKp(sv)) - return SvNVX(sv); - if (SvPOKp(sv) && SvLEN(sv)) { - if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) && - !grok_number(SvPVX_const(sv), SvCUR(sv), NULL)) - not_a_number(sv); - return Atof(SvPVX_const(sv)); - } - if (SvIOKp(sv)) { - if (SvIsUV(sv)) - return (NV)SvUVX(sv); - else - return (NV)SvIVX(sv); - } - if (!SvROK(sv)) { - if (!(SvFLAGS(sv) & SVs_PADTMP)) { - if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); + if (intro) { + GvINTRO_off(dstr); /* one-shot flag */ + GvLINE(dstr) = CopLINE(PL_curcop); + GvEGV(dstr) = (GV*)dstr; + } + GvMULTI_on(dstr); + switch (SvTYPE(sref)) { + case SVt_PVAV: + if (intro) + SAVEGENERICSV(GvAV(dstr)); + else + dref = (SV*)GvAV(dstr); + GvAV(dstr) = (AV*)sref; + if (!GvIMPORTED_AV(dstr) + && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) + { + GvIMPORTED_AV_on(dstr); + } + break; + case SVt_PVHV: + if (intro) + SAVEGENERICSV(GvHV(dstr)); + else + dref = (SV*)GvHV(dstr); + GvHV(dstr) = (HV*)sref; + if (!GvIMPORTED_HV(dstr) + && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) + { + GvIMPORTED_HV_on(dstr); + } + break; + case SVt_PVCV: + if (intro) { + if (GvCVGEN(dstr) && GvCV(dstr) != (CV*)sref) { + SvREFCNT_dec(GvCV(dstr)); + GvCV(dstr) = Nullcv; + GvCVGEN(dstr) = 0; /* Switch off cacheness. */ + PL_sub_generation++; + } + SAVEGENERICSV(GvCV(dstr)); + } + else + dref = (SV*)GvCV(dstr); + if (GvCV(dstr) != (CV*)sref) { + CV* const cv = GvCV(dstr); + if (cv) { + if (!GvCVGEN((GV*)dstr) && + (CvROOT(cv) || CvXSUB(cv))) + { + /* Redefining a sub - warning is mandatory if + it was a const and its value changed. */ + if (ckWARN(WARN_REDEFINE) + || (CvCONST(cv) + && (!CvCONST((CV*)sref) + || sv_cmp(cv_const_sv(cv), + cv_const_sv((CV*)sref))))) + { + Perl_warner(aTHX_ packWARN(WARN_REDEFINE), + CvCONST(cv) + ? "Constant subroutine %s::%s redefined" + : "Subroutine %s::%s redefined", + HvNAME_get(GvSTASH((GV*)dstr)), + GvENAME((GV*)dstr)); + } + } + if (!intro) + cv_ckproto(cv, (GV*)dstr, + SvPOK(sref) + ? SvPVX_const(sref) : Nullch); + } + GvCV(dstr) = (CV*)sref; + GvCVGEN(dstr) = 0; /* Switch off cacheness. */ + GvASSUMECV_on(dstr); + PL_sub_generation++; + } + if (!GvIMPORTED_CV(dstr) + && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) + { + GvIMPORTED_CV_on(dstr); + } + break; + case SVt_PVIO: + if (intro) + SAVEGENERICSV(GvIOp(dstr)); + else + dref = (SV*)GvIOp(dstr); + GvIOp(dstr) = (IO*)sref; + break; + case SVt_PVFM: + if (intro) + SAVEGENERICSV(GvFORM(dstr)); + else + dref = (SV*)GvFORM(dstr); + GvFORM(dstr) = (CV*)sref; + break; + default: + if (intro) + SAVEGENERICSV(GvSV(dstr)); + else + dref = (SV*)GvSV(dstr); + GvSV(dstr) = sref; + if (!GvIMPORTED_SV(dstr) + && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) + { + GvIMPORTED_SV_on(dstr); + } + break; + } + if (dref) + SvREFCNT_dec(dref); + if (SvTAINTED(sstr)) + SvTAINT(dstr); + return; + } + if (SvPVX_const(dstr)) { + SvPV_free(dstr); + SvLEN_set(dstr, 0); + SvCUR_set(dstr, 0); } - return (NV)0; - } - } - if (SvTHINKFIRST(sv)) { - if (SvROK(sv)) { - SV* tmpstr; - if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,numer)) && - (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) - return SvNV(tmpstr); - return PTR2NV(SvRV(sv)); } - if (SvIsCOW(sv)) { - sv_force_normal_flags(sv, 0); + (void)SvOK_off(dstr); + SvRV_set(dstr, SvREFCNT_inc(SvRV(sstr))); + SvROK_on(dstr); + if (sflags & SVp_NOK) { + SvNOKp_on(dstr); + /* Only set the public OK flag if the source has public OK. */ + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; + SvNV_set(dstr, SvNVX(sstr)); } - if (SvREADONLY(sv) && !SvOK(sv)) { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - return 0.0; + if (sflags & SVp_IOK) { + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; + if (sflags & SVf_IVisUV) + SvIsUV_on(dstr); + SvIV_set(dstr, SvIVX(sstr)); + } + if (SvAMAGIC(sstr)) { + SvAMAGIC_on(dstr); } } - if (SvTYPE(sv) < SVt_NV) { - if (SvTYPE(sv) == SVt_IV) - sv_upgrade(sv, SVt_PVNV); - else - sv_upgrade(sv, SVt_NV); -#ifdef USE_LONG_DOUBLE - DEBUG_c({ - STORE_NUMERIC_LOCAL_SET_STANDARD(); - PerlIO_printf(Perl_debug_log, - "0x%"UVxf" num(%" PERL_PRIgldbl ")\n", - PTR2UV(sv), SvNVX(sv)); - RESTORE_NUMERIC_LOCAL(); - }); -#else - DEBUG_c({ - STORE_NUMERIC_LOCAL_SET_STANDARD(); - PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%"NVgf")\n", - PTR2UV(sv), SvNVX(sv)); - RESTORE_NUMERIC_LOCAL(); - }); -#endif - } - else if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); - if (SvNOKp(sv)) { - return SvNVX(sv); - } - if (SvIOKp(sv)) { - SvNV_set(sv, SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv)); -#ifdef NV_PRESERVES_UV - SvNOK_on(sv); -#else - /* Only set the public NV OK flag if this NV preserves the IV */ - /* Check it's not 0xFFFFFFFFFFFFFFFF */ - if (SvIsUV(sv) ? ((SvUVX(sv) != UV_MAX)&&(SvUVX(sv) == U_V(SvNVX(sv)))) - : (SvIVX(sv) == I_V(SvNVX(sv)))) - SvNOK_on(sv); - else - SvNOKp_on(sv); -#endif - } - else if (SvPOKp(sv) && SvLEN(sv)) { - UV value; - const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); - if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC)) - not_a_number(sv); -#ifdef NV_PRESERVES_UV - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - == IS_NUMBER_IN_UV) { - /* It's definitely an integer */ - SvNV_set(sv, (numtype & IS_NUMBER_NEG) ? -(NV)value : (NV)value); - } else - SvNV_set(sv, Atof(SvPVX_const(sv))); - SvNOK_on(sv); -#else - SvNV_set(sv, Atof(SvPVX_const(sv))); - /* Only set the public NV OK flag if this NV preserves the value in - the PV at least as well as an IV/UV would. - Not sure how to do this 100% reliably. */ - /* if that shift count is out of range then Configure's test is - wonky. We shouldn't be in here with NV_PRESERVES_UV_BITS == - UV_BITS */ - if (((UV)1 << NV_PRESERVES_UV_BITS) > - U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv))) { - SvNOK_on(sv); /* Definitely small enough to preserve all bits */ - } else if (!(numtype & IS_NUMBER_IN_UV)) { - /* Can't use strtol etc to convert this string, so don't try. - sv_2iv and sv_2uv will use the NV to convert, not the PV. */ - SvNOK_on(sv); - } else { - /* value has been set. It may not be precise. */ - if ((numtype & IS_NUMBER_NEG) && (value > (UV)IV_MIN)) { - /* 2s complement assumption for (UV)IV_MIN */ - SvNOK_on(sv); /* Integer is too negative. */ - } else { - SvNOKp_on(sv); - SvIOKp_on(sv); - - if (numtype & IS_NUMBER_NEG) { - SvIV_set(sv, -(IV)value); - } else if (value <= (UV)IV_MAX) { - SvIV_set(sv, (IV)value); - } else { - SvUV_set(sv, value); - SvIsUV_on(sv); - } + else if (sflags & SVp_POK) { + bool isSwipe = 0; - if (numtype & IS_NUMBER_NOT_INT) { - /* I believe that even if the original PV had decimals, - they are lost beyond the limit of the FP precision. - However, neither is canonical, so both only get p - flags. NWC, 2000/11/25 */ - /* Both already have p flags, so do nothing */ - } else { - const NV nv = SvNVX(sv); - if (SvNVX(sv) < (NV)IV_MAX + 0.5) { - if (SvIVX(sv) == I_V(nv)) { - SvNOK_on(sv); - SvIOK_on(sv); - } else { - SvIOK_on(sv); - /* It had no "." so it must be integer. */ - } - } else { - /* between IV_MAX and NV(UV_MAX). - Could be slightly > UV_MAX */ + /* + * Check to see if we can just swipe the string. If so, it's a + * possible small lose on short strings, but a big win on long ones. + * It might even be a win on short strings if SvPVX_const(dstr) + * has to be allocated and SvPVX_const(sstr) has to be freed. + */ - if (numtype & IS_NUMBER_NOT_INT) { - /* UV and NV both imprecise. */ - } else { - const UV nv_as_uv = U_V(nv); + /* Whichever path we take through the next code, we want this true, + and doing it now facilitates the COW check. */ + (void)SvPOK_only(dstr); - if (value == nv_as_uv && SvUVX(sv) != UV_MAX) { - SvNOK_on(sv); - SvIOK_on(sv); - } else { - SvIOK_on(sv); - } - } - } + if ( + /* We're not already COW */ + ((sflags & (SVf_FAKE | SVf_READONLY)) != (SVf_FAKE | SVf_READONLY) +#ifndef PERL_OLD_COPY_ON_WRITE + /* or we are, but dstr isn't a suitable target. */ + || (SvFLAGS(dstr) & CAN_COW_MASK) != CAN_COW_FLAGS +#endif + ) + && + !(isSwipe = + (sflags & SVs_TEMP) && /* slated for free anyway? */ + !(sflags & SVf_OOK) && /* and not involved in OOK hack? */ + (!(flags & SV_NOSTEAL)) && + /* and we're allowed to steal temps */ + SvREFCNT(sstr) == 1 && /* and no other references to it? */ + SvLEN(sstr) && /* and really is a string */ + /* and won't be needed again, potentially */ + !(PL_op && PL_op->op_type == OP_AASSIGN)) +#ifdef PERL_OLD_COPY_ON_WRITE + && !((sflags & CAN_COW_MASK) == CAN_COW_FLAGS + && (SvFLAGS(dstr) & CAN_COW_MASK) == CAN_COW_FLAGS + && SvTYPE(sstr) >= SVt_PVIV) +#endif + ) { + /* Failed the swipe test, and it's not a shared hash key either. + Have to copy the string. */ + STRLEN len = SvCUR(sstr); + SvGROW(dstr, len + 1); /* inlined from sv_setpvn */ + Move(SvPVX_const(sstr),SvPVX(dstr),len,char); + SvCUR_set(dstr, len); + *SvEND(dstr) = '\0'; + } else { + /* If PERL_OLD_COPY_ON_WRITE is not defined, then isSwipe will always + be true in here. */ + /* Either it's a shared hash key, or it's suitable for + copy-on-write or we can swipe the string. */ + if (DEBUG_C_TEST) { + PerlIO_printf(Perl_debug_log, "Copy on write: sstr --> dstr\n"); + sv_dump(sstr); + sv_dump(dstr); + } +#ifdef PERL_OLD_COPY_ON_WRITE + if (!isSwipe) { + /* I believe I should acquire a global SV mutex if + it's a COW sv (not a shared hash key) to stop + it going un copy-on-write. + If the source SV has gone un copy on write between up there + and down here, then (assert() that) it is of the correct + form to make it copy on write again */ + if ((sflags & (SVf_FAKE | SVf_READONLY)) + != (SVf_FAKE | SVf_READONLY)) { + SvREADONLY_on(sstr); + SvFAKE_on(sstr); + /* Make the source SV into a loop of 1. + (about to become 2) */ + SV_COW_NEXT_SV_SET(sstr, sstr); } } - } -#endif /* NV_PRESERVES_UV */ - } - else { - if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - if (SvTYPE(sv) < SVt_NV) - /* Typically the caller expects that sv_any is not NULL now. */ - /* XXX Ilya implies that this is a bug in callers that assume this - and ideally should be fixed. */ - sv_upgrade(sv, SVt_NV); - return 0.0; - } -#if defined(USE_LONG_DOUBLE) - DEBUG_c({ - STORE_NUMERIC_LOCAL_SET_STANDARD(); - PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n", - PTR2UV(sv), SvNVX(sv)); - RESTORE_NUMERIC_LOCAL(); - }); -#else - DEBUG_c({ - STORE_NUMERIC_LOCAL_SET_STANDARD(); - PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%"NVgf")\n", - PTR2UV(sv), SvNVX(sv)); - RESTORE_NUMERIC_LOCAL(); - }); #endif - return SvNVX(sv); -} + /* Initial code is common. */ + if (SvPVX_const(dstr)) { /* we know that dtype >= SVt_PV */ + SvPV_free(dstr); + } -/* asIV(): extract an integer from the string value of an SV. - * Caller must validate PVX */ + if (!isSwipe) { + /* making another shared SV. */ + STRLEN cur = SvCUR(sstr); + STRLEN len = SvLEN(sstr); +#ifdef PERL_OLD_COPY_ON_WRITE + if (len) { + assert (SvTYPE(dstr) >= SVt_PVIV); + /* SvIsCOW_normal */ + /* splice us in between source and next-after-source. */ + SV_COW_NEXT_SV_SET(dstr, SV_COW_NEXT_SV(sstr)); + SV_COW_NEXT_SV_SET(sstr, dstr); + SvPV_set(dstr, SvPVX_mutable(sstr)); + } else +#endif + { + /* SvIsCOW_shared_hash */ + DEBUG_C(PerlIO_printf(Perl_debug_log, + "Copy on write: Sharing hash\n")); -STATIC IV -S_asIV(pTHX_ SV *sv) -{ - UV value; - const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); + assert (SvTYPE(dstr) >= SVt_PV); + SvPV_set(dstr, + HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sstr))))); + } + SvLEN_set(dstr, len); + SvCUR_set(dstr, cur); + SvREADONLY_on(dstr); + SvFAKE_on(dstr); + /* Relesase a global SV mutex. */ + } + else + { /* Passes the swipe test. */ + SvPV_set(dstr, SvPVX_mutable(sstr)); + SvLEN_set(dstr, SvLEN(sstr)); + SvCUR_set(dstr, SvCUR(sstr)); - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - == IS_NUMBER_IN_UV) { - /* It's definitely an integer */ - if (numtype & IS_NUMBER_NEG) { - if (value < (UV)IV_MIN) - return -(IV)value; - } else { - if (value < (UV)IV_MAX) - return (IV)value; + SvTEMP_off(dstr); + (void)SvOK_off(sstr); /* NOTE: nukes most SvFLAGS on sstr */ + SvPV_set(sstr, Nullch); + SvLEN_set(sstr, 0); + SvCUR_set(sstr, 0); + SvTEMP_off(sstr); + } + } + if (sflags & SVf_UTF8) + SvUTF8_on(dstr); + if (sflags & SVp_NOK) { + SvNOKp_on(dstr); + if (sflags & SVf_NOK) + SvFLAGS(dstr) |= SVf_NOK; + SvNV_set(dstr, SvNVX(sstr)); + } + if (sflags & SVp_IOK) { + (void)SvIOKp_on(dstr); + if (sflags & SVf_IOK) + SvFLAGS(dstr) |= SVf_IOK; + if (sflags & SVf_IVisUV) + SvIsUV_on(dstr); + SvIV_set(dstr, SvIVX(sstr)); + } + if (SvVOK(sstr)) { + const MAGIC * const smg = mg_find(sstr,PERL_MAGIC_vstring); + sv_magic(dstr, NULL, PERL_MAGIC_vstring, + smg->mg_ptr, smg->mg_len); + SvRMAGICAL_on(dstr); } } - if (!numtype) { - if (ckWARN(WARN_NUMERIC)) - not_a_number(sv); + else if (sflags & SVp_IOK) { + if (sflags & SVf_IOK) + (void)SvIOK_only(dstr); + else { + (void)SvOK_off(dstr); + (void)SvIOKp_on(dstr); + } + /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ + if (sflags & SVf_IVisUV) + SvIsUV_on(dstr); + SvIV_set(dstr, SvIVX(sstr)); + if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_on(dstr); + else + (void)SvNOKp_on(dstr); + SvNV_set(dstr, SvNVX(sstr)); + } } - return I_V(Atof(SvPVX_const(sv))); -} - -/* asUV(): extract an unsigned integer from the string value of an SV - * Caller must validate PVX */ - -STATIC UV -S_asUV(pTHX_ SV *sv) -{ - UV value; - const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); - - if ((numtype & (IS_NUMBER_IN_UV | IS_NUMBER_NOT_INT)) - == IS_NUMBER_IN_UV) { - /* It's definitely an integer */ - if (!(numtype & IS_NUMBER_NEG)) - return value; + else if (sflags & SVp_NOK) { + if (sflags & SVf_NOK) + (void)SvNOK_only(dstr); + else { + (void)SvOK_off(dstr); + SvNOKp_on(dstr); + } + SvNV_set(dstr, SvNVX(sstr)); } - if (!numtype) { - if (ckWARN(WARN_NUMERIC)) - not_a_number(sv); + else { + if (dtype == SVt_PVGV) { + if (ckWARN(WARN_MISC)) + Perl_warner(aTHX_ packWARN(WARN_MISC), "Undefined value assigned to typeglob"); + } + else + (void)SvOK_off(dstr); } - return U_V(Atof(SvPVX_const(sv))); + if (SvTAINTED(sstr)) + SvTAINT(dstr); } -/* uiv_2buf(): private routine for use by sv_2pv_flags(): print an IV or - * UV as a string towards the end of buf, and return pointers to start and - * end of it. - * - * We assume that buf is at least TYPE_CHARS(UV) long. - */ +/* +=for apidoc sv_setsv_mg -static char * -S_uiv_2buf(char *buf, IV iv, UV uv, int is_uv, char **peob) +Like C, but also handles 'set' magic. + +=cut +*/ + +void +Perl_sv_setsv_mg(pTHX_ SV *dstr, register SV *sstr) { - char *ptr = buf + TYPE_CHARS(UV); - char * const ebuf = ptr; - int sign; + sv_setsv(dstr,sstr); + SvSETMAGIC(dstr); +} - if (is_uv) - sign = 0; - else if (iv >= 0) { - uv = iv; - sign = 0; +#ifdef PERL_OLD_COPY_ON_WRITE +SV * +Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) +{ + STRLEN cur = SvCUR(sstr); + STRLEN len = SvLEN(sstr); + register char *new_pv; + + if (DEBUG_C_TEST) { + PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n", + sstr, dstr); + sv_dump(sstr); + if (dstr) + sv_dump(dstr); + } + + if (dstr) { + if (SvTHINKFIRST(dstr)) + sv_force_normal_flags(dstr, SV_COW_DROP_PV); + else if (SvPVX_const(dstr)) + Safefree(SvPVX_const(dstr)); + } + else + new_SV(dstr); + SvUPGRADE(dstr, SVt_PVIV); + + assert (SvPOK(sstr)); + assert (SvPOKp(sstr)); + assert (!SvIOK(sstr)); + assert (!SvIOKp(sstr)); + assert (!SvNOK(sstr)); + assert (!SvNOKp(sstr)); + + if (SvIsCOW(sstr)) { + + if (SvLEN(sstr) == 0) { + /* source is a COW shared hash key. */ + DEBUG_C(PerlIO_printf(Perl_debug_log, + "Fast copy on write: Sharing hash\n")); + new_pv = HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sstr)))); + goto common_exit; + } + SV_COW_NEXT_SV_SET(dstr, SV_COW_NEXT_SV(sstr)); } else { - uv = -iv; - sign = 1; + assert ((SvFLAGS(sstr) & CAN_COW_MASK) == CAN_COW_FLAGS); + SvUPGRADE(sstr, SVt_PVIV); + SvREADONLY_on(sstr); + SvFAKE_on(sstr); + DEBUG_C(PerlIO_printf(Perl_debug_log, + "Fast copy on write: Converting sstr to COW\n")); + SV_COW_NEXT_SV_SET(dstr, sstr); } - do { - *--ptr = '0' + (char)(uv % 10); - } while (uv /= 10); - if (sign) - *--ptr = '-'; - *peob = ebuf; - return ptr; + SV_COW_NEXT_SV_SET(sstr, dstr); + new_pv = SvPVX_mutable(sstr); + + common_exit: + SvPV_set(dstr, new_pv); + SvFLAGS(dstr) = (SVt_PVIV|SVf_POK|SVp_POK|SVf_FAKE|SVf_READONLY); + if (SvUTF8(sstr)) + SvUTF8_on(dstr); + SvLEN_set(dstr, len); + SvCUR_set(dstr, cur); + if (DEBUG_C_TEST) { + sv_dump(dstr); + } + return dstr; } +#endif /* -=for apidoc sv_2pv_flags +=for apidoc sv_setpvn -Returns a pointer to the string value of an SV, and sets *lp to its length. -If flags includes SV_GMAGIC, does an mg_get() first. Coerces sv to a string -if necessary. -Normally invoked via the C macro. C and C -usually end up here too. +Copies a string into an SV. The C parameter indicates the number of +bytes to be copied. If the C argument is NULL the SV will become +undefined. Does not handle 'set' magic. See C. =cut */ -char * -Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) +void +Perl_sv_setpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN len) { - register char *s; - int olderrno; - SV *tsv, *origsv; - char tbuf[64]; /* Must fit sprintf/Gconvert of longest IV/NV */ - char *tmpbuf = tbuf; - STRLEN len = 0; /* Hush gcc. len is always initialised before use. */ + register char *dptr; - if (!sv) { - if (lp) - *lp = 0; - return (char *)""; + SV_CHECK_THINKFIRST_COW_DROP(sv); + if (!ptr) { + (void)SvOK_off(sv); + return; } - if (SvGMAGICAL(sv)) { - if (flags & SV_GMAGIC) - mg_get(sv); - if (SvPOKp(sv)) { - if (lp) - *lp = SvCUR(sv); - if (flags & SV_MUTABLE_RETURN) - return SvPVX_mutable(sv); - if (flags & SV_CONST_RETURN) - return (char *)SvPVX_const(sv); - return SvPVX(sv); - } - if (SvIOKp(sv)) { - len = SvIsUV(sv) ? my_sprintf(tmpbuf,"%"UVuf, (UV)SvUVX(sv)) - : my_sprintf(tmpbuf,"%"IVdf, (IV)SvIVX(sv)); - tsv = Nullsv; - goto tokensave_has_len; - } - if (SvNOKp(sv)) { - Gconvert(SvNVX(sv), NV_DIG, 0, tmpbuf); - tsv = Nullsv; - goto tokensave; - } - if (!SvROK(sv)) { - if (!(SvFLAGS(sv) & SVs_PADTMP)) { - if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - } - if (lp) - *lp = 0; - return (char *)""; - } + else { + /* len is STRLEN which is unsigned, need to copy to signed */ + const IV iv = len; + if (iv < 0) + Perl_croak(aTHX_ "panic: sv_setpvn called with negative strlen"); } - if (SvTHINKFIRST(sv)) { - if (SvROK(sv)) { - SV* tmpstr; - register const char *typestr; - if (SvAMAGIC(sv) && (tmpstr=AMG_CALLun(sv,string)) && - (!SvROK(tmpstr) || (SvRV(tmpstr) != SvRV(sv)))) { - /* Unwrap this: */ - /* char *pv = lp ? SvPV(tmpstr, *lp) : SvPV_nolen(tmpstr); */ - - char *pv; - if ((SvFLAGS(tmpstr) & (SVf_POK)) == SVf_POK) { - if (flags & SV_CONST_RETURN) { - pv = (char *) SvPVX_const(tmpstr); - } else { - pv = (flags & SV_MUTABLE_RETURN) - ? SvPVX_mutable(tmpstr) : SvPVX(tmpstr); - } - if (lp) - *lp = SvCUR(tmpstr); - } else { - pv = sv_2pv_flags(tmpstr, lp, flags); - } - if (SvUTF8(tmpstr)) - SvUTF8_on(sv); - else - SvUTF8_off(sv); - return pv; - } - origsv = sv; - sv = (SV*)SvRV(sv); - if (!sv) - typestr = "NULLREF"; - else { - MAGIC *mg; - - switch (SvTYPE(sv)) { - case SVt_PVMG: - if ( ((SvFLAGS(sv) & - (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG)) - == (SVs_OBJECT|SVs_SMG)) - && (mg = mg_find(sv, PERL_MAGIC_qr))) { - const regexp *re = (regexp *)mg->mg_obj; - - if (!mg->mg_ptr) { - const char *fptr = "msix"; - char reflags[6]; - char ch; - int left = 0; - int right = 4; - char need_newline = 0; - U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12); - - while((ch = *fptr++)) { - if(reganch & 1) { - reflags[left++] = ch; - } - else { - reflags[right--] = ch; - } - reganch >>= 1; - } - if(left != 4) { - reflags[left] = '-'; - left = 5; - } - - mg->mg_len = re->prelen + 4 + left; - /* - * If /x was used, we have to worry about a regex - * ending with a comment later being embedded - * within another regex. If so, we don't want this - * regex's "commentization" to leak out to the - * right part of the enclosing regex, we must cap - * it with a newline. - * - * So, if /x was used, we scan backwards from the - * end of the regex. If we find a '#' before we - * find a newline, we need to add a newline - * ourself. If we find a '\n' first (or if we - * don't find '#' or '\n'), we don't need to add - * anything. -jfriedl - */ - if (PMf_EXTENDED & re->reganch) - { - const char *endptr = re->precomp + re->prelen; - while (endptr >= re->precomp) - { - const char c = *(endptr--); - if (c == '\n') - break; /* don't need another */ - if (c == '#') { - /* we end while in a comment, so we - need a newline */ - mg->mg_len++; /* save space for it */ - need_newline = 1; /* note to add it */ - break; - } - } - } + SvUPGRADE(sv, SVt_PV); - Newx(mg->mg_ptr, mg->mg_len + 1 + left, char); - Copy("(?", mg->mg_ptr, 2, char); - Copy(reflags, mg->mg_ptr+2, left, char); - Copy(":", mg->mg_ptr+left+2, 1, char); - Copy(re->precomp, mg->mg_ptr+3+left, re->prelen, char); - if (need_newline) - mg->mg_ptr[mg->mg_len - 2] = '\n'; - mg->mg_ptr[mg->mg_len - 1] = ')'; - mg->mg_ptr[mg->mg_len] = 0; - } - PL_reginterp_cnt += re->program[0].next_off; + dptr = SvGROW(sv, len + 1); + Move(ptr,dptr,len,char); + dptr[len] = '\0'; + SvCUR_set(sv, len); + (void)SvPOK_only_UTF8(sv); /* validate pointer */ + SvTAINT(sv); +} - if (re->reganch & ROPT_UTF8) - SvUTF8_on(origsv); - else - SvUTF8_off(origsv); - if (lp) - *lp = mg->mg_len; - return mg->mg_ptr; - } - /* Fall through */ - case SVt_NULL: - case SVt_IV: - case SVt_NV: - case SVt_RV: - case SVt_PV: - case SVt_PVIV: - case SVt_PVNV: - case SVt_PVBM: typestr = SvROK(sv) ? "REF" : "SCALAR"; break; - case SVt_PVLV: typestr = SvROK(sv) ? "REF" - /* tied lvalues should appear to be - * scalars for backwards compatitbility */ - : (LvTYPE(sv) == 't' || LvTYPE(sv) == 'T') - ? "SCALAR" : "LVALUE"; break; - case SVt_PVAV: typestr = "ARRAY"; break; - case SVt_PVHV: typestr = "HASH"; break; - case SVt_PVCV: typestr = "CODE"; break; - case SVt_PVGV: typestr = "GLOB"; break; - case SVt_PVFM: typestr = "FORMAT"; break; - case SVt_PVIO: typestr = "IO"; break; - default: typestr = "UNKNOWN"; break; - } - tsv = NEWSV(0,0); - if (SvOBJECT(sv)) { - const char * const name = HvNAME_get(SvSTASH(sv)); - Perl_sv_setpvf(aTHX_ tsv, "%s=%s(0x%"UVxf")", - name ? name : "__ANON__" , typestr, PTR2UV(sv)); - } - else - Perl_sv_setpvf(aTHX_ tsv, "%s(0x%"UVxf")", typestr, PTR2UV(sv)); - goto tokensaveref; - } - if (lp) - *lp = strlen(typestr); - return (char *)typestr; - } - if (SvREADONLY(sv) && !SvOK(sv)) { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - if (lp) - *lp = 0; - return (char *)""; - } +/* +=for apidoc sv_setpvn_mg + +Like C, but also handles 'set' magic. + +=cut +*/ + +void +Perl_sv_setpvn_mg(pTHX_ register SV *sv, register const char *ptr, register STRLEN len) +{ + sv_setpvn(sv,ptr,len); + SvSETMAGIC(sv); +} + +/* +=for apidoc sv_setpv + +Copies a string into an SV. The string must be null-terminated. Does not +handle 'set' magic. See C. + +=cut +*/ + +void +Perl_sv_setpv(pTHX_ register SV *sv, register const char *ptr) +{ + register STRLEN len; + + SV_CHECK_THINKFIRST_COW_DROP(sv); + if (!ptr) { + (void)SvOK_off(sv); + return; } - if (SvIOK(sv) || ((SvIOKp(sv) && !SvNOKp(sv)))) { - /* I'm assuming that if both IV and NV are equally valid then - converting the IV is going to be more efficient */ - const U32 isIOK = SvIOK(sv); - const U32 isUIOK = SvIsUV(sv); - char buf[TYPE_CHARS(UV)]; - char *ebuf, *ptr; + len = strlen(ptr); + SvUPGRADE(sv, SVt_PV); - if (SvTYPE(sv) < SVt_PVIV) - sv_upgrade(sv, SVt_PVIV); - if (isUIOK) - ptr = uiv_2buf(buf, 0, SvUVX(sv), 1, &ebuf); - else - ptr = uiv_2buf(buf, SvIVX(sv), 0, 0, &ebuf); - /* inlined from sv_setpvn */ - SvGROW_mutable(sv, (STRLEN)(ebuf - ptr + 1)); - Move(ptr,SvPVX_mutable(sv),ebuf - ptr,char); - SvCUR_set(sv, ebuf - ptr); - s = SvEND(sv); - *s = '\0'; - if (isIOK) - SvIOK_on(sv); - else - SvIOKp_on(sv); - if (isUIOK) - SvIsUV_on(sv); - } - else if (SvNOKp(sv)) { - if (SvTYPE(sv) < SVt_PVNV) - sv_upgrade(sv, SVt_PVNV); - /* The +20 is pure guesswork. Configure test needed. --jhi */ - s = SvGROW_mutable(sv, NV_DIG + 20); - olderrno = errno; /* some Xenix systems wipe out errno here */ -#ifdef apollo - if (SvNVX(sv) == 0.0) - (void)strcpy(s,"0"); - else -#endif /*apollo*/ - { - Gconvert(SvNVX(sv), NV_DIG, 0, s); - } - errno = olderrno; -#ifdef FIXNEGATIVEZERO - if (*s == '-' && s[1] == '0' && !s[2]) - strcpy(s,"0"); -#endif - while (*s) s++; -#ifdef hcx - if (s[-1] == '.') - *--s = '\0'; -#endif - } - else { - if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED)) - report_uninit(sv); - if (lp) - *lp = 0; - if (SvTYPE(sv) < SVt_PV) - /* Typically the caller expects that sv_any is not NULL now. */ - sv_upgrade(sv, SVt_PV); - return (char *)""; - } - { - const STRLEN len = s - SvPVX_const(sv); - if (lp) - *lp = len; - SvCUR_set(sv, len); - } - SvPOK_on(sv); - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n", - PTR2UV(sv),SvPVX_const(sv))); - if (flags & SV_CONST_RETURN) - return (char *)SvPVX_const(sv); - if (flags & SV_MUTABLE_RETURN) - return SvPVX_mutable(sv); - return SvPVX(sv); - - tokensave: - len = strlen(tmpbuf); - tokensave_has_len: - assert (!tsv); - if (SvROK(sv)) { /* XXX Skip this when sv_pvn_force calls */ - /* Sneaky stuff here */ - - tokensaveref: - if (!tsv) - tsv = newSVpvn(tmpbuf, len); - sv_2mortal(tsv); - if (lp) - *lp = SvCUR(tsv); - return SvPVX(tsv); - } - else { - dVAR; - -#ifdef FIXNEGATIVEZERO - if (len == 2 && tmpbuf[0] == '-' && tmpbuf[1] == '0') { - tmpbuf[0] = '0'; - tmpbuf[1] = 0; - len = 1; - } -#endif - SvUPGRADE(sv, SVt_PV); - if (lp) - *lp = len; - s = SvGROW_mutable(sv, len + 1); - SvCUR_set(sv, len); - SvPOKp_on(sv); - return memcpy(s, tmpbuf, len + 1); - } + SvGROW(sv, len + 1); + Move(ptr,SvPVX(sv),len+1,char); + SvCUR_set(sv, len); + (void)SvPOK_only_UTF8(sv); /* validate pointer */ + SvTAINT(sv); } /* -=for apidoc sv_copypv +=for apidoc sv_setpv_mg -Copies a stringified representation of the source SV into the -destination SV. Automatically performs any necessary mg_get and -coercion of numeric values into strings. Guaranteed to preserve -UTF-8 flag even from overloaded objects. Similar in nature to -sv_2pv[_flags] but operates directly on an SV instead of just the -string. Mostly uses sv_2pv_flags to do its work, except when that -would lose the UTF-8'ness of the PV. +Like C, but also handles 'set' magic. =cut */ void -Perl_sv_copypv(pTHX_ SV *dsv, register SV *ssv) +Perl_sv_setpv_mg(pTHX_ register SV *sv, register const char *ptr) { - STRLEN len; - const char * const s = SvPV_const(ssv,len); - sv_setpvn(dsv,s,len); - if (SvUTF8(ssv)) - SvUTF8_on(dsv); - else - SvUTF8_off(dsv); + sv_setpv(sv,ptr); + SvSETMAGIC(sv); } /* -=for apidoc sv_2pvbyte - -Return a pointer to the byte-encoded representation of the SV, and set *lp -to its length. May cause the SV to be downgraded from UTF-8 as a -side-effect. +=for apidoc sv_usepvn -Usually accessed via the C macro. +Tells an SV to use C to find its string value. Normally the string is +stored inside the SV but sv_usepvn allows the SV to use an outside string. +The C should point to memory that was allocated by C. The +string length, C, must be supplied. This function will realloc the +memory pointed to by C, so that pointer should not be freed or used by +the programmer after giving it to sv_usepvn. Does not handle 'set' magic. +See C. =cut */ -char * -Perl_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp) +void +Perl_sv_usepvn(pTHX_ register SV *sv, register char *ptr, register STRLEN len) { - sv_utf8_downgrade(sv,0); - return lp ? SvPV(sv,*lp) : SvPV_nolen(sv); + STRLEN allocate; + SV_CHECK_THINKFIRST_COW_DROP(sv); + SvUPGRADE(sv, SVt_PV); + if (!ptr) { + (void)SvOK_off(sv); + return; + } + if (SvPVX_const(sv)) + SvPV_free(sv); + + allocate = PERL_STRLEN_ROUNDUP(len + 1); + ptr = saferealloc (ptr, allocate); + SvPV_set(sv, ptr); + SvCUR_set(sv, len); + SvLEN_set(sv, allocate); + *SvEND(sv) = '\0'; + (void)SvPOK_only_UTF8(sv); /* validate pointer */ + SvTAINT(sv); } /* -=for apidoc sv_2pvutf8 - -Return a pointer to the UTF-8-encoded representation of the SV, and set *lp -to its length. May cause the SV to be upgraded to UTF-8 as a side-effect. +=for apidoc sv_usepvn_mg -Usually accessed via the C macro. +Like C, but also handles 'set' magic. =cut */ -char * -Perl_sv_2pvutf8(pTHX_ register SV *sv, STRLEN *lp) +void +Perl_sv_usepvn_mg(pTHX_ register SV *sv, register char *ptr, register STRLEN len) { - sv_utf8_upgrade(sv); - return lp ? SvPV(sv,*lp) : SvPV_nolen(sv); + sv_usepvn(sv,ptr,len); + SvSETMAGIC(sv); } - -/* -=for apidoc sv_2bool - -This function is only called on magical items, and is only used by -sv_true() or its macro equivalent. - -=cut -*/ - -bool -Perl_sv_2bool(pTHX_ register SV *sv) +#ifdef PERL_OLD_COPY_ON_WRITE +/* Need to do this *after* making the SV normal, as we need the buffer + pointer to remain valid until after we've copied it. If we let go too early, + another thread could invalidate it by unsharing last of the same hash key + (which it can do by means other than releasing copy-on-write Svs) + or by changing the other copy-on-write SVs in the loop. */ +STATIC void +S_sv_release_COW(pTHX_ register SV *sv, const char *pvx, STRLEN len, SV *after) { - SvGETMAGIC(sv); + if (len) { /* this SV was SvIsCOW_normal(sv) */ + /* we need to find the SV pointing to us. */ + SV * const current = SV_COW_NEXT_SV(after); - if (!SvOK(sv)) - return 0; - if (SvROK(sv)) { - SV* tmpsv; - if (SvAMAGIC(sv) && (tmpsv=AMG_CALLun(sv,bool_)) && - (!SvROK(tmpsv) || (SvRV(tmpsv) != SvRV(sv)))) - return (bool)SvTRUE(tmpsv); - return SvRV(sv) != 0; - } - if (SvPOKp(sv)) { - register XPV* const Xpvtmp = (XPV*)SvANY(sv); - if (Xpvtmp && - (*sv->sv_u.svu_pv > '0' || - Xpvtmp->xpv_cur > 1 || - (Xpvtmp->xpv_cur && *sv->sv_u.svu_pv != '0'))) - return 1; - else - return 0; - } - else { - if (SvIOKp(sv)) - return SvIVX(sv) != 0; - else { - if (SvNOKp(sv)) - return SvNVX(sv) != 0.0; - else - return FALSE; - } + if (current == sv) { + /* The SV we point to points back to us (there were only two of us + in the loop.) + Hence other SV is no longer copy on write either. */ + SvFAKE_off(after); + SvREADONLY_off(after); + } else { + /* We need to follow the pointers around the loop. */ + SV *next; + while ((next = SV_COW_NEXT_SV(current)) != sv) { + assert (next); + current = next; + /* don't loop forever if the structure is bust, and we have + a pointer into a closed loop. */ + assert (current != after); + assert (SvPVX_const(current) == pvx); + } + /* Make the SV before us point to the SV after us. */ + SV_COW_NEXT_SV_SET(current, after); + } + } else { + unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); } } +int +Perl_sv_release_IVX(pTHX_ register SV *sv) +{ + if (SvIsCOW(sv)) + sv_force_normal_flags(sv, 0); + SvOOK_off(sv); + return 0; +} +#endif /* -=for apidoc sv_utf8_upgrade - -Converts the PV of an SV to its UTF-8-encoded form. -Forces the SV to string form if it is not already. -Always sets the SvUTF8 flag to avoid future validity checks even -if all the bytes have hibit clear. +=for apidoc sv_force_normal_flags -This is not as a general purpose byte encoding to Unicode interface: -use the Encode extension for that. +Undo various types of fakery on an SV: if the PV is a shared string, make +a private copy; if we're a ref, stop refing; if we're a glob, downgrade to +an xpvmg; if we're a copy-on-write scalar, this is the on-write time when +we do the copy, and is also used locally. If C is set +then a copy-on-write scalar drops its PV buffer (if any) and becomes +SvPOK_off rather than making a copy. (Used where this scalar is about to be +set to some other value.) In addition, the C parameter gets passed to +C when unrefing. C calls this function +with flags set to 0. -=for apidoc sv_utf8_upgrade_flags +=cut +*/ -Converts the PV of an SV to its UTF-8-encoded form. -Forces the SV to string form if it is not already. -Always sets the SvUTF8 flag to avoid future validity checks even -if all the bytes have hibit clear. If C has C bit set, -will C on C if appropriate, else not. C and -C are implemented in terms of this function. - -This is not as a general purpose byte encoding to Unicode interface: -use the Encode extension for that. - -=cut -*/ - -STRLEN -Perl_sv_utf8_upgrade_flags(pTHX_ register SV *sv, I32 flags) +void +Perl_sv_force_normal_flags(pTHX_ register SV *sv, U32 flags) { - if (sv == &PL_sv_undef) - return 0; - if (!SvPOK(sv)) { - STRLEN len = 0; - if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) { - (void) sv_2pv_flags(sv,&len, flags); - if (SvUTF8(sv)) - return len; - } else { - (void) SvPV_force(sv,len); +#ifdef PERL_OLD_COPY_ON_WRITE + if (SvREADONLY(sv)) { + /* At this point I believe I should acquire a global SV mutex. */ + if (SvFAKE(sv)) { + const char * const pvx = SvPVX_const(sv); + const STRLEN len = SvLEN(sv); + const STRLEN cur = SvCUR(sv); + SV * const next = SV_COW_NEXT_SV(sv); /* next COW sv in the loop. */ + if (DEBUG_C_TEST) { + PerlIO_printf(Perl_debug_log, + "Copy on write: Force normal %ld\n", + (long) flags); + sv_dump(sv); + } + SvFAKE_off(sv); + SvREADONLY_off(sv); + /* This SV doesn't own the buffer, so need to Newx() a new one: */ + SvPV_set(sv, (char*)0); + SvLEN_set(sv, 0); + if (flags & SV_COW_DROP_PV) { + /* OK, so we don't need to copy our buffer. */ + SvPOK_off(sv); + } else { + SvGROW(sv, cur + 1); + Move(pvx,SvPVX(sv),cur,char); + SvCUR_set(sv, cur); + *SvEND(sv) = '\0'; + } + sv_release_COW(sv, pvx, len, next); + if (DEBUG_C_TEST) { + sv_dump(sv); + } } + else if (IN_PERL_RUNTIME) + Perl_croak(aTHX_ PL_no_modify); + /* At this point I believe that I can drop the global SV mutex. */ } - - if (SvUTF8(sv)) { - return SvCUR(sv); - } - - if (SvIsCOW(sv)) { - sv_force_normal_flags(sv, 0); - } - - if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) - sv_recode_to_utf8(sv, PL_encoding); - else { /* Assume Latin-1/EBCDIC */ - /* This function could be much more efficient if we - * had a FLAG in SVs to signal if there are any hibit - * chars in the PV. Given that there isn't such a flag - * make the loop as fast as possible. */ - const U8 *s = (U8 *) SvPVX_const(sv); - const U8 * const e = (U8 *) SvEND(sv); - const U8 *t = s; - int hibit = 0; - - while (t < e) { - const U8 ch = *t++; - if ((hibit = !NATIVE_IS_INVARIANT(ch))) - break; - } - if (hibit) { - STRLEN len = SvCUR(sv) + 1; /* Plus the \0 */ - U8 * const recoded = bytes_to_utf8((U8*)s, &len); - - SvPV_free(sv); /* No longer using what was there before. */ - - SvPV_set(sv, (char*)recoded); - SvCUR_set(sv, len - 1); - SvLEN_set(sv, len); /* No longer know the real size. */ +#else + if (SvREADONLY(sv)) { + if (SvFAKE(sv)) { + const char * const pvx = SvPVX_const(sv); + const STRLEN len = SvCUR(sv); + SvFAKE_off(sv); + SvREADONLY_off(sv); + SvPV_set(sv, Nullch); + SvLEN_set(sv, 0); + SvGROW(sv, len + 1); + Move(pvx,SvPVX(sv),len,char); + *SvEND(sv) = '\0'; + unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); } - /* Mark as UTF-8 even if no hibit - saves scanning loop */ - SvUTF8_on(sv); + else if (IN_PERL_RUNTIME) + Perl_croak(aTHX_ PL_no_modify); } - return SvCUR(sv); +#endif + if (SvROK(sv)) + sv_unref_flags(sv, flags); + else if (SvFAKE(sv) && SvTYPE(sv) == SVt_PVGV) + sv_unglob(sv); } /* -=for apidoc sv_utf8_downgrade - -Attempts to convert the PV of an SV from characters to bytes. -If the PV contains a character beyond byte, this conversion will fail; -in this case, either returns false or, if C is not -true, croaks. +=for apidoc sv_chop -This is not as a general purpose Unicode to byte encoding interface: -use the Encode extension for that. +Efficient removal of characters from the beginning of the string buffer. +SvPOK(sv) must be true and the C must be a pointer to somewhere inside +the string buffer. The C becomes the first character of the adjusted +string. Uses the "OOK hack". +Beware: after this function returns, C and SvPVX_const(sv) may no longer +refer to the same chunk of data. =cut */ -bool -Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) +void +Perl_sv_chop(pTHX_ register SV *sv, register const char *ptr) { - if (SvPOKp(sv) && SvUTF8(sv)) { - if (SvCUR(sv)) { - U8 *s; - STRLEN len; + register STRLEN delta; + if (!ptr || !SvPOKp(sv)) + return; + delta = ptr - SvPVX_const(sv); + SV_CHECK_THINKFIRST(sv); + if (SvTYPE(sv) < SVt_PVIV) + sv_upgrade(sv,SVt_PVIV); - if (SvIsCOW(sv)) { - sv_force_normal_flags(sv, 0); - } - s = (U8 *) SvPV(sv, len); - if (!utf8_to_bytes(s, &len)) { - if (fail_ok) - return FALSE; - else { - if (PL_op) - Perl_croak(aTHX_ "Wide character in %s", - OP_DESC(PL_op)); - else - Perl_croak(aTHX_ "Wide character"); - } - } - SvCUR_set(sv, len); + if (!SvOOK(sv)) { + if (!SvLEN(sv)) { /* make copy of shared string */ + const char *pvx = SvPVX_const(sv); + const STRLEN len = SvCUR(sv); + SvGROW(sv, len + 1); + Move(pvx,SvPVX(sv),len,char); + *SvEND(sv) = '\0'; } + SvIV_set(sv, 0); + /* Same SvOOK_on but SvOOK_on does a SvIOK_off + and we do that anyway inside the SvNIOK_off + */ + SvFLAGS(sv) |= SVf_OOK; } - SvUTF8_off(sv); - return TRUE; + SvNIOK_off(sv); + SvLEN_set(sv, SvLEN(sv) - delta); + SvCUR_set(sv, SvCUR(sv) - delta); + SvPV_set(sv, SvPVX(sv) + delta); + SvIV_set(sv, SvIVX(sv) + delta); } /* -=for apidoc sv_utf8_encode +=for apidoc sv_catpvn -Converts the PV of an SV to UTF-8, but then turns the C -flag off so that it looks like octets again. +Concatenates the string onto the end of the string which is in the SV. The +C indicates number of bytes to copy. If the SV has the UTF-8 +status set, then the bytes appended should be valid UTF-8. +Handles 'get' magic, but not 'set' magic. See C. + +=for apidoc sv_catpvn_flags + +Concatenates the string onto the end of the string which is in the SV. The +C indicates number of bytes to copy. If the SV has the UTF-8 +status set, then the bytes appended should be valid UTF-8. +If C has C bit set, will C on C if +appropriate, else not. C and C are implemented +in terms of this function. =cut */ void -Perl_sv_utf8_encode(pTHX_ register SV *sv) +Perl_sv_catpvn_flags(pTHX_ register SV *dsv, register const char *sstr, register STRLEN slen, I32 flags) { - (void) sv_utf8_upgrade(sv); - if (SvIsCOW(sv)) { - sv_force_normal_flags(sv, 0); - } - if (SvREADONLY(sv)) { - Perl_croak(aTHX_ PL_no_modify); - } - SvUTF8_off(sv); + STRLEN dlen; + const char *dstr = SvPV_force_flags(dsv, dlen, flags); + + SvGROW(dsv, dlen + slen + 1); + if (sstr == dstr) + sstr = SvPVX_const(dsv); + Move(sstr, SvPVX(dsv) + dlen, slen, char); + SvCUR_set(dsv, SvCUR(dsv) + slen); + *SvEND(dsv) = '\0'; + (void)SvPOK_only_UTF8(dsv); /* validate pointer */ + SvTAINT(dsv); + if (flags & SV_SMAGIC) + SvSETMAGIC(dsv); } /* -=for apidoc sv_utf8_decode +=for apidoc sv_catsv -If the PV of the SV is an octet sequence in UTF-8 -and contains a multiple-byte character, the C flag is turned on -so that it looks like a character. If the PV contains only single-byte -characters, the C flag stays being off. -Scans PV for validity and returns false if the PV is invalid UTF-8. +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. Handles 'get' magic, but +not 'set' magic. See C. -=cut -*/ +=for apidoc sv_catsv_flags -bool -Perl_sv_utf8_decode(pTHX_ register SV *sv) -{ - if (SvPOKp(sv)) { - const U8 *c; - const U8 *e; +Concatenates the string from SV C onto the end of the string in +SV C. Modifies C but not C. If C has C +bit set, will C on the SVs if appropriate, else not. C +and C are implemented in terms of this function. - /* The octets may have got themselves encoded - get them back as - * bytes - */ - if (!sv_utf8_downgrade(sv, TRUE)) - return FALSE; +=cut */ - /* it is actually just a matter of turning the utf8 flag on, but - * we want to make sure everything inside is valid utf8 first. - */ - c = (const U8 *) SvPVX_const(sv); - if (!is_utf8_string(c, SvCUR(sv)+1)) - return FALSE; - e = (const U8 *) SvEND(sv); - while (c < e) { - const U8 ch = *c++; - if (!UTF8_IS_INVARIANT(ch)) { - SvUTF8_on(sv); - break; - } - } +void +Perl_sv_catsv_flags(pTHX_ SV *dsv, register SV *ssv, I32 flags) +{ + if (ssv) { + STRLEN slen; + const char *spv = SvPV_const(ssv, slen); + if (spv) { + /* sutf8 and dutf8 were type bool, but under USE_ITHREADS, + gcc version 2.95.2 20000220 (Debian GNU/Linux) for + Linux xxx 2.2.17 on sparc64 with gcc -O2, we erroneously + get dutf8 = 0x20000000, (i.e. SVf_UTF8) even though + dsv->sv_flags doesn't have that bit set. + Andy Dougherty 12 Oct 2001 + */ + const I32 sutf8 = DO_UTF8(ssv); + I32 dutf8; + + if (SvGMAGICAL(dsv) && (flags & SV_GMAGIC)) + mg_get(dsv); + dutf8 = DO_UTF8(dsv); + + if (dutf8 != sutf8) { + if (dutf8) { + /* Not modifying source SV, so taking a temporary copy. */ + SV* const csv = sv_2mortal(newSVpvn(spv, slen)); + + sv_utf8_upgrade(csv); + spv = SvPV_const(csv, slen); + } + else + sv_utf8_upgrade_nomg(dsv); + } + sv_catpvn_nomg(dsv, spv, slen); + } } - return TRUE; + if (flags & SV_SMAGIC) + SvSETMAGIC(dsv); } /* -=for apidoc sv_setsv +=for apidoc sv_catpv -Copies the contents of the source SV C into the destination SV -C. The source SV may be destroyed if it is mortal, so don't use this -function if the source SV needs to be reused. Does not handle 'set' magic. -Loosely speaking, it performs a copy-by-value, obliterating any previous -content of the destination. +Concatenates the string onto the end of the string which is in the SV. +If the SV has the UTF-8 status set, then the bytes appended should be +valid UTF-8. Handles 'get' magic, but not 'set' magic. See C. -You probably want to use one of the assortment of wrappers, such as -C, C, C and -C. +=cut */ -=for apidoc sv_setsv_flags +void +Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) +{ + register STRLEN len; + STRLEN tlen; + char *junk; -Copies the contents of the source SV C into the destination SV -C. The source SV may be destroyed if it is mortal, so don't use this -function if the source SV needs to be reused. Does not handle 'set' magic. -Loosely speaking, it performs a copy-by-value, obliterating any previous -content of the destination. -If the C parameter has the C bit set, will C on -C if appropriate, else not. If the C parameter has the -C bit set then the buffers of temps will not be stolen. -and C are implemented in terms of this function. + if (!ptr) + return; + junk = SvPV_force(sv, tlen); + len = strlen(ptr); + SvGROW(sv, tlen + len + 1); + if (ptr == junk) + ptr = SvPVX_const(sv); + Move(ptr,SvPVX(sv)+tlen,len+1,char); + SvCUR_set(sv, SvCUR(sv) + len); + (void)SvPOK_only_UTF8(sv); /* validate pointer */ + SvTAINT(sv); +} -You probably want to use one of the assortment of wrappers, such as -C, C, C and -C. +/* +=for apidoc sv_catpv_mg -This is the primary function for copying scalars, and most other -copy-ish functions and macros use this underneath. +Like C, but also handles 'set' magic. =cut */ void -Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags) +Perl_sv_catpv_mg(pTHX_ register SV *sv, register const char *ptr) { - register U32 sflags; - register int dtype; - register int stype; - - if (sstr == dstr) - return; - SV_CHECK_THINKFIRST_COW_DROP(dstr); - if (!sstr) - sstr = &PL_sv_undef; - stype = SvTYPE(sstr); - dtype = SvTYPE(dstr); + sv_catpv(sv,ptr); + SvSETMAGIC(sv); +} - SvAMAGIC_off(dstr); - if ( SvVOK(dstr) ) - { - /* need to nuke the magic */ - mg_free(dstr); - SvRMAGICAL_off(dstr); - } +/* +=for apidoc newSV - /* There's a lot of redundancy below but we're going for speed here */ +Create a new null SV, or if len > 0, create a new empty SVt_PV type SV +with an initial PV allocation of len+1. Normally accessed via the C +macro. - switch (stype) { - case SVt_NULL: - undef_sstr: - if (dtype != SVt_PVGV) { - (void)SvOK_off(dstr); - return; - } - break; - case SVt_IV: - if (SvIOK(sstr)) { - switch (dtype) { - case SVt_NULL: - sv_upgrade(dstr, SVt_IV); - break; - case SVt_NV: - sv_upgrade(dstr, SVt_PVNV); - break; - case SVt_RV: - case SVt_PV: - sv_upgrade(dstr, SVt_PVIV); - break; - } - (void)SvIOK_only(dstr); - SvIV_set(dstr, SvIVX(sstr)); - if (SvIsUV(sstr)) - SvIsUV_on(dstr); - if (SvTAINTED(sstr)) - SvTAINT(dstr); - return; - } - goto undef_sstr; +=cut +*/ - case SVt_NV: - if (SvNOK(sstr)) { - switch (dtype) { - case SVt_NULL: - case SVt_IV: - sv_upgrade(dstr, SVt_NV); - break; - case SVt_RV: - case SVt_PV: - case SVt_PVIV: - sv_upgrade(dstr, SVt_PVNV); - break; - } - SvNV_set(dstr, SvNVX(sstr)); - (void)SvNOK_only(dstr); - if (SvTAINTED(sstr)) - SvTAINT(dstr); - return; - } - goto undef_sstr; +SV * +Perl_newSV(pTHX_ STRLEN len) +{ + register SV *sv; - case SVt_RV: - if (dtype < SVt_RV) - sv_upgrade(dstr, SVt_RV); - else if (dtype == SVt_PVGV && - SvROK(sstr) && SvTYPE(SvRV(sstr)) == SVt_PVGV) { - sstr = SvRV(sstr); - if (sstr == dstr) { - if (GvIMPORTED(dstr) != GVf_IMPORTED - && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) - { - GvIMPORTED_on(dstr); - } - GvMULTI_on(dstr); - return; - } - goto glob_assign; - } - break; - case SVt_PVFM: -#ifdef PERL_OLD_COPY_ON_WRITE - if ((SvFLAGS(sstr) & CAN_COW_MASK) == CAN_COW_FLAGS) { - if (dtype < SVt_PVIV) - sv_upgrade(dstr, SVt_PVIV); - break; - } - /* Fall through */ -#endif - case SVt_PV: - if (dtype < SVt_PV) - sv_upgrade(dstr, SVt_PV); - break; - case SVt_PVIV: - if (dtype < SVt_PVIV) - sv_upgrade(dstr, SVt_PVIV); - break; - case SVt_PVNV: - if (dtype < SVt_PVNV) - sv_upgrade(dstr, SVt_PVNV); - break; - case SVt_PVAV: - case SVt_PVHV: - case SVt_PVCV: - case SVt_PVIO: - { - const char * const type = sv_reftype(sstr,0); - if (PL_op) - Perl_croak(aTHX_ "Bizarre copy of %s in %s", type, OP_NAME(PL_op)); - else - Perl_croak(aTHX_ "Bizarre copy of %s", type); - } - break; + new_SV(sv); + if (len) { + sv_upgrade(sv, SVt_PV); + SvGROW(sv, len + 1); + } + return sv; +} +/* +=for apidoc sv_magicext - case SVt_PVGV: - if (dtype <= SVt_PVGV) { - glob_assign: - if (dtype != SVt_PVGV) { - const char * const name = GvNAME(sstr); - const STRLEN len = GvNAMELEN(sstr); - /* don't upgrade SVt_PVLV: it can hold a glob */ - if (dtype != SVt_PVLV) - sv_upgrade(dstr, SVt_PVGV); - sv_magic(dstr, dstr, PERL_MAGIC_glob, Nullch, 0); - GvSTASH(dstr) = GvSTASH(sstr); - if (GvSTASH(dstr)) - Perl_sv_add_backref(aTHX_ (SV*)GvSTASH(dstr), dstr); - GvNAME(dstr) = savepvn(name, len); - GvNAMELEN(dstr) = len; - SvFAKE_on(dstr); /* can coerce to non-glob */ - } +Adds magic to an SV, upgrading it if necessary. Applies the +supplied vtable and returns a pointer to the magic added. -#ifdef GV_UNIQUE_CHECK - if (GvUNIQUE((GV*)dstr)) { - Perl_croak(aTHX_ PL_no_modify); - } -#endif +Note that C will allow things that C will not. +In particular, you can add magic to SvREADONLY SVs, and add more than +one instance of the same 'how'. - (void)SvOK_off(dstr); - GvINTRO_off(dstr); /* one-shot flag */ - gp_free((GV*)dstr); - GvGP(dstr) = gp_ref(GvGP(sstr)); - if (SvTAINTED(sstr)) - SvTAINT(dstr); - if (GvIMPORTED(dstr) != GVf_IMPORTED - && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) - { - GvIMPORTED_on(dstr); - } - GvMULTI_on(dstr); - return; - } - /* FALL THROUGH */ +If C is greater than zero then a C I of C is +stored, if C is zero then C is stored as-is and - as another +special case - if C<(name && namlen == HEf_SVKEY)> then C is assumed +to contain an C and is stored as-is with its REFCNT incremented. - default: - if (SvGMAGICAL(sstr) && (flags & SV_GMAGIC)) { - mg_get(sstr); - if ((int)SvTYPE(sstr) != stype) { - stype = SvTYPE(sstr); - if (stype == SVt_PVGV && dtype <= SVt_PVGV) - goto glob_assign; - } - } - if (stype == SVt_PVLV) - SvUPGRADE(dstr, SVt_PVNV); - else - SvUPGRADE(dstr, (U32)stype); - } +(This is now used as a subroutine by C.) - sflags = SvFLAGS(sstr); +=cut +*/ +MAGIC * +Perl_sv_magicext(pTHX_ SV* sv, SV* obj, int how, const MGVTBL *vtable, + const char* name, I32 namlen) +{ + MAGIC* mg; - if (sflags & SVf_ROK) { - if (dtype >= SVt_PV) { - if (dtype == SVt_PVGV) { - SV * const sref = SvREFCNT_inc(SvRV(sstr)); - SV *dref = 0; - const int intro = GvINTRO(dstr); + if (SvTYPE(sv) < SVt_PVMG) { + SvUPGRADE(sv, SVt_PVMG); + } + Newxz(mg, 1, MAGIC); + mg->mg_moremagic = SvMAGIC(sv); + SvMAGIC_set(sv, mg); -#ifdef GV_UNIQUE_CHECK - if (GvUNIQUE((GV*)dstr)) { - Perl_croak(aTHX_ PL_no_modify); - } -#endif + /* Sometimes a magic contains a reference loop, where the sv and + object refer to each other. To prevent a reference loop that + would prevent such objects being freed, we look for such loops + and if we find one we avoid incrementing the object refcount. - if (intro) { - GvINTRO_off(dstr); /* one-shot flag */ - GvLINE(dstr) = CopLINE(PL_curcop); - GvEGV(dstr) = (GV*)dstr; - } - GvMULTI_on(dstr); - switch (SvTYPE(sref)) { - case SVt_PVAV: - if (intro) - SAVEGENERICSV(GvAV(dstr)); - else - dref = (SV*)GvAV(dstr); - GvAV(dstr) = (AV*)sref; - if (!GvIMPORTED_AV(dstr) - && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) - { - GvIMPORTED_AV_on(dstr); - } - break; - case SVt_PVHV: - if (intro) - SAVEGENERICSV(GvHV(dstr)); - else - dref = (SV*)GvHV(dstr); - GvHV(dstr) = (HV*)sref; - if (!GvIMPORTED_HV(dstr) - && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) - { - GvIMPORTED_HV_on(dstr); - } - break; - case SVt_PVCV: - if (intro) { - if (GvCVGEN(dstr) && GvCV(dstr) != (CV*)sref) { - SvREFCNT_dec(GvCV(dstr)); - GvCV(dstr) = Nullcv; - GvCVGEN(dstr) = 0; /* Switch off cacheness. */ - PL_sub_generation++; - } - SAVEGENERICSV(GvCV(dstr)); - } - else - dref = (SV*)GvCV(dstr); - if (GvCV(dstr) != (CV*)sref) { - CV* const cv = GvCV(dstr); - if (cv) { - if (!GvCVGEN((GV*)dstr) && - (CvROOT(cv) || CvXSUB(cv))) - { - /* Redefining a sub - warning is mandatory if - it was a const and its value changed. */ - if (ckWARN(WARN_REDEFINE) - || (CvCONST(cv) - && (!CvCONST((CV*)sref) - || sv_cmp(cv_const_sv(cv), - cv_const_sv((CV*)sref))))) - { - Perl_warner(aTHX_ packWARN(WARN_REDEFINE), - CvCONST(cv) - ? "Constant subroutine %s::%s redefined" - : "Subroutine %s::%s redefined", - HvNAME_get(GvSTASH((GV*)dstr)), - GvENAME((GV*)dstr)); - } - } - if (!intro) - cv_ckproto(cv, (GV*)dstr, - SvPOK(sref) - ? SvPVX_const(sref) : Nullch); - } - GvCV(dstr) = (CV*)sref; - GvCVGEN(dstr) = 0; /* Switch off cacheness. */ - GvASSUMECV_on(dstr); - PL_sub_generation++; - } - if (!GvIMPORTED_CV(dstr) - && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) - { - GvIMPORTED_CV_on(dstr); - } - break; - case SVt_PVIO: - if (intro) - SAVEGENERICSV(GvIOp(dstr)); - else - dref = (SV*)GvIOp(dstr); - GvIOp(dstr) = (IO*)sref; - break; - case SVt_PVFM: - if (intro) - SAVEGENERICSV(GvFORM(dstr)); - else - dref = (SV*)GvFORM(dstr); - GvFORM(dstr) = (CV*)sref; - break; - default: - if (intro) - SAVEGENERICSV(GvSV(dstr)); - else - dref = (SV*)GvSV(dstr); - GvSV(dstr) = sref; - if (!GvIMPORTED_SV(dstr) - && CopSTASH_ne(PL_curcop, GvSTASH(dstr))) - { - GvIMPORTED_SV_on(dstr); - } - break; - } - if (dref) - SvREFCNT_dec(dref); - if (SvTAINTED(sstr)) - SvTAINT(dstr); - return; - } - if (SvPVX_const(dstr)) { - SvPV_free(dstr); - SvLEN_set(dstr, 0); - SvCUR_set(dstr, 0); - } - } - (void)SvOK_off(dstr); - SvRV_set(dstr, SvREFCNT_inc(SvRV(sstr))); - SvROK_on(dstr); - if (sflags & SVp_NOK) { - SvNOKp_on(dstr); - /* Only set the public OK flag if the source has public OK. */ - if (sflags & SVf_NOK) - SvFLAGS(dstr) |= SVf_NOK; - SvNV_set(dstr, SvNVX(sstr)); - } - if (sflags & SVp_IOK) { - (void)SvIOKp_on(dstr); - if (sflags & SVf_IOK) - SvFLAGS(dstr) |= SVf_IOK; - if (sflags & SVf_IVisUV) - SvIsUV_on(dstr); - SvIV_set(dstr, SvIVX(sstr)); - } - if (SvAMAGIC(sstr)) { - SvAMAGIC_on(dstr); - } + Note we cannot do this to avoid self-tie loops as intervening RV must + have its REFCNT incremented to keep it in existence. + + */ + if (!obj || obj == sv || + how == PERL_MAGIC_arylen || + how == PERL_MAGIC_qr || + how == PERL_MAGIC_symtab || + (SvTYPE(obj) == SVt_PVGV && + (GvSV(obj) == sv || GvHV(obj) == (HV*)sv || GvAV(obj) == (AV*)sv || + GvCV(obj) == (CV*)sv || GvIOp(obj) == (IO*)sv || + GvFORM(obj) == (CV*)sv))) + { + mg->mg_obj = obj; + } + else { + mg->mg_obj = SvREFCNT_inc(obj); + mg->mg_flags |= MGf_REFCOUNTED; } - else if (sflags & SVp_POK) { - bool isSwipe = 0; - /* - * Check to see if we can just swipe the string. If so, it's a - * possible small lose on short strings, but a big win on long ones. - * It might even be a win on short strings if SvPVX_const(dstr) - * has to be allocated and SvPVX_const(sstr) has to be freed. - */ + /* Normal self-ties simply pass a null object, and instead of + using mg_obj directly, use the SvTIED_obj macro to produce a + new RV as needed. For glob "self-ties", we are tieing the PVIO + with an RV obj pointing to the glob containing the PVIO. In + this case, to avoid a reference loop, we need to weaken the + reference. + */ - /* Whichever path we take through the next code, we want this true, - and doing it now facilitates the COW check. */ - (void)SvPOK_only(dstr); + if (how == PERL_MAGIC_tiedscalar && SvTYPE(sv) == SVt_PVIO && + obj && SvROK(obj) && GvIO(SvRV(obj)) == (IO*)sv) + { + sv_rvweaken(obj); + } - if ( - /* We're not already COW */ - ((sflags & (SVf_FAKE | SVf_READONLY)) != (SVf_FAKE | SVf_READONLY) -#ifndef PERL_OLD_COPY_ON_WRITE - /* or we are, but dstr isn't a suitable target. */ - || (SvFLAGS(dstr) & CAN_COW_MASK) != CAN_COW_FLAGS -#endif - ) - && - !(isSwipe = - (sflags & SVs_TEMP) && /* slated for free anyway? */ - !(sflags & SVf_OOK) && /* and not involved in OOK hack? */ - (!(flags & SV_NOSTEAL)) && - /* and we're allowed to steal temps */ - SvREFCNT(sstr) == 1 && /* and no other references to it? */ - SvLEN(sstr) && /* and really is a string */ - /* and won't be needed again, potentially */ - !(PL_op && PL_op->op_type == OP_AASSIGN)) -#ifdef PERL_OLD_COPY_ON_WRITE - && !((sflags & CAN_COW_MASK) == CAN_COW_FLAGS - && (SvFLAGS(dstr) & CAN_COW_MASK) == CAN_COW_FLAGS - && SvTYPE(sstr) >= SVt_PVIV) -#endif - ) { - /* Failed the swipe test, and it's not a shared hash key either. - Have to copy the string. */ - STRLEN len = SvCUR(sstr); - SvGROW(dstr, len + 1); /* inlined from sv_setpvn */ - Move(SvPVX_const(sstr),SvPVX(dstr),len,char); - SvCUR_set(dstr, len); - *SvEND(dstr) = '\0'; - } else { - /* If PERL_OLD_COPY_ON_WRITE is not defined, then isSwipe will always - be true in here. */ - /* Either it's a shared hash key, or it's suitable for - copy-on-write or we can swipe the string. */ - if (DEBUG_C_TEST) { - PerlIO_printf(Perl_debug_log, "Copy on write: sstr --> dstr\n"); - sv_dump(sstr); - sv_dump(dstr); - } -#ifdef PERL_OLD_COPY_ON_WRITE - if (!isSwipe) { - /* I believe I should acquire a global SV mutex if - it's a COW sv (not a shared hash key) to stop - it going un copy-on-write. - If the source SV has gone un copy on write between up there - and down here, then (assert() that) it is of the correct - form to make it copy on write again */ - if ((sflags & (SVf_FAKE | SVf_READONLY)) - != (SVf_FAKE | SVf_READONLY)) { - SvREADONLY_on(sstr); - SvFAKE_on(sstr); - /* Make the source SV into a loop of 1. - (about to become 2) */ - SV_COW_NEXT_SV_SET(sstr, sstr); - } - } -#endif - /* Initial code is common. */ - if (SvPVX_const(dstr)) { /* we know that dtype >= SVt_PV */ - SvPV_free(dstr); - } - - if (!isSwipe) { - /* making another shared SV. */ - STRLEN cur = SvCUR(sstr); - STRLEN len = SvLEN(sstr); -#ifdef PERL_OLD_COPY_ON_WRITE - if (len) { - assert (SvTYPE(dstr) >= SVt_PVIV); - /* SvIsCOW_normal */ - /* splice us in between source and next-after-source. */ - SV_COW_NEXT_SV_SET(dstr, SV_COW_NEXT_SV(sstr)); - SV_COW_NEXT_SV_SET(sstr, dstr); - SvPV_set(dstr, SvPVX_mutable(sstr)); - } else -#endif - { - /* SvIsCOW_shared_hash */ - DEBUG_C(PerlIO_printf(Perl_debug_log, - "Copy on write: Sharing hash\n")); - - assert (SvTYPE(dstr) >= SVt_PV); - SvPV_set(dstr, - HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sstr))))); - } - SvLEN_set(dstr, len); - SvCUR_set(dstr, cur); - SvREADONLY_on(dstr); - SvFAKE_on(dstr); - /* Relesase a global SV mutex. */ - } - else - { /* Passes the swipe test. */ - SvPV_set(dstr, SvPVX_mutable(sstr)); - SvLEN_set(dstr, SvLEN(sstr)); - SvCUR_set(dstr, SvCUR(sstr)); - - SvTEMP_off(dstr); - (void)SvOK_off(sstr); /* NOTE: nukes most SvFLAGS on sstr */ - SvPV_set(sstr, Nullch); - SvLEN_set(sstr, 0); - SvCUR_set(sstr, 0); - SvTEMP_off(sstr); - } - } - if (sflags & SVf_UTF8) - SvUTF8_on(dstr); - if (sflags & SVp_NOK) { - SvNOKp_on(dstr); - if (sflags & SVf_NOK) - SvFLAGS(dstr) |= SVf_NOK; - SvNV_set(dstr, SvNVX(sstr)); - } - if (sflags & SVp_IOK) { - (void)SvIOKp_on(dstr); - if (sflags & SVf_IOK) - SvFLAGS(dstr) |= SVf_IOK; - if (sflags & SVf_IVisUV) - SvIsUV_on(dstr); - SvIV_set(dstr, SvIVX(sstr)); - } - if (SvVOK(sstr)) { - MAGIC *smg = mg_find(sstr,PERL_MAGIC_vstring); - sv_magic(dstr, NULL, PERL_MAGIC_vstring, - smg->mg_ptr, smg->mg_len); - SvRMAGICAL_on(dstr); - } - } - else if (sflags & SVp_IOK) { - if (sflags & SVf_IOK) - (void)SvIOK_only(dstr); - else { - (void)SvOK_off(dstr); - (void)SvIOKp_on(dstr); - } - /* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */ - if (sflags & SVf_IVisUV) - SvIsUV_on(dstr); - SvIV_set(dstr, SvIVX(sstr)); - if (sflags & SVp_NOK) { - if (sflags & SVf_NOK) - (void)SvNOK_on(dstr); - else - (void)SvNOKp_on(dstr); - SvNV_set(dstr, SvNVX(sstr)); - } - } - else if (sflags & SVp_NOK) { - if (sflags & SVf_NOK) - (void)SvNOK_only(dstr); - else { - (void)SvOK_off(dstr); - SvNOKp_on(dstr); - } - SvNV_set(dstr, SvNVX(sstr)); - } - else { - if (dtype == SVt_PVGV) { - if (ckWARN(WARN_MISC)) - Perl_warner(aTHX_ packWARN(WARN_MISC), "Undefined value assigned to typeglob"); - } + mg->mg_type = how; + mg->mg_len = namlen; + if (name) { + if (namlen > 0) + mg->mg_ptr = savepvn(name, namlen); + else if (namlen == HEf_SVKEY) + mg->mg_ptr = (char*)SvREFCNT_inc((SV*)name); else - (void)SvOK_off(dstr); + mg->mg_ptr = (char *) name; } - if (SvTAINTED(sstr)) - SvTAINT(dstr); + mg->mg_virtual = vtable; + + mg_magical(sv); + if (SvGMAGICAL(sv)) + SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK|SVf_POK); + return mg; } /* -=for apidoc sv_setsv_mg +=for apidoc sv_magic -Like C, but also handles 'set' magic. +Adds magic to an SV. First upgrades C to type C if necessary, +then adds a new magic item of type C to the head of the magic list. + +See C (which C now calls) for a description of the +handling of the C and C arguments. + +You need to use C to add magic to SvREADONLY SVs and also +to add more than one instance of the same 'how'. =cut */ void -Perl_sv_setsv_mg(pTHX_ SV *dstr, register SV *sstr) +Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 namlen) { - sv_setsv(dstr,sstr); - SvSETMAGIC(dstr); -} + const MGVTBL *vtable; + MAGIC* mg; #ifdef PERL_OLD_COPY_ON_WRITE -SV * -Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) -{ - STRLEN cur = SvCUR(sstr); - STRLEN len = SvLEN(sstr); - register char *new_pv; + if (SvIsCOW(sv)) + sv_force_normal_flags(sv, 0); +#endif + if (SvREADONLY(sv)) { + if ( + /* its okay to attach magic to shared strings; the subsequent + * upgrade to PVMG will unshare the string */ + !(SvFAKE(sv) && SvTYPE(sv) < SVt_PVMG) - if (DEBUG_C_TEST) { - PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n", - sstr, dstr); - sv_dump(sstr); - if (dstr) - sv_dump(dstr); + && IN_PERL_RUNTIME + && how != PERL_MAGIC_regex_global + && how != PERL_MAGIC_bm + && how != PERL_MAGIC_fm + && how != PERL_MAGIC_sv + && how != PERL_MAGIC_backref + ) + { + Perl_croak(aTHX_ PL_no_modify); + } } - - if (dstr) { - if (SvTHINKFIRST(dstr)) - sv_force_normal_flags(dstr, SV_COW_DROP_PV); - else if (SvPVX_const(dstr)) - Safefree(SvPVX_const(dstr)); + if (SvMAGICAL(sv) || (how == PERL_MAGIC_taint && SvTYPE(sv) >= SVt_PVMG)) { + if (SvMAGIC(sv) && (mg = mg_find(sv, how))) { + /* sv_magic() refuses to add a magic of the same 'how' as an + existing one + */ + if (how == PERL_MAGIC_taint) + mg->mg_len |= 1; + return; + } } - else - new_SV(dstr); - SvUPGRADE(dstr, SVt_PVIV); - - assert (SvPOK(sstr)); - assert (SvPOKp(sstr)); - assert (!SvIOK(sstr)); - assert (!SvIOKp(sstr)); - assert (!SvNOK(sstr)); - assert (!SvNOKp(sstr)); - - if (SvIsCOW(sstr)) { - if (SvLEN(sstr) == 0) { - /* source is a COW shared hash key. */ - DEBUG_C(PerlIO_printf(Perl_debug_log, - "Fast copy on write: Sharing hash\n")); - new_pv = HEK_KEY(share_hek_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sstr)))); - goto common_exit; - } - SV_COW_NEXT_SV_SET(dstr, SV_COW_NEXT_SV(sstr)); - } else { - assert ((SvFLAGS(sstr) & CAN_COW_MASK) == CAN_COW_FLAGS); - SvUPGRADE(sstr, SVt_PVIV); - SvREADONLY_on(sstr); - SvFAKE_on(sstr); - DEBUG_C(PerlIO_printf(Perl_debug_log, - "Fast copy on write: Converting sstr to COW\n")); - SV_COW_NEXT_SV_SET(dstr, sstr); + switch (how) { + case PERL_MAGIC_sv: + vtable = &PL_vtbl_sv; + break; + case PERL_MAGIC_overload: + vtable = &PL_vtbl_amagic; + break; + case PERL_MAGIC_overload_elem: + vtable = &PL_vtbl_amagicelem; + break; + case PERL_MAGIC_overload_table: + vtable = &PL_vtbl_ovrld; + break; + case PERL_MAGIC_bm: + vtable = &PL_vtbl_bm; + break; + case PERL_MAGIC_regdata: + vtable = &PL_vtbl_regdata; + break; + case PERL_MAGIC_regdatum: + vtable = &PL_vtbl_regdatum; + break; + case PERL_MAGIC_env: + vtable = &PL_vtbl_env; + break; + case PERL_MAGIC_fm: + vtable = &PL_vtbl_fm; + break; + case PERL_MAGIC_envelem: + vtable = &PL_vtbl_envelem; + break; + case PERL_MAGIC_regex_global: + vtable = &PL_vtbl_mglob; + break; + case PERL_MAGIC_isa: + vtable = &PL_vtbl_isa; + break; + case PERL_MAGIC_isaelem: + vtable = &PL_vtbl_isaelem; + break; + case PERL_MAGIC_nkeys: + vtable = &PL_vtbl_nkeys; + break; + case PERL_MAGIC_dbfile: + vtable = NULL; + break; + case PERL_MAGIC_dbline: + vtable = &PL_vtbl_dbline; + break; +#ifdef USE_LOCALE_COLLATE + case PERL_MAGIC_collxfrm: + vtable = &PL_vtbl_collxfrm; + break; +#endif /* USE_LOCALE_COLLATE */ + case PERL_MAGIC_tied: + vtable = &PL_vtbl_pack; + break; + case PERL_MAGIC_tiedelem: + case PERL_MAGIC_tiedscalar: + vtable = &PL_vtbl_packelem; + break; + case PERL_MAGIC_qr: + vtable = &PL_vtbl_regexp; + break; + case PERL_MAGIC_sig: + vtable = &PL_vtbl_sig; + break; + case PERL_MAGIC_sigelem: + vtable = &PL_vtbl_sigelem; + break; + case PERL_MAGIC_taint: + vtable = &PL_vtbl_taint; + break; + case PERL_MAGIC_uvar: + vtable = &PL_vtbl_uvar; + break; + case PERL_MAGIC_vec: + vtable = &PL_vtbl_vec; + break; + case PERL_MAGIC_arylen_p: + case PERL_MAGIC_rhash: + case PERL_MAGIC_symtab: + case PERL_MAGIC_vstring: + vtable = NULL; + break; + case PERL_MAGIC_utf8: + vtable = &PL_vtbl_utf8; + break; + case PERL_MAGIC_substr: + vtable = &PL_vtbl_substr; + break; + case PERL_MAGIC_defelem: + vtable = &PL_vtbl_defelem; + break; + case PERL_MAGIC_glob: + vtable = &PL_vtbl_glob; + break; + case PERL_MAGIC_arylen: + vtable = &PL_vtbl_arylen; + break; + case PERL_MAGIC_pos: + vtable = &PL_vtbl_pos; + break; + case PERL_MAGIC_backref: + vtable = &PL_vtbl_backref; + break; + case PERL_MAGIC_ext: + /* Reserved for use by extensions not perl internals. */ + /* Useful for attaching extension internal data to perl vars. */ + /* Note that multiple extensions may clash if magical scalars */ + /* etc holding private data from one are passed to another. */ + vtable = NULL; + break; + default: + Perl_croak(aTHX_ "Don't know how to handle magic of type \\%o", how); } - SV_COW_NEXT_SV_SET(sstr, dstr); - new_pv = SvPVX_mutable(sstr); - common_exit: - SvPV_set(dstr, new_pv); - SvFLAGS(dstr) = (SVt_PVIV|SVf_POK|SVp_POK|SVf_FAKE|SVf_READONLY); - if (SvUTF8(sstr)) - SvUTF8_on(dstr); - SvLEN_set(dstr, len); - SvCUR_set(dstr, cur); - if (DEBUG_C_TEST) { - sv_dump(dstr); + /* Rest of work is done else where */ + mg = sv_magicext(sv,obj,how,vtable,name,namlen); + + switch (how) { + case PERL_MAGIC_taint: + mg->mg_len = 1; + break; + case PERL_MAGIC_ext: + case PERL_MAGIC_dbfile: + SvRMAGICAL_on(sv); + break; } - return dstr; } -#endif /* -=for apidoc sv_setpvn +=for apidoc sv_unmagic -Copies a string into an SV. The C parameter indicates the number of -bytes to be copied. If the C argument is NULL the SV will become -undefined. Does not handle 'set' magic. See C. +Removes all magic of type C from an SV. =cut */ -void -Perl_sv_setpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN len) +int +Perl_sv_unmagic(pTHX_ SV *sv, int type) { - register char *dptr; - - SV_CHECK_THINKFIRST_COW_DROP(sv); - if (!ptr) { - (void)SvOK_off(sv); - return; + MAGIC* mg; + MAGIC** mgp; + if (SvTYPE(sv) < SVt_PVMG || !SvMAGIC(sv)) + return 0; + mgp = &SvMAGIC(sv); + for (mg = *mgp; mg; mg = *mgp) { + if (mg->mg_type == type) { + const MGVTBL* const vtbl = mg->mg_virtual; + *mgp = mg->mg_moremagic; + if (vtbl && vtbl->svt_free) + CALL_FPTR(vtbl->svt_free)(aTHX_ sv, mg); + if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) { + if (mg->mg_len > 0) + Safefree(mg->mg_ptr); + else if (mg->mg_len == HEf_SVKEY) + SvREFCNT_dec((SV*)mg->mg_ptr); + else if (mg->mg_type == PERL_MAGIC_utf8 && mg->mg_ptr) + Safefree(mg->mg_ptr); + } + if (mg->mg_flags & MGf_REFCOUNTED) + SvREFCNT_dec(mg->mg_obj); + Safefree(mg); + } + else + mgp = &mg->mg_moremagic; } - else { - /* len is STRLEN which is unsigned, need to copy to signed */ - const IV iv = len; - if (iv < 0) - Perl_croak(aTHX_ "panic: sv_setpvn called with negative strlen"); + if (!SvMAGIC(sv)) { + SvMAGICAL_off(sv); + SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_NOK|SVp_POK)) >> PRIVSHIFT; } - SvUPGRADE(sv, SVt_PV); - dptr = SvGROW(sv, len + 1); - Move(ptr,dptr,len,char); - dptr[len] = '\0'; - SvCUR_set(sv, len); - (void)SvPOK_only_UTF8(sv); /* validate pointer */ - SvTAINT(sv); + return 0; } /* -=for apidoc sv_setpvn_mg +=for apidoc sv_rvweaken -Like C, but also handles 'set' magic. +Weaken a reference: set the C flag on this RV; give the +referred-to SV C magic if it hasn't already; and +push a back-reference to this RV onto the array of backreferences +associated with that magic. =cut */ -void -Perl_sv_setpvn_mg(pTHX_ register SV *sv, register const char *ptr, register STRLEN len) +SV * +Perl_sv_rvweaken(pTHX_ SV *sv) { - sv_setpvn(sv,ptr,len); - SvSETMAGIC(sv); + SV *tsv; + if (!SvOK(sv)) /* let undefs pass */ + return sv; + if (!SvROK(sv)) + Perl_croak(aTHX_ "Can't weaken a nonreference"); + else if (SvWEAKREF(sv)) { + if (ckWARN(WARN_MISC)) + Perl_warner(aTHX_ packWARN(WARN_MISC), "Reference is already weak"); + return sv; + } + tsv = SvRV(sv); + Perl_sv_add_backref(aTHX_ tsv, sv); + SvWEAKREF_on(sv); + SvREFCNT_dec(tsv); + return sv; } -/* -=for apidoc sv_setpv - -Copies a string into an SV. The string must be null-terminated. Does not -handle 'set' magic. See C. - -=cut -*/ +/* Give tsv backref magic if it hasn't already got it, then push a + * back-reference to sv onto the array associated with the backref magic. + */ void -Perl_sv_setpv(pTHX_ register SV *sv, register const char *ptr) +Perl_sv_add_backref(pTHX_ SV *tsv, SV *sv) { - register STRLEN len; - - SV_CHECK_THINKFIRST_COW_DROP(sv); - if (!ptr) { - (void)SvOK_off(sv); - return; + AV *av; + MAGIC *mg; + if (SvMAGICAL(tsv) && (mg = mg_find(tsv, PERL_MAGIC_backref))) + av = (AV*)mg->mg_obj; + else { + av = newAV(); + sv_magic(tsv, (SV*)av, PERL_MAGIC_backref, NULL, 0); + /* av now has a refcnt of 2, which avoids it getting freed + * before us during global cleanup. The extra ref is removed + * by magic_killbackrefs() when tsv is being freed */ } - len = strlen(ptr); - SvUPGRADE(sv, SVt_PV); + if (AvFILLp(av) >= AvMAX(av)) { + av_extend(av, AvFILLp(av)+1); + } + AvARRAY(av)[++AvFILLp(av)] = sv; /* av_push() */ +} - SvGROW(sv, len + 1); - Move(ptr,SvPVX(sv),len+1,char); - SvCUR_set(sv, len); - (void)SvPOK_only_UTF8(sv); /* validate pointer */ - SvTAINT(sv); +/* delete a back-reference to ourselves from the backref magic associated + * with the SV we point to. + */ + +STATIC void +S_sv_del_backref(pTHX_ SV *tsv, SV *sv) +{ + AV *av; + SV **svp; + I32 i; + MAGIC *mg = NULL; + if (!SvMAGICAL(tsv) || !(mg = mg_find(tsv, PERL_MAGIC_backref))) { + if (PL_in_clean_all) + return; + } + if (!SvMAGICAL(tsv) || !(mg = mg_find(tsv, PERL_MAGIC_backref))) + Perl_croak(aTHX_ "panic: del_backref"); + av = (AV *)mg->mg_obj; + svp = AvARRAY(av); + /* We shouldn't be in here more than once, but for paranoia reasons lets + not assume this. */ + for (i = AvFILLp(av); i >= 0; i--) { + if (svp[i] == sv) { + const SSize_t fill = AvFILLp(av); + if (i != fill) { + /* We weren't the last entry. + An unordered list has this property that you can take the + last element off the end to fill the hole, and it's still + an unordered list :-) + */ + svp[i] = svp[fill]; + } + svp[fill] = Nullsv; + AvFILLp(av) = fill - 1; + } + } } /* -=for apidoc sv_setpv_mg +=for apidoc sv_insert -Like C, but also handles 'set' magic. +Inserts a string at the specified offset/length within the SV. Similar to +the Perl substr() function. =cut */ void -Perl_sv_setpv_mg(pTHX_ register SV *sv, register const char *ptr) +Perl_sv_insert(pTHX_ SV *bigstr, STRLEN offset, STRLEN len, const char *little, STRLEN littlelen) { - sv_setpv(sv,ptr); - SvSETMAGIC(sv); + register char *big; + register char *mid; + register char *midend; + register char *bigend; + register I32 i; + STRLEN curlen; + + + if (!bigstr) + Perl_croak(aTHX_ "Can't modify non-existent substring"); + SvPV_force(bigstr, curlen); + (void)SvPOK_only_UTF8(bigstr); + if (offset + len > curlen) { + SvGROW(bigstr, offset+len+1); + Zero(SvPVX(bigstr)+curlen, offset+len-curlen, char); + SvCUR_set(bigstr, offset+len); + } + + SvTAINT(bigstr); + i = littlelen - len; + if (i > 0) { /* string might grow */ + big = SvGROW(bigstr, SvCUR(bigstr) + i + 1); + mid = big + offset + len; + midend = bigend = big + SvCUR(bigstr); + bigend += i; + *bigend = '\0'; + while (midend > mid) /* shove everything down */ + *--bigend = *--midend; + Move(little,big+offset,littlelen,char); + SvCUR_set(bigstr, SvCUR(bigstr) + i); + SvSETMAGIC(bigstr); + return; + } + else if (i == 0) { + Move(little,SvPVX(bigstr)+offset,len,char); + SvSETMAGIC(bigstr); + return; + } + + big = SvPVX(bigstr); + mid = big + offset; + midend = mid + len; + bigend = big + SvCUR(bigstr); + + if (midend > bigend) + Perl_croak(aTHX_ "panic: sv_insert"); + + if (mid - big > bigend - midend) { /* faster to shorten from end */ + if (littlelen) { + Move(little, mid, littlelen,char); + mid += littlelen; + } + i = bigend - midend; + if (i > 0) { + Move(midend, mid, i,char); + mid += i; + } + *mid = '\0'; + SvCUR_set(bigstr, mid - big); + } + else if ((i = mid - big)) { /* faster from front */ + midend -= littlelen; + mid = midend; + sv_chop(bigstr,midend-i); + big += i; + while (i--) + *--midend = *--big; + if (littlelen) + Move(little, mid, littlelen,char); + } + else if (littlelen) { + midend -= littlelen; + sv_chop(bigstr,midend); + Move(little,midend,littlelen,char); + } + else { + sv_chop(bigstr,midend); + } + SvSETMAGIC(bigstr); } /* -=for apidoc sv_usepvn +=for apidoc sv_replace -Tells an SV to use C to find its string value. Normally the string is -stored inside the SV but sv_usepvn allows the SV to use an outside string. -The C should point to memory that was allocated by C. The -string length, C, must be supplied. This function will realloc the -memory pointed to by C, so that pointer should not be freed or used by -the programmer after giving it to sv_usepvn. Does not handle 'set' magic. -See C. +Make the first argument a copy of the second, then delete the original. +The target SV physically takes over ownership of the body of the source SV +and inherits its flags; however, the target keeps any magic it owns, +and any magic in the source is discarded. +Note that this is a rather specialist SV copying operation; most of the +time you'll want to use C or one of its many macro front-ends. =cut */ void -Perl_sv_usepvn(pTHX_ register SV *sv, register char *ptr, register STRLEN len) +Perl_sv_replace(pTHX_ register SV *sv, register SV *nsv) { - STRLEN allocate; + const U32 refcnt = SvREFCNT(sv); SV_CHECK_THINKFIRST_COW_DROP(sv); - SvUPGRADE(sv, SVt_PV); - if (!ptr) { - (void)SvOK_off(sv); - return; + if (SvREFCNT(nsv) != 1) { + Perl_croak(aTHX_ "panic: reference miscount on nsv in sv_replace() (%" + UVuf " != 1)", (UV) SvREFCNT(nsv)); } - if (SvPVX_const(sv)) - SvPV_free(sv); + if (SvMAGICAL(sv)) { + if (SvMAGICAL(nsv)) + mg_free(nsv); + else + sv_upgrade(nsv, SVt_PVMG); + SvMAGIC_set(nsv, SvMAGIC(sv)); + SvFLAGS(nsv) |= SvMAGICAL(sv); + SvMAGICAL_off(sv); + SvMAGIC_set(sv, NULL); + } + SvREFCNT(sv) = 0; + sv_clear(sv); + assert(!SvREFCNT(sv)); +#ifdef DEBUG_LEAKING_SCALARS + sv->sv_flags = nsv->sv_flags; + sv->sv_any = nsv->sv_any; + sv->sv_refcnt = nsv->sv_refcnt; + sv->sv_u = nsv->sv_u; +#else + StructCopy(nsv,sv,SV); +#endif + /* Currently could join these into one piece of pointer arithmetic, but + it would be unclear. */ + if(SvTYPE(sv) == SVt_IV) + SvANY(sv) + = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv)); + else if (SvTYPE(sv) == SVt_RV) { + SvANY(sv) = &sv->sv_u.svu_rv; + } + - allocate = PERL_STRLEN_ROUNDUP(len + 1); - ptr = saferealloc (ptr, allocate); - SvPV_set(sv, ptr); - SvCUR_set(sv, len); - SvLEN_set(sv, allocate); - *SvEND(sv) = '\0'; - (void)SvPOK_only_UTF8(sv); /* validate pointer */ - SvTAINT(sv); +#ifdef PERL_OLD_COPY_ON_WRITE + if (SvIsCOW_normal(nsv)) { + /* We need to follow the pointers around the loop to make the + previous SV point to sv, rather than nsv. */ + SV *next; + SV *current = nsv; + while ((next = SV_COW_NEXT_SV(current)) != nsv) { + assert(next); + current = next; + assert(SvPVX_const(current) == SvPVX_const(nsv)); + } + /* Make the SV before us point to the SV after us. */ + if (DEBUG_C_TEST) { + PerlIO_printf(Perl_debug_log, "previous is\n"); + sv_dump(current); + PerlIO_printf(Perl_debug_log, + "move it from 0x%"UVxf" to 0x%"UVxf"\n", + (UV) SV_COW_NEXT_SV(current), (UV) sv); + } + SV_COW_NEXT_SV_SET(current, sv); + } +#endif + SvREFCNT(sv) = refcnt; + SvFLAGS(nsv) |= SVTYPEMASK; /* Mark as freed */ + SvREFCNT(nsv) = 0; + del_SV(nsv); } /* -=for apidoc sv_usepvn_mg +=for apidoc sv_clear -Like C, but also handles 'set' magic. +Clear an SV: call any destructors, free up any memory used by the body, +and free the body itself. The SV's head is I freed, although +its type is set to all 1's so that it won't inadvertently be assumed +to be live during global destruction etc. +This function should only be called when REFCNT is zero. Most of the time +you'll want to call C (or its macro wrapper C) +instead. =cut */ void -Perl_sv_usepvn_mg(pTHX_ register SV *sv, register char *ptr, register STRLEN len) +Perl_sv_clear(pTHX_ register SV *sv) { - sv_usepvn(sv,ptr,len); - SvSETMAGIC(sv); -} + dVAR; + const U32 type = SvTYPE(sv); + const struct body_details *const sv_type_details + = bodies_by_type + type; -#ifdef PERL_OLD_COPY_ON_WRITE -/* Need to do this *after* making the SV normal, as we need the buffer - pointer to remain valid until after we've copied it. If we let go too early, - another thread could invalidate it by unsharing last of the same hash key - (which it can do by means other than releasing copy-on-write Svs) - or by changing the other copy-on-write SVs in the loop. */ -STATIC void -S_sv_release_COW(pTHX_ register SV *sv, const char *pvx, STRLEN len, SV *after) -{ - if (len) { /* this SV was SvIsCOW_normal(sv) */ - /* we need to find the SV pointing to us. */ - SV * const current = SV_COW_NEXT_SV(after); + assert(sv); + assert(SvREFCNT(sv) == 0); - if (current == sv) { - /* The SV we point to points back to us (there were only two of us - in the loop.) - Hence other SV is no longer copy on write either. */ - SvFAKE_off(after); - SvREADONLY_off(after); - } else { - /* We need to follow the pointers around the loop. */ - SV *next; - while ((next = SV_COW_NEXT_SV(current)) != sv) { - assert (next); - current = next; - /* don't loop forever if the structure is bust, and we have - a pointer into a closed loop. */ - assert (current != after); - assert (SvPVX_const(current) == pvx); - } - /* Make the SV before us point to the SV after us. */ - SV_COW_NEXT_SV_SET(current, after); - } - } else { - unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); - } -} + if (type <= SVt_IV) + return; -int -Perl_sv_release_IVX(pTHX_ register SV *sv) -{ - if (SvIsCOW(sv)) - sv_force_normal_flags(sv, 0); - SvOOK_off(sv); - return 0; -} -#endif -/* -=for apidoc sv_force_normal_flags + if (SvOBJECT(sv)) { + if (PL_defstash) { /* Still have a symbol table? */ + dSP; + HV* stash; + do { + CV* destructor; + stash = SvSTASH(sv); + destructor = StashHANDLER(stash,DESTROY); + if (destructor) { + SV* const tmpref = newRV(sv); + SvREADONLY_on(tmpref); /* DESTROY() could be naughty */ + ENTER; + PUSHSTACKi(PERLSI_DESTROY); + EXTEND(SP, 2); + PUSHMARK(SP); + PUSHs(tmpref); + PUTBACK; + call_sv((SV*)destructor, G_DISCARD|G_EVAL|G_KEEPERR|G_VOID); + + + POPSTACK; + SPAGAIN; + LEAVE; + if(SvREFCNT(tmpref) < 2) { + /* tmpref is not kept alive! */ + SvREFCNT(sv)--; + SvRV_set(tmpref, NULL); + SvROK_off(tmpref); + } + SvREFCNT_dec(tmpref); + } + } while (SvOBJECT(sv) && SvSTASH(sv) != stash); -Undo various types of fakery on an SV: if the PV is a shared string, make -a private copy; if we're a ref, stop refing; if we're a glob, downgrade to -an xpvmg; if we're a copy-on-write scalar, this is the on-write time when -we do the copy, and is also used locally. If C is set -then a copy-on-write scalar drops its PV buffer (if any) and becomes -SvPOK_off rather than making a copy. (Used where this scalar is about to be -set to some other value.) In addition, the C parameter gets passed to -C when unrefing. C calls this function -with flags set to 0. -=cut -*/ + if (SvREFCNT(sv)) { + if (PL_in_clean_objs) + Perl_croak(aTHX_ "DESTROY created new reference to dead object '%s'", + HvNAME_get(stash)); + /* DESTROY gave object new lease on life */ + return; + } + } -void -Perl_sv_force_normal_flags(pTHX_ register SV *sv, U32 flags) -{ + if (SvOBJECT(sv)) { + SvREFCNT_dec(SvSTASH(sv)); /* possibly of changed persuasion */ + SvOBJECT_off(sv); /* Curse the object. */ + if (type != SVt_PVIO) + --PL_sv_objcount; /* XXX Might want something more general */ + } + } + if (type >= SVt_PVMG) { + if (SvMAGIC(sv)) + mg_free(sv); + if (type == SVt_PVMG && SvFLAGS(sv) & SVpad_TYPED) + SvREFCNT_dec(SvSTASH(sv)); + } + switch (type) { + case SVt_PVIO: + if (IoIFP(sv) && + IoIFP(sv) != PerlIO_stdin() && + IoIFP(sv) != PerlIO_stdout() && + IoIFP(sv) != PerlIO_stderr()) + { + io_close((IO*)sv, FALSE); + } + if (IoDIRP(sv) && !(IoFLAGS(sv) & IOf_FAKE_DIRP)) + PerlDir_close(IoDIRP(sv)); + IoDIRP(sv) = (DIR*)NULL; + Safefree(IoTOP_NAME(sv)); + Safefree(IoFMT_NAME(sv)); + Safefree(IoBOTTOM_NAME(sv)); + goto freescalar; + case SVt_PVBM: + goto freescalar; + case SVt_PVCV: + case SVt_PVFM: + cv_undef((CV*)sv); + goto freescalar; + case SVt_PVHV: + hv_undef((HV*)sv); + break; + case SVt_PVAV: + av_undef((AV*)sv); + break; + case SVt_PVLV: + if (LvTYPE(sv) == 'T') { /* for tie: return HE to pool */ + SvREFCNT_dec(HeKEY_sv((HE*)LvTARG(sv))); + HeNEXT((HE*)LvTARG(sv)) = PL_hv_fetch_ent_mh; + PL_hv_fetch_ent_mh = (HE*)LvTARG(sv); + } + else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */ + SvREFCNT_dec(LvTARG(sv)); + goto freescalar; + case SVt_PVGV: + gp_free((GV*)sv); + Safefree(GvNAME(sv)); + /* If we're in a stash, we don't own a reference to it. However it does + have a back reference to us, which needs to be cleared. */ + if (GvSTASH(sv)) + sv_del_backref((SV*)GvSTASH(sv), sv); + case SVt_PVMG: + case SVt_PVNV: + case SVt_PVIV: + freescalar: + /* Don't bother with SvOOK_off(sv); as we're only going to free it. */ + if (SvOOK(sv)) { + SvPV_set(sv, SvPVX_mutable(sv) - SvIVX(sv)); + /* Don't even bother with turning off the OOK flag. */ + } + case SVt_PV: + case SVt_RV: + if (SvROK(sv)) { + SV *target = SvRV(sv); + if (SvWEAKREF(sv)) + sv_del_backref(target, sv); + else + SvREFCNT_dec(target); + } #ifdef PERL_OLD_COPY_ON_WRITE - if (SvREADONLY(sv)) { - /* At this point I believe I should acquire a global SV mutex. */ - if (SvFAKE(sv)) { - const char * const pvx = SvPVX_const(sv); - const STRLEN len = SvLEN(sv); - const STRLEN cur = SvCUR(sv); - SV * const next = SV_COW_NEXT_SV(sv); /* next COW sv in the loop. */ - if (DEBUG_C_TEST) { - PerlIO_printf(Perl_debug_log, - "Copy on write: Force normal %ld\n", - (long) flags); - sv_dump(sv); - } - SvFAKE_off(sv); - SvREADONLY_off(sv); - /* This SV doesn't own the buffer, so need to Newx() a new one: */ - SvPV_set(sv, (char*)0); - SvLEN_set(sv, 0); - if (flags & SV_COW_DROP_PV) { - /* OK, so we don't need to copy our buffer. */ - SvPOK_off(sv); - } else { - SvGROW(sv, cur + 1); - Move(pvx,SvPVX(sv),cur,char); - SvCUR_set(sv, cur); - *SvEND(sv) = '\0'; - } - sv_release_COW(sv, pvx, len, next); - if (DEBUG_C_TEST) { - sv_dump(sv); + else if (SvPVX_const(sv)) { + if (SvIsCOW(sv)) { + /* I believe I need to grab the global SV mutex here and + then recheck the COW status. */ + if (DEBUG_C_TEST) { + PerlIO_printf(Perl_debug_log, "Copy on write: clear\n"); + sv_dump(sv); + } + sv_release_COW(sv, SvPVX_const(sv), SvLEN(sv), + SV_COW_NEXT_SV(sv)); + /* And drop it here. */ + SvFAKE_off(sv); + } else if (SvLEN(sv)) { + Safefree(SvPVX_const(sv)); } } - else if (IN_PERL_RUNTIME) - Perl_croak(aTHX_ PL_no_modify); - /* At this point I believe that I can drop the global SV mutex. */ - } #else - if (SvREADONLY(sv)) { - if (SvFAKE(sv)) { - const char * const pvx = SvPVX_const(sv); - const STRLEN len = SvCUR(sv); + else if (SvPVX_const(sv) && SvLEN(sv)) + Safefree(SvPVX_mutable(sv)); + else if (SvPVX_const(sv) && SvREADONLY(sv) && SvFAKE(sv)) { + unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv))); SvFAKE_off(sv); - SvREADONLY_off(sv); - SvPV_set(sv, Nullch); - SvLEN_set(sv, 0); - SvGROW(sv, len + 1); - Move(pvx,SvPVX(sv),len,char); - *SvEND(sv) = '\0'; - unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); } - else if (IN_PERL_RUNTIME) - Perl_croak(aTHX_ PL_no_modify); - } #endif - if (SvROK(sv)) - sv_unref_flags(sv, flags); - else if (SvFAKE(sv) && SvTYPE(sv) == SVt_PVGV) - sv_unglob(sv); + break; + case SVt_NV: + break; + } + + SvFLAGS(sv) &= SVf_BREAK; + SvFLAGS(sv) |= SVTYPEMASK; + + if (sv_type_details->arena) { + del_body(((char *)SvANY(sv) + sv_type_details->offset), + &PL_body_roots[type]); + } + else if (sv_type_details->size) { + my_safefree(SvANY(sv)); + } } /* -=for apidoc sv_chop +=for apidoc sv_newref -Efficient removal of characters from the beginning of the string buffer. -SvPOK(sv) must be true and the C must be a pointer to somewhere inside -the string buffer. The C becomes the first character of the adjusted -string. Uses the "OOK hack". -Beware: after this function returns, C and SvPVX_const(sv) may no longer -refer to the same chunk of data. +Increment an SV's reference count. Use the C wrapper +instead. =cut */ -void -Perl_sv_chop(pTHX_ register SV *sv, register const char *ptr) +SV * +Perl_sv_newref(pTHX_ SV *sv) { - register STRLEN delta; - if (!ptr || !SvPOKp(sv)) - return; - delta = ptr - SvPVX_const(sv); - SV_CHECK_THINKFIRST(sv); - if (SvTYPE(sv) < SVt_PVIV) - sv_upgrade(sv,SVt_PVIV); - - if (!SvOOK(sv)) { - if (!SvLEN(sv)) { /* make copy of shared string */ - const char *pvx = SvPVX_const(sv); - const STRLEN len = SvCUR(sv); - SvGROW(sv, len + 1); - Move(pvx,SvPVX(sv),len,char); - *SvEND(sv) = '\0'; - } - SvIV_set(sv, 0); - /* Same SvOOK_on but SvOOK_on does a SvIOK_off - and we do that anyway inside the SvNIOK_off - */ - SvFLAGS(sv) |= SVf_OOK; - } - SvNIOK_off(sv); - SvLEN_set(sv, SvLEN(sv) - delta); - SvCUR_set(sv, SvCUR(sv) - delta); - SvPV_set(sv, SvPVX(sv) + delta); - SvIV_set(sv, SvIVX(sv) + delta); + if (sv) + (SvREFCNT(sv))++; + return sv; } /* -=for apidoc sv_catpvn - -Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. If the SV has the UTF-8 -status set, then the bytes appended should be valid UTF-8. -Handles 'get' magic, but not 'set' magic. See C. - -=for apidoc sv_catpvn_flags +=for apidoc sv_free -Concatenates the string onto the end of the string which is in the SV. The -C indicates number of bytes to copy. If the SV has the UTF-8 -status set, then the bytes appended should be valid UTF-8. -If C has C bit set, will C on C if -appropriate, else not. C and C are implemented -in terms of this function. +Decrement an SV's reference count, and if it drops to zero, call +C to invoke destructors and free up any memory used by +the body; finally, deallocate the SV's head itself. +Normally called via a wrapper macro C. =cut */ void -Perl_sv_catpvn_flags(pTHX_ register SV *dsv, register const char *sstr, register STRLEN slen, I32 flags) -{ - STRLEN dlen; - const char *dstr = SvPV_force_flags(dsv, dlen, flags); - - SvGROW(dsv, dlen + slen + 1); - if (sstr == dstr) - sstr = SvPVX_const(dsv); - Move(sstr, SvPVX(dsv) + dlen, slen, char); - SvCUR_set(dsv, SvCUR(dsv) + slen); - *SvEND(dsv) = '\0'; - (void)SvPOK_only_UTF8(dsv); /* validate pointer */ - SvTAINT(dsv); - if (flags & SV_SMAGIC) - SvSETMAGIC(dsv); -} - -/* -=for apidoc sv_catsv - -Concatenates the string from SV C onto the end of the string in -SV C. Modifies C but not C. Handles 'get' magic, but -not 'set' magic. See C. - -=for apidoc sv_catsv_flags - -Concatenates the string from SV C onto the end of the string in -SV C. Modifies C but not C. If C has C -bit set, will C on the SVs if appropriate, else not. C -and C are implemented in terms of this function. - -=cut */ - -void -Perl_sv_catsv_flags(pTHX_ SV *dsv, register SV *ssv, I32 flags) +Perl_sv_free(pTHX_ SV *sv) { - const char *spv; - STRLEN slen; - if (ssv) { - if ((spv = SvPV_const(ssv, slen))) { - /* sutf8 and dutf8 were type bool, but under USE_ITHREADS, - gcc version 2.95.2 20000220 (Debian GNU/Linux) for - Linux xxx 2.2.17 on sparc64 with gcc -O2, we erroneously - get dutf8 = 0x20000000, (i.e. SVf_UTF8) even though - dsv->sv_flags doesn't have that bit set. - Andy Dougherty 12 Oct 2001 - */ - const I32 sutf8 = DO_UTF8(ssv); - I32 dutf8; - - if (SvGMAGICAL(dsv) && (flags & SV_GMAGIC)) - mg_get(dsv); - dutf8 = DO_UTF8(dsv); - - if (dutf8 != sutf8) { - if (dutf8) { - /* Not modifying source SV, so taking a temporary copy. */ - SV* csv = sv_2mortal(newSVpvn(spv, slen)); - - sv_utf8_upgrade(csv); - spv = SvPV_const(csv, slen); - } - else - sv_utf8_upgrade_nomg(dsv); - } - sv_catpvn_nomg(dsv, spv, slen); + dVAR; + if (!sv) + return; + if (SvREFCNT(sv) == 0) { + if (SvFLAGS(sv) & SVf_BREAK) + /* this SV's refcnt has been artificially decremented to + * trigger cleanup */ + return; + if (PL_in_clean_all) /* All is fair */ + return; + if (SvREADONLY(sv) && SvIMMORTAL(sv)) { + /* make sure SvREFCNT(sv)==0 happens very seldom */ + SvREFCNT(sv) = (~(U32)0)/2; + return; + } + if (ckWARN_d(WARN_INTERNAL)) { + Perl_warner(aTHX_ packWARN(WARN_INTERNAL), + "Attempt to free unreferenced scalar: SV 0x%"UVxf + pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); +#ifdef DEBUG_LEAKING_SCALARS_FORK_DUMP + Perl_dump_sv_child(aTHX_ sv); +#endif } + return; } - if (flags & SV_SMAGIC) - SvSETMAGIC(dsv); + if (--(SvREFCNT(sv)) > 0) + return; + Perl_sv_free2(aTHX_ sv); } -/* -=for apidoc sv_catpv - -Concatenates the string onto the end of the string which is in the SV. -If the SV has the UTF-8 status set, then the bytes appended should be -valid UTF-8. Handles 'get' magic, but not 'set' magic. See C. - -=cut */ - void -Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) +Perl_sv_free2(pTHX_ SV *sv) { - register STRLEN len; - STRLEN tlen; - char *junk; - - if (!ptr) + dVAR; +#ifdef DEBUGGING + if (SvTEMP(sv)) { + if (ckWARN_d(WARN_DEBUGGING)) + Perl_warner(aTHX_ packWARN(WARN_DEBUGGING), + "Attempt to free temp prematurely: SV 0x%"UVxf + pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); return; - junk = SvPV_force(sv, tlen); - len = strlen(ptr); - SvGROW(sv, tlen + len + 1); - if (ptr == junk) - ptr = SvPVX_const(sv); - Move(ptr,SvPVX(sv)+tlen,len+1,char); - SvCUR_set(sv, SvCUR(sv) + len); - (void)SvPOK_only_UTF8(sv); /* validate pointer */ - SvTAINT(sv); + } +#endif + if (SvREADONLY(sv) && SvIMMORTAL(sv)) { + /* make sure SvREFCNT(sv)==0 happens very seldom */ + SvREFCNT(sv) = (~(U32)0)/2; + return; + } + sv_clear(sv); + if (! SvREFCNT(sv)) + del_SV(sv); } /* -=for apidoc sv_catpv_mg +=for apidoc sv_len -Like C, but also handles 'set' magic. +Returns the length of the string in the SV. Handles magic and type +coercion. See also C, which gives raw access to the xpv_cur slot. =cut */ -void -Perl_sv_catpv_mg(pTHX_ register SV *sv, register const char *ptr) +STRLEN +Perl_sv_len(pTHX_ register SV *sv) { - sv_catpv(sv,ptr); - SvSETMAGIC(sv); + STRLEN len; + + if (!sv) + return 0; + + if (SvGMAGICAL(sv)) + len = mg_length(sv); + else + (void)SvPV_const(sv, len); + return len; } /* -=for apidoc newSV +=for apidoc sv_len_utf8 -Create a new null SV, or if len > 0, create a new empty SVt_PV type SV -with an initial PV allocation of len+1. Normally accessed via the C -macro. +Returns the number of characters in the string in an SV, counting wide +UTF-8 bytes as a single character. Handles magic and type coercion. =cut */ -SV * -Perl_newSV(pTHX_ STRLEN len) -{ - register SV *sv; - - new_SV(sv); - if (len) { - sv_upgrade(sv, SVt_PV); - SvGROW(sv, len + 1); - } - return sv; -} /* -=for apidoc sv_magicext - -Adds magic to an SV, upgrading it if necessary. Applies the -supplied vtable and returns a pointer to the magic added. + * The length is cached in PERL_UTF8_magic, in the mg_len field. Also the + * mg_ptr is used, by sv_pos_u2b(), see the comments of S_utf8_mg_pos_init(). + * (Note that the mg_len is not the length of the mg_ptr field.) + * + */ -Note that C will allow things that C will not. -In particular, you can add magic to SvREADONLY SVs, and add more than -one instance of the same 'how'. +STRLEN +Perl_sv_len_utf8(pTHX_ register SV *sv) +{ + if (!sv) + return 0; -If C is greater than zero then a C I of C is -stored, if C is zero then C is stored as-is and - as another -special case - if C<(name && namlen == HEf_SVKEY)> then C is assumed -to contain an C and is stored as-is with its REFCNT incremented. + if (SvGMAGICAL(sv)) + return mg_length(sv); + else + { + STRLEN len, ulen; + const U8 *s = (U8*)SvPV_const(sv, len); + MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0; -(This is now used as a subroutine by C.) + if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) { + ulen = mg->mg_len; +#ifdef PERL_UTF8_CACHE_ASSERT + assert(ulen == Perl_utf8_length(aTHX_ s, s + len)); +#endif + } + else { + ulen = Perl_utf8_length(aTHX_ s, s + len); + if (!mg && !SvREADONLY(sv)) { + sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0); + mg = mg_find(sv, PERL_MAGIC_utf8); + assert(mg); + } + if (mg) + mg->mg_len = ulen; + } + return ulen; + } +} -=cut -*/ -MAGIC * -Perl_sv_magicext(pTHX_ SV* sv, SV* obj, int how, const MGVTBL *vtable, - const char* name, I32 namlen) +/* S_utf8_mg_pos_init() is used to initialize the mg_ptr field of + * a PERL_UTF8_magic. The mg_ptr is used to store the mapping + * between UTF-8 and byte offsets. There are two (substr offset and substr + * length, the i offset, PERL_MAGIC_UTF8_CACHESIZE) times two (UTF-8 offset + * and byte offset) cache positions. + * + * The mg_len field is used by sv_len_utf8(), see its comments. + * Note that the mg_len is not the length of the mg_ptr field. + * + */ +STATIC bool +S_utf8_mg_pos_init(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, + I32 offsetp, const U8 *s, const U8 *start) { - MAGIC* mg; + bool found = FALSE; - if (SvTYPE(sv) < SVt_PVMG) { - SvUPGRADE(sv, SVt_PVMG); + if (SvMAGICAL(sv) && !SvREADONLY(sv)) { + if (!*mgp) + *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0, 0); + assert(*mgp); + + if ((*mgp)->mg_ptr) + *cachep = (STRLEN *) (*mgp)->mg_ptr; + else { + Newxz(*cachep, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN); + (*mgp)->mg_ptr = (char *) *cachep; + } + assert(*cachep); + + (*cachep)[i] = offsetp; + (*cachep)[i+1] = s - start; + found = TRUE; } - Newxz(mg, 1, MAGIC); - mg->mg_moremagic = SvMAGIC(sv); - SvMAGIC_set(sv, mg); - /* Sometimes a magic contains a reference loop, where the sv and - object refer to each other. To prevent a reference loop that - would prevent such objects being freed, we look for such loops - and if we find one we avoid incrementing the object refcount. + return found; +} - Note we cannot do this to avoid self-tie loops as intervening RV must - have its REFCNT incremented to keep it in existence. +/* + * S_utf8_mg_pos() is used to query and update mg_ptr field of + * a PERL_UTF8_magic. The mg_ptr is used to store the mapping + * between UTF-8 and byte offsets. See also the comments of + * S_utf8_mg_pos_init(). + * + */ +STATIC bool +S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I32 uoff, const U8 **sp, const U8 *start, const U8 *send) +{ + bool found = FALSE; - */ - if (!obj || obj == sv || - how == PERL_MAGIC_arylen || - how == PERL_MAGIC_qr || - how == PERL_MAGIC_symtab || - (SvTYPE(obj) == SVt_PVGV && - (GvSV(obj) == sv || GvHV(obj) == (HV*)sv || GvAV(obj) == (AV*)sv || - GvCV(obj) == (CV*)sv || GvIOp(obj) == (IO*)sv || - GvFORM(obj) == (CV*)sv))) - { - mg->mg_obj = obj; - } - else { - mg->mg_obj = SvREFCNT_inc(obj); - mg->mg_flags |= MGf_REFCOUNTED; - } + if (SvMAGICAL(sv) && !SvREADONLY(sv)) { + if (!*mgp) + *mgp = mg_find(sv, PERL_MAGIC_utf8); + if (*mgp && (*mgp)->mg_ptr) { + *cachep = (STRLEN *) (*mgp)->mg_ptr; + ASSERT_UTF8_CACHE(*cachep); + if ((*cachep)[i] == (STRLEN)uoff) /* An exact match. */ + found = TRUE; + else { /* We will skip to the right spot. */ + STRLEN forw = 0; + STRLEN backw = 0; + const U8* p = NULL; - /* Normal self-ties simply pass a null object, and instead of - using mg_obj directly, use the SvTIED_obj macro to produce a - new RV as needed. For glob "self-ties", we are tieing the PVIO - with an RV obj pointing to the glob containing the PVIO. In - this case, to avoid a reference loop, we need to weaken the - reference. - */ + /* The assumption is that going backward is half + * the speed of going forward (that's where the + * 2 * backw in the below comes from). (The real + * figure of course depends on the UTF-8 data.) */ - if (how == PERL_MAGIC_tiedscalar && SvTYPE(sv) == SVt_PVIO && - obj && SvROK(obj) && GvIO(SvRV(obj)) == (IO*)sv) - { - sv_rvweaken(obj); - } + if ((*cachep)[i] > (STRLEN)uoff) { + forw = uoff; + backw = (*cachep)[i] - (STRLEN)uoff; - mg->mg_type = how; - mg->mg_len = namlen; - if (name) { - if (namlen > 0) - mg->mg_ptr = savepvn(name, namlen); - else if (namlen == HEf_SVKEY) - mg->mg_ptr = (char*)SvREFCNT_inc((SV*)name); - else - mg->mg_ptr = (char *) name; + if (forw < 2 * backw) + p = start; + else + p = start + (*cachep)[i+1]; + } + /* Try this only for the substr offset (i == 0), + * not for the substr length (i == 2). */ + else if (i == 0) { /* (*cachep)[i] < uoff */ + const STRLEN ulen = sv_len_utf8(sv); + + if ((STRLEN)uoff < ulen) { + forw = (STRLEN)uoff - (*cachep)[i]; + backw = ulen - (STRLEN)uoff; + + if (forw < 2 * backw) + p = start + (*cachep)[i+1]; + else + p = send; + } + + /* If the string is not long enough for uoff, + * we could extend it, but not at this low a level. */ + } + + if (p) { + if (forw < 2 * backw) { + while (forw--) + p += UTF8SKIP(p); + } + else { + while (backw--) { + p--; + while (UTF8_IS_CONTINUATION(*p)) + p--; + } + } + + /* Update the cache. */ + (*cachep)[i] = (STRLEN)uoff; + (*cachep)[i+1] = p - start; + + /* Drop the stale "length" cache */ + if (i == 0) { + (*cachep)[2] = 0; + (*cachep)[3] = 0; + } + + found = TRUE; + } + } + if (found) { /* Setup the return values. */ + *offsetp = (*cachep)[i+1]; + *sp = start + *offsetp; + if (*sp >= send) { + *sp = send; + *offsetp = send - start; + } + else if (*sp < start) { + *sp = start; + *offsetp = 0; + } + } + } +#ifdef PERL_UTF8_CACHE_ASSERT + if (found) { + U8 *s = start; + I32 n = uoff; + + while (n-- && s < send) + s += UTF8SKIP(s); + + if (i == 0) { + assert(*offsetp == s - start); + assert((*cachep)[0] == (STRLEN)uoff); + assert((*cachep)[1] == *offsetp); + } + ASSERT_UTF8_CACHE(*cachep); + } +#endif } - mg->mg_virtual = vtable; - mg_magical(sv); - if (SvGMAGICAL(sv)) - SvFLAGS(sv) &= ~(SVf_IOK|SVf_NOK|SVf_POK); - return mg; + return found; } /* -=for apidoc sv_magic - -Adds magic to an SV. First upgrades C to type C if necessary, -then adds a new magic item of type C to the head of the magic list. - -See C (which C now calls) for a description of the -handling of the C and C arguments. +=for apidoc sv_pos_u2b -You need to use C to add magic to SvREADONLY SVs and also -to add more than one instance of the same 'how'. +Converts the value pointed to by offsetp from a count of UTF-8 chars from +the start of the string, to a count of the equivalent number of bytes; if +lenp is non-zero, it does the same to lenp, but this time starting from +the offset, rather than from the start of the string. Handles magic and +type coercion. =cut */ +/* + * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential + * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and + * byte offsets. See also the comments of S_utf8_mg_pos(). + * + */ + void -Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 namlen) +Perl_sv_pos_u2b(pTHX_ register SV *sv, I32* offsetp, I32* lenp) { - const MGVTBL *vtable; - MAGIC* mg; + const U8 *start; + STRLEN len; -#ifdef PERL_OLD_COPY_ON_WRITE - if (SvIsCOW(sv)) - sv_force_normal_flags(sv, 0); -#endif - if (SvREADONLY(sv)) { - if ( - /* its okay to attach magic to shared strings; the subsequent - * upgrade to PVMG will unshare the string */ - !(SvFAKE(sv) && SvTYPE(sv) < SVt_PVMG) + if (!sv) + return; - && IN_PERL_RUNTIME - && how != PERL_MAGIC_regex_global - && how != PERL_MAGIC_bm - && how != PERL_MAGIC_fm - && how != PERL_MAGIC_sv - && how != PERL_MAGIC_backref - ) - { - Perl_croak(aTHX_ PL_no_modify); - } - } - if (SvMAGICAL(sv) || (how == PERL_MAGIC_taint && SvTYPE(sv) >= SVt_PVMG)) { - if (SvMAGIC(sv) && (mg = mg_find(sv, how))) { - /* sv_magic() refuses to add a magic of the same 'how' as an - existing one - */ - if (how == PERL_MAGIC_taint) - mg->mg_len |= 1; - return; - } - } + start = (U8*)SvPV_const(sv, len); + if (len) { + STRLEN boffset = 0; + STRLEN *cache = 0; + const U8 *s = start; + I32 uoffset = *offsetp; + const U8 * const send = s + len; + MAGIC *mg = 0; + bool found = FALSE; - switch (how) { - case PERL_MAGIC_sv: - vtable = &PL_vtbl_sv; - break; - case PERL_MAGIC_overload: - vtable = &PL_vtbl_amagic; - break; - case PERL_MAGIC_overload_elem: - vtable = &PL_vtbl_amagicelem; - break; - case PERL_MAGIC_overload_table: - vtable = &PL_vtbl_ovrld; - break; - case PERL_MAGIC_bm: - vtable = &PL_vtbl_bm; - break; - case PERL_MAGIC_regdata: - vtable = &PL_vtbl_regdata; - break; - case PERL_MAGIC_regdatum: - vtable = &PL_vtbl_regdatum; - break; - case PERL_MAGIC_env: - vtable = &PL_vtbl_env; - break; - case PERL_MAGIC_fm: - vtable = &PL_vtbl_fm; - break; - case PERL_MAGIC_envelem: - vtable = &PL_vtbl_envelem; - break; - case PERL_MAGIC_regex_global: - vtable = &PL_vtbl_mglob; - break; - case PERL_MAGIC_isa: - vtable = &PL_vtbl_isa; - break; - case PERL_MAGIC_isaelem: - vtable = &PL_vtbl_isaelem; - break; - case PERL_MAGIC_nkeys: - vtable = &PL_vtbl_nkeys; - break; - case PERL_MAGIC_dbfile: - vtable = NULL; - break; - case PERL_MAGIC_dbline: - vtable = &PL_vtbl_dbline; - break; -#ifdef USE_LOCALE_COLLATE - case PERL_MAGIC_collxfrm: - vtable = &PL_vtbl_collxfrm; - break; -#endif /* USE_LOCALE_COLLATE */ - case PERL_MAGIC_tied: - vtable = &PL_vtbl_pack; - break; - case PERL_MAGIC_tiedelem: - case PERL_MAGIC_tiedscalar: - vtable = &PL_vtbl_packelem; - break; - case PERL_MAGIC_qr: - vtable = &PL_vtbl_regexp; - break; - case PERL_MAGIC_sig: - vtable = &PL_vtbl_sig; - break; - case PERL_MAGIC_sigelem: - vtable = &PL_vtbl_sigelem; - break; - case PERL_MAGIC_taint: - vtable = &PL_vtbl_taint; - break; - case PERL_MAGIC_uvar: - vtable = &PL_vtbl_uvar; - break; - case PERL_MAGIC_vec: - vtable = &PL_vtbl_vec; - break; - case PERL_MAGIC_arylen_p: - case PERL_MAGIC_rhash: - case PERL_MAGIC_symtab: - case PERL_MAGIC_vstring: - vtable = NULL; - break; - case PERL_MAGIC_utf8: - vtable = &PL_vtbl_utf8; - break; - case PERL_MAGIC_substr: - vtable = &PL_vtbl_substr; - break; - case PERL_MAGIC_defelem: - vtable = &PL_vtbl_defelem; - break; - case PERL_MAGIC_glob: - vtable = &PL_vtbl_glob; - break; - case PERL_MAGIC_arylen: - vtable = &PL_vtbl_arylen; - break; - case PERL_MAGIC_pos: - vtable = &PL_vtbl_pos; - break; - case PERL_MAGIC_backref: - vtable = &PL_vtbl_backref; - break; - case PERL_MAGIC_ext: - /* Reserved for use by extensions not perl internals. */ - /* Useful for attaching extension internal data to perl vars. */ - /* Note that multiple extensions may clash if magical scalars */ - /* etc holding private data from one are passed to another. */ - vtable = NULL; - break; - default: - Perl_croak(aTHX_ "Don't know how to handle magic of type \\%o", how); + if (utf8_mg_pos(sv, &mg, &cache, 0, offsetp, *offsetp, &s, start, send)) + found = TRUE; + if (!found && uoffset > 0) { + while (s < send && uoffset--) + s += UTF8SKIP(s); + if (s >= send) + s = send; + if (utf8_mg_pos_init(sv, &mg, &cache, 0, *offsetp, s, start)) + boffset = cache[1]; + *offsetp = s - start; + } + if (lenp) { + found = FALSE; + start = s; + if (utf8_mg_pos(sv, &mg, &cache, 2, lenp, *lenp, &s, start, send)) { + *lenp -= boffset; + found = TRUE; + } + if (!found && *lenp > 0) { + I32 ulen = *lenp; + if (ulen > 0) + while (s < send && ulen--) + s += UTF8SKIP(s); + if (s >= send) + s = send; + utf8_mg_pos_init(sv, &mg, &cache, 2, *lenp, s, start); + } + *lenp = s - start; + } + ASSERT_UTF8_CACHE(cache); } - - /* Rest of work is done else where */ - mg = sv_magicext(sv,obj,how,vtable,name,namlen); - - switch (how) { - case PERL_MAGIC_taint: - mg->mg_len = 1; - break; - case PERL_MAGIC_ext: - case PERL_MAGIC_dbfile: - SvRMAGICAL_on(sv); - break; + else { + *offsetp = 0; + if (lenp) + *lenp = 0; } + + return; } /* -=for apidoc sv_unmagic +=for apidoc sv_pos_b2u -Removes all magic of type C from an SV. +Converts the value pointed to by offsetp from a count of bytes from the +start of the string, to a count of the equivalent number of UTF-8 chars. +Handles magic and type coercion. =cut */ -int -Perl_sv_unmagic(pTHX_ SV *sv, int type) -{ - MAGIC* mg; - MAGIC** mgp; - if (SvTYPE(sv) < SVt_PVMG || !SvMAGIC(sv)) - return 0; - mgp = &SvMAGIC(sv); - for (mg = *mgp; mg; mg = *mgp) { - if (mg->mg_type == type) { - const MGVTBL* const vtbl = mg->mg_virtual; - *mgp = mg->mg_moremagic; - if (vtbl && vtbl->svt_free) - CALL_FPTR(vtbl->svt_free)(aTHX_ sv, mg); - if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) { - if (mg->mg_len > 0) - Safefree(mg->mg_ptr); - else if (mg->mg_len == HEf_SVKEY) - SvREFCNT_dec((SV*)mg->mg_ptr); - else if (mg->mg_type == PERL_MAGIC_utf8 && mg->mg_ptr) - Safefree(mg->mg_ptr); - } - if (mg->mg_flags & MGf_REFCOUNTED) - SvREFCNT_dec(mg->mg_obj); - Safefree(mg); - } - else - mgp = &mg->mg_moremagic; - } - if (!SvMAGIC(sv)) { - SvMAGICAL_off(sv); - SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_NOK|SVp_POK)) >> PRIVSHIFT; - } - - return 0; -} - /* -=for apidoc sv_rvweaken + * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential + * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and + * byte offsets. See also the comments of S_utf8_mg_pos(). + * + */ -Weaken a reference: set the C flag on this RV; give the -referred-to SV C magic if it hasn't already; and -push a back-reference to this RV onto the array of backreferences -associated with that magic. +void +Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp) +{ + const U8* s; + STRLEN len; -=cut -*/ + if (!sv) + return; -SV * -Perl_sv_rvweaken(pTHX_ SV *sv) -{ - SV *tsv; - if (!SvOK(sv)) /* let undefs pass */ - return sv; - if (!SvROK(sv)) - Perl_croak(aTHX_ "Can't weaken a nonreference"); - else if (SvWEAKREF(sv)) { - if (ckWARN(WARN_MISC)) - Perl_warner(aTHX_ packWARN(WARN_MISC), "Reference is already weak"); - return sv; - } - tsv = SvRV(sv); - Perl_sv_add_backref(aTHX_ tsv, sv); - SvWEAKREF_on(sv); - SvREFCNT_dec(tsv); - return sv; -} + s = (const U8*)SvPV_const(sv, len); + if ((I32)len < *offsetp) + Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset"); + else { + const U8* send = s + *offsetp; + MAGIC* mg = NULL; + STRLEN *cache = NULL; -/* Give tsv backref magic if it hasn't already got it, then push a - * back-reference to sv onto the array associated with the backref magic. - */ + len = 0; -void -Perl_sv_add_backref(pTHX_ SV *tsv, SV *sv) -{ - AV *av; - MAGIC *mg; - if (SvMAGICAL(tsv) && (mg = mg_find(tsv, PERL_MAGIC_backref))) - av = (AV*)mg->mg_obj; - else { - av = newAV(); - sv_magic(tsv, (SV*)av, PERL_MAGIC_backref, NULL, 0); - /* av now has a refcnt of 2, which avoids it getting freed - * before us during global cleanup. The extra ref is removed - * by magic_killbackrefs() when tsv is being freed */ - } - if (AvFILLp(av) >= AvMAX(av)) { - av_extend(av, AvFILLp(av)+1); - } - AvARRAY(av)[++AvFILLp(av)] = sv; /* av_push() */ -} + if (SvMAGICAL(sv) && !SvREADONLY(sv)) { + mg = mg_find(sv, PERL_MAGIC_utf8); + if (mg && mg->mg_ptr) { + cache = (STRLEN *) mg->mg_ptr; + if (cache[1] == (STRLEN)*offsetp) { + /* An exact match. */ + *offsetp = cache[0]; -/* delete a back-reference to ourselves from the backref magic associated - * with the SV we point to. - */ + return; + } + else if (cache[1] < (STRLEN)*offsetp) { + /* We already know part of the way. */ + len = cache[0]; + s += cache[1]; + /* Let the below loop do the rest. */ + } + else { /* cache[1] > *offsetp */ + /* We already know all of the way, now we may + * be able to walk back. The same assumption + * is made as in S_utf8_mg_pos(), namely that + * walking backward is twice slower than + * walking forward. */ + const STRLEN forw = *offsetp; + STRLEN backw = cache[1] - *offsetp; -STATIC void -S_sv_del_backref(pTHX_ SV *tsv, SV *sv) -{ - AV *av; - SV **svp; - I32 i; - MAGIC *mg = NULL; - if (!SvMAGICAL(tsv) || !(mg = mg_find(tsv, PERL_MAGIC_backref))) { - if (PL_in_clean_all) - return; - } - if (!SvMAGICAL(tsv) || !(mg = mg_find(tsv, PERL_MAGIC_backref))) - Perl_croak(aTHX_ "panic: del_backref"); - av = (AV *)mg->mg_obj; - svp = AvARRAY(av); - /* We shouldn't be in here more than once, but for paranoia reasons lets - not assume this. */ - for (i = AvFILLp(av); i >= 0; i--) { - if (svp[i] == sv) { - const SSize_t fill = AvFILLp(av); - if (i != fill) { - /* We weren't the last entry. - An unordered list has this property that you can take the - last element off the end to fill the hole, and it's still - an unordered list :-) - */ - svp[i] = svp[fill]; + if (!(forw < 2 * backw)) { + const U8 *p = s + cache[1]; + STRLEN ubackw = 0; + + cache[1] -= backw; + + while (backw--) { + p--; + while (UTF8_IS_CONTINUATION(*p)) { + p--; + backw--; + } + ubackw++; + } + + cache[0] -= ubackw; + *offsetp = cache[0]; + + /* Drop the stale "length" cache */ + cache[2] = 0; + cache[3] = 0; + + return; + } + } } - svp[fill] = Nullsv; - AvFILLp(av) = fill - 1; + ASSERT_UTF8_CACHE(cache); + } + + while (s < send) { + STRLEN n = 1; + + /* Call utf8n_to_uvchr() to validate the sequence + * (unless a simple non-UTF character) */ + if (!UTF8_IS_INVARIANT(*s)) + utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0); + if (n > 0) { + s += n; + len++; + } + else + break; + } + + if (!SvREADONLY(sv)) { + if (!mg) { + sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0); + mg = mg_find(sv, PERL_MAGIC_utf8); + } + assert(mg); + + if (!mg->mg_ptr) { + Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN); + mg->mg_ptr = (char *) cache; + } + assert(cache); + + cache[0] = len; + cache[1] = *offsetp; + /* Drop the stale "length" cache */ + cache[2] = 0; + cache[3] = 0; } + + *offsetp = len; } + return; } /* -=for apidoc sv_insert +=for apidoc sv_eq -Inserts a string at the specified offset/length within the SV. Similar to -the Perl substr() function. +Returns a boolean indicating whether the strings in the two SVs are +identical. Is UTF-8 and 'use bytes' aware, handles get magic, and will +coerce its args to strings if necessary. =cut */ -void -Perl_sv_insert(pTHX_ SV *bigstr, STRLEN offset, STRLEN len, const char *little, STRLEN littlelen) +I32 +Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) { - register char *big; - register char *mid; - register char *midend; - register char *bigend; - register I32 i; - STRLEN curlen; - + const char *pv1; + STRLEN cur1; + const char *pv2; + STRLEN cur2; + I32 eq = 0; + char *tpv = Nullch; + SV* svrecode = Nullsv; - if (!bigstr) - Perl_croak(aTHX_ "Can't modify non-existent substring"); - SvPV_force(bigstr, curlen); - (void)SvPOK_only_UTF8(bigstr); - if (offset + len > curlen) { - SvGROW(bigstr, offset+len+1); - Zero(SvPVX(bigstr)+curlen, offset+len-curlen, char); - SvCUR_set(bigstr, offset+len); + if (!sv1) { + pv1 = ""; + cur1 = 0; } + else + pv1 = SvPV_const(sv1, cur1); - SvTAINT(bigstr); - i = littlelen - len; - if (i > 0) { /* string might grow */ - big = SvGROW(bigstr, SvCUR(bigstr) + i + 1); - mid = big + offset + len; - midend = bigend = big + SvCUR(bigstr); - bigend += i; - *bigend = '\0'; - while (midend > mid) /* shove everything down */ - *--bigend = *--midend; - Move(little,big+offset,littlelen,char); - SvCUR_set(bigstr, SvCUR(bigstr) + i); - SvSETMAGIC(bigstr); - return; - } - else if (i == 0) { - Move(little,SvPVX(bigstr)+offset,len,char); - SvSETMAGIC(bigstr); - return; + if (!sv2){ + pv2 = ""; + cur2 = 0; } + else + pv2 = SvPV_const(sv2, cur2); - big = SvPVX(bigstr); - mid = big + offset; - midend = mid + len; - bigend = big + SvCUR(bigstr); - - if (midend > bigend) - Perl_croak(aTHX_ "panic: sv_insert"); - - if (mid - big > bigend - midend) { /* faster to shorten from end */ - if (littlelen) { - Move(little, mid, littlelen,char); - mid += littlelen; - } - i = bigend - midend; - if (i > 0) { - Move(midend, mid, i,char); - mid += i; - } - *mid = '\0'; - SvCUR_set(bigstr, mid - big); - } - else if ((i = mid - big)) { /* faster from front */ - midend -= littlelen; - mid = midend; - sv_chop(bigstr,midend-i); - big += i; - while (i--) - *--midend = *--big; - if (littlelen) - Move(little, mid, littlelen,char); - } - else if (littlelen) { - midend -= littlelen; - sv_chop(bigstr,midend); - Move(little,midend,littlelen,char); - } - else { - sv_chop(bigstr,midend); + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { + /* Differing utf8ness. + * Do not UTF8size the comparands as a side-effect. */ + if (PL_encoding) { + if (SvUTF8(sv1)) { + svrecode = newSVpvn(pv2, cur2); + sv_recode_to_utf8(svrecode, PL_encoding); + pv2 = SvPV_const(svrecode, cur2); + } + else { + svrecode = newSVpvn(pv1, cur1); + sv_recode_to_utf8(svrecode, PL_encoding); + pv1 = SvPV_const(svrecode, cur1); + } + /* Now both are in UTF-8. */ + if (cur1 != cur2) { + SvREFCNT_dec(svrecode); + return FALSE; + } + } + else { + bool is_utf8 = TRUE; + + if (SvUTF8(sv1)) { + /* sv1 is the UTF-8 one, + * if is equal it must be downgrade-able */ + char * const pv = (char*)bytes_from_utf8((const U8*)pv1, + &cur1, &is_utf8); + if (pv != pv1) + pv1 = tpv = pv; + } + else { + /* sv2 is the UTF-8 one, + * if is equal it must be downgrade-able */ + char * const pv = (char *)bytes_from_utf8((const U8*)pv2, + &cur2, &is_utf8); + if (pv != pv2) + pv2 = tpv = pv; + } + if (is_utf8) { + /* Downgrade not possible - cannot be eq */ + assert (tpv == 0); + return FALSE; + } + } } - SvSETMAGIC(bigstr); + + if (cur1 == cur2) + eq = (pv1 == pv2) || memEQ(pv1, pv2, cur1); + + if (svrecode) + SvREFCNT_dec(svrecode); + + if (tpv) + Safefree(tpv); + + return eq; } /* -=for apidoc sv_replace +=for apidoc sv_cmp -Make the first argument a copy of the second, then delete the original. -The target SV physically takes over ownership of the body of the source SV -and inherits its flags; however, the target keeps any magic it owns, -and any magic in the source is discarded. -Note that this is a rather specialist SV copying operation; most of the -time you'll want to use C or one of its many macro front-ends. +Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the +string in C is less than, equal to, or greater than the string in +C. Is UTF-8 and 'use bytes' aware, handles get magic, and will +coerce its args to strings if necessary. See also C. =cut */ -void -Perl_sv_replace(pTHX_ register SV *sv, register SV *nsv) +I32 +Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) { - const U32 refcnt = SvREFCNT(sv); - SV_CHECK_THINKFIRST_COW_DROP(sv); - if (SvREFCNT(nsv) != 1) { - Perl_croak(aTHX_ "panic: reference miscount on nsv in sv_replace() (%" - UVuf " != 1)", (UV) SvREFCNT(nsv)); - } - if (SvMAGICAL(sv)) { - if (SvMAGICAL(nsv)) - mg_free(nsv); - else - sv_upgrade(nsv, SVt_PVMG); - SvMAGIC_set(nsv, SvMAGIC(sv)); - SvFLAGS(nsv) |= SvMAGICAL(sv); - SvMAGICAL_off(sv); - SvMAGIC_set(sv, NULL); + STRLEN cur1, cur2; + const char *pv1, *pv2; + char *tpv = Nullch; + I32 cmp; + SV *svrecode = Nullsv; + + if (!sv1) { + pv1 = ""; + cur1 = 0; } - SvREFCNT(sv) = 0; - sv_clear(sv); - assert(!SvREFCNT(sv)); -#ifdef DEBUG_LEAKING_SCALARS - sv->sv_flags = nsv->sv_flags; - sv->sv_any = nsv->sv_any; - sv->sv_refcnt = nsv->sv_refcnt; - sv->sv_u = nsv->sv_u; -#else - StructCopy(nsv,sv,SV); -#endif - /* Currently could join these into one piece of pointer arithmetic, but - it would be unclear. */ - if(SvTYPE(sv) == SVt_IV) - SvANY(sv) - = (XPVIV*)((char*)&(sv->sv_u.svu_iv) - STRUCT_OFFSET(XPVIV, xiv_iv)); - else if (SvTYPE(sv) == SVt_RV) { - SvANY(sv) = &sv->sv_u.svu_rv; + else + pv1 = SvPV_const(sv1, cur1); + + if (!sv2) { + pv2 = ""; + cur2 = 0; } - + else + pv2 = SvPV_const(sv2, cur2); -#ifdef PERL_OLD_COPY_ON_WRITE - if (SvIsCOW_normal(nsv)) { - /* We need to follow the pointers around the loop to make the - previous SV point to sv, rather than nsv. */ - SV *next; - SV *current = nsv; - while ((next = SV_COW_NEXT_SV(current)) != nsv) { - assert(next); - current = next; - assert(SvPVX_const(current) == SvPVX_const(nsv)); + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { + /* Differing utf8ness. + * Do not UTF8size the comparands as a side-effect. */ + if (SvUTF8(sv1)) { + if (PL_encoding) { + svrecode = newSVpvn(pv2, cur2); + sv_recode_to_utf8(svrecode, PL_encoding); + pv2 = SvPV_const(svrecode, cur2); + } + else { + pv2 = tpv = (char*)bytes_to_utf8((const U8*)pv2, &cur2); + } } - /* Make the SV before us point to the SV after us. */ - if (DEBUG_C_TEST) { - PerlIO_printf(Perl_debug_log, "previous is\n"); - sv_dump(current); - PerlIO_printf(Perl_debug_log, - "move it from 0x%"UVxf" to 0x%"UVxf"\n", - (UV) SV_COW_NEXT_SV(current), (UV) sv); + else { + if (PL_encoding) { + svrecode = newSVpvn(pv1, cur1); + sv_recode_to_utf8(svrecode, PL_encoding); + pv1 = SvPV_const(svrecode, cur1); + } + else { + pv1 = tpv = (char*)bytes_to_utf8((const U8*)pv1, &cur1); + } } - SV_COW_NEXT_SV_SET(current, sv); } -#endif - SvREFCNT(sv) = refcnt; - SvFLAGS(nsv) |= SVTYPEMASK; /* Mark as freed */ - SvREFCNT(nsv) = 0; - del_SV(nsv); + + if (!cur1) { + cmp = cur2 ? -1 : 0; + } else if (!cur2) { + cmp = 1; + } else { + const I32 retval = memcmp((const void*)pv1, (const void*)pv2, cur1 < cur2 ? cur1 : cur2); + + if (retval) { + cmp = retval < 0 ? -1 : 1; + } else if (cur1 == cur2) { + cmp = 0; + } else { + cmp = cur1 < cur2 ? -1 : 1; + } + } + + if (svrecode) + SvREFCNT_dec(svrecode); + + if (tpv) + Safefree(tpv); + + return cmp; } /* -=for apidoc sv_clear +=for apidoc sv_cmp_locale -Clear an SV: call any destructors, free up any memory used by the body, -and free the body itself. The SV's head is I freed, although -its type is set to all 1's so that it won't inadvertently be assumed -to be live during global destruction etc. -This function should only be called when REFCNT is zero. Most of the time -you'll want to call C (or its macro wrapper C) -instead. +Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and +'use bytes' aware, handles get magic, and will coerce its args to strings +if necessary. See also C. See also C. =cut */ -void -Perl_sv_clear(pTHX_ register SV *sv) +I32 +Perl_sv_cmp_locale(pTHX_ register SV *sv1, register SV *sv2) { - dVAR; - void** old_body_arena; - size_t old_body_offset; - const U32 type = SvTYPE(sv); +#ifdef USE_LOCALE_COLLATE - assert(sv); - assert(SvREFCNT(sv) == 0); + char *pv1, *pv2; + STRLEN len1, len2; + I32 retval; - if (type <= SVt_IV) - return; + if (PL_collation_standard) + goto raw_compare; - old_body_arena = 0; - old_body_offset = 0; + len1 = 0; + pv1 = sv1 ? sv_collxfrm(sv1, &len1) : (char *) NULL; + len2 = 0; + pv2 = sv2 ? sv_collxfrm(sv2, &len2) : (char *) NULL; - if (SvOBJECT(sv)) { - if (PL_defstash) { /* Still have a symbol table? */ - dSP; - HV* stash; - do { - CV* destructor; - stash = SvSTASH(sv); - destructor = StashHANDLER(stash,DESTROY); - if (destructor) { - SV* const tmpref = newRV(sv); - SvREADONLY_on(tmpref); /* DESTROY() could be naughty */ - ENTER; - PUSHSTACKi(PERLSI_DESTROY); - EXTEND(SP, 2); - PUSHMARK(SP); - PUSHs(tmpref); - PUTBACK; - call_sv((SV*)destructor, G_DISCARD|G_EVAL|G_KEEPERR|G_VOID); - - - POPSTACK; - SPAGAIN; - LEAVE; - if(SvREFCNT(tmpref) < 2) { - /* tmpref is not kept alive! */ - SvREFCNT(sv)--; - SvRV_set(tmpref, NULL); - SvROK_off(tmpref); - } - SvREFCNT_dec(tmpref); - } - } while (SvOBJECT(sv) && SvSTASH(sv) != stash); + if (!pv1 || !len1) { + if (pv2 && len2) + return -1; + else + goto raw_compare; + } + else { + if (!pv2 || !len2) + return 1; + } + retval = memcmp((void*)pv1, (void*)pv2, len1 < len2 ? len1 : len2); - if (SvREFCNT(sv)) { - if (PL_in_clean_objs) - Perl_croak(aTHX_ "DESTROY created new reference to dead object '%s'", - HvNAME_get(stash)); - /* DESTROY gave object new lease on life */ - return; - } - } + if (retval) + return retval < 0 ? -1 : 1; - if (SvOBJECT(sv)) { - SvREFCNT_dec(SvSTASH(sv)); /* possibly of changed persuasion */ - SvOBJECT_off(sv); /* Curse the object. */ - if (type != SVt_PVIO) - --PL_sv_objcount; /* XXX Might want something more general */ - } - } - if (type >= SVt_PVMG) { - if (SvMAGIC(sv)) - mg_free(sv); - if (type == SVt_PVMG && SvFLAGS(sv) & SVpad_TYPED) - SvREFCNT_dec(SvSTASH(sv)); - } - switch (type) { - case SVt_PVIO: - if (IoIFP(sv) && - IoIFP(sv) != PerlIO_stdin() && - IoIFP(sv) != PerlIO_stdout() && - IoIFP(sv) != PerlIO_stderr()) - { - io_close((IO*)sv, FALSE); - } - if (IoDIRP(sv) && !(IoFLAGS(sv) & IOf_FAKE_DIRP)) - PerlDir_close(IoDIRP(sv)); - IoDIRP(sv) = (DIR*)NULL; - Safefree(IoTOP_NAME(sv)); - Safefree(IoFMT_NAME(sv)); - Safefree(IoBOTTOM_NAME(sv)); - /* PVIOs aren't from arenas */ - goto freescalar; - case SVt_PVBM: - old_body_arena = &PL_body_roots[SVt_PVBM]; - goto freescalar; - case SVt_PVCV: - old_body_arena = &PL_body_roots[SVt_PVCV]; - case SVt_PVFM: - /* PVFMs aren't from arenas */ - cv_undef((CV*)sv); - goto freescalar; - case SVt_PVHV: - hv_undef((HV*)sv); - old_body_arena = &PL_body_roots[SVt_PVHV]; - old_body_offset = STRUCT_OFFSET(XPVHV, xhv_fill); - break; - case SVt_PVAV: - av_undef((AV*)sv); - old_body_arena = &PL_body_roots[SVt_PVAV]; - old_body_offset = STRUCT_OFFSET(XPVAV, xav_fill); - break; - case SVt_PVLV: - if (LvTYPE(sv) == 'T') { /* for tie: return HE to pool */ - SvREFCNT_dec(HeKEY_sv((HE*)LvTARG(sv))); - HeNEXT((HE*)LvTARG(sv)) = PL_hv_fetch_ent_mh; - PL_hv_fetch_ent_mh = (HE*)LvTARG(sv); - } - else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */ - SvREFCNT_dec(LvTARG(sv)); - old_body_arena = &PL_body_roots[SVt_PVLV]; - goto freescalar; - case SVt_PVGV: - gp_free((GV*)sv); - Safefree(GvNAME(sv)); - /* If we're in a stash, we don't own a reference to it. However it does - have a back reference to us, which needs to be cleared. */ - if (GvSTASH(sv)) - sv_del_backref((SV*)GvSTASH(sv), sv); - old_body_arena = &PL_body_roots[SVt_PVGV]; - goto freescalar; - case SVt_PVMG: - old_body_arena = &PL_body_roots[SVt_PVMG]; - goto freescalar; - case SVt_PVNV: - old_body_arena = &PL_body_roots[SVt_PVNV]; - goto freescalar; - case SVt_PVIV: - old_body_arena = &PL_body_roots[SVt_PVIV]; - old_body_offset = STRUCT_OFFSET(XPVIV, xpv_cur); - freescalar: - /* Don't bother with SvOOK_off(sv); as we're only going to free it. */ - if (SvOOK(sv)) { - SvPV_set(sv, SvPVX_mutable(sv) - SvIVX(sv)); - /* Don't even bother with turning off the OOK flag. */ - } - goto pvrv_common; - case SVt_PV: - old_body_arena = &PL_body_roots[SVt_PV]; - old_body_offset = STRUCT_OFFSET(XPV, xpv_cur); - case SVt_RV: - pvrv_common: - if (SvROK(sv)) { - SV *target = SvRV(sv); - if (SvWEAKREF(sv)) - sv_del_backref(target, sv); - else - SvREFCNT_dec(target); - } -#ifdef PERL_OLD_COPY_ON_WRITE - else if (SvPVX_const(sv)) { - if (SvIsCOW(sv)) { - /* I believe I need to grab the global SV mutex here and - then recheck the COW status. */ - if (DEBUG_C_TEST) { - PerlIO_printf(Perl_debug_log, "Copy on write: clear\n"); - sv_dump(sv); - } - sv_release_COW(sv, SvPVX_const(sv), SvLEN(sv), - SV_COW_NEXT_SV(sv)); - /* And drop it here. */ - SvFAKE_off(sv); - } else if (SvLEN(sv)) { - Safefree(SvPVX_const(sv)); - } - } -#else - else if (SvPVX_const(sv) && SvLEN(sv)) - Safefree(SvPVX_mutable(sv)); - else if (SvPVX_const(sv) && SvREADONLY(sv) && SvFAKE(sv)) { - unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv))); - SvFAKE_off(sv); - } -#endif - break; - case SVt_NV: - old_body_arena = PL_body_roots[SVt_NV]; - break; - } + /* + * When the result of collation is equality, that doesn't mean + * that there are no differences -- some locales exclude some + * characters from consideration. So to avoid false equalities, + * we use the raw string as a tiebreaker. + */ - SvFLAGS(sv) &= SVf_BREAK; - SvFLAGS(sv) |= SVTYPEMASK; + raw_compare: + /* FALL THROUGH */ -#ifndef PURIFY - if (old_body_arena) { - del_body(((char *)SvANY(sv) + old_body_offset), old_body_arena); - } - else -#endif - if (type > SVt_RV) { - my_safefree(SvANY(sv)); - } +#endif /* USE_LOCALE_COLLATE */ + + return sv_cmp(sv1, sv2); } + +#ifdef USE_LOCALE_COLLATE + /* -=for apidoc sv_newref +=for apidoc sv_collxfrm -Increment an SV's reference count. Use the C wrapper -instead. +Add Collate Transform magic to an SV if it doesn't already have it. + +Any scalar variable may carry PERL_MAGIC_collxfrm magic that contains the +scalar data of the variable, but transformed to such a format that a normal +memory comparison can be used to compare the data according to the locale +settings. =cut */ -SV * -Perl_sv_newref(pTHX_ SV *sv) +char * +Perl_sv_collxfrm(pTHX_ SV *sv, STRLEN *nxp) { - if (sv) - (SvREFCNT(sv))++; - return sv; + MAGIC *mg; + + mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_collxfrm) : (MAGIC *) NULL; + if (!mg || !mg->mg_ptr || *(U32*)mg->mg_ptr != PL_collation_ix) { + const char *s; + char *xf; + STRLEN len, xlen; + + if (mg) + Safefree(mg->mg_ptr); + s = SvPV_const(sv, len); + if ((xf = mem_collxfrm(s, len, &xlen))) { + if (SvREADONLY(sv)) { + SAVEFREEPV(xf); + *nxp = xlen; + return xf + sizeof(PL_collation_ix); + } + if (! mg) { + sv_magic(sv, 0, PERL_MAGIC_collxfrm, 0, 0); + mg = mg_find(sv, PERL_MAGIC_collxfrm); + assert(mg); + } + mg->mg_ptr = xf; + mg->mg_len = xlen; + } + else { + if (mg) { + mg->mg_ptr = NULL; + mg->mg_len = -1; + } + } + } + if (mg && mg->mg_ptr) { + *nxp = mg->mg_len; + return mg->mg_ptr + sizeof(PL_collation_ix); + } + else { + *nxp = 0; + return NULL; + } } +#endif /* USE_LOCALE_COLLATE */ + /* -=for apidoc sv_free +=for apidoc sv_gets -Decrement an SV's reference count, and if it drops to zero, call -C to invoke destructors and free up any memory used by -the body; finally, deallocate the SV's head itself. -Normally called via a wrapper macro C. +Get a line from the filehandle and store it into the SV, optionally +appending to the currently-stored string. =cut */ -void -Perl_sv_free(pTHX_ SV *sv) +char * +Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) { - dVAR; - if (!sv) - return; - if (SvREFCNT(sv) == 0) { - if (SvFLAGS(sv) & SVf_BREAK) - /* this SV's refcnt has been artificially decremented to - * trigger cleanup */ - return; - if (PL_in_clean_all) /* All is fair */ - return; - if (SvREADONLY(sv) && SvIMMORTAL(sv)) { - /* make sure SvREFCNT(sv)==0 happens very seldom */ - SvREFCNT(sv) = (~(U32)0)/2; - return; - } - if (ckWARN_d(WARN_INTERNAL)) { - Perl_warner(aTHX_ packWARN(WARN_INTERNAL), - "Attempt to free unreferenced scalar: SV 0x%"UVxf - pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); -#ifdef DEBUG_LEAKING_SCALARS_FORK_DUMP - Perl_dump_sv_child(aTHX_ sv); -#endif + const char *rsptr; + STRLEN rslen; + register STDCHAR rslast; + register STDCHAR *bp; + register I32 cnt; + I32 i = 0; + I32 rspara = 0; + I32 recsize; + + if (SvTHINKFIRST(sv)) + sv_force_normal_flags(sv, append ? 0 : SV_COW_DROP_PV); + /* XXX. If you make this PVIV, then copy on write can copy scalars read + from <>. + However, perlbench says it's slower, because the existing swipe code + is faster than copy on write. + Swings and roundabouts. */ + SvUPGRADE(sv, SVt_PV); + + SvSCREAM_off(sv); + + if (append) { + if (PerlIO_isutf8(fp)) { + if (!SvUTF8(sv)) { + sv_utf8_upgrade_nomg(sv); + sv_pos_u2b(sv,&append,0); + } + } else if (SvUTF8(sv)) { + SV * const tsv = NEWSV(0,0); + sv_gets(tsv, fp, 0); + sv_utf8_upgrade_nomg(tsv); + SvCUR_set(sv,append); + sv_catsv(sv,tsv); + sv_free(tsv); + goto return_string_or_null; } - return; } - if (--(SvREFCNT(sv)) > 0) - return; - Perl_sv_free2(aTHX_ sv); -} -void -Perl_sv_free2(pTHX_ SV *sv) -{ - dVAR; -#ifdef DEBUGGING - if (SvTEMP(sv)) { - if (ckWARN_d(WARN_DEBUGGING)) - Perl_warner(aTHX_ packWARN(WARN_DEBUGGING), - "Attempt to free temp prematurely: SV 0x%"UVxf - pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); - return; + SvPOK_only(sv); + if (PerlIO_isutf8(fp)) + SvUTF8_on(sv); + + if (IN_PERL_COMPILETIME) { + /* we always read code in line mode */ + rsptr = "\n"; + rslen = 1; } -#endif - if (SvREADONLY(sv) && SvIMMORTAL(sv)) { - /* make sure SvREFCNT(sv)==0 happens very seldom */ - SvREFCNT(sv) = (~(U32)0)/2; - return; + else if (RsSNARF(PL_rs)) { + /* If it is a regular disk file use size from stat() as estimate + of amount we are going to read - may result in malloc-ing + more memory than we realy need if layers bellow reduce + size we read (e.g. CRLF or a gzip layer) + */ + Stat_t st; + if (!PerlLIO_fstat(PerlIO_fileno(fp), &st) && S_ISREG(st.st_mode)) { + const Off_t offset = PerlIO_tell(fp); + if (offset != (Off_t) -1 && st.st_size + append > offset) { + (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 1)); + } + } + rsptr = NULL; + rslen = 0; } - sv_clear(sv); - if (! SvREFCNT(sv)) - del_SV(sv); -} - -/* -=for apidoc sv_len - -Returns the length of the string in the SV. Handles magic and type -coercion. See also C, which gives raw access to the xpv_cur slot. - -=cut -*/ + else if (RsRECORD(PL_rs)) { + I32 bytesread; + char *buffer; -STRLEN -Perl_sv_len(pTHX_ register SV *sv) -{ - STRLEN len; + /* Grab the size of the record we're getting */ + recsize = SvIV(SvRV(PL_rs)); + buffer = SvGROW(sv, (STRLEN)(recsize + append + 1)) + append; + /* Go yank in */ +#ifdef VMS + /* VMS wants read instead of fread, because fread doesn't respect */ + /* RMS record boundaries. This is not necessarily a good thing to be */ + /* doing, but we've got no other real choice - except avoid stdio + as implementation - perhaps write a :vms layer ? + */ + bytesread = PerlLIO_read(PerlIO_fileno(fp), buffer, recsize); +#else + bytesread = PerlIO_read(fp, buffer, recsize); +#endif + if (bytesread < 0) + bytesread = 0; + SvCUR_set(sv, bytesread += append); + buffer[bytesread] = '\0'; + goto return_string_or_null; + } + else if (RsPARA(PL_rs)) { + rsptr = "\n\n"; + rslen = 2; + rspara = 1; + } + else { + /* Get $/ i.e. PL_rs into same encoding as stream wants */ + if (PerlIO_isutf8(fp)) { + rsptr = SvPVutf8(PL_rs, rslen); + } + else { + if (SvUTF8(PL_rs)) { + if (!sv_utf8_downgrade(PL_rs, TRUE)) { + Perl_croak(aTHX_ "Wide character in $/"); + } + } + rsptr = SvPV_const(PL_rs, rslen); + } + } - if (!sv) - return 0; + rslast = rslen ? rsptr[rslen - 1] : '\0'; - if (SvGMAGICAL(sv)) - len = mg_length(sv); - else - (void)SvPV_const(sv, len); - return len; -} + if (rspara) { /* have to do this both before and after */ + do { /* to make sure file boundaries work right */ + if (PerlIO_eof(fp)) + return 0; + i = PerlIO_getc(fp); + if (i != '\n') { + if (i == -1) + return 0; + PerlIO_ungetc(fp,i); + break; + } + } while (i != EOF); + } -/* -=for apidoc sv_len_utf8 + /* See if we know enough about I/O mechanism to cheat it ! */ -Returns the number of characters in the string in an SV, counting wide -UTF-8 bytes as a single character. Handles magic and type coercion. + /* This used to be #ifdef test - it is made run-time test for ease + of abstracting out stdio interface. One call should be cheap + enough here - and may even be a macro allowing compile + time optimization. + */ -=cut -*/ + if (PerlIO_fast_gets(fp)) { -/* - * The length is cached in PERL_UTF8_magic, in the mg_len field. Also the - * mg_ptr is used, by sv_pos_u2b(), see the comments of S_utf8_mg_pos_init(). - * (Note that the mg_len is not the length of the mg_ptr field.) - * - */ + /* + * We're going to steal some values from the stdio struct + * and put EVERYTHING in the innermost loop into registers. + */ + register STDCHAR *ptr; + STRLEN bpx; + I32 shortbuffered; -STRLEN -Perl_sv_len_utf8(pTHX_ register SV *sv) -{ - if (!sv) - return 0; +#if defined(VMS) && defined(PERLIO_IS_STDIO) + /* An ungetc()d char is handled separately from the regular + * buffer, so we getc() it back out and stuff it in the buffer. + */ + i = PerlIO_getc(fp); + if (i == EOF) return 0; + *(--((*fp)->_ptr)) = (unsigned char) i; + (*fp)->_cnt++; +#endif - if (SvGMAGICAL(sv)) - return mg_length(sv); - else - { - STRLEN len, ulen; - const U8 *s = (U8*)SvPV_const(sv, len); - MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0; + /* Here is some breathtakingly efficient cheating */ - if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) { - ulen = mg->mg_len; -#ifdef PERL_UTF8_CACHE_ASSERT - assert(ulen == Perl_utf8_length(aTHX_ s, s + len)); -#endif + cnt = PerlIO_get_cnt(fp); /* get count into register */ + /* make sure we have the room */ + if ((I32)(SvLEN(sv) - append) <= cnt + 1) { + /* Not room for all of it + if we are looking for a separator and room for some + */ + if (rslen && cnt > 80 && (I32)SvLEN(sv) > append) { + /* just process what we have room for */ + shortbuffered = cnt - SvLEN(sv) + append + 1; + cnt -= shortbuffered; } else { - ulen = Perl_utf8_length(aTHX_ s, s + len); - if (!mg && !SvREADONLY(sv)) { - sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0); - mg = mg_find(sv, PERL_MAGIC_utf8); - assert(mg); - } - if (mg) - mg->mg_len = ulen; + shortbuffered = 0; + /* remember that cnt can be negative */ + SvGROW(sv, (STRLEN)(append + (cnt <= 0 ? 2 : (cnt + 1)))); } - return ulen; } -} - -/* S_utf8_mg_pos_init() is used to initialize the mg_ptr field of - * a PERL_UTF8_magic. The mg_ptr is used to store the mapping - * between UTF-8 and byte offsets. There are two (substr offset and substr - * length, the i offset, PERL_MAGIC_UTF8_CACHESIZE) times two (UTF-8 offset - * and byte offset) cache positions. - * - * The mg_len field is used by sv_len_utf8(), see its comments. - * Note that the mg_len is not the length of the mg_ptr field. - * - */ -STATIC bool -S_utf8_mg_pos_init(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, - I32 offsetp, const U8 *s, const U8 *start) -{ - bool found = FALSE; - - if (SvMAGICAL(sv) && !SvREADONLY(sv)) { - if (!*mgp) - *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0, 0); - assert(*mgp); - - if ((*mgp)->mg_ptr) - *cachep = (STRLEN *) (*mgp)->mg_ptr; - else { - Newxz(*cachep, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN); - (*mgp)->mg_ptr = (char *) *cachep; + else + shortbuffered = 0; + bp = (STDCHAR*)SvPVX_const(sv) + append; /* move these two too to registers */ + ptr = (STDCHAR*)PerlIO_get_ptr(fp); + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: entering, ptr=%"UVuf", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: entering: PerlIO * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", + PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), + PTR2UV(PerlIO_has_base(fp) ? PerlIO_get_base(fp) : 0))); + for (;;) { + screamer: + if (cnt > 0) { + if (rslen) { + while (cnt > 0) { /* this | eat */ + cnt--; + if ((*bp++ = *ptr++) == rslast) /* really | dust */ + goto thats_all_folks; /* screams | sed :-) */ + } + } + else { + Copy(ptr, bp, cnt, char); /* this | eat */ + bp += cnt; /* screams | dust */ + ptr += cnt; /* louder | sed :-) */ + cnt = 0; + } + } + + if (shortbuffered) { /* oh well, must extend */ + cnt = shortbuffered; + shortbuffered = 0; + bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */ + SvCUR_set(sv, bpx); + SvGROW(sv, SvLEN(sv) + append + cnt + 2); + bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */ + continue; } - assert(*cachep); - (*cachep)[i] = offsetp; - (*cachep)[i+1] = s - start; - found = TRUE; - } + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: going to getc, ptr=%"UVuf", cnt=%ld\n", + PTR2UV(ptr),(long)cnt)); + PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* deregisterize cnt and ptr */ +#if 0 + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: pre: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", + PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), + PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); +#endif + /* This used to call 'filbuf' in stdio form, but as that behaves like + getc when cnt <= 0 we use PerlIO_getc here to avoid introducing + another abstraction. */ + i = PerlIO_getc(fp); /* get more characters */ +#if 0 + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: post: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", + PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), + PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); +#endif + cnt = PerlIO_get_cnt(fp); + ptr = (STDCHAR*)PerlIO_get_ptr(fp); /* reregisterize cnt and ptr */ + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: after getc, ptr=%"UVuf", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); - return found; -} + if (i == EOF) /* all done for ever? */ + goto thats_really_all_folks; -/* - * S_utf8_mg_pos() is used to query and update mg_ptr field of - * a PERL_UTF8_magic. The mg_ptr is used to store the mapping - * between UTF-8 and byte offsets. See also the comments of - * S_utf8_mg_pos_init(). - * - */ -STATIC bool -S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I32 uoff, const U8 **sp, const U8 *start, const U8 *send) -{ - bool found = FALSE; + bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */ + SvCUR_set(sv, bpx); + SvGROW(sv, bpx + cnt + 2); + bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */ - if (SvMAGICAL(sv) && !SvREADONLY(sv)) { - if (!*mgp) - *mgp = mg_find(sv, PERL_MAGIC_utf8); - if (*mgp && (*mgp)->mg_ptr) { - *cachep = (STRLEN *) (*mgp)->mg_ptr; - ASSERT_UTF8_CACHE(*cachep); - if ((*cachep)[i] == (STRLEN)uoff) /* An exact match. */ - found = TRUE; - else { /* We will skip to the right spot. */ - STRLEN forw = 0; - STRLEN backw = 0; - const U8* p = NULL; + *bp++ = (STDCHAR)i; /* store character from PerlIO_getc */ - /* The assumption is that going backward is half - * the speed of going forward (that's where the - * 2 * backw in the below comes from). (The real - * figure of course depends on the UTF-8 data.) */ + if (rslen && (STDCHAR)i == rslast) /* all done for now? */ + goto thats_all_folks; + } - if ((*cachep)[i] > (STRLEN)uoff) { - forw = uoff; - backw = (*cachep)[i] - (STRLEN)uoff; +thats_all_folks: + if ((rslen > 1 && (STRLEN)(bp - (STDCHAR*)SvPVX_const(sv)) < rslen) || + memNE((char*)bp - rslen, rsptr, rslen)) + goto screamer; /* go back to the fray */ +thats_really_all_folks: + if (shortbuffered) + cnt += shortbuffered; + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: quitting, ptr=%"UVuf", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); + PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* put these back or we're in trouble */ + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: end: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", + PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), + PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); + *bp = '\0'; + SvCUR_set(sv, bp - (STDCHAR*)SvPVX_const(sv)); /* set length */ + DEBUG_P(PerlIO_printf(Perl_debug_log, + "Screamer: done, len=%ld, string=|%.*s|\n", + (long)SvCUR(sv),(int)SvCUR(sv),SvPVX_const(sv))); + } + else + { + /*The big, slow, and stupid way. */ +#ifdef USE_HEAP_INSTEAD_OF_STACK /* Even slower way. */ + STDCHAR *buf = 0; + Newx(buf, 8192, STDCHAR); + assert(buf); +#else + STDCHAR buf[8192]; +#endif - if (forw < 2 * backw) - p = start; - else - p = start + (*cachep)[i+1]; - } - /* Try this only for the substr offset (i == 0), - * not for the substr length (i == 2). */ - else if (i == 0) { /* (*cachep)[i] < uoff */ - const STRLEN ulen = sv_len_utf8(sv); +screamer2: + if (rslen) { + register const STDCHAR * const bpe = buf + sizeof(buf); + bp = buf; + while ((i = PerlIO_getc(fp)) != EOF && (*bp++ = (STDCHAR)i) != rslast && bp < bpe) + ; /* keep reading */ + cnt = bp - buf; + } + else { + cnt = PerlIO_read(fp,(char*)buf, sizeof(buf)); + /* Accomodate broken VAXC compiler, which applies U8 cast to + * both args of ?: operator, causing EOF to change into 255 + */ + if (cnt > 0) + i = (U8)buf[cnt - 1]; + else + i = EOF; + } - if ((STRLEN)uoff < ulen) { - forw = (STRLEN)uoff - (*cachep)[i]; - backw = ulen - (STRLEN)uoff; + if (cnt < 0) + cnt = 0; /* we do need to re-set the sv even when cnt <= 0 */ + if (append) + sv_catpvn(sv, (char *) buf, cnt); + else + sv_setpvn(sv, (char *) buf, cnt); - if (forw < 2 * backw) - p = start + (*cachep)[i+1]; - else - p = send; - } + if (i != EOF && /* joy */ + (!rslen || + SvCUR(sv) < rslen || + memNE(SvPVX_const(sv) + SvCUR(sv) - rslen, rsptr, rslen))) + { + append = -1; + /* + * If we're reading from a TTY and we get a short read, + * indicating that the user hit his EOF character, we need + * to notice it now, because if we try to read from the TTY + * again, the EOF condition will disappear. + * + * The comparison of cnt to sizeof(buf) is an optimization + * that prevents unnecessary calls to feof(). + * + * - jik 9/25/96 + */ + if (!(cnt < sizeof(buf) && PerlIO_eof(fp))) + goto screamer2; + } - /* If the string is not long enough for uoff, - * we could extend it, but not at this low a level. */ - } +#ifdef USE_HEAP_INSTEAD_OF_STACK + Safefree(buf); +#endif + } - if (p) { - if (forw < 2 * backw) { - while (forw--) - p += UTF8SKIP(p); - } - else { - while (backw--) { - p--; - while (UTF8_IS_CONTINUATION(*p)) - p--; - } - } + if (rspara) { /* have to do this both before and after */ + while (i != EOF) { /* to make sure file boundaries work right */ + i = PerlIO_getc(fp); + if (i != '\n') { + PerlIO_ungetc(fp,i); + break; + } + } + } - /* Update the cache. */ - (*cachep)[i] = (STRLEN)uoff; - (*cachep)[i+1] = p - start; +return_string_or_null: + return (SvCUR(sv) - append) ? SvPVX(sv) : Nullch; +} - /* Drop the stale "length" cache */ - if (i == 0) { - (*cachep)[2] = 0; - (*cachep)[3] = 0; - } +/* +=for apidoc sv_inc - found = TRUE; - } - } - if (found) { /* Setup the return values. */ - *offsetp = (*cachep)[i+1]; - *sp = start + *offsetp; - if (*sp >= send) { - *sp = send; - *offsetp = send - start; - } - else if (*sp < start) { - *sp = start; - *offsetp = 0; - } - } - } -#ifdef PERL_UTF8_CACHE_ASSERT - if (found) { - U8 *s = start; - I32 n = uoff; - - while (n-- && s < send) - s += UTF8SKIP(s); - - if (i == 0) { - assert(*offsetp == s - start); - assert((*cachep)[0] == (STRLEN)uoff); - assert((*cachep)[1] == *offsetp); - } - ASSERT_UTF8_CACHE(*cachep); - } -#endif - } - - return found; -} - -/* -=for apidoc sv_pos_u2b - -Converts the value pointed to by offsetp from a count of UTF-8 chars from -the start of the string, to a count of the equivalent number of bytes; if -lenp is non-zero, it does the same to lenp, but this time starting from -the offset, rather than from the start of the string. Handles magic and -type coercion. +Auto-increment of the value in the SV, doing string to numeric conversion +if necessary. Handles 'get' magic. =cut */ -/* - * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential - * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and - * byte offsets. See also the comments of S_utf8_mg_pos(). - * - */ - void -Perl_sv_pos_u2b(pTHX_ register SV *sv, I32* offsetp, I32* lenp) +Perl_sv_inc(pTHX_ register SV *sv) { - const U8 *start; - STRLEN len; + register char *d; + int flags; if (!sv) return; + SvGETMAGIC(sv); + if (SvTHINKFIRST(sv)) { + if (SvIsCOW(sv)) + sv_force_normal_flags(sv, 0); + if (SvREADONLY(sv)) { + if (IN_PERL_RUNTIME) + Perl_croak(aTHX_ PL_no_modify); + } + if (SvROK(sv)) { + IV i; + if (SvAMAGIC(sv) && AMG_CALLun(sv,inc)) + return; + i = PTR2IV(SvRV(sv)); + sv_unref(sv); + sv_setiv(sv, i); + } + } + flags = SvFLAGS(sv); + if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) { + /* It's (privately or publicly) a float, but not tested as an + integer, so test it to see. */ + (void) SvIV(sv); + flags = SvFLAGS(sv); + } + if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { + /* It's publicly an integer, or privately an integer-not-float */ +#ifdef PERL_PRESERVE_IVUV + oops_its_int: +#endif + if (SvIsUV(sv)) { + if (SvUVX(sv) == UV_MAX) + sv_setnv(sv, UV_MAX_P1); + else + (void)SvIOK_only_UV(sv); + SvUV_set(sv, SvUVX(sv) + 1); + } else { + if (SvIVX(sv) == IV_MAX) + sv_setuv(sv, (UV)IV_MAX + 1); + else { + (void)SvIOK_only(sv); + SvIV_set(sv, SvIVX(sv) + 1); + } + } + return; + } + if (flags & SVp_NOK) { + (void)SvNOK_only(sv); + SvNV_set(sv, SvNVX(sv) + 1.0); + return; + } - start = (U8*)SvPV_const(sv, len); - if (len) { - STRLEN boffset = 0; - STRLEN *cache = 0; - const U8 *s = start; - I32 uoffset = *offsetp; - const U8 * const send = s + len; - MAGIC *mg = 0; - bool found = FALSE; + if (!(flags & SVp_POK) || !*SvPVX_const(sv)) { + if ((flags & SVTYPEMASK) < SVt_PVIV) + sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV ? SVt_PVIV : SVt_IV)); + (void)SvIOK_only(sv); + SvIV_set(sv, 1); + return; + } + d = SvPVX(sv); + while (isALPHA(*d)) d++; + while (isDIGIT(*d)) d++; + if (*d) { +#ifdef PERL_PRESERVE_IVUV + /* Got to punt this as an integer if needs be, but we don't issue + warnings. Probably ought to make the sv_iv_please() that does + the conversion if possible, and silently. */ + const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), NULL); + if (numtype && !(numtype & IS_NUMBER_INFINITY)) { + /* Need to try really hard to see if it's an integer. + 9.22337203685478e+18 is an integer. + but "9.22337203685478e+18" + 0 is UV=9223372036854779904 + so $a="9.22337203685478e+18"; $a+0; $a++ + needs to be the same as $a="9.22337203685478e+18"; $a++ + or we go insane. */ + + (void) sv_2iv(sv); + if (SvIOK(sv)) + goto oops_its_int; - if (utf8_mg_pos(sv, &mg, &cache, 0, offsetp, *offsetp, &s, start, send)) - found = TRUE; - if (!found && uoffset > 0) { - while (s < send && uoffset--) - s += UTF8SKIP(s); - if (s >= send) - s = send; - if (utf8_mg_pos_init(sv, &mg, &cache, 0, *offsetp, s, start)) - boffset = cache[1]; - *offsetp = s - start; - } - if (lenp) { - found = FALSE; - start = s; - if (utf8_mg_pos(sv, &mg, &cache, 2, lenp, *lenp, &s, start, send)) { - *lenp -= boffset; - found = TRUE; - } - if (!found && *lenp > 0) { - I32 ulen = *lenp; - if (ulen > 0) - while (s < send && ulen--) - s += UTF8SKIP(s); - if (s >= send) - s = send; - utf8_mg_pos_init(sv, &mg, &cache, 2, *lenp, s, start); - } - *lenp = s - start; - } - ASSERT_UTF8_CACHE(cache); + /* sv_2iv *should* have made this an NV */ + if (flags & SVp_NOK) { + (void)SvNOK_only(sv); + SvNV_set(sv, SvNVX(sv) + 1.0); + return; + } + /* I don't think we can get here. Maybe I should assert this + And if we do get here I suspect that sv_setnv will croak. NWC + Fall through. */ +#if defined(USE_LONG_DOUBLE) + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n", + SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); +#else + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n", + SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); +#endif + } +#endif /* PERL_PRESERVE_IVUV */ + sv_setnv(sv,Atof(SvPVX_const(sv)) + 1.0); + return; } - else { - *offsetp = 0; - if (lenp) - *lenp = 0; + d--; + while (d >= SvPVX_const(sv)) { + if (isDIGIT(*d)) { + if (++*d <= '9') + return; + *(d--) = '0'; + } + else { +#ifdef EBCDIC + /* MKS: The original code here died if letters weren't consecutive. + * at least it didn't have to worry about non-C locales. The + * new code assumes that ('z'-'a')==('Z'-'A'), letters are + * arranged in order (although not consecutively) and that only + * [A-Za-z] are accepted by isALPHA in the C locale. + */ + if (*d != 'z' && *d != 'Z') { + do { ++*d; } while (!isALPHA(*d)); + return; + } + *(d--) -= 'z' - 'a'; +#else + ++*d; + if (isALPHA(*d)) + return; + *(d--) -= 'z' - 'a' + 1; +#endif + } } - - return; + /* oh,oh, the number grew */ + SvGROW(sv, SvCUR(sv) + 2); + SvCUR_set(sv, SvCUR(sv) + 1); + for (d = SvPVX(sv) + SvCUR(sv); d > SvPVX_const(sv); d--) + *d = d[-1]; + if (isDIGIT(d[1])) + *d = '1'; + else + *d = d[1]; } /* -=for apidoc sv_pos_b2u +=for apidoc sv_dec -Converts the value pointed to by offsetp from a count of bytes from the -start of the string, to a count of the equivalent number of UTF-8 chars. -Handles magic and type coercion. +Auto-decrement of the value in the SV, doing string to numeric conversion +if necessary. Handles 'get' magic. =cut */ -/* - * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential - * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and - * byte offsets. See also the comments of S_utf8_mg_pos(). - * - */ - void -Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp) +Perl_sv_dec(pTHX_ register SV *sv) { - const U8* s; - STRLEN len; + int flags; if (!sv) return; + SvGETMAGIC(sv); + if (SvTHINKFIRST(sv)) { + if (SvIsCOW(sv)) + sv_force_normal_flags(sv, 0); + if (SvREADONLY(sv)) { + if (IN_PERL_RUNTIME) + Perl_croak(aTHX_ PL_no_modify); + } + if (SvROK(sv)) { + IV i; + if (SvAMAGIC(sv) && AMG_CALLun(sv,dec)) + return; + i = PTR2IV(SvRV(sv)); + sv_unref(sv); + sv_setiv(sv, i); + } + } + /* Unlike sv_inc we don't have to worry about string-never-numbers + and keeping them magic. But we mustn't warn on punting */ + flags = SvFLAGS(sv); + if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { + /* It's publicly an integer, or privately an integer-not-float */ +#ifdef PERL_PRESERVE_IVUV + oops_its_int: +#endif + if (SvIsUV(sv)) { + if (SvUVX(sv) == 0) { + (void)SvIOK_only(sv); + SvIV_set(sv, -1); + } + else { + (void)SvIOK_only_UV(sv); + SvUV_set(sv, SvUVX(sv) - 1); + } + } else { + if (SvIVX(sv) == IV_MIN) + sv_setnv(sv, (NV)IV_MIN - 1.0); + else { + (void)SvIOK_only(sv); + SvIV_set(sv, SvIVX(sv) - 1); + } + } + return; + } + if (flags & SVp_NOK) { + SvNV_set(sv, SvNVX(sv) - 1.0); + (void)SvNOK_only(sv); + return; + } + if (!(flags & SVp_POK)) { + if ((flags & SVTYPEMASK) < SVt_PVIV) + sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV) ? SVt_PVIV : SVt_IV); + SvIV_set(sv, -1); + (void)SvIOK_only(sv); + return; + } +#ifdef PERL_PRESERVE_IVUV + { + const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), NULL); + if (numtype && !(numtype & IS_NUMBER_INFINITY)) { + /* Need to try really hard to see if it's an integer. + 9.22337203685478e+18 is an integer. + but "9.22337203685478e+18" + 0 is UV=9223372036854779904 + so $a="9.22337203685478e+18"; $a+0; $a-- + needs to be the same as $a="9.22337203685478e+18"; $a-- + or we go insane. */ + + (void) sv_2iv(sv); + if (SvIOK(sv)) + goto oops_its_int; - s = (const U8*)SvPV_const(sv, len); - if ((I32)len < *offsetp) - Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset"); - else { - const U8* send = s + *offsetp; - MAGIC* mg = NULL; - STRLEN *cache = NULL; - - len = 0; - - if (SvMAGICAL(sv) && !SvREADONLY(sv)) { - mg = mg_find(sv, PERL_MAGIC_utf8); - if (mg && mg->mg_ptr) { - cache = (STRLEN *) mg->mg_ptr; - if (cache[1] == (STRLEN)*offsetp) { - /* An exact match. */ - *offsetp = cache[0]; - - return; - } - else if (cache[1] < (STRLEN)*offsetp) { - /* We already know part of the way. */ - len = cache[0]; - s += cache[1]; - /* Let the below loop do the rest. */ - } - else { /* cache[1] > *offsetp */ - /* We already know all of the way, now we may - * be able to walk back. The same assumption - * is made as in S_utf8_mg_pos(), namely that - * walking backward is twice slower than - * walking forward. */ - const STRLEN forw = *offsetp; - STRLEN backw = cache[1] - *offsetp; + /* sv_2iv *should* have made this an NV */ + if (flags & SVp_NOK) { + (void)SvNOK_only(sv); + SvNV_set(sv, SvNVX(sv) - 1.0); + return; + } + /* I don't think we can get here. Maybe I should assert this + And if we do get here I suspect that sv_setnv will croak. NWC + Fall through. */ +#if defined(USE_LONG_DOUBLE) + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n", + SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); +#else + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n", + SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); +#endif + } + } +#endif /* PERL_PRESERVE_IVUV */ + sv_setnv(sv,Atof(SvPVX_const(sv)) - 1.0); /* punt */ +} - if (!(forw < 2 * backw)) { - const U8 *p = s + cache[1]; - STRLEN ubackw = 0; - - cache[1] -= backw; +/* +=for apidoc sv_mortalcopy - while (backw--) { - p--; - while (UTF8_IS_CONTINUATION(*p)) { - p--; - backw--; - } - ubackw++; - } +Creates a new SV which is a copy of the original SV (using C). +The new SV is marked as mortal. It will be destroyed "soon", either by an +explicit call to FREETMPS, or by an implicit call at places such as +statement boundaries. See also C and C. - cache[0] -= ubackw; - *offsetp = cache[0]; +=cut +*/ - /* Drop the stale "length" cache */ - cache[2] = 0; - cache[3] = 0; +/* Make a string that will exist for the duration of the expression + * evaluation. Actually, it may have to last longer than that, but + * hopefully we won't free it until it has been assigned to a + * permanent location. */ - return; - } - } - } - ASSERT_UTF8_CACHE(cache); - } +SV * +Perl_sv_mortalcopy(pTHX_ SV *oldstr) +{ + register SV *sv; - while (s < send) { - STRLEN n = 1; + new_SV(sv); + sv_setsv(sv,oldstr); + EXTEND_MORTAL(1); + PL_tmps_stack[++PL_tmps_ix] = sv; + SvTEMP_on(sv); + return sv; +} - /* Call utf8n_to_uvchr() to validate the sequence - * (unless a simple non-UTF character) */ - if (!UTF8_IS_INVARIANT(*s)) - utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0); - if (n > 0) { - s += n; - len++; - } - else - break; - } +/* +=for apidoc sv_newmortal - if (!SvREADONLY(sv)) { - if (!mg) { - sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0); - mg = mg_find(sv, PERL_MAGIC_utf8); - } - assert(mg); +Creates a new null SV which is mortal. The reference count of the SV is +set to 1. It will be destroyed "soon", either by an explicit call to +FREETMPS, or by an implicit call at places such as statement boundaries. +See also C and C. - if (!mg->mg_ptr) { - Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN); - mg->mg_ptr = (char *) cache; - } - assert(cache); +=cut +*/ - cache[0] = len; - cache[1] = *offsetp; - /* Drop the stale "length" cache */ - cache[2] = 0; - cache[3] = 0; - } +SV * +Perl_sv_newmortal(pTHX) +{ + register SV *sv; - *offsetp = len; - } - return; + new_SV(sv); + SvFLAGS(sv) = SVs_TEMP; + EXTEND_MORTAL(1); + PL_tmps_stack[++PL_tmps_ix] = sv; + return sv; } /* -=for apidoc sv_eq +=for apidoc sv_2mortal -Returns a boolean indicating whether the strings in the two SVs are -identical. Is UTF-8 and 'use bytes' aware, handles get magic, and will -coerce its args to strings if necessary. +Marks an existing SV as mortal. The SV will be destroyed "soon", either +by an explicit call to FREETMPS, or by an implicit call at places such as +statement boundaries. SvTEMP() is turned on which means that the SV's +string buffer can be "stolen" if this SV is copied. See also C +and C. =cut */ -I32 -Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) +SV * +Perl_sv_2mortal(pTHX_ register SV *sv) { - const char *pv1; - STRLEN cur1; - const char *pv2; - STRLEN cur2; - I32 eq = 0; - char *tpv = Nullch; - SV* svrecode = Nullsv; + dVAR; + if (!sv) + return sv; + if (SvREADONLY(sv) && SvIMMORTAL(sv)) + return sv; + EXTEND_MORTAL(1); + PL_tmps_stack[++PL_tmps_ix] = sv; + SvTEMP_on(sv); + return sv; +} - if (!sv1) { - pv1 = ""; - cur1 = 0; - } - else - pv1 = SvPV_const(sv1, cur1); +/* +=for apidoc newSVpv - if (!sv2){ - pv2 = ""; - cur2 = 0; - } - else - pv2 = SvPV_const(sv2, cur2); - - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { - /* Differing utf8ness. - * Do not UTF8size the comparands as a side-effect. */ - if (PL_encoding) { - if (SvUTF8(sv1)) { - svrecode = newSVpvn(pv2, cur2); - sv_recode_to_utf8(svrecode, PL_encoding); - pv2 = SvPV_const(svrecode, cur2); - } - else { - svrecode = newSVpvn(pv1, cur1); - sv_recode_to_utf8(svrecode, PL_encoding); - pv1 = SvPV_const(svrecode, cur1); - } - /* Now both are in UTF-8. */ - if (cur1 != cur2) { - SvREFCNT_dec(svrecode); - return FALSE; - } - } - else { - bool is_utf8 = TRUE; - - if (SvUTF8(sv1)) { - /* sv1 is the UTF-8 one, - * if is equal it must be downgrade-able */ - char * const pv = (char*)bytes_from_utf8((const U8*)pv1, - &cur1, &is_utf8); - if (pv != pv1) - pv1 = tpv = pv; - } - else { - /* sv2 is the UTF-8 one, - * if is equal it must be downgrade-able */ - char * const pv = (char *)bytes_from_utf8((const U8*)pv2, - &cur2, &is_utf8); - if (pv != pv2) - pv2 = tpv = pv; - } - if (is_utf8) { - /* Downgrade not possible - cannot be eq */ - assert (tpv == 0); - return FALSE; - } - } - } +Creates a new SV and copies a string into it. The reference count for the +SV is set to 1. If C is zero, Perl will compute the length using +strlen(). For efficiency, consider using C instead. - if (cur1 == cur2) - eq = (pv1 == pv2) || memEQ(pv1, pv2, cur1); - - if (svrecode) - SvREFCNT_dec(svrecode); +=cut +*/ - if (tpv) - Safefree(tpv); +SV * +Perl_newSVpv(pTHX_ const char *s, STRLEN len) +{ + register SV *sv; - return eq; + new_SV(sv); + sv_setpvn(sv,s,len ? len : strlen(s)); + return sv; } /* -=for apidoc sv_cmp +=for apidoc newSVpvn -Compares the strings in two SVs. Returns -1, 0, or 1 indicating whether the -string in C is less than, equal to, or greater than the string in -C. Is UTF-8 and 'use bytes' aware, handles get magic, and will -coerce its args to strings if necessary. See also C. +Creates a new SV and copies a string into it. The reference count for the +SV is set to 1. Note that if C is zero, Perl will create a zero length +string. You are responsible for ensuring that the source string is at least +C bytes long. If the C argument is NULL the new SV will be undefined. =cut */ -I32 -Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) +SV * +Perl_newSVpvn(pTHX_ const char *s, STRLEN len) { - STRLEN cur1, cur2; - const char *pv1, *pv2; - char *tpv = Nullch; - I32 cmp; - SV *svrecode = Nullsv; - - if (!sv1) { - pv1 = ""; - cur1 = 0; - } - else - pv1 = SvPV_const(sv1, cur1); - - if (!sv2) { - pv2 = ""; - cur2 = 0; - } - else - pv2 = SvPV_const(sv2, cur2); - - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { - /* Differing utf8ness. - * Do not UTF8size the comparands as a side-effect. */ - if (SvUTF8(sv1)) { - if (PL_encoding) { - svrecode = newSVpvn(pv2, cur2); - sv_recode_to_utf8(svrecode, PL_encoding); - pv2 = SvPV_const(svrecode, cur2); - } - else { - pv2 = tpv = (char*)bytes_to_utf8((const U8*)pv2, &cur2); - } - } - else { - if (PL_encoding) { - svrecode = newSVpvn(pv1, cur1); - sv_recode_to_utf8(svrecode, PL_encoding); - pv1 = SvPV_const(svrecode, cur1); - } - else { - pv1 = tpv = (char*)bytes_to_utf8((const U8*)pv1, &cur1); - } - } - } - - if (!cur1) { - cmp = cur2 ? -1 : 0; - } else if (!cur2) { - cmp = 1; - } else { - const I32 retval = memcmp((const void*)pv1, (const void*)pv2, cur1 < cur2 ? cur1 : cur2); - - if (retval) { - cmp = retval < 0 ? -1 : 1; - } else if (cur1 == cur2) { - cmp = 0; - } else { - cmp = cur1 < cur2 ? -1 : 1; - } - } - - if (svrecode) - SvREFCNT_dec(svrecode); - - if (tpv) - Safefree(tpv); + register SV *sv; - return cmp; + new_SV(sv); + sv_setpvn(sv,s,len); + return sv; } + /* -=for apidoc sv_cmp_locale +=for apidoc newSVhek -Compares the strings in two SVs in a locale-aware manner. Is UTF-8 and -'use bytes' aware, handles get magic, and will coerce its args to strings -if necessary. See also C. See also C. +Creates a new SV from the hash key structure. It will generate scalars that +point to the shared string table where possible. Returns a new (undefined) +SV if the hek is NULL. =cut */ -I32 -Perl_sv_cmp_locale(pTHX_ register SV *sv1, register SV *sv2) +SV * +Perl_newSVhek(pTHX_ const HEK *hek) { -#ifdef USE_LOCALE_COLLATE - - char *pv1, *pv2; - STRLEN len1, len2; - I32 retval; - - if (PL_collation_standard) - goto raw_compare; - - len1 = 0; - pv1 = sv1 ? sv_collxfrm(sv1, &len1) : (char *) NULL; - len2 = 0; - pv2 = sv2 ? sv_collxfrm(sv2, &len2) : (char *) NULL; + if (!hek) { + SV *sv; - if (!pv1 || !len1) { - if (pv2 && len2) - return -1; - else - goto raw_compare; - } - else { - if (!pv2 || !len2) - return 1; + new_SV(sv); + return sv; } - retval = memcmp((void*)pv1, (void*)pv2, len1 < len2 ? len1 : len2); - - if (retval) - return retval < 0 ? -1 : 1; - - /* - * When the result of collation is equality, that doesn't mean - * that there are no differences -- some locales exclude some - * characters from consideration. So to avoid false equalities, - * we use the raw string as a tiebreaker. - */ - - raw_compare: - /* FALL THROUGH */ + if (HEK_LEN(hek) == HEf_SVKEY) { + return newSVsv(*(SV**)HEK_KEY(hek)); + } else { + const int flags = HEK_FLAGS(hek); + if (flags & HVhek_WASUTF8) { + /* Trouble :-) + Andreas would like keys he put in as utf8 to come back as utf8 + */ + STRLEN utf8_len = HEK_LEN(hek); + const U8 *as_utf8 = bytes_to_utf8 ((U8*)HEK_KEY(hek), &utf8_len); + SV * const sv = newSVpvn ((const char*)as_utf8, utf8_len); -#endif /* USE_LOCALE_COLLATE */ + SvUTF8_on (sv); + Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */ + return sv; + } else if (flags & HVhek_REHASH) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK. This hv is using custom a hasing + algorithm. Hence we can't return a shared string scalar, as + that would contain the (wrong) hash value, and might get passed + into an hv routine with a regular hash */ - return sv_cmp(sv1, sv2); + SV * const sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek)); + if (HEK_UTF8(hek)) + SvUTF8_on (sv); + return sv; + } + /* This will be overwhelminly the most common case. */ + return newSVpvn_share(HEK_KEY(hek), + (HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)), + HEK_HASH(hek)); + } } - -#ifdef USE_LOCALE_COLLATE - /* -=for apidoc sv_collxfrm - -Add Collate Transform magic to an SV if it doesn't already have it. +=for apidoc newSVpvn_share -Any scalar variable may carry PERL_MAGIC_collxfrm magic that contains the -scalar data of the variable, but transformed to such a format that a normal -memory comparison can be used to compare the data according to the locale -settings. +Creates a new SV with its SvPVX_const pointing to a shared string in the string +table. If the string does not already exist in the table, it is created +first. Turns on READONLY and FAKE. The string's hash is stored in the UV +slot of the SV; if the C parameter is non-zero, that value is used; +otherwise the hash is computed. The idea here is that as the string table +is used for shared hash keys these strings will have SvPVX_const == HeKEY and +hash lookup will avoid string compare. =cut */ -char * -Perl_sv_collxfrm(pTHX_ SV *sv, STRLEN *nxp) +SV * +Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash) { - MAGIC *mg; - - mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_collxfrm) : (MAGIC *) NULL; - if (!mg || !mg->mg_ptr || *(U32*)mg->mg_ptr != PL_collation_ix) { - const char *s; - char *xf; - STRLEN len, xlen; - - if (mg) - Safefree(mg->mg_ptr); - s = SvPV_const(sv, len); - if ((xf = mem_collxfrm(s, len, &xlen))) { - if (SvREADONLY(sv)) { - SAVEFREEPV(xf); - *nxp = xlen; - return xf + sizeof(PL_collation_ix); - } - if (! mg) { - sv_magic(sv, 0, PERL_MAGIC_collxfrm, 0, 0); - mg = mg_find(sv, PERL_MAGIC_collxfrm); - assert(mg); - } - mg->mg_ptr = xf; - mg->mg_len = xlen; - } - else { - if (mg) { - mg->mg_ptr = NULL; - mg->mg_len = -1; - } - } - } - if (mg && mg->mg_ptr) { - *nxp = mg->mg_len; - return mg->mg_ptr + sizeof(PL_collation_ix); - } - else { - *nxp = 0; - return NULL; + register SV *sv; + bool is_utf8 = FALSE; + if (len < 0) { + STRLEN tmplen = -len; + is_utf8 = TRUE; + /* See the note in hv.c:hv_fetch() --jhi */ + src = (char*)bytes_from_utf8((const U8*)src, &tmplen, &is_utf8); + len = tmplen; } + if (!hash) + PERL_HASH(hash, src, len); + new_SV(sv); + sv_upgrade(sv, SVt_PV); + SvPV_set(sv, sharepvn(src, is_utf8?-len:len, hash)); + SvCUR_set(sv, len); + SvLEN_set(sv, 0); + SvREADONLY_on(sv); + SvFAKE_on(sv); + SvPOK_on(sv); + if (is_utf8) + SvUTF8_on(sv); + return sv; } -#endif /* USE_LOCALE_COLLATE */ + +#if defined(PERL_IMPLICIT_CONTEXT) + +/* pTHX_ magic can't cope with varargs, so this is a no-context + * version of the main function, (which may itself be aliased to us). + * Don't access this version directly. + */ + +SV * +Perl_newSVpvf_nocontext(const char* pat, ...) +{ + dTHX; + register SV *sv; + va_list args; + va_start(args, pat); + sv = vnewSVpvf(pat, &args); + va_end(args); + return sv; +} +#endif /* -=for apidoc sv_gets +=for apidoc newSVpvf -Get a line from the filehandle and store it into the SV, optionally -appending to the currently-stored string. +Creates a new SV and initializes it with the string formatted like +C. =cut */ -char * -Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append) +SV * +Perl_newSVpvf(pTHX_ const char* pat, ...) { - const char *rsptr; - STRLEN rslen; - register STDCHAR rslast; - register STDCHAR *bp; - register I32 cnt; - I32 i = 0; - I32 rspara = 0; - I32 recsize; + register SV *sv; + va_list args; + va_start(args, pat); + sv = vnewSVpvf(pat, &args); + va_end(args); + return sv; +} - if (SvTHINKFIRST(sv)) - sv_force_normal_flags(sv, append ? 0 : SV_COW_DROP_PV); - /* XXX. If you make this PVIV, then copy on write can copy scalars read - from <>. - However, perlbench says it's slower, because the existing swipe code - is faster than copy on write. - Swings and roundabouts. */ - SvUPGRADE(sv, SVt_PV); +/* backend for newSVpvf() and newSVpvf_nocontext() */ - SvSCREAM_off(sv); +SV * +Perl_vnewSVpvf(pTHX_ const char* pat, va_list* args) +{ + register SV *sv; + new_SV(sv); + sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); + return sv; +} - if (append) { - if (PerlIO_isutf8(fp)) { - if (!SvUTF8(sv)) { - sv_utf8_upgrade_nomg(sv); - sv_pos_u2b(sv,&append,0); - } - } else if (SvUTF8(sv)) { - SV * const tsv = NEWSV(0,0); - sv_gets(tsv, fp, 0); - sv_utf8_upgrade_nomg(tsv); - SvCUR_set(sv,append); - sv_catsv(sv,tsv); - sv_free(tsv); - goto return_string_or_null; - } - } +/* +=for apidoc newSVnv - SvPOK_only(sv); - if (PerlIO_isutf8(fp)) - SvUTF8_on(sv); +Creates a new SV and copies a floating point value into it. +The reference count for the SV is set to 1. - if (IN_PERL_COMPILETIME) { - /* we always read code in line mode */ - rsptr = "\n"; - rslen = 1; - } - else if (RsSNARF(PL_rs)) { - /* If it is a regular disk file use size from stat() as estimate - of amount we are going to read - may result in malloc-ing - more memory than we realy need if layers bellow reduce - size we read (e.g. CRLF or a gzip layer) - */ - Stat_t st; - if (!PerlLIO_fstat(PerlIO_fileno(fp), &st) && S_ISREG(st.st_mode)) { - const Off_t offset = PerlIO_tell(fp); - if (offset != (Off_t) -1 && st.st_size + append > offset) { - (void) SvGROW(sv, (STRLEN)((st.st_size - offset) + append + 1)); - } - } - rsptr = NULL; - rslen = 0; - } - else if (RsRECORD(PL_rs)) { - I32 bytesread; - char *buffer; +=cut +*/ - /* Grab the size of the record we're getting */ - recsize = SvIV(SvRV(PL_rs)); - buffer = SvGROW(sv, (STRLEN)(recsize + append + 1)) + append; - /* Go yank in */ -#ifdef VMS - /* VMS wants read instead of fread, because fread doesn't respect */ - /* RMS record boundaries. This is not necessarily a good thing to be */ - /* doing, but we've got no other real choice - except avoid stdio - as implementation - perhaps write a :vms layer ? - */ - bytesread = PerlLIO_read(PerlIO_fileno(fp), buffer, recsize); -#else - bytesread = PerlIO_read(fp, buffer, recsize); -#endif - if (bytesread < 0) - bytesread = 0; - SvCUR_set(sv, bytesread += append); - buffer[bytesread] = '\0'; - goto return_string_or_null; - } - else if (RsPARA(PL_rs)) { - rsptr = "\n\n"; - rslen = 2; - rspara = 1; - } - else { - /* Get $/ i.e. PL_rs into same encoding as stream wants */ - if (PerlIO_isutf8(fp)) { - rsptr = SvPVutf8(PL_rs, rslen); - } - else { - if (SvUTF8(PL_rs)) { - if (!sv_utf8_downgrade(PL_rs, TRUE)) { - Perl_croak(aTHX_ "Wide character in $/"); - } - } - rsptr = SvPV_const(PL_rs, rslen); - } - } +SV * +Perl_newSVnv(pTHX_ NV n) +{ + register SV *sv; - rslast = rslen ? rsptr[rslen - 1] : '\0'; + new_SV(sv); + sv_setnv(sv,n); + return sv; +} - if (rspara) { /* have to do this both before and after */ - do { /* to make sure file boundaries work right */ - if (PerlIO_eof(fp)) - return 0; - i = PerlIO_getc(fp); - if (i != '\n') { - if (i == -1) - return 0; - PerlIO_ungetc(fp,i); - break; - } - } while (i != EOF); - } +/* +=for apidoc newSViv - /* See if we know enough about I/O mechanism to cheat it ! */ +Creates a new SV and copies an integer into it. The reference count for the +SV is set to 1. - /* This used to be #ifdef test - it is made run-time test for ease - of abstracting out stdio interface. One call should be cheap - enough here - and may even be a macro allowing compile - time optimization. - */ +=cut +*/ - if (PerlIO_fast_gets(fp)) { +SV * +Perl_newSViv(pTHX_ IV i) +{ + register SV *sv; - /* - * We're going to steal some values from the stdio struct - * and put EVERYTHING in the innermost loop into registers. - */ - register STDCHAR *ptr; - STRLEN bpx; - I32 shortbuffered; - -#if defined(VMS) && defined(PERLIO_IS_STDIO) - /* An ungetc()d char is handled separately from the regular - * buffer, so we getc() it back out and stuff it in the buffer. - */ - i = PerlIO_getc(fp); - if (i == EOF) return 0; - *(--((*fp)->_ptr)) = (unsigned char) i; - (*fp)->_cnt++; -#endif - - /* Here is some breathtakingly efficient cheating */ - - cnt = PerlIO_get_cnt(fp); /* get count into register */ - /* make sure we have the room */ - if ((I32)(SvLEN(sv) - append) <= cnt + 1) { - /* Not room for all of it - if we are looking for a separator and room for some - */ - if (rslen && cnt > 80 && (I32)SvLEN(sv) > append) { - /* just process what we have room for */ - shortbuffered = cnt - SvLEN(sv) + append + 1; - cnt -= shortbuffered; - } - else { - shortbuffered = 0; - /* remember that cnt can be negative */ - SvGROW(sv, (STRLEN)(append + (cnt <= 0 ? 2 : (cnt + 1)))); - } - } - else - shortbuffered = 0; - bp = (STDCHAR*)SvPVX_const(sv) + append; /* move these two too to registers */ - ptr = (STDCHAR*)PerlIO_get_ptr(fp); - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: entering, ptr=%"UVuf", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: entering: PerlIO * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", - PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), - PTR2UV(PerlIO_has_base(fp) ? PerlIO_get_base(fp) : 0))); - for (;;) { - screamer: - if (cnt > 0) { - if (rslen) { - while (cnt > 0) { /* this | eat */ - cnt--; - if ((*bp++ = *ptr++) == rslast) /* really | dust */ - goto thats_all_folks; /* screams | sed :-) */ - } - } - else { - Copy(ptr, bp, cnt, char); /* this | eat */ - bp += cnt; /* screams | dust */ - ptr += cnt; /* louder | sed :-) */ - cnt = 0; - } - } - - if (shortbuffered) { /* oh well, must extend */ - cnt = shortbuffered; - shortbuffered = 0; - bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */ - SvCUR_set(sv, bpx); - SvGROW(sv, SvLEN(sv) + append + cnt + 2); - bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */ - continue; - } + new_SV(sv); + sv_setiv(sv,i); + return sv; +} - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: going to getc, ptr=%"UVuf", cnt=%ld\n", - PTR2UV(ptr),(long)cnt)); - PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* deregisterize cnt and ptr */ -#if 0 - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: pre: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", - PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), - PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); -#endif - /* This used to call 'filbuf' in stdio form, but as that behaves like - getc when cnt <= 0 we use PerlIO_getc here to avoid introducing - another abstraction. */ - i = PerlIO_getc(fp); /* get more characters */ -#if 0 - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: post: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", - PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), - PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); -#endif - cnt = PerlIO_get_cnt(fp); - ptr = (STDCHAR*)PerlIO_get_ptr(fp); /* reregisterize cnt and ptr */ - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: after getc, ptr=%"UVuf", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); +/* +=for apidoc newSVuv - if (i == EOF) /* all done for ever? */ - goto thats_really_all_folks; +Creates a new SV and copies an unsigned integer into it. +The reference count for the SV is set to 1. - bpx = bp - (STDCHAR*)SvPVX_const(sv); /* box up before relocation */ - SvCUR_set(sv, bpx); - SvGROW(sv, bpx + cnt + 2); - bp = (STDCHAR*)SvPVX_const(sv) + bpx; /* unbox after relocation */ +=cut +*/ - *bp++ = (STDCHAR)i; /* store character from PerlIO_getc */ +SV * +Perl_newSVuv(pTHX_ UV u) +{ + register SV *sv; - if (rslen && (STDCHAR)i == rslast) /* all done for now? */ - goto thats_all_folks; - } + new_SV(sv); + sv_setuv(sv,u); + return sv; +} -thats_all_folks: - if ((rslen > 1 && (STRLEN)(bp - (STDCHAR*)SvPVX_const(sv)) < rslen) || - memNE((char*)bp - rslen, rsptr, rslen)) - goto screamer; /* go back to the fray */ -thats_really_all_folks: - if (shortbuffered) - cnt += shortbuffered; - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: quitting, ptr=%"UVuf", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); - PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* put these back or we're in trouble */ - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: end: FILE * thinks ptr=%"UVuf", cnt=%ld, base=%"UVuf"\n", - PTR2UV(PerlIO_get_ptr(fp)), (long)PerlIO_get_cnt(fp), - PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); - *bp = '\0'; - SvCUR_set(sv, bp - (STDCHAR*)SvPVX_const(sv)); /* set length */ - DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: done, len=%ld, string=|%.*s|\n", - (long)SvCUR(sv),(int)SvCUR(sv),SvPVX_const(sv))); - } - else - { - /*The big, slow, and stupid way. */ -#ifdef USE_HEAP_INSTEAD_OF_STACK /* Even slower way. */ - STDCHAR *buf = 0; - Newx(buf, 8192, STDCHAR); - assert(buf); -#else - STDCHAR buf[8192]; -#endif +/* +=for apidoc newRV_noinc -screamer2: - if (rslen) { - register const STDCHAR *bpe = buf + sizeof(buf); - bp = buf; - while ((i = PerlIO_getc(fp)) != EOF && (*bp++ = (STDCHAR)i) != rslast && bp < bpe) - ; /* keep reading */ - cnt = bp - buf; - } - else { - cnt = PerlIO_read(fp,(char*)buf, sizeof(buf)); - /* Accomodate broken VAXC compiler, which applies U8 cast to - * both args of ?: operator, causing EOF to change into 255 - */ - if (cnt > 0) - i = (U8)buf[cnt - 1]; - else - i = EOF; - } +Creates an RV wrapper for an SV. The reference count for the original +SV is B incremented. - if (cnt < 0) - cnt = 0; /* we do need to re-set the sv even when cnt <= 0 */ - if (append) - sv_catpvn(sv, (char *) buf, cnt); - else - sv_setpvn(sv, (char *) buf, cnt); +=cut +*/ - if (i != EOF && /* joy */ - (!rslen || - SvCUR(sv) < rslen || - memNE(SvPVX_const(sv) + SvCUR(sv) - rslen, rsptr, rslen))) - { - append = -1; - /* - * If we're reading from a TTY and we get a short read, - * indicating that the user hit his EOF character, we need - * to notice it now, because if we try to read from the TTY - * again, the EOF condition will disappear. - * - * The comparison of cnt to sizeof(buf) is an optimization - * that prevents unnecessary calls to feof(). - * - * - jik 9/25/96 - */ - if (!(cnt < sizeof(buf) && PerlIO_eof(fp))) - goto screamer2; - } +SV * +Perl_newRV_noinc(pTHX_ SV *tmpRef) +{ + register SV *sv; -#ifdef USE_HEAP_INSTEAD_OF_STACK - Safefree(buf); -#endif - } + new_SV(sv); + sv_upgrade(sv, SVt_RV); + SvTEMP_off(tmpRef); + SvRV_set(sv, tmpRef); + SvROK_on(sv); + return sv; +} - if (rspara) { /* have to do this both before and after */ - while (i != EOF) { /* to make sure file boundaries work right */ - i = PerlIO_getc(fp); - if (i != '\n') { - PerlIO_ungetc(fp,i); - break; - } - } - } +/* newRV_inc is the official function name to use now. + * newRV_inc is in fact #defined to newRV in sv.h + */ -return_string_or_null: - return (SvCUR(sv) - append) ? SvPVX(sv) : Nullch; +SV * +Perl_newRV(pTHX_ SV *tmpRef) +{ + return newRV_noinc(SvREFCNT_inc(tmpRef)); } /* -=for apidoc sv_inc +=for apidoc newSVsv -Auto-increment of the value in the SV, doing string to numeric conversion -if necessary. Handles 'get' magic. +Creates a new SV which is an exact duplicate of the original SV. +(Uses C). =cut */ -void -Perl_sv_inc(pTHX_ register SV *sv) +SV * +Perl_newSVsv(pTHX_ register SV *old) { - register char *d; - int flags; + register SV *sv; - if (!sv) - return; - SvGETMAGIC(sv); - if (SvTHINKFIRST(sv)) { - if (SvIsCOW(sv)) - sv_force_normal_flags(sv, 0); - if (SvREADONLY(sv)) { - if (IN_PERL_RUNTIME) - Perl_croak(aTHX_ PL_no_modify); - } - if (SvROK(sv)) { - IV i; - if (SvAMAGIC(sv) && AMG_CALLun(sv,inc)) - return; - i = PTR2IV(SvRV(sv)); - sv_unref(sv); - sv_setiv(sv, i); - } - } - flags = SvFLAGS(sv); - if ((flags & (SVp_NOK|SVp_IOK)) == SVp_NOK) { - /* It's (privately or publicly) a float, but not tested as an - integer, so test it to see. */ - (void) SvIV(sv); - flags = SvFLAGS(sv); - } - if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { - /* It's publicly an integer, or privately an integer-not-float */ -#ifdef PERL_PRESERVE_IVUV - oops_its_int: -#endif - if (SvIsUV(sv)) { - if (SvUVX(sv) == UV_MAX) - sv_setnv(sv, UV_MAX_P1); - else - (void)SvIOK_only_UV(sv); - SvUV_set(sv, SvUVX(sv) + 1); - } else { - if (SvIVX(sv) == IV_MAX) - sv_setuv(sv, (UV)IV_MAX + 1); - else { - (void)SvIOK_only(sv); - SvIV_set(sv, SvIVX(sv) + 1); - } - } - return; - } - if (flags & SVp_NOK) { - (void)SvNOK_only(sv); - SvNV_set(sv, SvNVX(sv) + 1.0); - return; + if (!old) + return Nullsv; + if (SvTYPE(old) == SVTYPEMASK) { + if (ckWARN_d(WARN_INTERNAL)) + Perl_warner(aTHX_ packWARN(WARN_INTERNAL), "semi-panic: attempt to dup freed string"); + return Nullsv; } + new_SV(sv); + /* SV_GMAGIC is the default for sv_setv() + SV_NOSTEAL prevents TEMP buffers being, well, stolen, and saves games + with SvTEMP_off and SvTEMP_on round a call to sv_setsv. */ + sv_setsv_flags(sv, old, SV_GMAGIC | SV_NOSTEAL); + return sv; +} - if (!(flags & SVp_POK) || !*SvPVX_const(sv)) { - if ((flags & SVTYPEMASK) < SVt_PVIV) - sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV ? SVt_PVIV : SVt_IV)); - (void)SvIOK_only(sv); - SvIV_set(sv, 1); +/* +=for apidoc sv_reset + +Underlying implementation for the C Perl function. +Note that the perl-level function is vaguely deprecated. + +=cut +*/ + +void +Perl_sv_reset(pTHX_ register const char *s, HV *stash) +{ + dVAR; + char todo[PERL_UCHAR_MAX+1]; + + if (!stash) return; - } - d = SvPVX(sv); - while (isALPHA(*d)) d++; - while (isDIGIT(*d)) d++; - if (*d) { -#ifdef PERL_PRESERVE_IVUV - /* Got to punt this as an integer if needs be, but we don't issue - warnings. Probably ought to make the sv_iv_please() that does - the conversion if possible, and silently. */ - const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), NULL); - if (numtype && !(numtype & IS_NUMBER_INFINITY)) { - /* Need to try really hard to see if it's an integer. - 9.22337203685478e+18 is an integer. - but "9.22337203685478e+18" + 0 is UV=9223372036854779904 - so $a="9.22337203685478e+18"; $a+0; $a++ - needs to be the same as $a="9.22337203685478e+18"; $a++ - or we go insane. */ - - (void) sv_2iv(sv); - if (SvIOK(sv)) - goto oops_its_int; - /* sv_2iv *should* have made this an NV */ - if (flags & SVp_NOK) { - (void)SvNOK_only(sv); - SvNV_set(sv, SvNVX(sv) + 1.0); - return; + if (!*s) { /* reset ?? searches */ + MAGIC * const mg = mg_find((SV *)stash, PERL_MAGIC_symtab); + if (mg) { + PMOP *pm = (PMOP *) mg->mg_obj; + while (pm) { + pm->op_pmdynflags &= ~PMdf_USED; + pm = pm->op_pmnext; } - /* I don't think we can get here. Maybe I should assert this - And if we do get here I suspect that sv_setnv will croak. NWC - Fall through. */ -#if defined(USE_LONG_DOUBLE) - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n", - SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); -#else - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n", - SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); -#endif } -#endif /* PERL_PRESERVE_IVUV */ - sv_setnv(sv,Atof(SvPVX_const(sv)) + 1.0); return; } - d--; - while (d >= SvPVX_const(sv)) { - if (isDIGIT(*d)) { - if (++*d <= '9') - return; - *(d--) = '0'; + + /* reset variables */ + + if (!HvARRAY(stash)) + return; + + Zero(todo, 256, char); + while (*s) { + I32 max; + I32 i = (unsigned char)*s; + if (s[1] == '-') { + s += 2; } - else { -#ifdef EBCDIC - /* MKS: The original code here died if letters weren't consecutive. - * at least it didn't have to worry about non-C locales. The - * new code assumes that ('z'-'a')==('Z'-'A'), letters are - * arranged in order (although not consecutively) and that only - * [A-Za-z] are accepted by isALPHA in the C locale. - */ - if (*d != 'z' && *d != 'Z') { - do { ++*d; } while (!isALPHA(*d)); - return; + max = (unsigned char)*s++; + for ( ; i <= max; i++) { + todo[i] = 1; + } + for (i = 0; i <= (I32) HvMAX(stash); i++) { + HE *entry; + for (entry = HvARRAY(stash)[i]; + entry; + entry = HeNEXT(entry)) + { + register GV *gv; + register SV *sv; + + if (!todo[(U8)*HeKEY(entry)]) + continue; + gv = (GV*)HeVAL(entry); + sv = GvSV(gv); + if (sv) { + if (SvTHINKFIRST(sv)) { + if (!SvREADONLY(sv) && SvROK(sv)) + sv_unref(sv); + /* XXX Is this continue a bug? Why should THINKFIRST + exempt us from resetting arrays and hashes? */ + continue; + } + SvOK_off(sv); + if (SvTYPE(sv) >= SVt_PV) { + SvCUR_set(sv, 0); + if (SvPVX_const(sv) != Nullch) + *SvPVX(sv) = '\0'; + SvTAINT(sv); + } + } + if (GvAV(gv)) { + av_clear(GvAV(gv)); + } + if (GvHV(gv) && !HvNAME_get(GvHV(gv))) { +#if defined(VMS) + Perl_die(aTHX_ "Can't reset %%ENV on this system"); +#else /* ! VMS */ + hv_clear(GvHV(gv)); +# if defined(USE_ENVIRON_ARRAY) + if (gv == PL_envgv) + my_clearenv(); +# endif /* USE_ENVIRON_ARRAY */ +#endif /* VMS */ + } } - *(d--) -= 'z' - 'a'; -#else - ++*d; - if (isALPHA(*d)) - return; - *(d--) -= 'z' - 'a' + 1; -#endif } } - /* oh,oh, the number grew */ - SvGROW(sv, SvCUR(sv) + 2); - SvCUR_set(sv, SvCUR(sv) + 1); - for (d = SvPVX(sv) + SvCUR(sv); d > SvPVX_const(sv); d--) - *d = d[-1]; - if (isDIGIT(d[1])) - *d = '1'; - else - *d = d[1]; } /* -=for apidoc sv_dec +=for apidoc sv_2io -Auto-decrement of the value in the SV, doing string to numeric conversion -if necessary. Handles 'get' magic. +Using various gambits, try to get an IO from an SV: the IO slot if its a +GV; or the recursive result if we're an RV; or the IO slot of the symbol +named after the PV if we're a string. =cut */ -void -Perl_sv_dec(pTHX_ register SV *sv) +IO* +Perl_sv_2io(pTHX_ SV *sv) { - int flags; + IO* io; + GV* gv; - if (!sv) - return; - SvGETMAGIC(sv); - if (SvTHINKFIRST(sv)) { - if (SvIsCOW(sv)) - sv_force_normal_flags(sv, 0); - if (SvREADONLY(sv)) { - if (IN_PERL_RUNTIME) - Perl_croak(aTHX_ PL_no_modify); - } - if (SvROK(sv)) { - IV i; - if (SvAMAGIC(sv) && AMG_CALLun(sv,dec)) - return; - i = PTR2IV(SvRV(sv)); - sv_unref(sv); - sv_setiv(sv, i); - } - } - /* Unlike sv_inc we don't have to worry about string-never-numbers - and keeping them magic. But we mustn't warn on punting */ - flags = SvFLAGS(sv); - if ((flags & SVf_IOK) || ((flags & (SVp_IOK | SVp_NOK)) == SVp_IOK)) { - /* It's publicly an integer, or privately an integer-not-float */ -#ifdef PERL_PRESERVE_IVUV - oops_its_int: -#endif - if (SvIsUV(sv)) { - if (SvUVX(sv) == 0) { - (void)SvIOK_only(sv); - SvIV_set(sv, -1); - } - else { - (void)SvIOK_only_UV(sv); - SvUV_set(sv, SvUVX(sv) - 1); - } - } else { - if (SvIVX(sv) == IV_MIN) - sv_setnv(sv, (NV)IV_MIN - 1.0); - else { - (void)SvIOK_only(sv); - SvIV_set(sv, SvIVX(sv) - 1); - } - } - return; - } - if (flags & SVp_NOK) { - SvNV_set(sv, SvNVX(sv) - 1.0); - (void)SvNOK_only(sv); - return; - } - if (!(flags & SVp_POK)) { - if ((flags & SVTYPEMASK) < SVt_PVIV) - sv_upgrade(sv, ((flags & SVTYPEMASK) > SVt_IV) ? SVt_PVIV : SVt_IV); - SvIV_set(sv, -1); - (void)SvIOK_only(sv); - return; - } -#ifdef PERL_PRESERVE_IVUV - { - const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), NULL); - if (numtype && !(numtype & IS_NUMBER_INFINITY)) { - /* Need to try really hard to see if it's an integer. - 9.22337203685478e+18 is an integer. - but "9.22337203685478e+18" + 0 is UV=9223372036854779904 - so $a="9.22337203685478e+18"; $a+0; $a-- - needs to be the same as $a="9.22337203685478e+18"; $a-- - or we go insane. */ - - (void) sv_2iv(sv); - if (SvIOK(sv)) - goto oops_its_int; - - /* sv_2iv *should* have made this an NV */ - if (flags & SVp_NOK) { - (void)SvNOK_only(sv); - SvNV_set(sv, SvNVX(sv) - 1.0); - return; - } - /* I don't think we can get here. Maybe I should assert this - And if we do get here I suspect that sv_setnv will croak. NWC - Fall through. */ -#if defined(USE_LONG_DOUBLE) - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n", - SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); -#else - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n", - SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); -#endif - } + switch (SvTYPE(sv)) { + case SVt_PVIO: + io = (IO*)sv; + break; + case SVt_PVGV: + gv = (GV*)sv; + io = GvIO(gv); + if (!io) + Perl_croak(aTHX_ "Bad filehandle: %s", GvNAME(gv)); + break; + default: + if (!SvOK(sv)) + Perl_croak(aTHX_ PL_no_usym, "filehandle"); + if (SvROK(sv)) + return sv_2io(SvRV(sv)); + gv = gv_fetchsv(sv, FALSE, SVt_PVIO); + if (gv) + io = GvIO(gv); + else + io = 0; + if (!io) + Perl_croak(aTHX_ "Bad filehandle: %"SVf, sv); + break; } -#endif /* PERL_PRESERVE_IVUV */ - sv_setnv(sv,Atof(SvPVX_const(sv)) - 1.0); /* punt */ + return io; } /* -=for apidoc sv_mortalcopy +=for apidoc sv_2cv -Creates a new SV which is a copy of the original SV (using C). -The new SV is marked as mortal. It will be destroyed "soon", either by an -explicit call to FREETMPS, or by an implicit call at places such as -statement boundaries. See also C and C. +Using various gambits, try to get a CV from an SV; in addition, try if +possible to set C<*st> and C<*gvp> to the stash and GV associated with it. =cut */ -/* Make a string that will exist for the duration of the expression - * evaluation. Actually, it may have to last longer than that, but - * hopefully we won't free it until it has been assigned to a - * permanent location. */ - -SV * -Perl_sv_mortalcopy(pTHX_ SV *oldstr) +CV * +Perl_sv_2cv(pTHX_ SV *sv, HV **st, GV **gvp, I32 lref) { - register SV *sv; + dVAR; + GV *gv = Nullgv; + CV *cv = Nullcv; - new_SV(sv); - sv_setsv(sv,oldstr); - EXTEND_MORTAL(1); - PL_tmps_stack[++PL_tmps_ix] = sv; - SvTEMP_on(sv); - return sv; + if (!sv) + return *gvp = Nullgv, Nullcv; + switch (SvTYPE(sv)) { + case SVt_PVCV: + *st = CvSTASH(sv); + *gvp = Nullgv; + return (CV*)sv; + case SVt_PVHV: + case SVt_PVAV: + *gvp = Nullgv; + return Nullcv; + case SVt_PVGV: + gv = (GV*)sv; + *gvp = gv; + *st = GvESTASH(gv); + goto fix_gv; + + default: + SvGETMAGIC(sv); + if (SvROK(sv)) { + SV * const *sp = &sv; /* Used in tryAMAGICunDEREF macro. */ + tryAMAGICunDEREF(to_cv); + + sv = SvRV(sv); + if (SvTYPE(sv) == SVt_PVCV) { + cv = (CV*)sv; + *gvp = Nullgv; + *st = CvSTASH(cv); + return cv; + } + else if(isGV(sv)) + gv = (GV*)sv; + else + Perl_croak(aTHX_ "Not a subroutine reference"); + } + else if (isGV(sv)) + gv = (GV*)sv; + else + gv = gv_fetchsv(sv, lref, SVt_PVCV); + *gvp = gv; + if (!gv) + return Nullcv; + *st = GvESTASH(gv); + fix_gv: + if (lref && !GvCVu(gv)) { + SV *tmpsv; + ENTER; + tmpsv = NEWSV(704,0); + gv_efullname3(tmpsv, gv, Nullch); + /* XXX this is probably not what they think they're getting. + * It has the same effect as "sub name;", i.e. just a forward + * declaration! */ + newSUB(start_subparse(FALSE, 0), + newSVOP(OP_CONST, 0, tmpsv), + Nullop, + Nullop); + LEAVE; + if (!GvCVu(gv)) + Perl_croak(aTHX_ "Unable to create sub named \"%"SVf"\"", + sv); + } + return GvCVu(gv); + } } /* -=for apidoc sv_newmortal +=for apidoc sv_true -Creates a new null SV which is mortal. The reference count of the SV is -set to 1. It will be destroyed "soon", either by an explicit call to -FREETMPS, or by an implicit call at places such as statement boundaries. -See also C and C. +Returns true if the SV has a true value by Perl's rules. +Use the C macro instead, which may call C or may +instead use an in-line version. =cut */ -SV * -Perl_sv_newmortal(pTHX) +I32 +Perl_sv_true(pTHX_ register SV *sv) { - register SV *sv; - - new_SV(sv); - SvFLAGS(sv) = SVs_TEMP; - EXTEND_MORTAL(1); - PL_tmps_stack[++PL_tmps_ix] = sv; - return sv; + if (!sv) + return 0; + if (SvPOK(sv)) { + register const XPV* const tXpv = (XPV*)SvANY(sv); + if (tXpv && + (tXpv->xpv_cur > 1 || + (tXpv->xpv_cur && *sv->sv_u.svu_pv != '0'))) + return 1; + else + return 0; + } + else { + if (SvIOK(sv)) + return SvIVX(sv) != 0; + else { + if (SvNOK(sv)) + return SvNVX(sv) != 0.0; + else + return sv_2bool(sv); + } + } } /* -=for apidoc sv_2mortal +=for apidoc sv_pvn_force -Marks an existing SV as mortal. The SV will be destroyed "soon", either -by an explicit call to FREETMPS, or by an implicit call at places such as -statement boundaries. SvTEMP() is turned on which means that the SV's -string buffer can be "stolen" if this SV is copied. See also C -and C. +Get a sensible string out of the SV somehow. +A private implementation of the C macro for compilers which +can't cope with complex macro expressions. Always use the macro instead. + +=for apidoc sv_pvn_force_flags + +Get a sensible string out of the SV somehow. +If C has C bit set, will C on C if +appropriate, else not. C and C are +implemented in terms of this function. +You normally want to use the various wrapper macros instead: see +C and C =cut */ -SV * -Perl_sv_2mortal(pTHX_ register SV *sv) +char * +Perl_sv_pvn_force_flags(pTHX_ SV *sv, STRLEN *lp, I32 flags) { - dVAR; - if (!sv) - return sv; - if (SvREADONLY(sv) && SvIMMORTAL(sv)) - return sv; - EXTEND_MORTAL(1); - PL_tmps_stack[++PL_tmps_ix] = sv; - SvTEMP_on(sv); - return sv; -} -/* -=for apidoc newSVpv + if (SvTHINKFIRST(sv) && !SvROK(sv)) + sv_force_normal_flags(sv, 0); -Creates a new SV and copies a string into it. The reference count for the -SV is set to 1. If C is zero, Perl will compute the length using -strlen(). For efficiency, consider using C instead. + if (SvPOK(sv)) { + if (lp) + *lp = SvCUR(sv); + } + else { + char *s; + STRLEN len; + + if (SvREADONLY(sv) && !(flags & SV_MUTABLE_RETURN)) { + const char * const ref = sv_reftype(sv,0); + if (PL_op) + Perl_croak(aTHX_ "Can't coerce readonly %s to string in %s", + ref, OP_NAME(PL_op)); + else + Perl_croak(aTHX_ "Can't coerce readonly %s to string", ref); + } + if (SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM) + Perl_croak(aTHX_ "Can't coerce %s to string in %s", sv_reftype(sv,0), + OP_NAME(PL_op)); + s = sv_2pv_flags(sv, &len, flags); + if (lp) + *lp = len; + + if (s != SvPVX_const(sv)) { /* Almost, but not quite, sv_setpvn() */ + if (SvROK(sv)) + sv_unref(sv); + SvUPGRADE(sv, SVt_PV); /* Never FALSE */ + SvGROW(sv, len + 1); + Move(s,SvPVX(sv),len,char); + SvCUR_set(sv, len); + *SvEND(sv) = '\0'; + } + if (!SvPOK(sv)) { + SvPOK_on(sv); /* validate pointer */ + SvTAINT(sv); + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n", + PTR2UV(sv),SvPVX_const(sv))); + } + } + return SvPVX_mutable(sv); +} + +/* +=for apidoc sv_pvbyten_force + +The backend for the C macro. Always use the macro instead. =cut */ -SV * -Perl_newSVpv(pTHX_ const char *s, STRLEN len) +char * +Perl_sv_pvbyten_force(pTHX_ SV *sv, STRLEN *lp) { - register SV *sv; - - new_SV(sv); - sv_setpvn(sv,s,len ? len : strlen(s)); - return sv; + sv_pvn_force(sv,lp); + sv_utf8_downgrade(sv,0); + *lp = SvCUR(sv); + return SvPVX(sv); } /* -=for apidoc newSVpvn +=for apidoc sv_pvutf8n_force -Creates a new SV and copies a string into it. The reference count for the -SV is set to 1. Note that if C is zero, Perl will create a zero length -string. You are responsible for ensuring that the source string is at least -C bytes long. If the C argument is NULL the new SV will be undefined. +The backend for the C macro. Always use the macro instead. =cut */ -SV * -Perl_newSVpvn(pTHX_ const char *s, STRLEN len) +char * +Perl_sv_pvutf8n_force(pTHX_ SV *sv, STRLEN *lp) { - register SV *sv; - - new_SV(sv); - sv_setpvn(sv,s,len); - return sv; + sv_pvn_force(sv,lp); + sv_utf8_upgrade(sv); + *lp = SvCUR(sv); + return SvPVX(sv); } - /* -=for apidoc newSVhek +=for apidoc sv_reftype -Creates a new SV from the hash key structure. It will generate scalars that -point to the shared string table where possible. Returns a new (undefined) -SV if the hek is NULL. +Returns a string describing what the SV is a reference to. =cut */ -SV * -Perl_newSVhek(pTHX_ const HEK *hek) +char * +Perl_sv_reftype(pTHX_ const SV *sv, int ob) { - if (!hek) { - SV *sv; - - new_SV(sv); - return sv; + /* The fact that I don't need to downcast to char * everywhere, only in ?: + inside return suggests a const propagation bug in g++. */ + if (ob && SvOBJECT(sv)) { + char * const name = HvNAME_get(SvSTASH(sv)); + return name ? name : (char *) "__ANON__"; } + else { + switch (SvTYPE(sv)) { + case SVt_NULL: + case SVt_IV: + case SVt_NV: + case SVt_RV: + case SVt_PV: + case SVt_PVIV: + case SVt_PVNV: + case SVt_PVMG: + case SVt_PVBM: + if (SvVOK(sv)) + return "VSTRING"; + if (SvROK(sv)) + return "REF"; + else + return "SCALAR"; - if (HEK_LEN(hek) == HEf_SVKEY) { - return newSVsv(*(SV**)HEK_KEY(hek)); - } else { - const int flags = HEK_FLAGS(hek); - if (flags & HVhek_WASUTF8) { - /* Trouble :-) - Andreas would like keys he put in as utf8 to come back as utf8 - */ - STRLEN utf8_len = HEK_LEN(hek); - const U8 *as_utf8 = bytes_to_utf8 ((U8*)HEK_KEY(hek), &utf8_len); - SV * const sv = newSVpvn ((const char*)as_utf8, utf8_len); - - SvUTF8_on (sv); - Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */ - return sv; - } else if (flags & HVhek_REHASH) { - /* We don't have a pointer to the hv, so we have to replicate the - flag into every HEK. This hv is using custom a hasing - algorithm. Hence we can't return a shared string scalar, as - that would contain the (wrong) hash value, and might get passed - into an hv routine with a regular hash */ - - SV * const sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek)); - if (HEK_UTF8(hek)) - SvUTF8_on (sv); - return sv; + case SVt_PVLV: return (char *) (SvROK(sv) ? "REF" + /* tied lvalues should appear to be + * scalars for backwards compatitbility */ + : (LvTYPE(sv) == 't' || LvTYPE(sv) == 'T') + ? "SCALAR" : "LVALUE"); + case SVt_PVAV: return "ARRAY"; + case SVt_PVHV: return "HASH"; + case SVt_PVCV: return "CODE"; + case SVt_PVGV: return "GLOB"; + case SVt_PVFM: return "FORMAT"; + case SVt_PVIO: return "IO"; + default: return "UNKNOWN"; } - /* This will be overwhelminly the most common case. */ - return newSVpvn_share(HEK_KEY(hek), - (HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)), - HEK_HASH(hek)); } } /* -=for apidoc newSVpvn_share +=for apidoc sv_isobject -Creates a new SV with its SvPVX_const pointing to a shared string in the string -table. If the string does not already exist in the table, it is created -first. Turns on READONLY and FAKE. The string's hash is stored in the UV -slot of the SV; if the C parameter is non-zero, that value is used; -otherwise the hash is computed. The idea here is that as the string table -is used for shared hash keys these strings will have SvPVX_const == HeKEY and -hash lookup will avoid string compare. +Returns a boolean indicating whether the SV is an RV pointing to a blessed +object. If the SV is not an RV, or if the object is not blessed, then this +will return false. =cut */ -SV * -Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash) +int +Perl_sv_isobject(pTHX_ SV *sv) { - register SV *sv; - bool is_utf8 = FALSE; - if (len < 0) { - STRLEN tmplen = -len; - is_utf8 = TRUE; - /* See the note in hv.c:hv_fetch() --jhi */ - src = (char*)bytes_from_utf8((const U8*)src, &tmplen, &is_utf8); - len = tmplen; - } - if (!hash) - PERL_HASH(hash, src, len); - new_SV(sv); - sv_upgrade(sv, SVt_PV); - SvPV_set(sv, sharepvn(src, is_utf8?-len:len, hash)); - SvCUR_set(sv, len); - SvLEN_set(sv, 0); - SvREADONLY_on(sv); - SvFAKE_on(sv); - SvPOK_on(sv); - if (is_utf8) - SvUTF8_on(sv); - return sv; + if (!sv) + return 0; + SvGETMAGIC(sv); + if (!SvROK(sv)) + return 0; + sv = (SV*)SvRV(sv); + if (!SvOBJECT(sv)) + return 0; + return 1; } +/* +=for apidoc sv_isa -#if defined(PERL_IMPLICIT_CONTEXT) +Returns a boolean indicating whether the SV is blessed into the specified +class. This does not check for subtypes; use C to verify +an inheritance relationship. -/* pTHX_ magic can't cope with varargs, so this is a no-context - * version of the main function, (which may itself be aliased to us). - * Don't access this version directly. - */ +=cut +*/ -SV * -Perl_newSVpvf_nocontext(const char* pat, ...) +int +Perl_sv_isa(pTHX_ SV *sv, const char *name) { - dTHX; - register SV *sv; - va_list args; - va_start(args, pat); - sv = vnewSVpvf(pat, &args); - va_end(args); - return sv; + const char *hvname; + if (!sv) + return 0; + SvGETMAGIC(sv); + if (!SvROK(sv)) + return 0; + sv = (SV*)SvRV(sv); + if (!SvOBJECT(sv)) + return 0; + hvname = HvNAME_get(SvSTASH(sv)); + if (!hvname) + return 0; + + return strEQ(hvname, name); } -#endif /* -=for apidoc newSVpvf - -Creates a new SV and initializes it with the string formatted like -C. +=for apidoc newSVrv -=cut -*/ - -SV * -Perl_newSVpvf(pTHX_ const char* pat, ...) -{ - register SV *sv; - va_list args; - va_start(args, pat); - sv = vnewSVpvf(pat, &args); - va_end(args); - return sv; -} +Creates a new SV for the RV, C, to point to. If C is not an RV then +it will be upgraded to one. If C is non-null then the new SV will +be blessed in the specified package. The new SV is returned and its +reference count is 1. -/* backend for newSVpvf() and newSVpvf_nocontext() */ +=cut +*/ -SV * -Perl_vnewSVpvf(pTHX_ const char* pat, va_list* args) +SV* +Perl_newSVrv(pTHX_ SV *rv, const char *classname) { - register SV *sv; + SV *sv; + new_SV(sv); - sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); - return sv; -} -/* -=for apidoc newSVnv + SV_CHECK_THINKFIRST_COW_DROP(rv); + SvAMAGIC_off(rv); -Creates a new SV and copies a floating point value into it. -The reference count for the SV is set to 1. + if (SvTYPE(rv) >= SVt_PVMG) { + const U32 refcnt = SvREFCNT(rv); + SvREFCNT(rv) = 0; + sv_clear(rv); + SvFLAGS(rv) = 0; + SvREFCNT(rv) = refcnt; + } -=cut -*/ + if (SvTYPE(rv) < SVt_RV) + sv_upgrade(rv, SVt_RV); + else if (SvTYPE(rv) > SVt_RV) { + SvPV_free(rv); + SvCUR_set(rv, 0); + SvLEN_set(rv, 0); + } -SV * -Perl_newSVnv(pTHX_ NV n) -{ - register SV *sv; + SvOK_off(rv); + SvRV_set(rv, sv); + SvROK_on(rv); - new_SV(sv); - sv_setnv(sv,n); + if (classname) { + HV* const stash = gv_stashpv(classname, TRUE); + (void)sv_bless(rv, stash); + } return sv; } /* -=for apidoc newSViv +=for apidoc sv_setref_pv -Creates a new SV and copies an integer into it. The reference count for the -SV is set to 1. +Copies a pointer into a new SV, optionally blessing the SV. The C +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. If the C argument is NULL then C will be placed +into the SV. The C argument indicates the package for the +blessing. Set C to C to avoid the blessing. The new SV +will have a reference count of 1, and the RV will be returned. + +Do not use with other Perl types such as HV, AV, SV, CV, because those +objects will become corrupted by the pointer copy process. + +Note that C copies the string while this copies the pointer. =cut */ -SV * -Perl_newSViv(pTHX_ IV i) +SV* +Perl_sv_setref_pv(pTHX_ SV *rv, const char *classname, void *pv) { - register SV *sv; - - new_SV(sv); - sv_setiv(sv,i); - return sv; + if (!pv) { + sv_setsv(rv, &PL_sv_undef); + SvSETMAGIC(rv); + } + else + sv_setiv(newSVrv(rv,classname), PTR2IV(pv)); + return rv; } /* -=for apidoc newSVuv +=for apidoc sv_setref_iv -Creates a new SV and copies an unsigned integer into it. -The reference count for the SV is set to 1. +Copies an integer into a new SV, optionally blessing the SV. The C +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C argument indicates the package for the +blessing. Set C to C to avoid the blessing. The new SV +will have a reference count of 1, and the RV will be returned. =cut */ -SV * -Perl_newSVuv(pTHX_ UV u) +SV* +Perl_sv_setref_iv(pTHX_ SV *rv, const char *classname, IV iv) { - register SV *sv; - - new_SV(sv); - sv_setuv(sv,u); - return sv; + sv_setiv(newSVrv(rv,classname), iv); + return rv; } /* -=for apidoc newRV_noinc +=for apidoc sv_setref_uv -Creates an RV wrapper for an SV. The reference count for the original -SV is B incremented. +Copies an unsigned integer into a new SV, optionally blessing the SV. The C +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C argument indicates the package for the +blessing. Set C to C to avoid the blessing. The new SV +will have a reference count of 1, and the RV will be returned. =cut */ -SV * -Perl_newRV_noinc(pTHX_ SV *tmpRef) +SV* +Perl_sv_setref_uv(pTHX_ SV *rv, const char *classname, UV uv) { - register SV *sv; - - new_SV(sv); - sv_upgrade(sv, SVt_RV); - SvTEMP_off(tmpRef); - SvRV_set(sv, tmpRef); - SvROK_on(sv); - return sv; + sv_setuv(newSVrv(rv,classname), uv); + return rv; } -/* newRV_inc is the official function name to use now. - * newRV_inc is in fact #defined to newRV in sv.h - */ +/* +=for apidoc sv_setref_nv -SV * -Perl_newRV(pTHX_ SV *tmpRef) +Copies a double into a new SV, optionally blessing the SV. The C +argument will be upgraded to an RV. That RV will be modified to point to +the new SV. The C argument indicates the package for the +blessing. Set C to C to avoid the blessing. The new SV +will have a reference count of 1, and the RV will be returned. + +=cut +*/ + +SV* +Perl_sv_setref_nv(pTHX_ SV *rv, const char *classname, NV nv) { - return newRV_noinc(SvREFCNT_inc(tmpRef)); + sv_setnv(newSVrv(rv,classname), nv); + return rv; } /* -=for apidoc newSVsv +=for apidoc sv_setref_pvn -Creates a new SV which is an exact duplicate of the original SV. -(Uses C). +Copies a string into a new SV, optionally blessing the SV. The length of the +string must be specified with C. The C argument will be upgraded to +an RV. That RV will be modified to point to the new SV. The C +argument indicates the package for the blessing. Set C to +C to avoid the blessing. The new SV will have a reference count +of 1, and the RV will be returned. + +Note that C copies the pointer while this copies the string. =cut */ -SV * -Perl_newSVsv(pTHX_ register SV *old) +SV* +Perl_sv_setref_pvn(pTHX_ SV *rv, const char *classname, const char *pv, STRLEN n) { - register SV *sv; - - if (!old) - return Nullsv; - if (SvTYPE(old) == SVTYPEMASK) { - if (ckWARN_d(WARN_INTERNAL)) - Perl_warner(aTHX_ packWARN(WARN_INTERNAL), "semi-panic: attempt to dup freed string"); - return Nullsv; - } - new_SV(sv); - /* SV_GMAGIC is the default for sv_setv() - SV_NOSTEAL prevents TEMP buffers being, well, stolen, and saves games - with SvTEMP_off and SvTEMP_on round a call to sv_setsv. */ - sv_setsv_flags(sv, old, SV_GMAGIC | SV_NOSTEAL); - return sv; + sv_setpvn(newSVrv(rv,classname), pv, n); + return rv; } /* -=for apidoc sv_reset +=for apidoc sv_bless -Underlying implementation for the C Perl function. -Note that the perl-level function is vaguely deprecated. +Blesses an SV into a specified package. The SV must be an RV. The package +must be designated by its stash (see C). The reference count +of the SV is unaffected. =cut */ -void -Perl_sv_reset(pTHX_ register const char *s, HV *stash) +SV* +Perl_sv_bless(pTHX_ SV *sv, HV *stash) { - dVAR; - char todo[PERL_UCHAR_MAX+1]; - - if (!stash) - return; - - if (!*s) { /* reset ?? searches */ - MAGIC * const mg = mg_find((SV *)stash, PERL_MAGIC_symtab); - if (mg) { - PMOP *pm = (PMOP *) mg->mg_obj; - while (pm) { - pm->op_pmdynflags &= ~PMdf_USED; - pm = pm->op_pmnext; - } + SV *tmpRef; + if (!SvROK(sv)) + Perl_croak(aTHX_ "Can't bless non-reference value"); + tmpRef = SvRV(sv); + if (SvFLAGS(tmpRef) & (SVs_OBJECT|SVf_READONLY)) { + if (SvREADONLY(tmpRef)) + Perl_croak(aTHX_ PL_no_modify); + if (SvOBJECT(tmpRef)) { + if (SvTYPE(tmpRef) != SVt_PVIO) + --PL_sv_objcount; + SvREFCNT_dec(SvSTASH(tmpRef)); } - return; } + SvOBJECT_on(tmpRef); + if (SvTYPE(tmpRef) != SVt_PVIO) + ++PL_sv_objcount; + SvUPGRADE(tmpRef, SVt_PVMG); + SvSTASH_set(tmpRef, (HV*)SvREFCNT_inc(stash)); - /* reset variables */ + if (Gv_AMG(stash)) + SvAMAGIC_on(sv); + else + SvAMAGIC_off(sv); - if (!HvARRAY(stash)) - return; + if(SvSMAGICAL(tmpRef)) + if(mg_find(tmpRef, PERL_MAGIC_ext) || mg_find(tmpRef, PERL_MAGIC_uvar)) + mg_set(tmpRef); - Zero(todo, 256, char); - while (*s) { - I32 max; - I32 i = (unsigned char)*s; - if (s[1] == '-') { - s += 2; - } - max = (unsigned char)*s++; - for ( ; i <= max; i++) { - todo[i] = 1; - } - for (i = 0; i <= (I32) HvMAX(stash); i++) { - HE *entry; - for (entry = HvARRAY(stash)[i]; - entry; - entry = HeNEXT(entry)) - { - register GV *gv; - register SV *sv; - if (!todo[(U8)*HeKEY(entry)]) - continue; - gv = (GV*)HeVAL(entry); - sv = GvSV(gv); - if (sv) { - if (SvTHINKFIRST(sv)) { - if (!SvREADONLY(sv) && SvROK(sv)) - sv_unref(sv); - /* XXX Is this continue a bug? Why should THINKFIRST - exempt us from resetting arrays and hashes? */ - continue; - } - SvOK_off(sv); - if (SvTYPE(sv) >= SVt_PV) { - SvCUR_set(sv, 0); - if (SvPVX_const(sv) != Nullch) - *SvPVX(sv) = '\0'; - SvTAINT(sv); - } - } - if (GvAV(gv)) { - av_clear(GvAV(gv)); - } - if (GvHV(gv) && !HvNAME_get(GvHV(gv))) { -#if defined(VMS) - Perl_die(aTHX_ "Can't reset %%ENV on this system"); -#else /* ! VMS */ - hv_clear(GvHV(gv)); -# if defined(USE_ENVIRON_ARRAY) - if (gv == PL_envgv) - my_clearenv(); -# endif /* USE_ENVIRON_ARRAY */ -#endif /* VMS */ - } - } - } + + return sv; +} + +/* Downgrades a PVGV to a PVMG. + */ + +STATIC void +S_sv_unglob(pTHX_ SV *sv) +{ + void *xpvmg; + + assert(SvTYPE(sv) == SVt_PVGV); + SvFAKE_off(sv); + if (GvGP(sv)) + gp_free((GV*)sv); + if (GvSTASH(sv)) { + sv_del_backref((SV*)GvSTASH(sv), sv); + GvSTASH(sv) = NULL; } + sv_unmagic(sv, PERL_MAGIC_glob); + Safefree(GvNAME(sv)); + GvMULTI_off(sv); + + /* need to keep SvANY(sv) in the right arena */ + xpvmg = new_XPVMG(); + StructCopy(SvANY(sv), xpvmg, XPVMG); + del_XPVGV(SvANY(sv)); + SvANY(sv) = xpvmg; + + SvFLAGS(sv) &= ~SVTYPEMASK; + SvFLAGS(sv) |= SVt_PVMG; } /* -=for apidoc sv_2io +=for apidoc sv_unref_flags -Using various gambits, try to get an IO from an SV: the IO slot if its a -GV; or the recursive result if we're an RV; or the IO slot of the symbol -named after the PV if we're a string. +Unsets the RV status of the SV, and decrements the reference count of +whatever was being referenced by the RV. This can almost be thought of +as a reversal of C. The C argument can contain +C to force the reference count to be decremented +(otherwise the decrementing is conditional on the reference count being +different from one or the reference being a readonly SV). +See C. =cut */ -IO* -Perl_sv_2io(pTHX_ SV *sv) +void +Perl_sv_unref_flags(pTHX_ SV *ref, U32 flags) { - IO* io; - GV* gv; + SV* const target = SvRV(ref); - switch (SvTYPE(sv)) { - case SVt_PVIO: - io = (IO*)sv; - break; - case SVt_PVGV: - gv = (GV*)sv; - io = GvIO(gv); - if (!io) - Perl_croak(aTHX_ "Bad filehandle: %s", GvNAME(gv)); - break; - default: - if (!SvOK(sv)) - Perl_croak(aTHX_ PL_no_usym, "filehandle"); - if (SvROK(sv)) - return sv_2io(SvRV(sv)); - gv = gv_fetchsv(sv, FALSE, SVt_PVIO); - if (gv) - io = GvIO(gv); - else - io = 0; - if (!io) - Perl_croak(aTHX_ "Bad filehandle: %"SVf, sv); - break; + if (SvWEAKREF(ref)) { + sv_del_backref(target, ref); + SvWEAKREF_off(ref); + SvRV_set(ref, NULL); + return; } - return io; + SvRV_set(ref, NULL); + SvROK_off(ref); + /* You can't have a || SvREADONLY(target) here, as $a = $$a, where $a was + assigned to as BEGIN {$a = \"Foo"} will fail. */ + if (SvREFCNT(target) != 1 || (flags & SV_IMMEDIATE_UNREF)) + SvREFCNT_dec(target); + else /* XXX Hack, but hard to make $a=$a->[1] work otherwise */ + sv_2mortal(target); /* Schedule for freeing later */ } /* -=for apidoc sv_2cv - -Using various gambits, try to get a CV from an SV; in addition, try if -possible to set C<*st> and C<*gvp> to the stash and GV associated with it. +=for apidoc sv_untaint +Untaint an SV. Use C instead. =cut */ -CV * -Perl_sv_2cv(pTHX_ SV *sv, HV **st, GV **gvp, I32 lref) +void +Perl_sv_untaint(pTHX_ SV *sv) { - dVAR; - GV *gv = Nullgv; - CV *cv = Nullcv; - - if (!sv) - return *gvp = Nullgv, Nullcv; - switch (SvTYPE(sv)) { - case SVt_PVCV: - *st = CvSTASH(sv); - *gvp = Nullgv; - return (CV*)sv; - case SVt_PVHV: - case SVt_PVAV: - *gvp = Nullgv; - return Nullcv; - case SVt_PVGV: - gv = (GV*)sv; - *gvp = gv; - *st = GvESTASH(gv); - goto fix_gv; - - default: - SvGETMAGIC(sv); - if (SvROK(sv)) { - SV * const *sp = &sv; /* Used in tryAMAGICunDEREF macro. */ - tryAMAGICunDEREF(to_cv); - - sv = SvRV(sv); - if (SvTYPE(sv) == SVt_PVCV) { - cv = (CV*)sv; - *gvp = Nullgv; - *st = CvSTASH(cv); - return cv; - } - else if(isGV(sv)) - gv = (GV*)sv; - else - Perl_croak(aTHX_ "Not a subroutine reference"); - } - else if (isGV(sv)) - gv = (GV*)sv; - else - gv = gv_fetchsv(sv, lref, SVt_PVCV); - *gvp = gv; - if (!gv) - return Nullcv; - *st = GvESTASH(gv); - fix_gv: - if (lref && !GvCVu(gv)) { - SV *tmpsv; - ENTER; - tmpsv = NEWSV(704,0); - gv_efullname3(tmpsv, gv, Nullch); - /* XXX this is probably not what they think they're getting. - * It has the same effect as "sub name;", i.e. just a forward - * declaration! */ - newSUB(start_subparse(FALSE, 0), - newSVOP(OP_CONST, 0, tmpsv), - Nullop, - Nullop); - LEAVE; - if (!GvCVu(gv)) - Perl_croak(aTHX_ "Unable to create sub named \"%"SVf"\"", - sv); - } - return GvCVu(gv); + if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { + MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint); + if (mg) + mg->mg_len &= ~1; } } /* -=for apidoc sv_true - -Returns true if the SV has a true value by Perl's rules. -Use the C macro instead, which may call C or may -instead use an in-line version. +=for apidoc sv_tainted +Test an SV for taintedness. Use C instead. =cut */ -I32 -Perl_sv_true(pTHX_ register SV *sv) +bool +Perl_sv_tainted(pTHX_ SV *sv) { - if (!sv) - return 0; - if (SvPOK(sv)) { - register const XPV* const tXpv = (XPV*)SvANY(sv); - if (tXpv && - (tXpv->xpv_cur > 1 || - (tXpv->xpv_cur && *sv->sv_u.svu_pv != '0'))) - return 1; - else - return 0; - } - else { - if (SvIOK(sv)) - return SvIVX(sv) != 0; - else { - if (SvNOK(sv)) - return SvNVX(sv) != 0.0; - else - return sv_2bool(sv); - } + if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { + const MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint); + if (mg && (mg->mg_len & 1) ) + return TRUE; } + return FALSE; } /* -=for apidoc sv_pvn_force +=for apidoc sv_setpviv -Get a sensible string out of the SV somehow. -A private implementation of the C macro for compilers which -can't cope with complex macro expressions. Always use the macro instead. +Copies an integer into the given SV, also updating its string value. +Does not handle 'set' magic. See C. -=for apidoc sv_pvn_force_flags +=cut +*/ -Get a sensible string out of the SV somehow. -If C has C bit set, will C on C if -appropriate, else not. C and C are -implemented in terms of this function. -You normally want to use the various wrapper macros instead: see -C and C - -=cut -*/ - -char * -Perl_sv_pvn_force_flags(pTHX_ SV *sv, STRLEN *lp, I32 flags) +void +Perl_sv_setpviv(pTHX_ SV *sv, IV iv) { + char buf[TYPE_CHARS(UV)]; + char *ebuf; + char * const ptr = uiv_2buf(buf, iv, 0, 0, &ebuf); - if (SvTHINKFIRST(sv) && !SvROK(sv)) - sv_force_normal_flags(sv, 0); - - if (SvPOK(sv)) { - if (lp) - *lp = SvCUR(sv); - } - else { - char *s; - STRLEN len; - - if (SvREADONLY(sv) && !(flags & SV_MUTABLE_RETURN)) { - const char * const ref = sv_reftype(sv,0); - if (PL_op) - Perl_croak(aTHX_ "Can't coerce readonly %s to string in %s", - ref, OP_NAME(PL_op)); - else - Perl_croak(aTHX_ "Can't coerce readonly %s to string", ref); - } - if (SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM) - Perl_croak(aTHX_ "Can't coerce %s to string in %s", sv_reftype(sv,0), - OP_NAME(PL_op)); - s = sv_2pv_flags(sv, &len, flags); - if (lp) - *lp = len; - - if (s != SvPVX_const(sv)) { /* Almost, but not quite, sv_setpvn() */ - if (SvROK(sv)) - sv_unref(sv); - SvUPGRADE(sv, SVt_PV); /* Never FALSE */ - SvGROW(sv, len + 1); - Move(s,SvPVX(sv),len,char); - SvCUR_set(sv, len); - *SvEND(sv) = '\0'; - } - if (!SvPOK(sv)) { - SvPOK_on(sv); /* validate pointer */ - SvTAINT(sv); - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n", - PTR2UV(sv),SvPVX_const(sv))); - } - } - return SvPVX_mutable(sv); + sv_setpvn(sv, ptr, ebuf - ptr); } /* -=for apidoc sv_pvbyten_force +=for apidoc sv_setpviv_mg -The backend for the C macro. Always use the macro instead. +Like C, but also handles 'set' magic. =cut */ -char * -Perl_sv_pvbyten_force(pTHX_ SV *sv, STRLEN *lp) +void +Perl_sv_setpviv_mg(pTHX_ SV *sv, IV iv) { - sv_pvn_force(sv,lp); - sv_utf8_downgrade(sv,0); - *lp = SvCUR(sv); - return SvPVX(sv); + sv_setpviv(sv, iv); + SvSETMAGIC(sv); } -/* -=for apidoc sv_pvutf8n_force +#if defined(PERL_IMPLICIT_CONTEXT) -The backend for the C macro. Always use the macro instead. +/* pTHX_ magic can't cope with varargs, so this is a no-context + * version of the main function, (which may itself be aliased to us). + * Don't access this version directly. + */ -=cut -*/ +void +Perl_sv_setpvf_nocontext(SV *sv, const char* pat, ...) +{ + dTHX; + va_list args; + va_start(args, pat); + sv_vsetpvf(sv, pat, &args); + va_end(args); +} -char * -Perl_sv_pvutf8n_force(pTHX_ SV *sv, STRLEN *lp) +/* pTHX_ magic can't cope with varargs, so this is a no-context + * version of the main function, (which may itself be aliased to us). + * Don't access this version directly. + */ + +void +Perl_sv_setpvf_mg_nocontext(SV *sv, const char* pat, ...) { - sv_pvn_force(sv,lp); - sv_utf8_upgrade(sv); - *lp = SvCUR(sv); - return SvPVX(sv); + dTHX; + va_list args; + va_start(args, pat); + sv_vsetpvf_mg(sv, pat, &args); + va_end(args); } +#endif /* -=for apidoc sv_reftype +=for apidoc sv_setpvf -Returns a string describing what the SV is a reference to. +Works like C but copies the text into the SV instead of +appending it. Does not handle 'set' magic. See C. =cut */ -char * -Perl_sv_reftype(pTHX_ const SV *sv, int ob) +void +Perl_sv_setpvf(pTHX_ SV *sv, const char* pat, ...) { - /* The fact that I don't need to downcast to char * everywhere, only in ?: - inside return suggests a const propagation bug in g++. */ - if (ob && SvOBJECT(sv)) { - char * const name = HvNAME_get(SvSTASH(sv)); - return name ? name : (char *) "__ANON__"; - } - else { - switch (SvTYPE(sv)) { - case SVt_NULL: - case SVt_IV: - case SVt_NV: - case SVt_RV: - case SVt_PV: - case SVt_PVIV: - case SVt_PVNV: - case SVt_PVMG: - case SVt_PVBM: - if (SvVOK(sv)) - return "VSTRING"; - if (SvROK(sv)) - return "REF"; - else - return "SCALAR"; - - case SVt_PVLV: return (char *) (SvROK(sv) ? "REF" - /* tied lvalues should appear to be - * scalars for backwards compatitbility */ - : (LvTYPE(sv) == 't' || LvTYPE(sv) == 'T') - ? "SCALAR" : "LVALUE"); - case SVt_PVAV: return "ARRAY"; - case SVt_PVHV: return "HASH"; - case SVt_PVCV: return "CODE"; - case SVt_PVGV: return "GLOB"; - case SVt_PVFM: return "FORMAT"; - case SVt_PVIO: return "IO"; - default: return "UNKNOWN"; - } - } + va_list args; + va_start(args, pat); + sv_vsetpvf(sv, pat, &args); + va_end(args); } /* -=for apidoc sv_isobject +=for apidoc sv_vsetpvf -Returns a boolean indicating whether the SV is an RV pointing to a blessed -object. If the SV is not an RV, or if the object is not blessed, then this -will return false. +Works like C but copies the text into the SV instead of +appending it. Does not handle 'set' magic. See C. + +Usually used via its frontend C. =cut */ -int -Perl_sv_isobject(pTHX_ SV *sv) +void +Perl_sv_vsetpvf(pTHX_ SV *sv, const char* pat, va_list* args) { - if (!sv) - return 0; - SvGETMAGIC(sv); - if (!SvROK(sv)) - return 0; - sv = (SV*)SvRV(sv); - if (!SvOBJECT(sv)) - return 0; - return 1; + sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); } /* -=for apidoc sv_isa +=for apidoc sv_setpvf_mg -Returns a boolean indicating whether the SV is blessed into the specified -class. This does not check for subtypes; use C to verify -an inheritance relationship. +Like C, but also handles 'set' magic. =cut */ -int -Perl_sv_isa(pTHX_ SV *sv, const char *name) +void +Perl_sv_setpvf_mg(pTHX_ SV *sv, const char* pat, ...) { - const char *hvname; - if (!sv) - return 0; - SvGETMAGIC(sv); - if (!SvROK(sv)) - return 0; - sv = (SV*)SvRV(sv); - if (!SvOBJECT(sv)) - return 0; - hvname = HvNAME_get(SvSTASH(sv)); - if (!hvname) - return 0; - - return strEQ(hvname, name); + va_list args; + va_start(args, pat); + sv_vsetpvf_mg(sv, pat, &args); + va_end(args); } /* -=for apidoc newSVrv +=for apidoc sv_vsetpvf_mg -Creates a new SV for the RV, C, to point to. If C is not an RV then -it will be upgraded to one. If C is non-null then the new SV will -be blessed in the specified package. The new SV is returned and its -reference count is 1. +Like C, but also handles 'set' magic. + +Usually used via its frontend C. =cut */ -SV* -Perl_newSVrv(pTHX_ SV *rv, const char *classname) +void +Perl_sv_vsetpvf_mg(pTHX_ SV *sv, const char* pat, va_list* args) { - SV *sv; + sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); + SvSETMAGIC(sv); +} - new_SV(sv); +#if defined(PERL_IMPLICIT_CONTEXT) - SV_CHECK_THINKFIRST_COW_DROP(rv); - SvAMAGIC_off(rv); +/* pTHX_ magic can't cope with varargs, so this is a no-context + * version of the main function, (which may itself be aliased to us). + * Don't access this version directly. + */ - if (SvTYPE(rv) >= SVt_PVMG) { - const U32 refcnt = SvREFCNT(rv); - SvREFCNT(rv) = 0; - sv_clear(rv); - SvFLAGS(rv) = 0; - SvREFCNT(rv) = refcnt; - } +void +Perl_sv_catpvf_nocontext(SV *sv, const char* pat, ...) +{ + dTHX; + va_list args; + va_start(args, pat); + sv_vcatpvf(sv, pat, &args); + va_end(args); +} - if (SvTYPE(rv) < SVt_RV) - sv_upgrade(rv, SVt_RV); - else if (SvTYPE(rv) > SVt_RV) { - SvPV_free(rv); - SvCUR_set(rv, 0); - SvLEN_set(rv, 0); - } - - SvOK_off(rv); - SvRV_set(rv, sv); - SvROK_on(rv); +/* pTHX_ magic can't cope with varargs, so this is a no-context + * version of the main function, (which may itself be aliased to us). + * Don't access this version directly. + */ - if (classname) { - HV* const stash = gv_stashpv(classname, TRUE); - (void)sv_bless(rv, stash); - } - return sv; +void +Perl_sv_catpvf_mg_nocontext(SV *sv, const char* pat, ...) +{ + dTHX; + va_list args; + va_start(args, pat); + sv_vcatpvf_mg(sv, pat, &args); + va_end(args); } +#endif /* -=for apidoc sv_setref_pv - -Copies a pointer into a new SV, optionally blessing the SV. The C -argument will be upgraded to an RV. That RV will be modified to point to -the new SV. If the C argument is NULL then C will be placed -into the SV. The C argument indicates the package for the -blessing. Set C to C to avoid the blessing. The new SV -will have a reference count of 1, and the RV will be returned. - -Do not use with other Perl types such as HV, AV, SV, CV, because those -objects will become corrupted by the pointer copy process. +=for apidoc sv_catpvf -Note that C copies the string while this copies the pointer. +Processes its arguments like C and appends the formatted +output to an SV. If the appended data contains "wide" characters +(including, but not limited to, SVs with a UTF-8 PV formatted with %s, +and characters >255 formatted with %c), the original SV might get +upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. See +C. If the original SV was UTF-8, the pattern should be +valid UTF-8; if the original SV was bytes, the pattern should be too. -=cut -*/ +=cut */ -SV* -Perl_sv_setref_pv(pTHX_ SV *rv, const char *classname, void *pv) +void +Perl_sv_catpvf(pTHX_ SV *sv, const char* pat, ...) { - if (!pv) { - sv_setsv(rv, &PL_sv_undef); - SvSETMAGIC(rv); - } - else - sv_setiv(newSVrv(rv,classname), PTR2IV(pv)); - return rv; + va_list args; + va_start(args, pat); + sv_vcatpvf(sv, pat, &args); + va_end(args); } /* -=for apidoc sv_setref_iv +=for apidoc sv_vcatpvf -Copies an integer into a new SV, optionally blessing the SV. The C -argument will be upgraded to an RV. That RV will be modified to point to -the new SV. The C argument indicates the package for the -blessing. Set C to C to avoid the blessing. The new SV -will have a reference count of 1, and the RV will be returned. +Processes its arguments like C and appends the formatted output +to an SV. Does not handle 'set' magic. See C. + +Usually used via its frontend C. =cut */ -SV* -Perl_sv_setref_iv(pTHX_ SV *rv, const char *classname, IV iv) +void +Perl_sv_vcatpvf(pTHX_ SV *sv, const char* pat, va_list* args) { - sv_setiv(newSVrv(rv,classname), iv); - return rv; + sv_vcatpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); } /* -=for apidoc sv_setref_uv +=for apidoc sv_catpvf_mg -Copies an unsigned integer into a new SV, optionally blessing the SV. The C -argument will be upgraded to an RV. That RV will be modified to point to -the new SV. The C argument indicates the package for the -blessing. Set C to C to avoid the blessing. The new SV -will have a reference count of 1, and the RV will be returned. +Like C, but also handles 'set' magic. =cut */ -SV* -Perl_sv_setref_uv(pTHX_ SV *rv, const char *classname, UV uv) +void +Perl_sv_catpvf_mg(pTHX_ SV *sv, const char* pat, ...) { - sv_setuv(newSVrv(rv,classname), uv); - return rv; + va_list args; + va_start(args, pat); + sv_vcatpvf_mg(sv, pat, &args); + va_end(args); } /* -=for apidoc sv_setref_nv +=for apidoc sv_vcatpvf_mg -Copies a double into a new SV, optionally blessing the SV. The C -argument will be upgraded to an RV. That RV will be modified to point to -the new SV. The C argument indicates the package for the -blessing. Set C to C to avoid the blessing. The new SV -will have a reference count of 1, and the RV will be returned. +Like C, but also handles 'set' magic. + +Usually used via its frontend C. =cut */ -SV* -Perl_sv_setref_nv(pTHX_ SV *rv, const char *classname, NV nv) +void +Perl_sv_vcatpvf_mg(pTHX_ SV *sv, const char* pat, va_list* args) { - sv_setnv(newSVrv(rv,classname), nv); - return rv; + sv_vcatpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); + SvSETMAGIC(sv); } /* -=for apidoc sv_setref_pvn +=for apidoc sv_vsetpvfn -Copies a string into a new SV, optionally blessing the SV. The length of the -string must be specified with C. The C argument will be upgraded to -an RV. That RV will be modified to point to the new SV. The C -argument indicates the package for the blessing. Set C to -C to avoid the blessing. The new SV will have a reference count -of 1, and the RV will be returned. +Works like C but copies the text into the SV instead of +appending it. -Note that C copies the pointer while this copies the string. +Usually used via one of its frontends C and C. =cut */ -SV* -Perl_sv_setref_pvn(pTHX_ SV *rv, const char *classname, const char *pv, STRLEN n) +void +Perl_sv_vsetpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV **svargs, I32 svmax, bool *maybe_tainted) { - sv_setpvn(newSVrv(rv,classname), pv, n); - return rv; + sv_setpvn(sv, "", 0); + sv_vcatpvfn(sv, pat, patlen, args, svargs, svmax, maybe_tainted); } -/* -=for apidoc sv_bless - -Blesses an SV into a specified package. The SV must be an RV. The package -must be designated by its stash (see C). The reference count -of the SV is unaffected. - -=cut -*/ +/* private function for use in sv_vcatpvfn via the EXPECT_NUMBER macro */ -SV* -Perl_sv_bless(pTHX_ SV *sv, HV *stash) +STATIC I32 +S_expect_number(pTHX_ char** pattern) { - SV *tmpRef; - if (!SvROK(sv)) - Perl_croak(aTHX_ "Can't bless non-reference value"); - tmpRef = SvRV(sv); - if (SvFLAGS(tmpRef) & (SVs_OBJECT|SVf_READONLY)) { - if (SvREADONLY(tmpRef)) - Perl_croak(aTHX_ PL_no_modify); - if (SvOBJECT(tmpRef)) { - if (SvTYPE(tmpRef) != SVt_PVIO) - --PL_sv_objcount; - SvREFCNT_dec(SvSTASH(tmpRef)); + I32 var = 0; + switch (**pattern) { + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + var = *(*pattern)++ - '0'; + while (isDIGIT(**pattern)) { + I32 tmp = var * 10 + (*(*pattern)++ - '0'); + if (tmp < var) + Perl_croak(aTHX_ "Integer overflow in format string for %s", (PL_op ? OP_NAME(PL_op) : "sv_vcatpvfn")); + var = tmp; } } - SvOBJECT_on(tmpRef); - if (SvTYPE(tmpRef) != SVt_PVIO) - ++PL_sv_objcount; - SvUPGRADE(tmpRef, SVt_PVMG); - SvSTASH_set(tmpRef, (HV*)SvREFCNT_inc(stash)); - - if (Gv_AMG(stash)) - SvAMAGIC_on(sv); - else - SvAMAGIC_off(sv); - - if(SvSMAGICAL(tmpRef)) - if(mg_find(tmpRef, PERL_MAGIC_ext) || mg_find(tmpRef, PERL_MAGIC_uvar)) - mg_set(tmpRef); - - - - return sv; + return var; } +#define EXPECT_NUMBER(pattern, var) (var = S_expect_number(aTHX_ &pattern)) -/* Downgrades a PVGV to a PVMG. - */ - -STATIC void -S_sv_unglob(pTHX_ SV *sv) +static char * +F0convert(NV nv, char *endbuf, STRLEN *len) { - void *xpvmg; + const int neg = nv < 0; + UV uv; - assert(SvTYPE(sv) == SVt_PVGV); - SvFAKE_off(sv); - if (GvGP(sv)) - gp_free((GV*)sv); - if (GvSTASH(sv)) { - sv_del_backref((SV*)GvSTASH(sv), sv); - GvSTASH(sv) = Nullhv; + if (neg) + nv = -nv; + if (nv < UV_MAX) { + char *p = endbuf; + nv += 0.5; + uv = (UV)nv; + if (uv & 1 && uv == nv) + uv--; /* Round to even */ + do { + const unsigned dig = uv % 10; + *--p = '0' + dig; + } while (uv /= 10); + if (neg) + *--p = '-'; + *len = endbuf - p; + return p; } - sv_unmagic(sv, PERL_MAGIC_glob); - Safefree(GvNAME(sv)); - GvMULTI_off(sv); - - /* need to keep SvANY(sv) in the right arena */ - xpvmg = new_XPVMG(); - StructCopy(SvANY(sv), xpvmg, XPVMG); - del_XPVGV(SvANY(sv)); - SvANY(sv) = xpvmg; - - SvFLAGS(sv) &= ~SVTYPEMASK; - SvFLAGS(sv) |= SVt_PVMG; + return Nullch; } + /* -=for apidoc sv_unref_flags +=for apidoc sv_vcatpvfn -Unsets the RV status of the SV, and decrements the reference count of -whatever was being referenced by the RV. This can almost be thought of -as a reversal of C. The C argument can contain -C to force the reference count to be decremented -(otherwise the decrementing is conditional on the reference count being -different from one or the reference being a readonly SV). -See C. +Processes its arguments like C and appends the formatted output +to an SV. Uses an array of SVs if the C style variable argument list is +missing (NULL). When running with taint checks enabled, indicates via +C if results are untrustworthy (often due to the use of +locales). + +Usually used via one of its frontends C and C. =cut */ -void -Perl_sv_unref_flags(pTHX_ SV *ref, U32 flags) -{ - SV* const target = SvRV(ref); - if (SvWEAKREF(ref)) { - sv_del_backref(target, ref); - SvWEAKREF_off(ref); - SvRV_set(ref, NULL); - return; - } - SvRV_set(ref, NULL); - SvROK_off(ref); - /* You can't have a || SvREADONLY(target) here, as $a = $$a, where $a was - assigned to as BEGIN {$a = \"Foo"} will fail. */ - if (SvREFCNT(target) != 1 || (flags & SV_IMMEDIATE_UNREF)) - SvREFCNT_dec(target); - else /* XXX Hack, but hard to make $a=$a->[1] work otherwise */ - sv_2mortal(target); /* Schedule for freeing later */ -} - -/* -=for apidoc sv_untaint +#define VECTORIZE_ARGS vecsv = va_arg(*args, SV*);\ + vecstr = (U8*)SvPV_const(vecsv,veclen);\ + vec_utf8 = DO_UTF8(vecsv); -Untaint an SV. Use C instead. -=cut -*/ +/* XXX maybe_tainted is never assigned to, so the doc above is lying. */ void -Perl_sv_untaint(pTHX_ SV *sv) +Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV **svargs, I32 svmax, bool *maybe_tainted) { - if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { - MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint); - if (mg) - mg->mg_len &= ~1; - } -} + char *p; + char *q; + const char *patend; + STRLEN origlen; + I32 svix = 0; + static const char nullstr[] = "(null)"; + SV *argsv = Nullsv; + bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ + const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ + SV *nsv = Nullsv; + /* Times 4: a decimal digit takes more than 3 binary digits. + * NV_DIG: mantissa takes than many decimal digits. + * Plus 32: Playing safe. */ + char ebuf[IV_DIG * 4 + NV_DIG + 32]; + /* large enough for "%#.#f" --chip */ + /* what about long double NVs? --jhi */ -/* -=for apidoc sv_tainted + PERL_UNUSED_ARG(maybe_tainted); -Test an SV for taintedness. Use C instead. -=cut -*/ + /* no matter what, this is a string now */ + (void)SvPV_force(sv, origlen); -bool -Perl_sv_tainted(pTHX_ SV *sv) -{ - if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { - const MAGIC * const mg = mg_find(sv, PERL_MAGIC_taint); - if (mg && (mg->mg_len & 1) ) - return TRUE; + /* special-case "", "%s", and "%-p" (SVf - see below) */ + if (patlen == 0) + return; + if (patlen == 2 && pat[0] == '%' && pat[1] == 's') { + if (args) { + const char * const s = va_arg(*args, char*); + sv_catpv(sv, s ? s : nullstr); + } + else if (svix < svmax) { + sv_catsv(sv, *svargs); + } + return; + } + if (args && patlen == 3 && pat[0] == '%' && + pat[1] == '-' && pat[2] == 'p') { + argsv = va_arg(*args, SV*); + sv_catsv(sv, argsv); + return; } - return FALSE; -} - -/* -=for apidoc sv_setpviv -Copies an integer into the given SV, also updating its string value. -Does not handle 'set' magic. See C. +#ifndef USE_LONG_DOUBLE + /* special-case "%.[gf]" */ + if ( !args && patlen <= 5 && pat[0] == '%' && pat[1] == '.' + && (pat[patlen-1] == 'g' || pat[patlen-1] == 'f') ) { + unsigned digits = 0; + const char *pp; -=cut -*/ + pp = pat + 2; + while (*pp >= '0' && *pp <= '9') + digits = 10 * digits + (*pp++ - '0'); + if (pp - pat == (int)patlen - 1) { + NV nv; -void -Perl_sv_setpviv(pTHX_ SV *sv, IV iv) -{ - char buf[TYPE_CHARS(UV)]; - char *ebuf; - char * const ptr = uiv_2buf(buf, iv, 0, 0, &ebuf); + if (svix < svmax) + nv = SvNV(*svargs); + else + return; + if (*pp == 'g') { + /* Add check for digits != 0 because it seems that some + gconverts are buggy in this case, and we don't yet have + a Configure test for this. */ + if (digits && digits < sizeof(ebuf) - NV_DIG - 10) { + /* 0, point, slack */ + Gconvert(nv, (int)digits, 0, ebuf); + sv_catpv(sv, ebuf); + if (*ebuf) /* May return an empty string for digits==0 */ + return; + } + } else if (!digits) { + STRLEN l; - sv_setpvn(sv, ptr, ebuf - ptr); -} + if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) { + sv_catpvn(sv, p, l); + return; + } + } + } + } +#endif /* !USE_LONG_DOUBLE */ -/* -=for apidoc sv_setpviv_mg + if (!args && svix < svmax && DO_UTF8(*svargs)) + has_utf8 = TRUE; -Like C, but also handles 'set' magic. + patend = (char*)pat + patlen; + for (p = (char*)pat; p < patend; p = q) { + bool alt = FALSE; + bool left = FALSE; + bool vectorize = FALSE; + bool vectorarg = FALSE; + bool vec_utf8 = FALSE; + char fill = ' '; + char plus = 0; + char intsize = 0; + STRLEN width = 0; + STRLEN zeros = 0; + bool has_precis = FALSE; + STRLEN precis = 0; + I32 osvix = svix; + bool is_utf8 = FALSE; /* is this item utf8? */ +#ifdef HAS_LDBL_SPRINTF_BUG + /* This is to try to fix a bug with irix/nonstop-ux/powerux and + with sfio - Allen */ + bool fix_ldbl_sprintf_bug = FALSE; +#endif -=cut -*/ + char esignbuf[4]; + U8 utf8buf[UTF8_MAXBYTES+1]; + STRLEN esignlen = 0; -void -Perl_sv_setpviv_mg(pTHX_ SV *sv, IV iv) -{ - sv_setpviv(sv, iv); - SvSETMAGIC(sv); -} + const char *eptr = Nullch; + STRLEN elen = 0; + SV *vecsv = Nullsv; + const U8 *vecstr = Null(U8*); + STRLEN veclen = 0; + char c = 0; + int i; + unsigned base = 0; + IV iv = 0; + UV uv = 0; + /* we need a long double target in case HAS_LONG_DOUBLE but + not USE_LONG_DOUBLE + */ +#if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE + long double nv; +#else + NV nv; +#endif + STRLEN have; + STRLEN need; + STRLEN gap; + const char *dotstr = "."; + STRLEN dotstrlen = 1; + I32 efix = 0; /* explicit format parameter index */ + I32 ewix = 0; /* explicit width index */ + I32 epix = 0; /* explicit precision index */ + I32 evix = 0; /* explicit vector index */ + bool asterisk = FALSE; -#if defined(PERL_IMPLICIT_CONTEXT) + /* echo everything up to the next format specification */ + for (q = p; q < patend && *q != '%'; ++q) ; + if (q > p) { + if (has_utf8 && !pat_utf8) + sv_catpvn_utf8_upgrade(sv, p, q - p, nsv); + else + sv_catpvn(sv, p, q - p); + p = q; + } + if (q++ >= patend) + break; -/* pTHX_ magic can't cope with varargs, so this is a no-context - * version of the main function, (which may itself be aliased to us). - * Don't access this version directly. - */ +/* + We allow format specification elements in this order: + \d+\$ explicit format parameter index + [-+ 0#]+ flags + v|\*(\d+\$)?v vector with optional (optionally specified) arg + 0 flag (as above): repeated to allow "v02" + \d+|\*(\d+\$)? width using optional (optionally specified) arg + \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg + [hlqLV] size + [%bcdefginopsuxDFOUX] format (mandatory) +*/ -void -Perl_sv_setpvf_nocontext(SV *sv, const char* pat, ...) -{ - dTHX; - va_list args; - va_start(args, pat); - sv_vsetpvf(sv, pat, &args); - va_end(args); -} + if (args) { +/* + As of perl5.9.3, printf format checking is on by default. + Internally, perl uses %p formats to provide an escape to + some extended formatting. This block deals with those + extensions: if it does not match, (char*)q is reset and + the normal format processing code is used. -/* pTHX_ magic can't cope with varargs, so this is a no-context - * version of the main function, (which may itself be aliased to us). - * Don't access this version directly. - */ + Currently defined extensions are: + %p include pointer address (standard) + %-p (SVf) include an SV (previously %_) + %-p include an SV with precision + %1p (VDf) include a v-string (as %vd) + %p reserved for future extensions -void -Perl_sv_setpvf_mg_nocontext(SV *sv, const char* pat, ...) -{ - dTHX; - va_list args; - va_start(args, pat); - sv_vsetpvf_mg(sv, pat, &args); - va_end(args); -} + Robin Barker 2005-07-14 +*/ + char* r = q; + bool sv = FALSE; + STRLEN n = 0; + if (*q == '-') + sv = *q++; + EXPECT_NUMBER(q, n); + if (*q++ == 'p') { + if (sv) { /* SVf */ + if (n) { + precis = n; + has_precis = TRUE; + } + argsv = va_arg(*args, SV*); + eptr = SvPVx_const(argsv, elen); + if (DO_UTF8(argsv)) + is_utf8 = TRUE; + goto string; + } +#if vdNUMBER + else if (n == vdNUMBER) { /* VDf */ + vectorize = TRUE; + VECTORIZE_ARGS + goto format_vd; + } #endif + else if (n) { + if (ckWARN_d(WARN_INTERNAL)) + Perl_warner(aTHX_ packWARN(WARN_INTERNAL), + "internal %%p might conflict with future printf extensions"); + } + } + q = r; + } -/* -=for apidoc sv_setpvf - -Works like C but copies the text into the SV instead of -appending it. Does not handle 'set' magic. See C. - -=cut -*/ + if (EXPECT_NUMBER(q, width)) { + if (*q == '$') { + ++q; + efix = width; + } else { + goto gotwidth; + } + } -void -Perl_sv_setpvf(pTHX_ SV *sv, const char* pat, ...) -{ - va_list args; - va_start(args, pat); - sv_vsetpvf(sv, pat, &args); - va_end(args); -} + /* FLAGS */ -/* -=for apidoc sv_vsetpvf + while (*q) { + switch (*q) { + case ' ': + case '+': + plus = *q++; + continue; -Works like C but copies the text into the SV instead of -appending it. Does not handle 'set' magic. See C. + case '-': + left = TRUE; + q++; + continue; -Usually used via its frontend C. + case '0': + fill = *q++; + continue; -=cut -*/ + case '#': + alt = TRUE; + q++; + continue; -void -Perl_sv_vsetpvf(pTHX_ SV *sv, const char* pat, va_list* args) -{ - sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); -} + default: + break; + } + break; + } -/* -=for apidoc sv_setpvf_mg + tryasterisk: + if (*q == '*') { + q++; + if (EXPECT_NUMBER(q, ewix)) + if (*q++ != '$') + goto unknown; + asterisk = TRUE; + } + if (*q == 'v') { + q++; + if (vectorize) + goto unknown; + if ((vectorarg = asterisk)) { + evix = ewix; + ewix = 0; + asterisk = FALSE; + } + vectorize = TRUE; + goto tryasterisk; + } -Like C, but also handles 'set' magic. + if (!asterisk) + { + if( *q == '0' ) + fill = *q++; + EXPECT_NUMBER(q, width); + } -=cut -*/ + if (vectorize) { + if (vectorarg) { + if (args) + vecsv = va_arg(*args, SV*); + else if (evix) { + vecsv = (evix > 0 && evix <= svmax) + ? svargs[evix-1] : &PL_sv_undef; + } else { + vecsv = svix < svmax ? svargs[svix++] : &PL_sv_undef; + } + dotstr = SvPV_const(vecsv, dotstrlen); + /* Keep the DO_UTF8 test *after* the SvPV call, else things go + bad with tied or overloaded values that return UTF8. */ + if (DO_UTF8(vecsv)) + is_utf8 = TRUE; + else if (has_utf8) { + vecsv = sv_mortalcopy(vecsv); + sv_utf8_upgrade(vecsv); + dotstr = SvPV_const(vecsv, dotstrlen); + is_utf8 = TRUE; + } + } + if (args) { + VECTORIZE_ARGS + } + else if (efix ? (efix > 0 && efix <= svmax) : svix < svmax) { + vecsv = svargs[efix ? efix-1 : svix++]; + vecstr = (U8*)SvPV_const(vecsv,veclen); + vec_utf8 = DO_UTF8(vecsv); -void -Perl_sv_setpvf_mg(pTHX_ SV *sv, const char* pat, ...) -{ - va_list args; - va_start(args, pat); - sv_vsetpvf_mg(sv, pat, &args); - va_end(args); -} + /* if this is a version object, we need to convert + * back into v-string notation and then let the + * vectorize happen normally + */ + if (sv_derived_from(vecsv, "version")) { + char *version = savesvpv(vecsv); + vecsv = sv_newmortal(); + /* scan_vstring is expected to be called during + * tokenization, so we need to fake up the end + * of the buffer for it + */ + PL_bufend = version + veclen; + scan_vstring(version, vecsv); + vecstr = (U8*)SvPV_const(vecsv, veclen); + vec_utf8 = DO_UTF8(vecsv); + Safefree(version); + } + } + else { + vecstr = (U8*)""; + veclen = 0; + } + } -/* -=for apidoc sv_vsetpvf_mg + if (asterisk) { + if (args) + i = va_arg(*args, int); + else + i = (ewix ? ewix <= svmax : svix < svmax) ? + SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; + left |= (i < 0); + width = (i < 0) ? -i : i; + } + gotwidth: -Like C, but also handles 'set' magic. + /* PRECISION */ -Usually used via its frontend C. + if (*q == '.') { + q++; + if (*q == '*') { + q++; + if (EXPECT_NUMBER(q, epix) && *q++ != '$') + goto unknown; + /* XXX: todo, support specified precision parameter */ + if (epix) + goto unknown; + if (args) + i = va_arg(*args, int); + else + i = (ewix ? ewix <= svmax : svix < svmax) + ? SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; + precis = (i < 0) ? 0 : i; + } + else { + precis = 0; + while (isDIGIT(*q)) + precis = precis * 10 + (*q++ - '0'); + } + has_precis = TRUE; + } -=cut -*/ + /* SIZE */ -void -Perl_sv_vsetpvf_mg(pTHX_ SV *sv, const char* pat, va_list* args) -{ - sv_vsetpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); - SvSETMAGIC(sv); -} + switch (*q) { +#ifdef WIN32 + case 'I': /* Ix, I32x, and I64x */ +# ifdef WIN64 + if (q[1] == '6' && q[2] == '4') { + q += 3; + intsize = 'q'; + break; + } +# endif + if (q[1] == '3' && q[2] == '2') { + q += 3; + break; + } +# ifdef WIN64 + intsize = 'q'; +# endif + q++; + break; +#endif +#if defined(HAS_QUAD) || defined(HAS_LONG_DOUBLE) + case 'L': /* Ld */ + /* FALL THROUGH */ +#ifdef HAS_QUAD + case 'q': /* qd */ +#endif + intsize = 'q'; + q++; + break; +#endif + case 'l': +#if defined(HAS_QUAD) || defined(HAS_LONG_DOUBLE) + if (*(q + 1) == 'l') { /* lld, llf */ + intsize = 'q'; + q += 2; + break; + } +#endif + /* FALL THROUGH */ + case 'h': + /* FALL THROUGH */ + case 'V': + intsize = *q++; + break; + } -#if defined(PERL_IMPLICIT_CONTEXT) + /* CONVERSION */ -/* pTHX_ magic can't cope with varargs, so this is a no-context - * version of the main function, (which may itself be aliased to us). - * Don't access this version directly. - */ - -void -Perl_sv_catpvf_nocontext(SV *sv, const char* pat, ...) -{ - dTHX; - va_list args; - va_start(args, pat); - sv_vcatpvf(sv, pat, &args); - va_end(args); -} - -/* pTHX_ magic can't cope with varargs, so this is a no-context - * version of the main function, (which may itself be aliased to us). - * Don't access this version directly. - */ - -void -Perl_sv_catpvf_mg_nocontext(SV *sv, const char* pat, ...) -{ - dTHX; - va_list args; - va_start(args, pat); - sv_vcatpvf_mg(sv, pat, &args); - va_end(args); -} -#endif - -/* -=for apidoc sv_catpvf - -Processes its arguments like C and appends the formatted -output to an SV. If the appended data contains "wide" characters -(including, but not limited to, SVs with a UTF-8 PV formatted with %s, -and characters >255 formatted with %c), the original SV might get -upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. See -C. If the original SV was UTF-8, the pattern should be -valid UTF-8; if the original SV was bytes, the pattern should be too. - -=cut */ - -void -Perl_sv_catpvf(pTHX_ SV *sv, const char* pat, ...) -{ - va_list args; - va_start(args, pat); - sv_vcatpvf(sv, pat, &args); - va_end(args); -} - -/* -=for apidoc sv_vcatpvf - -Processes its arguments like C and appends the formatted output -to an SV. Does not handle 'set' magic. See C. + if (*q == '%') { + eptr = q++; + elen = 1; + if (vectorize) { + c = '%'; + goto unknown; + } + goto string; + } -Usually used via its frontend C. + if (!vectorize && !args) { + if (efix) { + const I32 i = efix-1; + argsv = (i >= 0 && i < svmax) ? svargs[i] : &PL_sv_undef; + } else { + argsv = (svix >= 0 && svix < svmax) + ? svargs[svix++] : &PL_sv_undef; + } + } -=cut -*/ + switch (c = *q++) { -void -Perl_sv_vcatpvf(pTHX_ SV *sv, const char* pat, va_list* args) -{ - sv_vcatpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); -} + /* STRINGS */ -/* -=for apidoc sv_catpvf_mg + case 'c': + if (vectorize) + goto unknown; + uv = (args) ? va_arg(*args, int) : SvIVx(argsv); + if ((uv > 255 || + (!UNI_IS_INVARIANT(uv) && SvUTF8(sv))) + && !IN_BYTES) { + eptr = (char*)utf8buf; + elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf; + is_utf8 = TRUE; + } + else { + c = (char)uv; + eptr = &c; + elen = 1; + } + goto string; -Like C, but also handles 'set' magic. + case 's': + if (vectorize) + goto unknown; + if (args) { + eptr = va_arg(*args, char*); + if (eptr) +#ifdef MACOS_TRADITIONAL + /* On MacOS, %#s format is used for Pascal strings */ + if (alt) + elen = *eptr++; + else +#endif + elen = strlen(eptr); + else { + eptr = (char *)nullstr; + elen = sizeof nullstr - 1; + } + } + else { + eptr = SvPVx_const(argsv, elen); + if (DO_UTF8(argsv)) { + if (has_precis && precis < elen) { + I32 p = precis; + sv_pos_u2b(argsv, &p, 0); /* sticks at end */ + precis = p; + } + if (width) { /* fudge width (can't fudge elen) */ + width += elen - sv_len_utf8(argsv); + } + is_utf8 = TRUE; + } + } -=cut -*/ + string: + if (has_precis && elen > precis) + elen = precis; + break; -void -Perl_sv_catpvf_mg(pTHX_ SV *sv, const char* pat, ...) -{ - va_list args; - va_start(args, pat); - sv_vcatpvf_mg(sv, pat, &args); - va_end(args); -} + /* INTEGERS */ -/* -=for apidoc sv_vcatpvf_mg + case 'p': + if (alt || vectorize) + goto unknown; + uv = PTR2UV(args ? va_arg(*args, void*) : argsv); + base = 16; + goto integer; -Like C, but also handles 'set' magic. + case 'D': +#ifdef IV_IS_QUAD + intsize = 'q'; +#else + intsize = 'l'; +#endif + /* FALL THROUGH */ + case 'd': + case 'i': +#if vdNUMBER + format_vd: +#endif + if (vectorize) { + STRLEN ulen; + if (!veclen) + continue; + if (vec_utf8) + uv = utf8n_to_uvchr(vecstr, veclen, &ulen, + UTF8_ALLOW_ANYUV); + else { + uv = *vecstr; + ulen = 1; + } + vecstr += ulen; + veclen -= ulen; + if (plus) + esignbuf[esignlen++] = plus; + } + else if (args) { + switch (intsize) { + case 'h': iv = (short)va_arg(*args, int); break; + case 'l': iv = va_arg(*args, long); break; + case 'V': iv = va_arg(*args, IV); break; + default: iv = va_arg(*args, int); break; +#ifdef HAS_QUAD + case 'q': iv = va_arg(*args, Quad_t); break; +#endif + } + } + else { + IV tiv = SvIVx(argsv); /* work around GCC bug #13488 */ + switch (intsize) { + case 'h': iv = (short)tiv; break; + case 'l': iv = (long)tiv; break; + case 'V': + default: iv = tiv; break; +#ifdef HAS_QUAD + case 'q': iv = (Quad_t)tiv; break; +#endif + } + } + if ( !vectorize ) /* we already set uv above */ + { + if (iv >= 0) { + uv = iv; + if (plus) + esignbuf[esignlen++] = plus; + } + else { + uv = -iv; + esignbuf[esignlen++] = '-'; + } + } + base = 10; + goto integer; -Usually used via its frontend C. + case 'U': +#ifdef IV_IS_QUAD + intsize = 'q'; +#else + intsize = 'l'; +#endif + /* FALL THROUGH */ + case 'u': + base = 10; + goto uns_integer; -=cut -*/ + case 'b': + base = 2; + goto uns_integer; -void -Perl_sv_vcatpvf_mg(pTHX_ SV *sv, const char* pat, va_list* args) -{ - sv_vcatpvfn(sv, pat, strlen(pat), args, Null(SV**), 0, Null(bool*)); - SvSETMAGIC(sv); -} + case 'O': +#ifdef IV_IS_QUAD + intsize = 'q'; +#else + intsize = 'l'; +#endif + /* FALL THROUGH */ + case 'o': + base = 8; + goto uns_integer; -/* -=for apidoc sv_vsetpvfn + case 'X': + case 'x': + base = 16; -Works like C but copies the text into the SV instead of -appending it. - -Usually used via one of its frontends C and C. + uns_integer: + if (vectorize) { + STRLEN ulen; + vector: + if (!veclen) + continue; + if (vec_utf8) + uv = utf8n_to_uvchr(vecstr, veclen, &ulen, + UTF8_ALLOW_ANYUV); + else { + uv = *vecstr; + ulen = 1; + } + vecstr += ulen; + veclen -= ulen; + } + else if (args) { + switch (intsize) { + case 'h': uv = (unsigned short)va_arg(*args, unsigned); break; + case 'l': uv = va_arg(*args, unsigned long); break; + case 'V': uv = va_arg(*args, UV); break; + default: uv = va_arg(*args, unsigned); break; +#ifdef HAS_QUAD + case 'q': uv = va_arg(*args, Uquad_t); break; +#endif + } + } + else { + UV tuv = SvUVx(argsv); /* work around GCC bug #13488 */ + switch (intsize) { + case 'h': uv = (unsigned short)tuv; break; + case 'l': uv = (unsigned long)tuv; break; + case 'V': + default: uv = tuv; break; +#ifdef HAS_QUAD + case 'q': uv = (Uquad_t)tuv; break; +#endif + } + } -=cut -*/ + integer: + { + char *ptr = ebuf + sizeof ebuf; + switch (base) { + unsigned dig; + case 16: + if (!uv) + alt = FALSE; + p = (char*)((c == 'X') + ? "0123456789ABCDEF" : "0123456789abcdef"); + do { + dig = uv & 15; + *--ptr = p[dig]; + } while (uv >>= 4); + if (alt) { + esignbuf[esignlen++] = '0'; + esignbuf[esignlen++] = c; /* 'x' or 'X' */ + } + break; + case 8: + do { + dig = uv & 7; + *--ptr = '0' + dig; + } while (uv >>= 3); + if (alt && *ptr != '0') + *--ptr = '0'; + break; + case 2: + if (!uv) + alt = FALSE; + do { + dig = uv & 1; + *--ptr = '0' + dig; + } while (uv >>= 1); + if (alt) { + esignbuf[esignlen++] = '0'; + esignbuf[esignlen++] = 'b'; + } + break; + default: /* it had better be ten or less */ + do { + dig = uv % base; + *--ptr = '0' + dig; + } while (uv /= base); + break; + } + elen = (ebuf + sizeof ebuf) - ptr; + eptr = ptr; + if (has_precis) { + if (precis > elen) + zeros = precis - elen; + else if (precis == 0 && elen == 1 && *eptr == '0') + elen = 0; + } + } + break; -void -Perl_sv_vsetpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV **svargs, I32 svmax, bool *maybe_tainted) -{ - sv_setpvn(sv, "", 0); - sv_vcatpvfn(sv, pat, patlen, args, svargs, svmax, maybe_tainted); -} + /* FLOATING POINT */ -/* private function for use in sv_vcatpvfn via the EXPECT_NUMBER macro */ + case 'F': + c = 'f'; /* maybe %F isn't supported here */ + /* FALL THROUGH */ + case 'e': case 'E': + case 'f': + case 'g': case 'G': + if (vectorize) + goto unknown; -STATIC I32 -S_expect_number(pTHX_ char** pattern) -{ - I32 var = 0; - switch (**pattern) { - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - while (isDIGIT(**pattern)) - var = var * 10 + (*(*pattern)++ - '0'); - } - return var; -} -#define EXPECT_NUMBER(pattern, var) (var = S_expect_number(aTHX_ &pattern)) + /* This is evil, but floating point is even more evil */ -static char * -F0convert(NV nv, char *endbuf, STRLEN *len) -{ - const int neg = nv < 0; - UV uv; + /* for SV-style calling, we can only get NV + for C-style calling, we assume %f is double; + for simplicity we allow any of %Lf, %llf, %qf for long double + */ + switch (intsize) { + case 'V': +#if defined(USE_LONG_DOUBLE) + intsize = 'q'; +#endif + break; +/* [perl #20339] - we should accept and ignore %lf rather than die */ + case 'l': + /* FALL THROUGH */ + default: +#if defined(USE_LONG_DOUBLE) + intsize = args ? 0 : 'q'; +#endif + break; + case 'q': +#if defined(HAS_LONG_DOUBLE) + break; +#else + /* FALL THROUGH */ +#endif + case 'h': + goto unknown; + } - if (neg) - nv = -nv; - if (nv < UV_MAX) { - char *p = endbuf; - nv += 0.5; - uv = (UV)nv; - if (uv & 1 && uv == nv) - uv--; /* Round to even */ - do { - const unsigned dig = uv % 10; - *--p = '0' + dig; - } while (uv /= 10); - if (neg) - *--p = '-'; - *len = endbuf - p; - return p; - } - return Nullch; -} + /* now we need (long double) if intsize == 'q', else (double) */ + nv = (args) ? +#if LONG_DOUBLESIZE > DOUBLESIZE + intsize == 'q' ? + va_arg(*args, long double) : + va_arg(*args, double) +#else + va_arg(*args, double) +#endif + : SvNVx(argsv); + need = 0; + if (c != 'e' && c != 'E') { + i = PERL_INT_MIN; + /* FIXME: if HAS_LONG_DOUBLE but not USE_LONG_DOUBLE this + will cast our (long double) to (double) */ + (void)Perl_frexp(nv, &i); + if (i == PERL_INT_MIN) + Perl_die(aTHX_ "panic: frexp"); + if (i > 0) + need = BIT_DIGITS(i); + } + need += has_precis ? precis : 6; /* known default */ -/* -=for apidoc sv_vcatpvfn + if (need < width) + need = width; -Processes its arguments like C and appends the formatted output -to an SV. Uses an array of SVs if the C style variable argument list is -missing (NULL). When running with taint checks enabled, indicates via -C if results are untrustworthy (often due to the use of -locales). +#ifdef HAS_LDBL_SPRINTF_BUG + /* This is to try to fix a bug with irix/nonstop-ux/powerux and + with sfio - Allen */ -Usually used via one of its frontends C and C. +# ifdef DBL_MAX +# define MY_DBL_MAX DBL_MAX +# else /* XXX guessing! HUGE_VAL may be defined as infinity, so not using */ +# if DOUBLESIZE >= 8 +# define MY_DBL_MAX 1.7976931348623157E+308L +# else +# define MY_DBL_MAX 3.40282347E+38L +# endif +# endif -=cut -*/ +# ifdef HAS_LDBL_SPRINTF_BUG_LESS1 /* only between -1L & 1L - Allen */ +# define MY_DBL_MAX_BUG 1L +# else +# define MY_DBL_MAX_BUG MY_DBL_MAX +# endif +# ifdef DBL_MIN +# define MY_DBL_MIN DBL_MIN +# else /* XXX guessing! -Allen */ +# if DOUBLESIZE >= 8 +# define MY_DBL_MIN 2.2250738585072014E-308L +# else +# define MY_DBL_MIN 1.17549435E-38L +# endif +# endif -#define VECTORIZE_ARGS vecsv = va_arg(*args, SV*);\ - vecstr = (U8*)SvPV_const(vecsv,veclen);\ - vec_utf8 = DO_UTF8(vecsv); + if ((intsize == 'q') && (c == 'f') && + ((nv < MY_DBL_MAX_BUG) && (nv > -MY_DBL_MAX_BUG)) && + (need < DBL_DIG)) { + /* it's going to be short enough that + * long double precision is not needed */ -/* XXX maybe_tainted is never assigned to, so the doc above is lying. */ + if ((nv <= 0L) && (nv >= -0L)) + fix_ldbl_sprintf_bug = TRUE; /* 0 is 0 - easiest */ + else { + /* would use Perl_fp_class as a double-check but not + * functional on IRIX - see perl.h comments */ -void -Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV **svargs, I32 svmax, bool *maybe_tainted) -{ - char *p; - char *q; - const char *patend; - STRLEN origlen; - I32 svix = 0; - static const char nullstr[] = "(null)"; - SV *argsv = Nullsv; - bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ - const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ - SV *nsv = Nullsv; - /* Times 4: a decimal digit takes more than 3 binary digits. - * NV_DIG: mantissa takes than many decimal digits. - * Plus 32: Playing safe. */ - char ebuf[IV_DIG * 4 + NV_DIG + 32]; - /* large enough for "%#.#f" --chip */ - /* what about long double NVs? --jhi */ - - PERL_UNUSED_ARG(maybe_tainted); - - /* no matter what, this is a string now */ - (void)SvPV_force(sv, origlen); + if ((nv >= MY_DBL_MIN) || (nv <= -MY_DBL_MIN)) { + /* It's within the range that a double can represent */ +#if defined(DBL_MAX) && !defined(DBL_MIN) + if ((nv >= ((long double)1/DBL_MAX)) || + (nv <= (-(long double)1/DBL_MAX))) +#endif + fix_ldbl_sprintf_bug = TRUE; + } + } + if (fix_ldbl_sprintf_bug == TRUE) { + double temp; - /* special-case "", "%s", and "%-p" (SVf - see below) */ - if (patlen == 0) - return; - if (patlen == 2 && pat[0] == '%' && pat[1] == 's') { - if (args) { - const char * const s = va_arg(*args, char*); - sv_catpv(sv, s ? s : nullstr); - } - else if (svix < svmax) { - sv_catsv(sv, *svargs); - if (DO_UTF8(*svargs)) - SvUTF8_on(sv); - } - return; - } - if (args && patlen == 3 && pat[0] == '%' && - pat[1] == '-' && pat[2] == 'p') { - argsv = va_arg(*args, SV*); - sv_catsv(sv, argsv); - if (DO_UTF8(argsv)) - SvUTF8_on(sv); - return; - } + intsize = 0; + temp = (double)nv; + nv = (NV)temp; + } + } -#ifndef USE_LONG_DOUBLE - /* special-case "%.[gf]" */ - if ( !args && patlen <= 5 && pat[0] == '%' && pat[1] == '.' - && (pat[patlen-1] == 'g' || pat[patlen-1] == 'f') ) { - unsigned digits = 0; - const char *pp; +# undef MY_DBL_MAX +# undef MY_DBL_MAX_BUG +# undef MY_DBL_MIN - pp = pat + 2; - while (*pp >= '0' && *pp <= '9') - digits = 10 * digits + (*pp++ - '0'); - if (pp - pat == (int)patlen - 1) { - NV nv; +#endif /* HAS_LDBL_SPRINTF_BUG */ - if (svix < svmax) - nv = SvNV(*svargs); - else - return; - if (*pp == 'g') { - /* Add check for digits != 0 because it seems that some - gconverts are buggy in this case, and we don't yet have - a Configure test for this. */ - if (digits && digits < sizeof(ebuf) - NV_DIG - 10) { - /* 0, point, slack */ - Gconvert(nv, (int)digits, 0, ebuf); - sv_catpv(sv, ebuf); - if (*ebuf) /* May return an empty string for digits==0 */ - return; - } - } else if (!digits) { - STRLEN l; + need += 20; /* fudge factor */ + if (PL_efloatsize < need) { + Safefree(PL_efloatbuf); + PL_efloatsize = need + 20; /* more fudge */ + Newx(PL_efloatbuf, PL_efloatsize, char); + PL_efloatbuf[0] = '\0'; + } - if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) { - sv_catpvn(sv, p, l); - return; + if ( !(width || left || plus || alt) && fill != '0' + && has_precis && intsize != 'q' ) { /* Shortcuts */ + /* See earlier comment about buggy Gconvert when digits, + aka precis is 0 */ + if ( c == 'g' && precis) { + Gconvert((NV)nv, (int)precis, 0, PL_efloatbuf); + /* May return an empty string for digits==0 */ + if (*PL_efloatbuf) { + elen = strlen(PL_efloatbuf); + goto float_converted; + } + } else if ( c == 'f' && !precis) { + if ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen))) + break; } } - } - } -#endif /* !USE_LONG_DOUBLE */ - - if (!args && svix < svmax && DO_UTF8(*svargs)) - has_utf8 = TRUE; - - patend = (char*)pat + patlen; - for (p = (char*)pat; p < patend; p = q) { - bool alt = FALSE; - bool left = FALSE; - bool vectorize = FALSE; - bool vectorarg = FALSE; - bool vec_utf8 = FALSE; - char fill = ' '; - char plus = 0; - char intsize = 0; - STRLEN width = 0; - STRLEN zeros = 0; - bool has_precis = FALSE; - STRLEN precis = 0; - I32 osvix = svix; - bool is_utf8 = FALSE; /* is this item utf8? */ -#ifdef HAS_LDBL_SPRINTF_BUG - /* This is to try to fix a bug with irix/nonstop-ux/powerux and - with sfio - Allen */ - bool fix_ldbl_sprintf_bug = FALSE; + { + char *ptr = ebuf + sizeof ebuf; + *--ptr = '\0'; + *--ptr = c; + /* FIXME: what to do if HAS_LONG_DOUBLE but not PERL_PRIfldbl? */ +#if defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl) + if (intsize == 'q') { + /* Copy the one or more characters in a long double + * format before the 'base' ([efgEFG]) character to + * the format string. */ + static char const prifldbl[] = PERL_PRIfldbl; + char const *p = prifldbl + sizeof(prifldbl) - 3; + while (p >= prifldbl) { *--ptr = *p--; } + } #endif + if (has_precis) { + base = precis; + do { *--ptr = '0' + (base % 10); } while (base /= 10); + *--ptr = '.'; + } + if (width) { + base = width; + do { *--ptr = '0' + (base % 10); } while (base /= 10); + } + if (fill == '0') + *--ptr = fill; + if (left) + *--ptr = '-'; + if (plus) + *--ptr = plus; + if (alt) + *--ptr = '#'; + *--ptr = '%'; - char esignbuf[4]; - U8 utf8buf[UTF8_MAXBYTES+1]; - STRLEN esignlen = 0; - - const char *eptr = Nullch; - STRLEN elen = 0; - SV *vecsv = Nullsv; - const U8 *vecstr = Null(U8*); - STRLEN veclen = 0; - char c = 0; - int i; - unsigned base = 0; - IV iv = 0; - UV uv = 0; - /* we need a long double target in case HAS_LONG_DOUBLE but - not USE_LONG_DOUBLE - */ -#if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE - long double nv; + /* No taint. Otherwise we are in the strange situation + * where printf() taints but print($float) doesn't. + * --jhi */ +#if defined(HAS_LONG_DOUBLE) + elen = ((intsize == 'q') + ? my_sprintf(PL_efloatbuf, ptr, nv) + : my_sprintf(PL_efloatbuf, ptr, (double)nv)); #else - NV nv; + elen = my_sprintf(PL_efloatbuf, ptr, nv); #endif - STRLEN have; - STRLEN need; - STRLEN gap; - const char *dotstr = "."; - STRLEN dotstrlen = 1; - I32 efix = 0; /* explicit format parameter index */ - I32 ewix = 0; /* explicit width index */ - I32 epix = 0; /* explicit precision index */ - I32 evix = 0; /* explicit vector index */ - bool asterisk = FALSE; - - /* echo everything up to the next format specification */ - for (q = p; q < patend && *q != '%'; ++q) ; - if (q > p) { - if (has_utf8 && !pat_utf8) - sv_catpvn_utf8_upgrade(sv, p, q - p, nsv); - else - sv_catpvn(sv, p, q - p); - p = q; - } - if (q++ >= patend) + } + float_converted: + eptr = PL_efloatbuf; break; -/* - We allow format specification elements in this order: - \d+\$ explicit format parameter index - [-+ 0#]+ flags - v|\*(\d+\$)?v vector with optional (optionally specified) arg - 0 flag (as above): repeated to allow "v02" - \d+|\*(\d+\$)? width using optional (optionally specified) arg - \.(\d*|\*(\d+\$)?) precision using optional (optionally specified) arg - [hlqLV] size - [%bcdefginopsuxDFOUX] format (mandatory) -*/ - - if (args) { -/* - As of perl5.9.3, printf format checking is on by default. - Internally, perl uses %p formats to provide an escape to - some extended formatting. This block deals with those - extensions: if it does not match, (char*)q is reset and - the normal format processing code is used. - - Currently defined extensions are: - %p include pointer address (standard) - %-p (SVf) include an SV (previously %_) - %-p include an SV with precision - %1p (VDf) include a v-string (as %vd) - %p reserved for future extensions + /* SPECIAL */ - Robin Barker 2005-07-14 -*/ - char* r = q; - bool sv = FALSE; - STRLEN n = 0; - if (*q == '-') - sv = *q++; - EXPECT_NUMBER(q, n); - if (*q++ == 'p') { - if (sv) { /* SVf */ - if (n) { - precis = n; - has_precis = TRUE; - } - argsv = va_arg(*args, SV*); - eptr = SvPVx_const(argsv, elen); - if (DO_UTF8(argsv)) - is_utf8 = TRUE; - goto string; - } -#if vdNUMBER - else if (n == vdNUMBER) { /* VDf */ - vectorize = TRUE; - VECTORIZE_ARGS - goto format_vd; - } + case 'n': + if (vectorize) + goto unknown; + i = SvCUR(sv) - origlen; + if (args) { + switch (intsize) { + case 'h': *(va_arg(*args, short*)) = i; break; + default: *(va_arg(*args, int*)) = i; break; + case 'l': *(va_arg(*args, long*)) = i; break; + case 'V': *(va_arg(*args, IV*)) = i; break; +#ifdef HAS_QUAD + case 'q': *(va_arg(*args, Quad_t*)) = i; break; #endif - else if (n) { - if (ckWARN_d(WARN_INTERNAL)) - Perl_warner(aTHX_ packWARN(WARN_INTERNAL), - "internal %%p might conflict with future printf extensions"); } } - q = r; - } + else + sv_setuv_mg(argsv, (UV)i); + continue; /* not "break" */ - if (EXPECT_NUMBER(q, width)) { - if (*q == '$') { - ++q; - efix = width; - } else { - goto gotwidth; + /* UNKNOWN */ + + default: + unknown: + if (!args + && (PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF) + && ckWARN(WARN_PRINTF)) + { + SV * const msg = sv_newmortal(); + Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %sprintf: ", + (PL_op->op_type == OP_PRTF) ? "" : "s"); + if (c) { + if (isPRINT(c)) + Perl_sv_catpvf(aTHX_ msg, + "\"%%%c\"", c & 0xFF); + else + Perl_sv_catpvf(aTHX_ msg, + "\"%%\\%03"UVof"\"", + (UV)c & 0xFF); + } else + sv_catpv(msg, "end of string"); + Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%"SVf, msg); /* yes, this is reentrant */ } - } - /* FLAGS */ + /* output mangled stuff ... */ + if (c == '\0') + --q; + eptr = p; + elen = q - p; - while (*q) { - switch (*q) { - case ' ': - case '+': - plus = *q++; - continue; + /* ... right here, because formatting flags should not apply */ + SvGROW(sv, SvCUR(sv) + elen + 1); + p = SvEND(sv); + Copy(eptr, p, elen, char); + p += elen; + *p = '\0'; + SvCUR_set(sv, p - SvPVX_const(sv)); + svix = osvix; + continue; /* not "break" */ + } - case '-': - left = TRUE; - q++; - continue; + /* calculate width before utf8_upgrade changes it */ + have = esignlen + zeros + elen; + if (have < zeros) + Perl_croak_nocontext(PL_memory_wrap); - case '0': - fill = *q++; - continue; + if (is_utf8 != has_utf8) { + if (is_utf8) { + if (SvCUR(sv)) + sv_utf8_upgrade(sv); + } + else { + SV * const nsv = sv_2mortal(newSVpvn(eptr, elen)); + sv_utf8_upgrade(nsv); + eptr = SvPVX_const(nsv); + elen = SvCUR(nsv); + } + SvGROW(sv, SvCUR(sv) + elen + 1); + p = SvEND(sv); + *p = '\0'; + } - case '#': - alt = TRUE; - q++; - continue; + need = (have > width ? have : width); + gap = need - have; - default: - break; - } - break; + if (need >= (((STRLEN)~0) - SvCUR(sv) - dotstrlen - 1)) + Perl_croak_nocontext(PL_memory_wrap); + SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1); + p = SvEND(sv); + if (esignlen && fill == '0') { + int i; + for (i = 0; i < (int)esignlen; i++) + *p++ = esignbuf[i]; } - - tryasterisk: - if (*q == '*') { - q++; - if (EXPECT_NUMBER(q, ewix)) - if (*q++ != '$') - goto unknown; - asterisk = TRUE; + if (gap && !left) { + memset(p, fill, gap); + p += gap; } - if (*q == 'v') { - q++; - if (vectorize) - goto unknown; - if ((vectorarg = asterisk)) { - evix = ewix; - ewix = 0; - asterisk = FALSE; - } - vectorize = TRUE; - goto tryasterisk; + if (esignlen && fill != '0') { + int i; + for (i = 0; i < (int)esignlen; i++) + *p++ = esignbuf[i]; } - - if (!asterisk) - { - if( *q == '0' ) - fill = *q++; - EXPECT_NUMBER(q, width); + if (zeros) { + int i; + for (i = zeros; i; i--) + *p++ = '0'; + } + if (elen) { + Copy(eptr, p, elen, char); + p += elen; + } + if (gap && left) { + memset(p, ' ', gap); + p += gap; } - if (vectorize) { - if (vectorarg) { - if (args) - vecsv = va_arg(*args, SV*); - else - vecsv = (evix ? evix <= svmax : svix < svmax) ? - svargs[evix ? evix-1 : svix++] : &PL_sv_undef; - dotstr = SvPV_const(vecsv, dotstrlen); - if (DO_UTF8(vecsv)) - is_utf8 = TRUE; - } - if (args) { - VECTORIZE_ARGS - } - else if (efix ? efix <= svmax : svix < svmax) { - vecsv = svargs[efix ? efix-1 : svix++]; - vecstr = (U8*)SvPV_const(vecsv,veclen); - vec_utf8 = DO_UTF8(vecsv); - /* if this is a version object, we need to return the - * stringified representation (which the SvPVX_const has - * already done for us), but not vectorize the args - */ - if ( *q == 'd' && sv_derived_from(vecsv,"version") ) - { - q++; /* skip past the rest of the %vd format */ - eptr = (const char *) vecstr; - elen = veclen; - vectorize=FALSE; - goto string; - } - } - else { - vecstr = (U8*)""; - veclen = 0; + if (veclen) { + Copy(dotstr, p, dotstrlen, char); + p += dotstrlen; } - } - - if (asterisk) { - if (args) - i = va_arg(*args, int); else - i = (ewix ? ewix <= svmax : svix < svmax) ? - SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; - left |= (i < 0); - width = (i < 0) ? -i : i; + vectorize = FALSE; /* done iterating over vecstr */ } - gotwidth: + if (is_utf8) + has_utf8 = TRUE; + if (has_utf8) + SvUTF8_on(sv); + *p = '\0'; + SvCUR_set(sv, p - SvPVX_const(sv)); + if (vectorize) { + esignlen = 0; + goto vector; + } + } +} - /* PRECISION */ +/* ========================================================================= - if (*q == '.') { - q++; - if (*q == '*') { - q++; - if (EXPECT_NUMBER(q, epix) && *q++ != '$') - goto unknown; - /* XXX: todo, support specified precision parameter */ - if (epix) - goto unknown; - if (args) - i = va_arg(*args, int); - else - i = (ewix ? ewix <= svmax : svix < svmax) - ? SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; - precis = (i < 0) ? 0 : i; - } - else { - precis = 0; - while (isDIGIT(*q)) - precis = precis * 10 + (*q++ - '0'); - } - has_precis = TRUE; - } +=head1 Cloning an interpreter - /* SIZE */ +All the macros and functions in this section are for the private use of +the main function, perl_clone(). - switch (*q) { -#ifdef WIN32 - case 'I': /* Ix, I32x, and I64x */ -# ifdef WIN64 - if (q[1] == '6' && q[2] == '4') { - q += 3; - intsize = 'q'; - break; - } -# endif - if (q[1] == '3' && q[2] == '2') { - q += 3; - break; - } -# ifdef WIN64 - intsize = 'q'; -# endif - q++; - break; -#endif -#if defined(HAS_QUAD) || defined(HAS_LONG_DOUBLE) - case 'L': /* Ld */ - /* FALL THROUGH */ -#ifdef HAS_QUAD - case 'q': /* qd */ -#endif - intsize = 'q'; - q++; - break; -#endif - case 'l': -#if defined(HAS_QUAD) || defined(HAS_LONG_DOUBLE) - if (*(q + 1) == 'l') { /* lld, llf */ - intsize = 'q'; - q += 2; - break; - } -#endif - /* FALL THROUGH */ - case 'h': - /* FALL THROUGH */ - case 'V': - intsize = *q++; - break; - } +The foo_dup() functions make an exact copy of an existing foo thinngy. +During the course of a cloning, a hash table is used to map old addresses +to new addresses. The table is created and manipulated with the +ptr_table_* functions. - /* CONVERSION */ +=cut - if (*q == '%') { - eptr = q++; - elen = 1; - goto string; - } +============================================================================*/ - if (vectorize) - argsv = vecsv; - else if (!args) - argsv = (efix ? efix <= svmax : svix < svmax) ? - svargs[efix ? efix-1 : svix++] : &PL_sv_undef; - switch (c = *q++) { +#if defined(USE_ITHREADS) - /* STRINGS */ +#ifndef GpREFCNT_inc +# define GpREFCNT_inc(gp) ((gp) ? (++(gp)->gp_refcnt, (gp)) : (GP*)NULL) +#endif - case 'c': - uv = (args && !vectorize) ? va_arg(*args, int) : SvIVx(argsv); - if ((uv > 255 || - (!UNI_IS_INVARIANT(uv) && SvUTF8(sv))) - && !IN_BYTES) { - eptr = (char*)utf8buf; - elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf; - is_utf8 = TRUE; - } - else { - c = (char)uv; - eptr = &c; - elen = 1; - } - goto string; - case 's': - if (args && !vectorize) { - eptr = va_arg(*args, char*); - if (eptr) -#ifdef MACOS_TRADITIONAL - /* On MacOS, %#s format is used for Pascal strings */ - if (alt) - elen = *eptr++; - else -#endif - elen = strlen(eptr); - else { - eptr = (char *)nullstr; - elen = sizeof nullstr - 1; - } - } - else { - eptr = SvPVx_const(argsv, elen); - if (DO_UTF8(argsv)) { - if (has_precis && precis < elen) { - I32 p = precis; - sv_pos_u2b(argsv, &p, 0); /* sticks at end */ - precis = p; - } - if (width) { /* fudge width (can't fudge elen) */ - width += elen - sv_len_utf8(argsv); - } - is_utf8 = TRUE; - } - } +#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t)) +#define av_dup(s,t) (AV*)sv_dup((SV*)s,t) +#define av_dup_inc(s,t) (AV*)SvREFCNT_inc(sv_dup((SV*)s,t)) +#define hv_dup(s,t) (HV*)sv_dup((SV*)s,t) +#define hv_dup_inc(s,t) (HV*)SvREFCNT_inc(sv_dup((SV*)s,t)) +#define cv_dup(s,t) (CV*)sv_dup((SV*)s,t) +#define cv_dup_inc(s,t) (CV*)SvREFCNT_inc(sv_dup((SV*)s,t)) +#define io_dup(s,t) (IO*)sv_dup((SV*)s,t) +#define io_dup_inc(s,t) (IO*)SvREFCNT_inc(sv_dup((SV*)s,t)) +#define gv_dup(s,t) (GV*)sv_dup((SV*)s,t) +#define gv_dup_inc(s,t) (GV*)SvREFCNT_inc(sv_dup((SV*)s,t)) +#define SAVEPV(p) (p ? savepv(p) : Nullch) +#define SAVEPVN(p,n) (p ? savepvn(p,n) : Nullch) - string: - vectorize = FALSE; - if (has_precis && elen > precis) - elen = precis; - break; - /* INTEGERS */ +/* Duplicate a regexp. Required reading: pregcomp() and pregfree() in + regcomp.c. AMS 20010712 */ - case 'p': - if (alt || vectorize) - goto unknown; - uv = PTR2UV(args ? va_arg(*args, void*) : argsv); - base = 16; - goto integer; +REGEXP * +Perl_re_dup(pTHX_ const REGEXP *r, CLONE_PARAMS *param) +{ + dVAR; + REGEXP *ret; + int i, len, npar; + struct reg_substr_datum *s; - case 'D': -#ifdef IV_IS_QUAD - intsize = 'q'; -#else - intsize = 'l'; -#endif - /* FALL THROUGH */ - case 'd': - case 'i': -#if vdNUMBER - format_vd: -#endif - if (vectorize) { - STRLEN ulen; - if (!veclen) - continue; - if (vec_utf8) - uv = utf8n_to_uvchr(vecstr, veclen, &ulen, - UTF8_ALLOW_ANYUV); - else { - uv = *vecstr; - ulen = 1; - } - vecstr += ulen; - veclen -= ulen; - if (plus) - esignbuf[esignlen++] = plus; - } - else if (args) { - switch (intsize) { - case 'h': iv = (short)va_arg(*args, int); break; - case 'l': iv = va_arg(*args, long); break; - case 'V': iv = va_arg(*args, IV); break; - default: iv = va_arg(*args, int); break; -#ifdef HAS_QUAD - case 'q': iv = va_arg(*args, Quad_t); break; -#endif - } - } - else { - IV tiv = SvIVx(argsv); /* work around GCC bug #13488 */ - switch (intsize) { - case 'h': iv = (short)tiv; break; - case 'l': iv = (long)tiv; break; - case 'V': - default: iv = tiv; break; -#ifdef HAS_QUAD - case 'q': iv = (Quad_t)tiv; break; -#endif - } - } - if ( !vectorize ) /* we already set uv above */ - { - if (iv >= 0) { - uv = iv; - if (plus) - esignbuf[esignlen++] = plus; - } - else { - uv = -iv; - esignbuf[esignlen++] = '-'; - } - } - base = 10; - goto integer; + if (!r) + return (REGEXP *)NULL; - case 'U': -#ifdef IV_IS_QUAD - intsize = 'q'; -#else - intsize = 'l'; -#endif - /* FALL THROUGH */ - case 'u': - base = 10; - goto uns_integer; + if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r))) + return ret; - case 'b': - base = 2; - goto uns_integer; + len = r->offsets[0]; + npar = r->nparens+1; - case 'O': -#ifdef IV_IS_QUAD - intsize = 'q'; -#else - intsize = 'l'; -#endif - /* FALL THROUGH */ - case 'o': - base = 8; - goto uns_integer; + Newxc(ret, sizeof(regexp) + (len+1)*sizeof(regnode), char, regexp); + Copy(r->program, ret->program, len+1, regnode); - case 'X': - case 'x': - base = 16; + Newx(ret->startp, npar, I32); + Copy(r->startp, ret->startp, npar, I32); + Newx(ret->endp, npar, I32); + Copy(r->startp, ret->startp, npar, I32); - uns_integer: - if (vectorize) { - STRLEN ulen; - vector: - if (!veclen) - continue; - if (vec_utf8) - uv = utf8n_to_uvchr(vecstr, veclen, &ulen, - UTF8_ALLOW_ANYUV); - else { - uv = *vecstr; - ulen = 1; - } - vecstr += ulen; - veclen -= ulen; - } - else if (args) { - switch (intsize) { - case 'h': uv = (unsigned short)va_arg(*args, unsigned); break; - case 'l': uv = va_arg(*args, unsigned long); break; - case 'V': uv = va_arg(*args, UV); break; - default: uv = va_arg(*args, unsigned); break; -#ifdef HAS_QUAD - case 'q': uv = va_arg(*args, Uquad_t); break; -#endif - } - } - else { - UV tuv = SvUVx(argsv); /* work around GCC bug #13488 */ - switch (intsize) { - case 'h': uv = (unsigned short)tuv; break; - case 'l': uv = (unsigned long)tuv; break; - case 'V': - default: uv = tuv; break; -#ifdef HAS_QUAD - case 'q': uv = (Uquad_t)tuv; break; -#endif - } - } - - integer: - { - char *ptr = ebuf + sizeof ebuf; - switch (base) { - unsigned dig; - case 16: - if (!uv) - alt = FALSE; - p = (char*)((c == 'X') - ? "0123456789ABCDEF" : "0123456789abcdef"); - do { - dig = uv & 15; - *--ptr = p[dig]; - } while (uv >>= 4); - if (alt) { - esignbuf[esignlen++] = '0'; - esignbuf[esignlen++] = c; /* 'x' or 'X' */ - } - break; - case 8: - do { - dig = uv & 7; - *--ptr = '0' + dig; - } while (uv >>= 3); - if (alt && *ptr != '0') - *--ptr = '0'; - break; - case 2: - do { - dig = uv & 1; - *--ptr = '0' + dig; - } while (uv >>= 1); - if (alt) { - esignbuf[esignlen++] = '0'; - esignbuf[esignlen++] = 'b'; - } - break; - default: /* it had better be ten or less */ - do { - dig = uv % base; - *--ptr = '0' + dig; - } while (uv /= base); - break; - } - elen = (ebuf + sizeof ebuf) - ptr; - eptr = ptr; - if (has_precis) { - if (precis > elen) - zeros = precis - elen; - else if (precis == 0 && elen == 1 && *eptr == '0') - elen = 0; - } - } - break; - - /* FLOATING POINT */ + Newx(ret->substrs, 1, struct reg_substr_data); + for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { + s->min_offset = r->substrs->data[i].min_offset; + s->max_offset = r->substrs->data[i].max_offset; + s->substr = sv_dup_inc(r->substrs->data[i].substr, param); + s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param); + } - case 'F': - c = 'f'; /* maybe %F isn't supported here */ - /* FALL THROUGH */ - case 'e': case 'E': - case 'f': - case 'g': case 'G': + ret->regstclass = NULL; + if (r->data) { + struct reg_data *d; + const int count = r->data->count; + int i; - /* This is evil, but floating point is even more evil */ + Newxc(d, sizeof(struct reg_data) + count*sizeof(void *), + char, struct reg_data); + Newx(d->what, count, U8); - /* for SV-style calling, we can only get NV - for C-style calling, we assume %f is double; - for simplicity we allow any of %Lf, %llf, %qf for long double - */ - switch (intsize) { - case 'V': -#if defined(USE_LONG_DOUBLE) - intsize = 'q'; -#endif + d->count = count; + for (i = 0; i < count; i++) { + d->what[i] = r->data->what[i]; + switch (d->what[i]) { + /* legal options are one of: sfpont + see also regcomp.h and pregfree() */ + case 's': + d->data[i] = sv_dup_inc((SV *)r->data->data[i], param); break; -/* [perl #20339] - we should accept and ignore %lf rather than die */ - case 'l': - /* FALL THROUGH */ - default: -#if defined(USE_LONG_DOUBLE) - intsize = args ? 0 : 'q'; -#endif + case 'p': + d->data[i] = av_dup_inc((AV *)r->data->data[i], param); break; - case 'q': -#if defined(HAS_LONG_DOUBLE) + case 'f': + /* This is cheating. */ + Newx(d->data[i], 1, struct regnode_charclass_class); + StructCopy(r->data->data[i], d->data[i], + struct regnode_charclass_class); + ret->regstclass = (regnode*)d->data[i]; break; -#else - /* FALL THROUGH */ -#endif - case 'h': - goto unknown; + case 'o': + /* Compiled op trees are readonly, and can thus be + shared without duplication. */ + OP_REFCNT_LOCK; + d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]); + OP_REFCNT_UNLOCK; + break; + case 'n': + d->data[i] = r->data->data[i]; + break; + case 't': + d->data[i] = r->data->data[i]; + OP_REFCNT_LOCK; + ((reg_trie_data*)d->data[i])->refcount++; + OP_REFCNT_UNLOCK; + break; + default: + Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", r->data->what[i]); } + } - /* now we need (long double) if intsize == 'q', else (double) */ - nv = (args && !vectorize) ? -#if LONG_DOUBLESIZE > DOUBLESIZE - intsize == 'q' ? - va_arg(*args, long double) : - va_arg(*args, double) -#else - va_arg(*args, double) -#endif - : SvNVx(argsv); + ret->data = d; + } + else + ret->data = NULL; - need = 0; - vectorize = FALSE; - if (c != 'e' && c != 'E') { - i = PERL_INT_MIN; - /* FIXME: if HAS_LONG_DOUBLE but not USE_LONG_DOUBLE this - will cast our (long double) to (double) */ - (void)Perl_frexp(nv, &i); - if (i == PERL_INT_MIN) - Perl_die(aTHX_ "panic: frexp"); - if (i > 0) - need = BIT_DIGITS(i); - } - need += has_precis ? precis : 6; /* known default */ + Newx(ret->offsets, 2*len+1, U32); + Copy(r->offsets, ret->offsets, 2*len+1, U32); - if (need < width) - need = width; + ret->precomp = SAVEPVN(r->precomp, r->prelen); + ret->refcnt = r->refcnt; + ret->minlen = r->minlen; + ret->prelen = r->prelen; + ret->nparens = r->nparens; + ret->lastparen = r->lastparen; + ret->lastcloseparen = r->lastcloseparen; + ret->reganch = r->reganch; -#ifdef HAS_LDBL_SPRINTF_BUG - /* This is to try to fix a bug with irix/nonstop-ux/powerux and - with sfio - Allen */ + ret->sublen = r->sublen; -# ifdef DBL_MAX -# define MY_DBL_MAX DBL_MAX -# else /* XXX guessing! HUGE_VAL may be defined as infinity, so not using */ -# if DOUBLESIZE >= 8 -# define MY_DBL_MAX 1.7976931348623157E+308L -# else -# define MY_DBL_MAX 3.40282347E+38L -# endif -# endif + if (RX_MATCH_COPIED(ret)) + ret->subbeg = SAVEPVN(r->subbeg, r->sublen); + else + ret->subbeg = Nullch; +#ifdef PERL_OLD_COPY_ON_WRITE + ret->saved_copy = Nullsv; +#endif -# ifdef HAS_LDBL_SPRINTF_BUG_LESS1 /* only between -1L & 1L - Allen */ -# define MY_DBL_MAX_BUG 1L -# else -# define MY_DBL_MAX_BUG MY_DBL_MAX -# endif + ptr_table_store(PL_ptr_table, r, ret); + return ret; +} -# ifdef DBL_MIN -# define MY_DBL_MIN DBL_MIN -# else /* XXX guessing! -Allen */ -# if DOUBLESIZE >= 8 -# define MY_DBL_MIN 2.2250738585072014E-308L -# else -# define MY_DBL_MIN 1.17549435E-38L -# endif -# endif +/* duplicate a file handle */ - if ((intsize == 'q') && (c == 'f') && - ((nv < MY_DBL_MAX_BUG) && (nv > -MY_DBL_MAX_BUG)) && - (need < DBL_DIG)) { - /* it's going to be short enough that - * long double precision is not needed */ +PerlIO * +Perl_fp_dup(pTHX_ PerlIO *fp, char type, CLONE_PARAMS *param) +{ + PerlIO *ret; - if ((nv <= 0L) && (nv >= -0L)) - fix_ldbl_sprintf_bug = TRUE; /* 0 is 0 - easiest */ - else { - /* would use Perl_fp_class as a double-check but not - * functional on IRIX - see perl.h comments */ + PERL_UNUSED_ARG(type); - if ((nv >= MY_DBL_MIN) || (nv <= -MY_DBL_MIN)) { - /* It's within the range that a double can represent */ -#if defined(DBL_MAX) && !defined(DBL_MIN) - if ((nv >= ((long double)1/DBL_MAX)) || - (nv <= (-(long double)1/DBL_MAX))) -#endif - fix_ldbl_sprintf_bug = TRUE; - } - } - if (fix_ldbl_sprintf_bug == TRUE) { - double temp; - - intsize = 0; - temp = (double)nv; - nv = (NV)temp; - } - } - -# undef MY_DBL_MAX -# undef MY_DBL_MAX_BUG -# undef MY_DBL_MIN - -#endif /* HAS_LDBL_SPRINTF_BUG */ + if (!fp) + return (PerlIO*)NULL; - need += 20; /* fudge factor */ - if (PL_efloatsize < need) { - Safefree(PL_efloatbuf); - PL_efloatsize = need + 20; /* more fudge */ - Newx(PL_efloatbuf, PL_efloatsize, char); - PL_efloatbuf[0] = '\0'; - } + /* look for it in the table first */ + ret = (PerlIO*)ptr_table_fetch(PL_ptr_table, fp); + if (ret) + return ret; - if ( !(width || left || plus || alt) && fill != '0' - && has_precis && intsize != 'q' ) { /* Shortcuts */ - /* See earlier comment about buggy Gconvert when digits, - aka precis is 0 */ - if ( c == 'g' && precis) { - Gconvert((NV)nv, (int)precis, 0, PL_efloatbuf); - /* May return an empty string for digits==0 */ - if (*PL_efloatbuf) { - elen = strlen(PL_efloatbuf); - goto float_converted; - } - } else if ( c == 'f' && !precis) { - if ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen))) - break; - } - } - { - char *ptr = ebuf + sizeof ebuf; - *--ptr = '\0'; - *--ptr = c; - /* FIXME: what to do if HAS_LONG_DOUBLE but not PERL_PRIfldbl? */ -#if defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl) - if (intsize == 'q') { - /* Copy the one or more characters in a long double - * format before the 'base' ([efgEFG]) character to - * the format string. */ - static char const prifldbl[] = PERL_PRIfldbl; - char const *p = prifldbl + sizeof(prifldbl) - 3; - while (p >= prifldbl) { *--ptr = *p--; } - } -#endif - if (has_precis) { - base = precis; - do { *--ptr = '0' + (base % 10); } while (base /= 10); - *--ptr = '.'; - } - if (width) { - base = width; - do { *--ptr = '0' + (base % 10); } while (base /= 10); - } - if (fill == '0') - *--ptr = fill; - if (left) - *--ptr = '-'; - if (plus) - *--ptr = plus; - if (alt) - *--ptr = '#'; - *--ptr = '%'; + /* create anew and remember what it is */ + ret = PerlIO_fdupopen(aTHX_ fp, param, PERLIO_DUP_CLONE); + ptr_table_store(PL_ptr_table, fp, ret); + return ret; +} - /* No taint. Otherwise we are in the strange situation - * where printf() taints but print($float) doesn't. - * --jhi */ -#if defined(HAS_LONG_DOUBLE) - elen = ((intsize == 'q') - ? my_sprintf(PL_efloatbuf, ptr, nv) - : my_sprintf(PL_efloatbuf, ptr, (double)nv)); -#else - elen = my_sprintf(PL_efloatbuf, ptr, nv); -#endif - } - float_converted: - eptr = PL_efloatbuf; - break; +/* duplicate a directory handle */ - /* SPECIAL */ +DIR * +Perl_dirp_dup(pTHX_ DIR *dp) +{ + if (!dp) + return (DIR*)NULL; + /* XXX TODO */ + return dp; +} - case 'n': - i = SvCUR(sv) - origlen; - if (args && !vectorize) { - switch (intsize) { - case 'h': *(va_arg(*args, short*)) = i; break; - default: *(va_arg(*args, int*)) = i; break; - case 'l': *(va_arg(*args, long*)) = i; break; - case 'V': *(va_arg(*args, IV*)) = i; break; -#ifdef HAS_QUAD - case 'q': *(va_arg(*args, Quad_t*)) = i; break; -#endif - } - } - else - sv_setuv_mg(argsv, (UV)i); - vectorize = FALSE; - continue; /* not "break" */ +/* duplicate a typeglob */ - /* UNKNOWN */ +GP * +Perl_gp_dup(pTHX_ GP *gp, CLONE_PARAMS* param) +{ + GP *ret; + if (!gp) + return (GP*)NULL; + /* look for it in the table first */ + ret = (GP*)ptr_table_fetch(PL_ptr_table, gp); + if (ret) + return ret; - default: - unknown: - if (!args - && (PL_op->op_type == OP_PRTF || PL_op->op_type == OP_SPRINTF) - && ckWARN(WARN_PRINTF)) - { - SV * const msg = sv_newmortal(); - Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %sprintf: ", - (PL_op->op_type == OP_PRTF) ? "" : "s"); - if (c) { - if (isPRINT(c)) - Perl_sv_catpvf(aTHX_ msg, - "\"%%%c\"", c & 0xFF); - else - Perl_sv_catpvf(aTHX_ msg, - "\"%%\\%03"UVof"\"", - (UV)c & 0xFF); - } else - sv_catpv(msg, "end of string"); - Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%"SVf, msg); /* yes, this is reentrant */ - } + /* create anew and remember what it is */ + Newxz(ret, 1, GP); + ptr_table_store(PL_ptr_table, gp, ret); - /* output mangled stuff ... */ - if (c == '\0') - --q; - eptr = p; - elen = q - p; + /* clone */ + ret->gp_refcnt = 0; /* must be before any other dups! */ + ret->gp_sv = sv_dup_inc(gp->gp_sv, param); + ret->gp_io = io_dup_inc(gp->gp_io, param); + ret->gp_form = cv_dup_inc(gp->gp_form, param); + ret->gp_av = av_dup_inc(gp->gp_av, param); + ret->gp_hv = hv_dup_inc(gp->gp_hv, param); + ret->gp_egv = gv_dup(gp->gp_egv, param);/* GvEGV is not refcounted */ + ret->gp_cv = cv_dup_inc(gp->gp_cv, param); + ret->gp_cvgen = gp->gp_cvgen; + ret->gp_line = gp->gp_line; + ret->gp_file = gp->gp_file; /* points to COP.cop_file */ + return ret; +} - /* ... right here, because formatting flags should not apply */ - SvGROW(sv, SvCUR(sv) + elen + 1); - p = SvEND(sv); - Copy(eptr, p, elen, char); - p += elen; - *p = '\0'; - SvCUR_set(sv, p - SvPVX_const(sv)); - svix = osvix; - continue; /* not "break" */ - } +/* duplicate a chain of magic */ - /* calculate width before utf8_upgrade changes it */ - have = esignlen + zeros + elen; +MAGIC * +Perl_mg_dup(pTHX_ MAGIC *mg, CLONE_PARAMS* param) +{ + MAGIC *mgprev = (MAGIC*)NULL; + MAGIC *mgret; + if (!mg) + return (MAGIC*)NULL; + /* look for it in the table first */ + mgret = (MAGIC*)ptr_table_fetch(PL_ptr_table, mg); + if (mgret) + return mgret; - if (is_utf8 != has_utf8) { - if (is_utf8) { - if (SvCUR(sv)) - sv_utf8_upgrade(sv); - } - else { - SV * const nsv = sv_2mortal(newSVpvn(eptr, elen)); - sv_utf8_upgrade(nsv); - eptr = SvPVX_const(nsv); - elen = SvCUR(nsv); - } - SvGROW(sv, SvCUR(sv) + elen + 1); - p = SvEND(sv); - *p = '\0'; + for (; mg; mg = mg->mg_moremagic) { + MAGIC *nmg; + Newxz(nmg, 1, MAGIC); + if (mgprev) + mgprev->mg_moremagic = nmg; + else + mgret = nmg; + nmg->mg_virtual = mg->mg_virtual; /* XXX copy dynamic vtable? */ + nmg->mg_private = mg->mg_private; + nmg->mg_type = mg->mg_type; + nmg->mg_flags = mg->mg_flags; + if (mg->mg_type == PERL_MAGIC_qr) { + nmg->mg_obj = (SV*)re_dup((REGEXP*)mg->mg_obj, param); } - - need = (have > width ? have : width); - gap = need - have; - - SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1); - p = SvEND(sv); - if (esignlen && fill == '0') { - int i; - for (i = 0; i < (int)esignlen; i++) - *p++ = esignbuf[i]; + else if(mg->mg_type == PERL_MAGIC_backref) { + const AV * const av = (AV*) mg->mg_obj; + SV **svp; + I32 i; + (void)SvREFCNT_inc(nmg->mg_obj = (SV*)newAV()); + svp = AvARRAY(av); + for (i = AvFILLp(av); i >= 0; i--) { + if (!svp[i]) continue; + av_push((AV*)nmg->mg_obj,sv_dup(svp[i],param)); + } } - if (gap && !left) { - memset(p, fill, gap); - p += gap; + else if (mg->mg_type == PERL_MAGIC_symtab) { + nmg->mg_obj = mg->mg_obj; } - if (esignlen && fill != '0') { - int i; - for (i = 0; i < (int)esignlen; i++) - *p++ = esignbuf[i]; - } - if (zeros) { - int i; - for (i = zeros; i; i--) - *p++ = '0'; - } - if (elen) { - Copy(eptr, p, elen, char); - p += elen; - } - if (gap && left) { - memset(p, ' ', gap); - p += gap; + else { + nmg->mg_obj = (mg->mg_flags & MGf_REFCOUNTED) + ? sv_dup_inc(mg->mg_obj, param) + : sv_dup(mg->mg_obj, param); } - if (vectorize) { - if (veclen) { - Copy(dotstr, p, dotstrlen, char); - p += dotstrlen; + nmg->mg_len = mg->mg_len; + nmg->mg_ptr = mg->mg_ptr; /* XXX random ptr? */ + if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) { + if (mg->mg_len > 0) { + nmg->mg_ptr = SAVEPVN(mg->mg_ptr, mg->mg_len); + if (mg->mg_type == PERL_MAGIC_overload_table && + AMT_AMAGIC((AMT*)mg->mg_ptr)) + { + AMT * const amtp = (AMT*)mg->mg_ptr; + AMT * const namtp = (AMT*)nmg->mg_ptr; + I32 i; + for (i = 1; i < NofAMmeth; i++) { + namtp->table[i] = cv_dup_inc(amtp->table[i], param); + } + } } - else - vectorize = FALSE; /* done iterating over vecstr */ + else if (mg->mg_len == HEf_SVKEY) + nmg->mg_ptr = (char*)sv_dup_inc((SV*)mg->mg_ptr, param); } - if (is_utf8) - has_utf8 = TRUE; - if (has_utf8) - SvUTF8_on(sv); - *p = '\0'; - SvCUR_set(sv, p - SvPVX_const(sv)); - if (vectorize) { - esignlen = 0; - goto vector; + if ((mg->mg_flags & MGf_DUP) && mg->mg_virtual && mg->mg_virtual->svt_dup) { + CALL_FPTR(nmg->mg_virtual->svt_dup)(aTHX_ nmg, param); } + mgprev = nmg; } + return mgret; } -/* ========================================================================= - -=head1 Cloning an interpreter - -All the macros and functions in this section are for the private use of -the main function, perl_clone(). - -The foo_dup() functions make an exact copy of an existing foo thinngy. -During the course of a cloning, a hash table is used to map old addresses -to new addresses. The table is created and manipulated with the -ptr_table_* functions. - -=cut - -============================================================================*/ - +/* create a new pointer-mapping table */ -#if defined(USE_ITHREADS) +PTR_TBL_t * +Perl_ptr_table_new(pTHX) +{ + PTR_TBL_t *tbl; + Newxz(tbl, 1, PTR_TBL_t); + tbl->tbl_max = 511; + tbl->tbl_items = 0; + Newxz(tbl->tbl_ary, tbl->tbl_max + 1, PTR_TBL_ENT_t*); + return tbl; +} -#ifndef GpREFCNT_inc -# define GpREFCNT_inc(gp) ((gp) ? (++(gp)->gp_refcnt, (gp)) : (GP*)NULL) -#endif +#define PTR_TABLE_HASH(ptr) \ + ((PTR2UV(ptr) >> 3) ^ (PTR2UV(ptr) >> (3 + 7)) ^ (PTR2UV(ptr) >> (3 + 17))) +/* + we use the PTE_SVSLOT 'reservation' made above, both here (in the + following define) and at call to new_body_inline made below in + Perl_ptr_table_store() + */ -#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t)) -#define av_dup(s,t) (AV*)sv_dup((SV*)s,t) -#define av_dup_inc(s,t) (AV*)SvREFCNT_inc(sv_dup((SV*)s,t)) -#define hv_dup(s,t) (HV*)sv_dup((SV*)s,t) -#define hv_dup_inc(s,t) (HV*)SvREFCNT_inc(sv_dup((SV*)s,t)) -#define cv_dup(s,t) (CV*)sv_dup((SV*)s,t) -#define cv_dup_inc(s,t) (CV*)SvREFCNT_inc(sv_dup((SV*)s,t)) -#define io_dup(s,t) (IO*)sv_dup((SV*)s,t) -#define io_dup_inc(s,t) (IO*)SvREFCNT_inc(sv_dup((SV*)s,t)) -#define gv_dup(s,t) (GV*)sv_dup((SV*)s,t) -#define gv_dup_inc(s,t) (GV*)SvREFCNT_inc(sv_dup((SV*)s,t)) -#define SAVEPV(p) (p ? savepv(p) : Nullch) -#define SAVEPVN(p,n) (p ? savepvn(p,n) : Nullch) +#define del_pte(p) del_body_type(p, PTE_SVSLOT) +/* map an existing pointer using a table */ -/* Duplicate a regexp. Required reading: pregcomp() and pregfree() in - regcomp.c. AMS 20010712 */ +STATIC PTR_TBL_ENT_t * +S_ptr_table_find(pTHX_ PTR_TBL_t *tbl, const void *sv) { + PTR_TBL_ENT_t *tblent; + const UV hash = PTR_TABLE_HASH(sv); + assert(tbl); + tblent = tbl->tbl_ary[hash & tbl->tbl_max]; + for (; tblent; tblent = tblent->next) { + if (tblent->oldval == sv) + return tblent; + } + return 0; +} -REGEXP * -Perl_re_dup(pTHX_ const REGEXP *r, CLONE_PARAMS *param) +void * +Perl_ptr_table_fetch(pTHX_ PTR_TBL_t *tbl, const void *sv) { - dVAR; - REGEXP *ret; - int i, len, npar; - struct reg_substr_datum *s; - - if (!r) - return (REGEXP *)NULL; - - if ((ret = (REGEXP *)ptr_table_fetch(PL_ptr_table, r))) - return ret; + PTR_TBL_ENT_t const *const tblent = S_ptr_table_find(aTHX_ tbl, sv); + return tblent ? tblent->newval : (void *) 0; +} - len = r->offsets[0]; - npar = r->nparens+1; +/* add a new entry to a pointer-mapping table */ - Newxc(ret, sizeof(regexp) + (len+1)*sizeof(regnode), char, regexp); - Copy(r->program, ret->program, len+1, regnode); +void +Perl_ptr_table_store(pTHX_ PTR_TBL_t *tbl, const void *oldsv, void *newsv) +{ + PTR_TBL_ENT_t *tblent = S_ptr_table_find(aTHX_ tbl, oldsv); - Newx(ret->startp, npar, I32); - Copy(r->startp, ret->startp, npar, I32); - Newx(ret->endp, npar, I32); - Copy(r->startp, ret->startp, npar, I32); + if (tblent) { + tblent->newval = newsv; + } else { + const UV entry = PTR_TABLE_HASH(oldsv) & tbl->tbl_max; - Newx(ret->substrs, 1, struct reg_substr_data); - for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { - s->min_offset = r->substrs->data[i].min_offset; - s->max_offset = r->substrs->data[i].max_offset; - s->substr = sv_dup_inc(r->substrs->data[i].substr, param); - s->utf8_substr = sv_dup_inc(r->substrs->data[i].utf8_substr, param); + new_body_inline(tblent, sizeof(struct ptr_tbl_ent), PTE_SVSLOT); + tblent->oldval = oldsv; + tblent->newval = newsv; + tblent->next = tbl->tbl_ary[entry]; + tbl->tbl_ary[entry] = tblent; + tbl->tbl_items++; + if (tblent->next && tbl->tbl_items > tbl->tbl_max) + ptr_table_split(tbl); } +} - ret->regstclass = NULL; - if (r->data) { - struct reg_data *d; - const int count = r->data->count; - int i; +/* double the hash bucket size of an existing ptr table */ - Newxc(d, sizeof(struct reg_data) + count*sizeof(void *), - char, struct reg_data); - Newx(d->what, count, U8); +void +Perl_ptr_table_split(pTHX_ PTR_TBL_t *tbl) +{ + PTR_TBL_ENT_t **ary = tbl->tbl_ary; + const UV oldsize = tbl->tbl_max + 1; + UV newsize = oldsize * 2; + UV i; - d->count = count; - for (i = 0; i < count; i++) { - d->what[i] = r->data->what[i]; - switch (d->what[i]) { - /* legal options are one of: sfpont - see also regcomp.h and pregfree() */ - case 's': - d->data[i] = sv_dup_inc((SV *)r->data->data[i], param); - break; - case 'p': - d->data[i] = av_dup_inc((AV *)r->data->data[i], param); - break; - case 'f': - /* This is cheating. */ - Newx(d->data[i], 1, struct regnode_charclass_class); - StructCopy(r->data->data[i], d->data[i], - struct regnode_charclass_class); - ret->regstclass = (regnode*)d->data[i]; - break; - case 'o': - /* Compiled op trees are readonly, and can thus be - shared without duplication. */ - OP_REFCNT_LOCK; - d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]); - OP_REFCNT_UNLOCK; - break; - case 'n': - d->data[i] = r->data->data[i]; - break; - case 't': - d->data[i] = r->data->data[i]; - OP_REFCNT_LOCK; - ((reg_trie_data*)d->data[i])->refcount++; - OP_REFCNT_UNLOCK; - break; - default: - Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'", r->data->what[i]); + Renew(ary, newsize, PTR_TBL_ENT_t*); + Zero(&ary[oldsize], newsize-oldsize, PTR_TBL_ENT_t*); + tbl->tbl_max = --newsize; + tbl->tbl_ary = ary; + for (i=0; i < oldsize; i++, ary++) { + PTR_TBL_ENT_t **curentp, **entp, *ent; + if (!*ary) + continue; + curentp = ary + oldsize; + for (entp = ary, ent = *ary; ent; ent = *entp) { + if ((newsize & PTR_TABLE_HASH(ent->oldval)) != i) { + *entp = ent->next; + ent->next = *curentp; + *curentp = ent; + continue; } + else + entp = &ent->next; } - - ret->data = d; } - else - ret->data = NULL; +} - Newx(ret->offsets, 2*len+1, U32); - Copy(r->offsets, ret->offsets, 2*len+1, U32); +/* remove all the entries from a ptr table */ - ret->precomp = SAVEPVN(r->precomp, r->prelen); - ret->refcnt = r->refcnt; - ret->minlen = r->minlen; - ret->prelen = r->prelen; - ret->nparens = r->nparens; - ret->lastparen = r->lastparen; - ret->lastcloseparen = r->lastcloseparen; - ret->reganch = r->reganch; +void +Perl_ptr_table_clear(pTHX_ PTR_TBL_t *tbl) +{ + if (tbl && tbl->tbl_items) { + register PTR_TBL_ENT_t **array = tbl->tbl_ary; + UV riter = tbl->tbl_max; - ret->sublen = r->sublen; + do { + PTR_TBL_ENT_t *entry = array[riter]; - if (RX_MATCH_COPIED(ret)) - ret->subbeg = SAVEPVN(r->subbeg, r->sublen); - else - ret->subbeg = Nullch; -#ifdef PERL_OLD_COPY_ON_WRITE - ret->saved_copy = Nullsv; -#endif + while (entry) { + PTR_TBL_ENT_t * const oentry = entry; + entry = entry->next; + del_pte(oentry); + } + } while (riter--); - ptr_table_store(PL_ptr_table, r, ret); - return ret; -} - -/* duplicate a file handle */ - -PerlIO * -Perl_fp_dup(pTHX_ PerlIO *fp, char type, CLONE_PARAMS *param) -{ - PerlIO *ret; - - PERL_UNUSED_ARG(type); - - if (!fp) - return (PerlIO*)NULL; - - /* look for it in the table first */ - ret = (PerlIO*)ptr_table_fetch(PL_ptr_table, fp); - if (ret) - return ret; - - /* create anew and remember what it is */ - ret = PerlIO_fdupopen(aTHX_ fp, param, PERLIO_DUP_CLONE); - ptr_table_store(PL_ptr_table, fp, ret); - return ret; -} - -/* duplicate a directory handle */ - -DIR * -Perl_dirp_dup(pTHX_ DIR *dp) -{ - if (!dp) - return (DIR*)NULL; - /* XXX TODO */ - return dp; -} - -/* duplicate a typeglob */ - -GP * -Perl_gp_dup(pTHX_ GP *gp, CLONE_PARAMS* param) -{ - GP *ret; - if (!gp) - return (GP*)NULL; - /* look for it in the table first */ - ret = (GP*)ptr_table_fetch(PL_ptr_table, gp); - if (ret) - return ret; - - /* create anew and remember what it is */ - Newxz(ret, 1, GP); - ptr_table_store(PL_ptr_table, gp, ret); - - /* clone */ - ret->gp_refcnt = 0; /* must be before any other dups! */ - ret->gp_sv = sv_dup_inc(gp->gp_sv, param); - ret->gp_io = io_dup_inc(gp->gp_io, param); - ret->gp_form = cv_dup_inc(gp->gp_form, param); - ret->gp_av = av_dup_inc(gp->gp_av, param); - ret->gp_hv = hv_dup_inc(gp->gp_hv, param); - ret->gp_egv = gv_dup(gp->gp_egv, param);/* GvEGV is not refcounted */ - ret->gp_cv = cv_dup_inc(gp->gp_cv, param); - ret->gp_cvgen = gp->gp_cvgen; - ret->gp_line = gp->gp_line; - ret->gp_file = gp->gp_file; /* points to COP.cop_file */ - return ret; -} - -/* duplicate a chain of magic */ - -MAGIC * -Perl_mg_dup(pTHX_ MAGIC *mg, CLONE_PARAMS* param) -{ - MAGIC *mgprev = (MAGIC*)NULL; - MAGIC *mgret; - if (!mg) - return (MAGIC*)NULL; - /* look for it in the table first */ - mgret = (MAGIC*)ptr_table_fetch(PL_ptr_table, mg); - if (mgret) - return mgret; - - for (; mg; mg = mg->mg_moremagic) { - MAGIC *nmg; - Newxz(nmg, 1, MAGIC); - if (mgprev) - mgprev->mg_moremagic = nmg; - else - mgret = nmg; - nmg->mg_virtual = mg->mg_virtual; /* XXX copy dynamic vtable? */ - nmg->mg_private = mg->mg_private; - nmg->mg_type = mg->mg_type; - nmg->mg_flags = mg->mg_flags; - if (mg->mg_type == PERL_MAGIC_qr) { - nmg->mg_obj = (SV*)re_dup((REGEXP*)mg->mg_obj, param); - } - else if(mg->mg_type == PERL_MAGIC_backref) { - const AV * const av = (AV*) mg->mg_obj; - SV **svp; - I32 i; - (void)SvREFCNT_inc(nmg->mg_obj = (SV*)newAV()); - svp = AvARRAY(av); - for (i = AvFILLp(av); i >= 0; i--) { - if (!svp[i]) continue; - av_push((AV*)nmg->mg_obj,sv_dup(svp[i],param)); - } - } - else if (mg->mg_type == PERL_MAGIC_symtab) { - nmg->mg_obj = mg->mg_obj; - } - else { - nmg->mg_obj = (mg->mg_flags & MGf_REFCOUNTED) - ? sv_dup_inc(mg->mg_obj, param) - : sv_dup(mg->mg_obj, param); - } - nmg->mg_len = mg->mg_len; - nmg->mg_ptr = mg->mg_ptr; /* XXX random ptr? */ - if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) { - if (mg->mg_len > 0) { - nmg->mg_ptr = SAVEPVN(mg->mg_ptr, mg->mg_len); - if (mg->mg_type == PERL_MAGIC_overload_table && - AMT_AMAGIC((AMT*)mg->mg_ptr)) - { - AMT * const amtp = (AMT*)mg->mg_ptr; - AMT * const namtp = (AMT*)nmg->mg_ptr; - I32 i; - for (i = 1; i < NofAMmeth; i++) { - namtp->table[i] = cv_dup_inc(amtp->table[i], param); - } - } - } - else if (mg->mg_len == HEf_SVKEY) - nmg->mg_ptr = (char*)sv_dup_inc((SV*)mg->mg_ptr, param); - } - if ((mg->mg_flags & MGf_DUP) && mg->mg_virtual && mg->mg_virtual->svt_dup) { - CALL_FPTR(nmg->mg_virtual->svt_dup)(aTHX_ nmg, param); - } - mgprev = nmg; - } - return mgret; -} - -/* create a new pointer-mapping table */ - -PTR_TBL_t * -Perl_ptr_table_new(pTHX) -{ - PTR_TBL_t *tbl; - Newxz(tbl, 1, PTR_TBL_t); - tbl->tbl_max = 511; - tbl->tbl_items = 0; - Newxz(tbl->tbl_ary, tbl->tbl_max + 1, PTR_TBL_ENT_t*); - return tbl; -} - -#if (PTRSIZE == 8) -# define PTR_TABLE_HASH(ptr) (PTR2UV(ptr) >> 3) -#else -# define PTR_TABLE_HASH(ptr) (PTR2UV(ptr) >> 2) -#endif - -/* - we use the PTE_SVSLOT 'reservation' made above, both here (in the - following define) and at call to new_body_inline made below in - Perl_ptr_table_store() - */ - -#define del_pte(p) del_body_type(p, PTE_SVSLOT) - -/* map an existing pointer using a table */ - -void * -Perl_ptr_table_fetch(pTHX_ PTR_TBL_t *tbl, const void *sv) -{ - PTR_TBL_ENT_t *tblent; - const UV hash = PTR_TABLE_HASH(sv); - assert(tbl); - tblent = tbl->tbl_ary[hash & tbl->tbl_max]; - for (; tblent; tblent = tblent->next) { - if (tblent->oldval == sv) - return tblent->newval; - } - return (void*)NULL; -} - -/* add a new entry to a pointer-mapping table */ - -void -Perl_ptr_table_store(pTHX_ PTR_TBL_t *tbl, const void *oldsv, void *newsv) -{ - PTR_TBL_ENT_t *tblent, **otblent; - /* XXX this may be pessimal on platforms where pointers aren't good - * hash values e.g. if they grow faster in the most significant - * bits */ - const UV hash = PTR_TABLE_HASH(oldsv); - bool empty = 1; - - assert(tbl); - otblent = &tbl->tbl_ary[hash & tbl->tbl_max]; - for (tblent = *otblent; tblent; empty=0, tblent = tblent->next) { - if (tblent->oldval == oldsv) { - tblent->newval = newsv; - return; - } - } - new_body_inline(tblent, &PL_body_roots[PTE_SVSLOT], - sizeof(struct ptr_tbl_ent), PTE_SVSLOT); - tblent->oldval = oldsv; - tblent->newval = newsv; - tblent->next = *otblent; - *otblent = tblent; - tbl->tbl_items++; - if (!empty && tbl->tbl_items > tbl->tbl_max) - ptr_table_split(tbl); -} - -/* double the hash bucket size of an existing ptr table */ - -void -Perl_ptr_table_split(pTHX_ PTR_TBL_t *tbl) -{ - PTR_TBL_ENT_t **ary = tbl->tbl_ary; - const UV oldsize = tbl->tbl_max + 1; - UV newsize = oldsize * 2; - UV i; - - Renew(ary, newsize, PTR_TBL_ENT_t*); - Zero(&ary[oldsize], newsize-oldsize, PTR_TBL_ENT_t*); - tbl->tbl_max = --newsize; - tbl->tbl_ary = ary; - for (i=0; i < oldsize; i++, ary++) { - PTR_TBL_ENT_t **curentp, **entp, *ent; - if (!*ary) - continue; - curentp = ary + oldsize; - for (entp = ary, ent = *ary; ent; ent = *entp) { - if ((newsize & PTR_TABLE_HASH(ent->oldval)) != i) { - *entp = ent->next; - ent->next = *curentp; - *curentp = ent; - continue; - } - else - entp = &ent->next; - } - } -} - -/* remove all the entries from a ptr table */ - -void -Perl_ptr_table_clear(pTHX_ PTR_TBL_t *tbl) -{ - register PTR_TBL_ENT_t **array; - register PTR_TBL_ENT_t *entry; - UV riter = 0; - UV max; - - if (!tbl || !tbl->tbl_items) { - return; - } - - array = tbl->tbl_ary; - entry = array[0]; - max = tbl->tbl_max; - - for (;;) { - if (entry) { - PTR_TBL_ENT_t *oentry = entry; - entry = entry->next; - del_pte(oentry); - } - if (!entry) { - if (++riter > max) { - break; - } - entry = array[riter]; - } - } - - tbl->tbl_items = 0; + tbl->tbl_items = 0; + } } /* clear and free a ptr table */ @@ -10080,12 +9283,10 @@ Perl_sv_dup(pTHX_ SV *sstr, CLONE_PARAMS* param) default: { /* These are all the types that need complex bodies allocating. */ - size_t new_body_length; - size_t new_body_offset = 0; - void **new_body_arena; - void **new_body_arenaroot; void *new_body; - svtype sv_type = SvTYPE(sstr); + const svtype sv_type = SvTYPE(sstr); + const struct body_details *const sv_type_details + = bodies_by_type + sv_type; switch (sv_type) { default: @@ -10093,80 +9294,44 @@ Perl_sv_dup(pTHX_ SV *sstr, CLONE_PARAMS* param) (IV)SvTYPE(sstr)); break; - case SVt_PVIO: - new_body = new_XPVIO(); - new_body_length = sizeof(XPVIO); - break; - case SVt_PVFM: - new_body = new_XPVFM(); - new_body_length = sizeof(XPVFM); - break; - - case SVt_PVHV: - new_body_arena = &PL_body_roots[SVt_PVHV]; - new_body_arenaroot = &PL_body_arenaroots[SVt_PVHV]; - new_body_offset = - bodies_by_type[SVt_PVHV].offset; - - new_body_length = STRUCT_OFFSET(XPVHV, xmg_stash) - + sizeof (((XPVHV*)SvANY(sstr))->xmg_stash) - - new_body_offset; - goto new_body; - case SVt_PVAV: - new_body_arena = &PL_body_roots[SVt_PVAV]; - new_body_arenaroot = &PL_body_arenaroots[SVt_PVAV]; - new_body_offset = - bodies_by_type[SVt_PVAV].offset; - - new_body_length = STRUCT_OFFSET(XPVHV, xmg_stash) - + sizeof (((XPVHV*)SvANY(sstr))->xmg_stash) - - new_body_offset; - goto new_body; case SVt_PVGV: if (GvUNIQUE((GV*)sstr)) { /* Do sharing here, and fall through */ } + case SVt_PVIO: + case SVt_PVFM: + case SVt_PVHV: + case SVt_PVAV: case SVt_PVBM: case SVt_PVCV: case SVt_PVLV: case SVt_PVMG: case SVt_PVNV: - new_body_length = bodies_by_type[sv_type].size; - new_body_arena = &PL_body_roots[sv_type]; - new_body_arenaroot = &PL_body_arenaroots[sv_type]; - goto new_body; - case SVt_PVIV: - new_body_offset = - bodies_by_type[SVt_PVIV].offset; - new_body_length = sizeof(XPVIV) - new_body_offset; - new_body_arena = &PL_body_roots[SVt_PVIV]; - new_body_arenaroot = &PL_body_arenaroots[SVt_PVIV]; - goto new_body; case SVt_PV: - new_body_offset = - bodies_by_type[SVt_PV].offset; - new_body_length = sizeof(XPV) - new_body_offset; - new_body_arena = &PL_body_roots[SVt_PV]; - new_body_arenaroot = &PL_body_arenaroots[SVt_PV]; - new_body: - assert(new_body_length); -#ifndef PURIFY - new_body_inline(new_body, new_body_arena, - new_body_length, SvTYPE(sstr)); - - new_body = (void*)((char*)new_body - new_body_offset); -#else - /* We always allocated the full length item with PURIFY */ - new_body_length += new_body_offset; - new_body_offset = 0; - new_body = my_safemalloc(new_body_length); -#endif + assert(sv_type_details->size); + if (sv_type_details->arena) { + new_body_inline(new_body, sv_type_details->size, sv_type); + new_body + = (void*)((char*)new_body - sv_type_details->offset); + } else { + new_body = new_NOARENA(sv_type_details); + } } assert(new_body); SvANY(dstr) = new_body; - Copy(((char*)SvANY(sstr)) + new_body_offset, - ((char*)SvANY(dstr)) + new_body_offset, - new_body_length, char); +#ifndef PURIFY + Copy(((char*)SvANY(sstr)) + sv_type_details->offset, + ((char*)SvANY(dstr)) + sv_type_details->offset, + sv_type_details->copy, char); +#else + Copy(((char*)SvANY(sstr)), + ((char*)SvANY(dstr)), + sv_type_details->size + sv_type_details->offset, char); +#endif - if (SvTYPE(sstr) != SVt_PVAV && SvTYPE(sstr) != SVt_PVHV) + if (sv_type != SVt_PVAV && sv_type != SVt_PVHV) Perl_rvpv_dup(aTHX_ dstr, sstr, param); /* The Copy above means that all the source (unduplicated) pointers @@ -10174,14 +9339,15 @@ Perl_sv_dup(pTHX_ SV *sstr, CLONE_PARAMS* param) pointers in either, but it's possible that there's less cache missing by always going for the destination. FIXME - instrument and check that assumption */ - if (SvTYPE(sstr) >= SVt_PVMG) { + if (sv_type >= SVt_PVMG) { if (SvMAGIC(dstr)) SvMAGIC_set(dstr, mg_dup(SvMAGIC(dstr), param)); if (SvSTASH(dstr)) SvSTASH_set(dstr, hv_dup_inc(SvSTASH(dstr), param)); } - switch (SvTYPE(sstr)) { + /* The cast silences a GCC warning about unhandled types. */ + switch ((int)sv_type) { case SVt_PV: break; case SVt_PVIV: @@ -10283,8 +9449,8 @@ Perl_sv_dup(pTHX_ SV *sstr, CLONE_PARAMS* param) ++i; } if (SvOOK(sstr)) { - struct xpvhv_aux *saux = HvAUX(sstr); - struct xpvhv_aux *daux = HvAUX(dstr); + struct xpvhv_aux * const saux = HvAUX(sstr); + struct xpvhv_aux * const daux = HvAUX(dstr); /* This flag isn't copied. */ /* SvOOK_on(hv) attacks the IV flags. */ SvFLAGS(dstr) |= SVf_OOK; @@ -10384,7 +9550,7 @@ Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param) : cv_dup(cx->blk_sub.cv,param)); ncx->blk_sub.argarray = (cx->blk_sub.hasargs ? av_dup_inc(cx->blk_sub.argarray, param) - : Nullav); + : NULL); ncx->blk_sub.savearray = av_dup_inc(cx->blk_sub.savearray, param); ncx->blk_sub.olddepth = cx->blk_sub.olddepth; ncx->blk_sub.hasargs = cx->blk_sub.hasargs; @@ -10782,984 +9948,1454 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) } } - return nss; -} - + return nss; +} + + +/* if sv is a stash, call $class->CLONE_SKIP(), and set the SVphv_CLONEABLE + * flag to the result. This is done for each stash before cloning starts, + * so we know which stashes want their objects cloned */ + +static void +do_mark_cloneable_stash(pTHX_ SV *sv) +{ + const HEK * const hvname = HvNAME_HEK((HV*)sv); + if (hvname) { + GV* const cloner = gv_fetchmethod_autoload((HV*)sv, "CLONE_SKIP", 0); + SvFLAGS(sv) |= SVphv_CLONEABLE; /* clone objects by default */ + if (cloner && GvCV(cloner)) { + dSP; + UV status; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + XPUSHs(sv_2mortal(newSVhek(hvname))); + PUTBACK; + call_sv((SV*)GvCV(cloner), G_SCALAR); + SPAGAIN; + status = POPu; + PUTBACK; + FREETMPS; + LEAVE; + if (status) + SvFLAGS(sv) &= ~SVphv_CLONEABLE; + } + } +} + + + +/* +=for apidoc perl_clone + +Create and return a new interpreter by cloning the current one. + +perl_clone takes these flags as parameters: + +CLONEf_COPY_STACKS - is used to, well, copy the stacks also, +without it we only clone the data and zero the stacks, +with it we copy the stacks and the new perl interpreter is +ready to run at the exact same point as the previous one. +The pseudo-fork code uses COPY_STACKS while the +threads->new doesn't. + +CLONEf_KEEP_PTR_TABLE +perl_clone keeps a ptr_table with the pointer of the old +variable as a key and the new variable as a value, +this allows it to check if something has been cloned and not +clone it again but rather just use the value and increase the +refcount. If KEEP_PTR_TABLE is not set then perl_clone will kill +the ptr_table using the function +C, +reason to keep it around is if you want to dup some of your own +variable who are outside the graph perl scans, example of this +code is in threads.xs create + +CLONEf_CLONE_HOST +This is a win32 thing, it is ignored on unix, it tells perls +win32host code (which is c++) to clone itself, this is needed on +win32 if you want to run two threads at the same time, +if you just want to do some stuff in a separate perl interpreter +and then throw it away and return to the original one, +you don't need to do anything. + +=cut +*/ + +/* XXX the above needs expanding by someone who actually understands it ! */ +EXTERN_C PerlInterpreter * +perl_clone_host(PerlInterpreter* proto_perl, UV flags); + +PerlInterpreter * +perl_clone(PerlInterpreter *proto_perl, UV flags) +{ + dVAR; +#ifdef PERL_IMPLICIT_SYS + + /* perlhost.h so we need to call into it + to clone the host, CPerlHost should have a c interface, sky */ + + if (flags & CLONEf_CLONE_HOST) { + return perl_clone_host(proto_perl,flags); + } + return perl_clone_using(proto_perl, flags, + proto_perl->IMem, + proto_perl->IMemShared, + proto_perl->IMemParse, + proto_perl->IEnv, + proto_perl->IStdIO, + proto_perl->ILIO, + proto_perl->IDir, + proto_perl->ISock, + proto_perl->IProc); +} + +PerlInterpreter * +perl_clone_using(PerlInterpreter *proto_perl, UV flags, + struct IPerlMem* ipM, struct IPerlMem* ipMS, + struct IPerlMem* ipMP, struct IPerlEnv* ipE, + struct IPerlStdIO* ipStd, struct IPerlLIO* ipLIO, + struct IPerlDir* ipD, struct IPerlSock* ipS, + struct IPerlProc* ipP) +{ + /* XXX many of the string copies here can be optimized if they're + * constants; they need to be allocated as common memory and just + * their pointers copied. */ + + IV i; + CLONE_PARAMS clone_params; + CLONE_PARAMS* param = &clone_params; + + PerlInterpreter *my_perl = (PerlInterpreter*)(*ipM->pMalloc)(ipM, sizeof(PerlInterpreter)); + /* for each stash, determine whether its objects should be cloned */ + S_visit(proto_perl, do_mark_cloneable_stash, SVt_PVHV, SVTYPEMASK); + PERL_SET_THX(my_perl); + +# ifdef DEBUGGING + Poison(my_perl, 1, PerlInterpreter); + PL_op = Nullop; + PL_curcop = (COP *)Nullop; + PL_markstack = 0; + PL_scopestack = 0; + PL_savestack = 0; + PL_savestack_ix = 0; + PL_savestack_max = -1; + PL_sig_pending = 0; + Zero(&PL_debug_pad, 1, struct perl_debug_pad); +# else /* !DEBUGGING */ + Zero(my_perl, 1, PerlInterpreter); +# endif /* DEBUGGING */ + + /* host pointers */ + PL_Mem = ipM; + PL_MemShared = ipMS; + PL_MemParse = ipMP; + PL_Env = ipE; + PL_StdIO = ipStd; + PL_LIO = ipLIO; + PL_Dir = ipD; + PL_Sock = ipS; + PL_Proc = ipP; +#else /* !PERL_IMPLICIT_SYS */ + IV i; + CLONE_PARAMS clone_params; + CLONE_PARAMS* param = &clone_params; + PerlInterpreter *my_perl = (PerlInterpreter*)PerlMem_malloc(sizeof(PerlInterpreter)); + /* for each stash, determine whether its objects should be cloned */ + S_visit(proto_perl, do_mark_cloneable_stash, SVt_PVHV, SVTYPEMASK); + PERL_SET_THX(my_perl); + +# ifdef DEBUGGING + Poison(my_perl, 1, PerlInterpreter); + PL_op = Nullop; + PL_curcop = (COP *)Nullop; + PL_markstack = 0; + PL_scopestack = 0; + PL_savestack = 0; + PL_savestack_ix = 0; + PL_savestack_max = -1; + PL_sig_pending = 0; + Zero(&PL_debug_pad, 1, struct perl_debug_pad); +# else /* !DEBUGGING */ + Zero(my_perl, 1, PerlInterpreter); +# endif /* DEBUGGING */ +#endif /* PERL_IMPLICIT_SYS */ + param->flags = flags; + param->proto_perl = proto_perl; + + Zero(&PL_body_arenaroots, 1, PL_body_arenaroots); + Zero(&PL_body_roots, 1, PL_body_roots); + + PL_nice_chunk = NULL; + PL_nice_chunk_size = 0; + PL_sv_count = 0; + PL_sv_objcount = 0; + PL_sv_root = Nullsv; + PL_sv_arenaroot = Nullsv; + + PL_debug = proto_perl->Idebug; + + PL_hash_seed = proto_perl->Ihash_seed; + PL_rehash_seed = proto_perl->Irehash_seed; + +#ifdef USE_REENTRANT_API + /* XXX: things like -Dm will segfault here in perlio, but doing + * PERL_SET_CONTEXT(proto_perl); + * breaks too many other things + */ + Perl_reentrant_init(aTHX); +#endif + + /* create SV map for pointer relocation */ + PL_ptr_table = ptr_table_new(); + + /* initialize these special pointers as early as possible */ + SvANY(&PL_sv_undef) = NULL; + SvREFCNT(&PL_sv_undef) = (~(U32)0)/2; + SvFLAGS(&PL_sv_undef) = SVf_READONLY|SVt_NULL; + ptr_table_store(PL_ptr_table, &proto_perl->Isv_undef, &PL_sv_undef); + + SvANY(&PL_sv_no) = new_XPVNV(); + SvREFCNT(&PL_sv_no) = (~(U32)0)/2; + SvFLAGS(&PL_sv_no) = SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK + |SVp_POK|SVf_POK|SVf_READONLY|SVt_PVNV; + SvPV_set(&PL_sv_no, SAVEPVN(PL_No, 0)); + SvCUR_set(&PL_sv_no, 0); + SvLEN_set(&PL_sv_no, 1); + SvIV_set(&PL_sv_no, 0); + SvNV_set(&PL_sv_no, 0); + ptr_table_store(PL_ptr_table, &proto_perl->Isv_no, &PL_sv_no); + + SvANY(&PL_sv_yes) = new_XPVNV(); + SvREFCNT(&PL_sv_yes) = (~(U32)0)/2; + SvFLAGS(&PL_sv_yes) = SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK + |SVp_POK|SVf_POK|SVf_READONLY|SVt_PVNV; + SvPV_set(&PL_sv_yes, SAVEPVN(PL_Yes, 1)); + SvCUR_set(&PL_sv_yes, 1); + SvLEN_set(&PL_sv_yes, 2); + SvIV_set(&PL_sv_yes, 1); + SvNV_set(&PL_sv_yes, 1); + ptr_table_store(PL_ptr_table, &proto_perl->Isv_yes, &PL_sv_yes); + + /* create (a non-shared!) shared string table */ + PL_strtab = newHV(); + HvSHAREKEYS_off(PL_strtab); + hv_ksplit(PL_strtab, HvTOTALKEYS(proto_perl->Istrtab)); + ptr_table_store(PL_ptr_table, proto_perl->Istrtab, PL_strtab); + + PL_compiling = proto_perl->Icompiling; + + /* These two PVs will be free'd special way so must set them same way op.c does */ + PL_compiling.cop_stashpv = savesharedpv(PL_compiling.cop_stashpv); + ptr_table_store(PL_ptr_table, proto_perl->Icompiling.cop_stashpv, PL_compiling.cop_stashpv); + + PL_compiling.cop_file = savesharedpv(PL_compiling.cop_file); + ptr_table_store(PL_ptr_table, proto_perl->Icompiling.cop_file, PL_compiling.cop_file); + + ptr_table_store(PL_ptr_table, &proto_perl->Icompiling, &PL_compiling); + if (!specialWARN(PL_compiling.cop_warnings)) + PL_compiling.cop_warnings = sv_dup_inc(PL_compiling.cop_warnings, param); + if (!specialCopIO(PL_compiling.cop_io)) + PL_compiling.cop_io = sv_dup_inc(PL_compiling.cop_io, param); + PL_curcop = (COP*)any_dup(proto_perl->Tcurcop, proto_perl); + + /* pseudo environmental stuff */ + PL_origargc = proto_perl->Iorigargc; + PL_origargv = proto_perl->Iorigargv; + + param->stashes = newAV(); /* Setup array of objects to call clone on */ + + /* Set tainting stuff before PerlIO_debug can possibly get called */ + PL_tainting = proto_perl->Itainting; + PL_taint_warn = proto_perl->Itaint_warn; + +#ifdef PERLIO_LAYERS + /* Clone PerlIO tables as soon as we can handle general xx_dup() */ + PerlIO_clone(aTHX_ proto_perl, param); +#endif + + PL_envgv = gv_dup(proto_perl->Ienvgv, param); + PL_incgv = gv_dup(proto_perl->Iincgv, param); + PL_hintgv = gv_dup(proto_perl->Ihintgv, param); + PL_origfilename = SAVEPV(proto_perl->Iorigfilename); + PL_diehook = sv_dup_inc(proto_perl->Idiehook, param); + PL_warnhook = sv_dup_inc(proto_perl->Iwarnhook, param); + + /* switches */ + PL_minus_c = proto_perl->Iminus_c; + PL_patchlevel = sv_dup_inc(proto_perl->Ipatchlevel, param); + PL_localpatches = proto_perl->Ilocalpatches; + PL_splitstr = proto_perl->Isplitstr; + PL_preprocess = proto_perl->Ipreprocess; + PL_minus_n = proto_perl->Iminus_n; + PL_minus_p = proto_perl->Iminus_p; + PL_minus_l = proto_perl->Iminus_l; + PL_minus_a = proto_perl->Iminus_a; + PL_minus_F = proto_perl->Iminus_F; + PL_doswitches = proto_perl->Idoswitches; + PL_dowarn = proto_perl->Idowarn; + PL_doextract = proto_perl->Idoextract; + PL_sawampersand = proto_perl->Isawampersand; + PL_unsafe = proto_perl->Iunsafe; + PL_inplace = SAVEPV(proto_perl->Iinplace); + PL_e_script = sv_dup_inc(proto_perl->Ie_script, param); + PL_perldb = proto_perl->Iperldb; + PL_perl_destruct_level = proto_perl->Iperl_destruct_level; + PL_exit_flags = proto_perl->Iexit_flags; + + /* magical thingies */ + /* XXX time(&PL_basetime) when asked for? */ + PL_basetime = proto_perl->Ibasetime; + PL_formfeed = sv_dup(proto_perl->Iformfeed, param); + + PL_maxsysfd = proto_perl->Imaxsysfd; + PL_multiline = proto_perl->Imultiline; + PL_statusvalue = proto_perl->Istatusvalue; +#ifdef VMS + PL_statusvalue_vms = proto_perl->Istatusvalue_vms; +#else + PL_statusvalue_posix = proto_perl->Istatusvalue_posix; +#endif + PL_encoding = sv_dup(proto_perl->Iencoding, param); + + sv_setpvn(PERL_DEBUG_PAD(0), "", 0); /* For regex debugging. */ + sv_setpvn(PERL_DEBUG_PAD(1), "", 0); /* ext/re needs these */ + sv_setpvn(PERL_DEBUG_PAD(2), "", 0); /* even without DEBUGGING. */ + + /* Clone the regex array */ + PL_regex_padav = newAV(); + { + const I32 len = av_len((AV*)proto_perl->Iregex_padav); + SV** const regexen = AvARRAY((AV*)proto_perl->Iregex_padav); + IV i; + av_push(PL_regex_padav, + sv_dup_inc(regexen[0],param)); + for(i = 1; i <= len; i++) { + if(SvREPADTMP(regexen[i])) { + av_push(PL_regex_padav, sv_dup_inc(regexen[i], param)); + } else { + av_push(PL_regex_padav, + SvREFCNT_inc( + newSViv(PTR2IV(re_dup(INT2PTR(REGEXP *, + SvIVX(regexen[i])), param))) + )); + } + } + } + PL_regex_pad = AvARRAY(PL_regex_padav); + + /* shortcuts to various I/O objects */ + PL_stdingv = gv_dup(proto_perl->Istdingv, param); + PL_stderrgv = gv_dup(proto_perl->Istderrgv, param); + PL_defgv = gv_dup(proto_perl->Idefgv, param); + PL_argvgv = gv_dup(proto_perl->Iargvgv, param); + PL_argvoutgv = gv_dup(proto_perl->Iargvoutgv, param); + PL_argvout_stack = av_dup_inc(proto_perl->Iargvout_stack, param); -/* if sv is a stash, call $class->CLONE_SKIP(), and set the SVphv_CLONEABLE - * flag to the result. This is done for each stash before cloning starts, - * so we know which stashes want their objects cloned */ + /* shortcuts to regexp stuff */ + PL_replgv = gv_dup(proto_perl->Ireplgv, param); -static void -do_mark_cloneable_stash(pTHX_ SV *sv) -{ - const HEK * const hvname = HvNAME_HEK((HV*)sv); - if (hvname) { - GV* const cloner = gv_fetchmethod_autoload((HV*)sv, "CLONE_SKIP", 0); - SvFLAGS(sv) |= SVphv_CLONEABLE; /* clone objects by default */ - if (cloner && GvCV(cloner)) { - dSP; - UV status; + /* shortcuts to misc objects */ + PL_errgv = gv_dup(proto_perl->Ierrgv, param); - ENTER; - SAVETMPS; - PUSHMARK(SP); - XPUSHs(sv_2mortal(newSVhek(hvname))); - PUTBACK; - call_sv((SV*)GvCV(cloner), G_SCALAR); - SPAGAIN; - status = POPu; - PUTBACK; - FREETMPS; - LEAVE; - if (status) - SvFLAGS(sv) &= ~SVphv_CLONEABLE; - } - } -} + /* shortcuts to debugging objects */ + PL_DBgv = gv_dup(proto_perl->IDBgv, param); + PL_DBline = gv_dup(proto_perl->IDBline, param); + PL_DBsub = gv_dup(proto_perl->IDBsub, param); + PL_DBsingle = sv_dup(proto_perl->IDBsingle, param); + PL_DBtrace = sv_dup(proto_perl->IDBtrace, param); + PL_DBsignal = sv_dup(proto_perl->IDBsignal, param); + PL_DBassertion = sv_dup(proto_perl->IDBassertion, param); + PL_lineary = av_dup(proto_perl->Ilineary, param); + PL_dbargs = av_dup(proto_perl->Idbargs, param); + /* symbol tables */ + PL_defstash = hv_dup_inc(proto_perl->Tdefstash, param); + PL_curstash = hv_dup(proto_perl->Tcurstash, param); + PL_debstash = hv_dup(proto_perl->Idebstash, param); + PL_globalstash = hv_dup(proto_perl->Iglobalstash, param); + PL_curstname = sv_dup_inc(proto_perl->Icurstname, param); + PL_beginav = av_dup_inc(proto_perl->Ibeginav, param); + PL_beginav_save = av_dup_inc(proto_perl->Ibeginav_save, param); + PL_checkav_save = av_dup_inc(proto_perl->Icheckav_save, param); + PL_endav = av_dup_inc(proto_perl->Iendav, param); + PL_checkav = av_dup_inc(proto_perl->Icheckav, param); + PL_initav = av_dup_inc(proto_perl->Iinitav, param); -/* -=for apidoc perl_clone + PL_sub_generation = proto_perl->Isub_generation; -Create and return a new interpreter by cloning the current one. + /* funky return mechanisms */ + PL_forkprocess = proto_perl->Iforkprocess; -perl_clone takes these flags as parameters: + /* subprocess state */ + PL_fdpid = av_dup_inc(proto_perl->Ifdpid, param); -CLONEf_COPY_STACKS - is used to, well, copy the stacks also, -without it we only clone the data and zero the stacks, -with it we copy the stacks and the new perl interpreter is -ready to run at the exact same point as the previous one. -The pseudo-fork code uses COPY_STACKS while the -threads->new doesn't. + /* internal state */ + PL_maxo = proto_perl->Imaxo; + if (proto_perl->Iop_mask) + PL_op_mask = SAVEPVN(proto_perl->Iop_mask, PL_maxo); + else + PL_op_mask = Nullch; + /* PL_asserting = proto_perl->Iasserting; */ -CLONEf_KEEP_PTR_TABLE -perl_clone keeps a ptr_table with the pointer of the old -variable as a key and the new variable as a value, -this allows it to check if something has been cloned and not -clone it again but rather just use the value and increase the -refcount. If KEEP_PTR_TABLE is not set then perl_clone will kill -the ptr_table using the function -C, -reason to keep it around is if you want to dup some of your own -variable who are outside the graph perl scans, example of this -code is in threads.xs create + /* current interpreter roots */ + PL_main_cv = cv_dup_inc(proto_perl->Imain_cv, param); + PL_main_root = OpREFCNT_inc(proto_perl->Imain_root); + PL_main_start = proto_perl->Imain_start; + PL_eval_root = proto_perl->Ieval_root; + PL_eval_start = proto_perl->Ieval_start; -CLONEf_CLONE_HOST -This is a win32 thing, it is ignored on unix, it tells perls -win32host code (which is c++) to clone itself, this is needed on -win32 if you want to run two threads at the same time, -if you just want to do some stuff in a separate perl interpreter -and then throw it away and return to the original one, -you don't need to do anything. + /* runtime control stuff */ + PL_curcopdb = (COP*)any_dup(proto_perl->Icurcopdb, proto_perl); + PL_copline = proto_perl->Icopline; -=cut -*/ + PL_filemode = proto_perl->Ifilemode; + PL_lastfd = proto_perl->Ilastfd; + PL_oldname = proto_perl->Ioldname; /* XXX not quite right */ + PL_Argv = NULL; + PL_Cmd = Nullch; + PL_gensym = proto_perl->Igensym; + PL_preambled = proto_perl->Ipreambled; + PL_preambleav = av_dup_inc(proto_perl->Ipreambleav, param); + PL_laststatval = proto_perl->Ilaststatval; + PL_laststype = proto_perl->Ilaststype; + PL_mess_sv = Nullsv; -/* XXX the above needs expanding by someone who actually understands it ! */ -EXTERN_C PerlInterpreter * -perl_clone_host(PerlInterpreter* proto_perl, UV flags); + PL_ors_sv = sv_dup_inc(proto_perl->Iors_sv, param); -PerlInterpreter * -perl_clone(PerlInterpreter *proto_perl, UV flags) -{ - dVAR; -#ifdef PERL_IMPLICIT_SYS + /* interpreter atexit processing */ + PL_exitlistlen = proto_perl->Iexitlistlen; + if (PL_exitlistlen) { + Newx(PL_exitlist, PL_exitlistlen, PerlExitListEntry); + Copy(proto_perl->Iexitlist, PL_exitlist, PL_exitlistlen, PerlExitListEntry); + } + else + PL_exitlist = (PerlExitListEntry*)NULL; + PL_modglobal = hv_dup_inc(proto_perl->Imodglobal, param); + PL_custom_op_names = hv_dup_inc(proto_perl->Icustom_op_names,param); + PL_custom_op_descs = hv_dup_inc(proto_perl->Icustom_op_descs,param); - /* perlhost.h so we need to call into it - to clone the host, CPerlHost should have a c interface, sky */ + PL_profiledata = NULL; + PL_rsfp = fp_dup(proto_perl->Irsfp, '<', param); + /* PL_rsfp_filters entries have fake IoDIRP() */ + PL_rsfp_filters = av_dup_inc(proto_perl->Irsfp_filters, param); - if (flags & CLONEf_CLONE_HOST) { - return perl_clone_host(proto_perl,flags); - } - return perl_clone_using(proto_perl, flags, - proto_perl->IMem, - proto_perl->IMemShared, - proto_perl->IMemParse, - proto_perl->IEnv, - proto_perl->IStdIO, - proto_perl->ILIO, - proto_perl->IDir, - proto_perl->ISock, - proto_perl->IProc); -} + PL_compcv = cv_dup(proto_perl->Icompcv, param); -PerlInterpreter * -perl_clone_using(PerlInterpreter *proto_perl, UV flags, - struct IPerlMem* ipM, struct IPerlMem* ipMS, - struct IPerlMem* ipMP, struct IPerlEnv* ipE, - struct IPerlStdIO* ipStd, struct IPerlLIO* ipLIO, - struct IPerlDir* ipD, struct IPerlSock* ipS, - struct IPerlProc* ipP) -{ - /* XXX many of the string copies here can be optimized if they're - * constants; they need to be allocated as common memory and just - * their pointers copied. */ + PAD_CLONE_VARS(proto_perl, param); - IV i; - CLONE_PARAMS clone_params; - CLONE_PARAMS* param = &clone_params; +#ifdef HAVE_INTERP_INTERN + sys_intern_dup(&proto_perl->Isys_intern, &PL_sys_intern); +#endif - PerlInterpreter *my_perl = (PerlInterpreter*)(*ipM->pMalloc)(ipM, sizeof(PerlInterpreter)); - /* for each stash, determine whether its objects should be cloned */ - S_visit(proto_perl, do_mark_cloneable_stash, SVt_PVHV, SVTYPEMASK); - PERL_SET_THX(my_perl); + /* more statics moved here */ + PL_generation = proto_perl->Igeneration; + PL_DBcv = cv_dup(proto_perl->IDBcv, param); -# ifdef DEBUGGING - Poison(my_perl, 1, PerlInterpreter); - PL_op = Nullop; - PL_curcop = (COP *)Nullop; - PL_markstack = 0; - PL_scopestack = 0; - PL_savestack = 0; - PL_savestack_ix = 0; - PL_savestack_max = -1; - PL_sig_pending = 0; - Zero(&PL_debug_pad, 1, struct perl_debug_pad); -# else /* !DEBUGGING */ - Zero(my_perl, 1, PerlInterpreter); -# endif /* DEBUGGING */ + PL_in_clean_objs = proto_perl->Iin_clean_objs; + PL_in_clean_all = proto_perl->Iin_clean_all; - /* host pointers */ - PL_Mem = ipM; - PL_MemShared = ipMS; - PL_MemParse = ipMP; - PL_Env = ipE; - PL_StdIO = ipStd; - PL_LIO = ipLIO; - PL_Dir = ipD; - PL_Sock = ipS; - PL_Proc = ipP; -#else /* !PERL_IMPLICIT_SYS */ - IV i; - CLONE_PARAMS clone_params; - CLONE_PARAMS* param = &clone_params; - PerlInterpreter *my_perl = (PerlInterpreter*)PerlMem_malloc(sizeof(PerlInterpreter)); - /* for each stash, determine whether its objects should be cloned */ - S_visit(proto_perl, do_mark_cloneable_stash, SVt_PVHV, SVTYPEMASK); - PERL_SET_THX(my_perl); + PL_uid = proto_perl->Iuid; + PL_euid = proto_perl->Ieuid; + PL_gid = proto_perl->Igid; + PL_egid = proto_perl->Iegid; + PL_nomemok = proto_perl->Inomemok; + PL_an = proto_perl->Ian; + PL_evalseq = proto_perl->Ievalseq; + PL_origenviron = proto_perl->Iorigenviron; /* XXX not quite right */ + PL_origalen = proto_perl->Iorigalen; +#ifdef PERL_USES_PL_PIDSTATUS + PL_pidstatus = newHV(); /* XXX flag for cloning? */ +#endif + PL_osname = SAVEPV(proto_perl->Iosname); + PL_sighandlerp = proto_perl->Isighandlerp; -# ifdef DEBUGGING - Poison(my_perl, 1, PerlInterpreter); - PL_op = Nullop; - PL_curcop = (COP *)Nullop; - PL_markstack = 0; - PL_scopestack = 0; - PL_savestack = 0; - PL_savestack_ix = 0; - PL_savestack_max = -1; - PL_sig_pending = 0; - Zero(&PL_debug_pad, 1, struct perl_debug_pad); -# else /* !DEBUGGING */ - Zero(my_perl, 1, PerlInterpreter); -# endif /* DEBUGGING */ -#endif /* PERL_IMPLICIT_SYS */ - param->flags = flags; - param->proto_perl = proto_perl; + PL_runops = proto_perl->Irunops; - Zero(&PL_body_arenaroots, 1, PL_body_arenaroots); - Zero(&PL_body_roots, 1, PL_body_roots); - - PL_he_arenaroot = NULL; - PL_he_root = NULL; + Copy(proto_perl->Itokenbuf, PL_tokenbuf, 256, char); - PL_nice_chunk = NULL; - PL_nice_chunk_size = 0; - PL_sv_count = 0; - PL_sv_objcount = 0; - PL_sv_root = Nullsv; - PL_sv_arenaroot = Nullsv; +#ifdef CSH + PL_cshlen = proto_perl->Icshlen; + PL_cshname = proto_perl->Icshname; /* XXX never deallocated */ +#endif - PL_debug = proto_perl->Idebug; + PL_lex_state = proto_perl->Ilex_state; + PL_lex_defer = proto_perl->Ilex_defer; + PL_lex_expect = proto_perl->Ilex_expect; + PL_lex_formbrack = proto_perl->Ilex_formbrack; + PL_lex_dojoin = proto_perl->Ilex_dojoin; + PL_lex_starts = proto_perl->Ilex_starts; + PL_lex_stuff = sv_dup_inc(proto_perl->Ilex_stuff, param); + PL_lex_repl = sv_dup_inc(proto_perl->Ilex_repl, param); + PL_lex_op = proto_perl->Ilex_op; + PL_lex_inpat = proto_perl->Ilex_inpat; + PL_lex_inwhat = proto_perl->Ilex_inwhat; + PL_lex_brackets = proto_perl->Ilex_brackets; + i = (PL_lex_brackets < 120 ? 120 : PL_lex_brackets); + PL_lex_brackstack = SAVEPVN(proto_perl->Ilex_brackstack,i); + PL_lex_casemods = proto_perl->Ilex_casemods; + i = (PL_lex_casemods < 12 ? 12 : PL_lex_casemods); + PL_lex_casestack = SAVEPVN(proto_perl->Ilex_casestack,i); - PL_hash_seed = proto_perl->Ihash_seed; - PL_rehash_seed = proto_perl->Irehash_seed; + Copy(proto_perl->Inextval, PL_nextval, 5, YYSTYPE); + Copy(proto_perl->Inexttype, PL_nexttype, 5, I32); + PL_nexttoke = proto_perl->Inexttoke; -#ifdef USE_REENTRANT_API - /* XXX: things like -Dm will segfault here in perlio, but doing - * PERL_SET_CONTEXT(proto_perl); - * breaks too many other things + /* XXX This is probably masking the deeper issue of why + * SvANY(proto_perl->Ilinestr) can be NULL at this point. For test case: + * http://archive.develooper.com/perl5-porters%40perl.org/msg83298.html + * (A little debugging with a watchpoint on it may help.) */ - Perl_reentrant_init(aTHX); -#endif + if (SvANY(proto_perl->Ilinestr)) { + PL_linestr = sv_dup_inc(proto_perl->Ilinestr, param); + i = proto_perl->Ibufptr - SvPVX_const(proto_perl->Ilinestr); + PL_bufptr = SvPVX(PL_linestr) + (i < 0 ? 0 : i); + i = proto_perl->Ioldbufptr - SvPVX_const(proto_perl->Ilinestr); + PL_oldbufptr = SvPVX(PL_linestr) + (i < 0 ? 0 : i); + i = proto_perl->Ioldoldbufptr - SvPVX_const(proto_perl->Ilinestr); + PL_oldoldbufptr = SvPVX(PL_linestr) + (i < 0 ? 0 : i); + i = proto_perl->Ilinestart - SvPVX_const(proto_perl->Ilinestr); + PL_linestart = SvPVX(PL_linestr) + (i < 0 ? 0 : i); + } + else { + PL_linestr = NEWSV(65,79); + sv_upgrade(PL_linestr,SVt_PVIV); + sv_setpvn(PL_linestr,"",0); + PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); + } + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_pending_ident = proto_perl->Ipending_ident; + PL_sublex_info = proto_perl->Isublex_info; /* XXX not quite right */ - /* create SV map for pointer relocation */ - PL_ptr_table = ptr_table_new(); + PL_expect = proto_perl->Iexpect; - /* initialize these special pointers as early as possible */ - SvANY(&PL_sv_undef) = NULL; - SvREFCNT(&PL_sv_undef) = (~(U32)0)/2; - SvFLAGS(&PL_sv_undef) = SVf_READONLY|SVt_NULL; - ptr_table_store(PL_ptr_table, &proto_perl->Isv_undef, &PL_sv_undef); + PL_multi_start = proto_perl->Imulti_start; + PL_multi_end = proto_perl->Imulti_end; + PL_multi_open = proto_perl->Imulti_open; + PL_multi_close = proto_perl->Imulti_close; - SvANY(&PL_sv_no) = new_XPVNV(); - SvREFCNT(&PL_sv_no) = (~(U32)0)/2; - SvFLAGS(&PL_sv_no) = SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK - |SVp_POK|SVf_POK|SVf_READONLY|SVt_PVNV; - SvPV_set(&PL_sv_no, SAVEPVN(PL_No, 0)); - SvCUR_set(&PL_sv_no, 0); - SvLEN_set(&PL_sv_no, 1); - SvIV_set(&PL_sv_no, 0); - SvNV_set(&PL_sv_no, 0); - ptr_table_store(PL_ptr_table, &proto_perl->Isv_no, &PL_sv_no); + PL_error_count = proto_perl->Ierror_count; + PL_subline = proto_perl->Isubline; + PL_subname = sv_dup_inc(proto_perl->Isubname, param); - SvANY(&PL_sv_yes) = new_XPVNV(); - SvREFCNT(&PL_sv_yes) = (~(U32)0)/2; - SvFLAGS(&PL_sv_yes) = SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK - |SVp_POK|SVf_POK|SVf_READONLY|SVt_PVNV; - SvPV_set(&PL_sv_yes, SAVEPVN(PL_Yes, 1)); - SvCUR_set(&PL_sv_yes, 1); - SvLEN_set(&PL_sv_yes, 2); - SvIV_set(&PL_sv_yes, 1); - SvNV_set(&PL_sv_yes, 1); - ptr_table_store(PL_ptr_table, &proto_perl->Isv_yes, &PL_sv_yes); + /* XXX See comment on SvANY(proto_perl->Ilinestr) above */ + if (SvANY(proto_perl->Ilinestr)) { + i = proto_perl->Ilast_uni - SvPVX_const(proto_perl->Ilinestr); + PL_last_uni = SvPVX(PL_linestr) + (i < 0 ? 0 : i); + i = proto_perl->Ilast_lop - SvPVX_const(proto_perl->Ilinestr); + PL_last_lop = SvPVX(PL_linestr) + (i < 0 ? 0 : i); + PL_last_lop_op = proto_perl->Ilast_lop_op; + } + else { + PL_last_uni = SvPVX(PL_linestr); + PL_last_lop = SvPVX(PL_linestr); + PL_last_lop_op = 0; + } + PL_in_my = proto_perl->Iin_my; + PL_in_my_stash = hv_dup(proto_perl->Iin_my_stash, param); +#ifdef FCRYPT + PL_cryptseen = proto_perl->Icryptseen; +#endif - /* create (a non-shared!) shared string table */ - PL_strtab = newHV(); - HvSHAREKEYS_off(PL_strtab); - hv_ksplit(PL_strtab, HvTOTALKEYS(proto_perl->Istrtab)); - ptr_table_store(PL_ptr_table, proto_perl->Istrtab, PL_strtab); + PL_hints = proto_perl->Ihints; - PL_compiling = proto_perl->Icompiling; + PL_amagic_generation = proto_perl->Iamagic_generation; - /* These two PVs will be free'd special way so must set them same way op.c does */ - PL_compiling.cop_stashpv = savesharedpv(PL_compiling.cop_stashpv); - ptr_table_store(PL_ptr_table, proto_perl->Icompiling.cop_stashpv, PL_compiling.cop_stashpv); +#ifdef USE_LOCALE_COLLATE + PL_collation_ix = proto_perl->Icollation_ix; + PL_collation_name = SAVEPV(proto_perl->Icollation_name); + PL_collation_standard = proto_perl->Icollation_standard; + PL_collxfrm_base = proto_perl->Icollxfrm_base; + PL_collxfrm_mult = proto_perl->Icollxfrm_mult; +#endif /* USE_LOCALE_COLLATE */ - PL_compiling.cop_file = savesharedpv(PL_compiling.cop_file); - ptr_table_store(PL_ptr_table, proto_perl->Icompiling.cop_file, PL_compiling.cop_file); +#ifdef USE_LOCALE_NUMERIC + PL_numeric_name = SAVEPV(proto_perl->Inumeric_name); + PL_numeric_standard = proto_perl->Inumeric_standard; + PL_numeric_local = proto_perl->Inumeric_local; + PL_numeric_radix_sv = sv_dup_inc(proto_perl->Inumeric_radix_sv, param); +#endif /* !USE_LOCALE_NUMERIC */ - ptr_table_store(PL_ptr_table, &proto_perl->Icompiling, &PL_compiling); - if (!specialWARN(PL_compiling.cop_warnings)) - PL_compiling.cop_warnings = sv_dup_inc(PL_compiling.cop_warnings, param); - if (!specialCopIO(PL_compiling.cop_io)) - PL_compiling.cop_io = sv_dup_inc(PL_compiling.cop_io, param); - PL_curcop = (COP*)any_dup(proto_perl->Tcurcop, proto_perl); + /* utf8 character classes */ + PL_utf8_alnum = sv_dup_inc(proto_perl->Iutf8_alnum, param); + PL_utf8_alnumc = sv_dup_inc(proto_perl->Iutf8_alnumc, param); + PL_utf8_ascii = sv_dup_inc(proto_perl->Iutf8_ascii, param); + PL_utf8_alpha = sv_dup_inc(proto_perl->Iutf8_alpha, param); + PL_utf8_space = sv_dup_inc(proto_perl->Iutf8_space, param); + PL_utf8_cntrl = sv_dup_inc(proto_perl->Iutf8_cntrl, param); + PL_utf8_graph = sv_dup_inc(proto_perl->Iutf8_graph, param); + PL_utf8_digit = sv_dup_inc(proto_perl->Iutf8_digit, param); + PL_utf8_upper = sv_dup_inc(proto_perl->Iutf8_upper, param); + PL_utf8_lower = sv_dup_inc(proto_perl->Iutf8_lower, param); + PL_utf8_print = sv_dup_inc(proto_perl->Iutf8_print, param); + PL_utf8_punct = sv_dup_inc(proto_perl->Iutf8_punct, param); + PL_utf8_xdigit = sv_dup_inc(proto_perl->Iutf8_xdigit, param); + PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); + PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param); + PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param); + PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param); + PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param); + PL_utf8_idstart = sv_dup_inc(proto_perl->Iutf8_idstart, param); + PL_utf8_idcont = sv_dup_inc(proto_perl->Iutf8_idcont, param); - /* pseudo environmental stuff */ - PL_origargc = proto_perl->Iorigargc; - PL_origargv = proto_perl->Iorigargv; + /* Did the locale setup indicate UTF-8? */ + PL_utf8locale = proto_perl->Iutf8locale; + /* Unicode features (see perlrun/-C) */ + PL_unicode = proto_perl->Iunicode; - param->stashes = newAV(); /* Setup array of objects to call clone on */ + /* Pre-5.8 signals control */ + PL_signals = proto_perl->Isignals; - /* Set tainting stuff before PerlIO_debug can possibly get called */ - PL_tainting = proto_perl->Itainting; - PL_taint_warn = proto_perl->Itaint_warn; + /* times() ticks per second */ + PL_clocktick = proto_perl->Iclocktick; -#ifdef PERLIO_LAYERS - /* Clone PerlIO tables as soon as we can handle general xx_dup() */ - PerlIO_clone(aTHX_ proto_perl, param); -#endif + /* Recursion stopper for PerlIO_find_layer */ + PL_in_load_module = proto_perl->Iin_load_module; - PL_envgv = gv_dup(proto_perl->Ienvgv, param); - PL_incgv = gv_dup(proto_perl->Iincgv, param); - PL_hintgv = gv_dup(proto_perl->Ihintgv, param); - PL_origfilename = SAVEPV(proto_perl->Iorigfilename); - PL_diehook = sv_dup_inc(proto_perl->Idiehook, param); - PL_warnhook = sv_dup_inc(proto_perl->Iwarnhook, param); + /* sort() routine */ + PL_sort_RealCmp = proto_perl->Isort_RealCmp; - /* switches */ - PL_minus_c = proto_perl->Iminus_c; - PL_patchlevel = sv_dup_inc(proto_perl->Ipatchlevel, param); - PL_localpatches = proto_perl->Ilocalpatches; - PL_splitstr = proto_perl->Isplitstr; - PL_preprocess = proto_perl->Ipreprocess; - PL_minus_n = proto_perl->Iminus_n; - PL_minus_p = proto_perl->Iminus_p; - PL_minus_l = proto_perl->Iminus_l; - PL_minus_a = proto_perl->Iminus_a; - PL_minus_F = proto_perl->Iminus_F; - PL_doswitches = proto_perl->Idoswitches; - PL_dowarn = proto_perl->Idowarn; - PL_doextract = proto_perl->Idoextract; - PL_sawampersand = proto_perl->Isawampersand; - PL_unsafe = proto_perl->Iunsafe; - PL_inplace = SAVEPV(proto_perl->Iinplace); - PL_e_script = sv_dup_inc(proto_perl->Ie_script, param); - PL_perldb = proto_perl->Iperldb; - PL_perl_destruct_level = proto_perl->Iperl_destruct_level; - PL_exit_flags = proto_perl->Iexit_flags; + /* Not really needed/useful since the reenrant_retint is "volatile", + * but do it for consistency's sake. */ + PL_reentrant_retint = proto_perl->Ireentrant_retint; + + /* Hooks to shared SVs and locks. */ + PL_sharehook = proto_perl->Isharehook; + PL_lockhook = proto_perl->Ilockhook; + PL_unlockhook = proto_perl->Iunlockhook; + PL_threadhook = proto_perl->Ithreadhook; - /* magical thingies */ - /* XXX time(&PL_basetime) when asked for? */ - PL_basetime = proto_perl->Ibasetime; - PL_formfeed = sv_dup(proto_perl->Iformfeed, param); + PL_runops_std = proto_perl->Irunops_std; + PL_runops_dbg = proto_perl->Irunops_dbg; - PL_maxsysfd = proto_perl->Imaxsysfd; - PL_multiline = proto_perl->Imultiline; - PL_statusvalue = proto_perl->Istatusvalue; -#ifdef VMS - PL_statusvalue_vms = proto_perl->Istatusvalue_vms; -#else - PL_statusvalue_posix = proto_perl->Istatusvalue_posix; +#ifdef THREADS_HAVE_PIDS + PL_ppid = proto_perl->Ippid; #endif - PL_encoding = sv_dup(proto_perl->Iencoding, param); - sv_setpvn(PERL_DEBUG_PAD(0), "", 0); /* For regex debugging. */ - sv_setpvn(PERL_DEBUG_PAD(1), "", 0); /* ext/re needs these */ - sv_setpvn(PERL_DEBUG_PAD(2), "", 0); /* even without DEBUGGING. */ + /* swatch cache */ + PL_last_swash_hv = NULL; /* reinits on demand */ + PL_last_swash_klen = 0; + PL_last_swash_key[0]= '\0'; + PL_last_swash_tmps = (U8*)NULL; + PL_last_swash_slen = 0; - /* Clone the regex array */ - PL_regex_padav = newAV(); - { - const I32 len = av_len((AV*)proto_perl->Iregex_padav); - SV** const regexen = AvARRAY((AV*)proto_perl->Iregex_padav); - IV i; - av_push(PL_regex_padav, - sv_dup_inc(regexen[0],param)); - for(i = 1; i <= len; i++) { - if(SvREPADTMP(regexen[i])) { - av_push(PL_regex_padav, sv_dup_inc(regexen[i], param)); - } else { - av_push(PL_regex_padav, - SvREFCNT_inc( - newSViv(PTR2IV(re_dup(INT2PTR(REGEXP *, - SvIVX(regexen[i])), param))) - )); - } + PL_glob_index = proto_perl->Iglob_index; + PL_srand_called = proto_perl->Isrand_called; + PL_uudmap['M'] = 0; /* reinits on demand */ + PL_bitcount = Nullch; /* reinits on demand */ + + if (proto_perl->Ipsig_pend) { + Newxz(PL_psig_pend, SIG_SIZE, int); + } + else { + PL_psig_pend = (int*)NULL; + } + + if (proto_perl->Ipsig_ptr) { + Newxz(PL_psig_ptr, SIG_SIZE, SV*); + Newxz(PL_psig_name, SIG_SIZE, SV*); + for (i = 1; i < SIG_SIZE; i++) { + PL_psig_ptr[i] = sv_dup_inc(proto_perl->Ipsig_ptr[i], param); + PL_psig_name[i] = sv_dup_inc(proto_perl->Ipsig_name[i], param); } } - PL_regex_pad = AvARRAY(PL_regex_padav); + else { + PL_psig_ptr = (SV**)NULL; + PL_psig_name = (SV**)NULL; + } - /* shortcuts to various I/O objects */ - PL_stdingv = gv_dup(proto_perl->Istdingv, param); - PL_stderrgv = gv_dup(proto_perl->Istderrgv, param); - PL_defgv = gv_dup(proto_perl->Idefgv, param); - PL_argvgv = gv_dup(proto_perl->Iargvgv, param); - PL_argvoutgv = gv_dup(proto_perl->Iargvoutgv, param); - PL_argvout_stack = av_dup_inc(proto_perl->Iargvout_stack, param); + /* thrdvar.h stuff */ - /* shortcuts to regexp stuff */ - PL_replgv = gv_dup(proto_perl->Ireplgv, param); + if (flags & CLONEf_COPY_STACKS) { + /* next allocation will be PL_tmps_stack[PL_tmps_ix+1] */ + PL_tmps_ix = proto_perl->Ttmps_ix; + PL_tmps_max = proto_perl->Ttmps_max; + PL_tmps_floor = proto_perl->Ttmps_floor; + Newxz(PL_tmps_stack, PL_tmps_max, SV*); + i = 0; + while (i <= PL_tmps_ix) { + PL_tmps_stack[i] = sv_dup_inc(proto_perl->Ttmps_stack[i], param); + ++i; + } - /* shortcuts to misc objects */ - PL_errgv = gv_dup(proto_perl->Ierrgv, param); + /* next PUSHMARK() sets *(PL_markstack_ptr+1) */ + i = proto_perl->Tmarkstack_max - proto_perl->Tmarkstack; + Newxz(PL_markstack, i, I32); + PL_markstack_max = PL_markstack + (proto_perl->Tmarkstack_max + - proto_perl->Tmarkstack); + PL_markstack_ptr = PL_markstack + (proto_perl->Tmarkstack_ptr + - proto_perl->Tmarkstack); + Copy(proto_perl->Tmarkstack, PL_markstack, + PL_markstack_ptr - PL_markstack + 1, I32); - /* shortcuts to debugging objects */ - PL_DBgv = gv_dup(proto_perl->IDBgv, param); - PL_DBline = gv_dup(proto_perl->IDBline, param); - PL_DBsub = gv_dup(proto_perl->IDBsub, param); - PL_DBsingle = sv_dup(proto_perl->IDBsingle, param); - PL_DBtrace = sv_dup(proto_perl->IDBtrace, param); - PL_DBsignal = sv_dup(proto_perl->IDBsignal, param); - PL_DBassertion = sv_dup(proto_perl->IDBassertion, param); - PL_lineary = av_dup(proto_perl->Ilineary, param); - PL_dbargs = av_dup(proto_perl->Idbargs, param); + /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix] + * NOTE: unlike the others! */ + PL_scopestack_ix = proto_perl->Tscopestack_ix; + PL_scopestack_max = proto_perl->Tscopestack_max; + Newxz(PL_scopestack, PL_scopestack_max, I32); + Copy(proto_perl->Tscopestack, PL_scopestack, PL_scopestack_ix, I32); - /* symbol tables */ - PL_defstash = hv_dup_inc(proto_perl->Tdefstash, param); - PL_curstash = hv_dup(proto_perl->Tcurstash, param); - PL_debstash = hv_dup(proto_perl->Idebstash, param); - PL_globalstash = hv_dup(proto_perl->Iglobalstash, param); - PL_curstname = sv_dup_inc(proto_perl->Icurstname, param); + /* NOTE: si_dup() looks at PL_markstack */ + PL_curstackinfo = si_dup(proto_perl->Tcurstackinfo, param); - PL_beginav = av_dup_inc(proto_perl->Ibeginav, param); - PL_beginav_save = av_dup_inc(proto_perl->Ibeginav_save, param); - PL_checkav_save = av_dup_inc(proto_perl->Icheckav_save, param); - PL_endav = av_dup_inc(proto_perl->Iendav, param); - PL_checkav = av_dup_inc(proto_perl->Icheckav, param); - PL_initav = av_dup_inc(proto_perl->Iinitav, param); + /* PL_curstack = PL_curstackinfo->si_stack; */ + PL_curstack = av_dup(proto_perl->Tcurstack, param); + PL_mainstack = av_dup(proto_perl->Tmainstack, param); - PL_sub_generation = proto_perl->Isub_generation; + /* next PUSHs() etc. set *(PL_stack_sp+1) */ + PL_stack_base = AvARRAY(PL_curstack); + PL_stack_sp = PL_stack_base + (proto_perl->Tstack_sp + - proto_perl->Tstack_base); + PL_stack_max = PL_stack_base + AvMAX(PL_curstack); - /* funky return mechanisms */ - PL_forkprocess = proto_perl->Iforkprocess; + /* next SSPUSHFOO() sets PL_savestack[PL_savestack_ix] + * NOTE: unlike the others! */ + PL_savestack_ix = proto_perl->Tsavestack_ix; + PL_savestack_max = proto_perl->Tsavestack_max; + /*Newxz(PL_savestack, PL_savestack_max, ANY);*/ + PL_savestack = ss_dup(proto_perl, param); + } + else { + init_stacks(); + ENTER; /* perl_destruct() wants to LEAVE; */ + } - /* subprocess state */ - PL_fdpid = av_dup_inc(proto_perl->Ifdpid, param); + PL_start_env = proto_perl->Tstart_env; /* XXXXXX */ + PL_top_env = &PL_start_env; - /* internal state */ - PL_maxo = proto_perl->Imaxo; - if (proto_perl->Iop_mask) - PL_op_mask = SAVEPVN(proto_perl->Iop_mask, PL_maxo); - else - PL_op_mask = Nullch; - /* PL_asserting = proto_perl->Iasserting; */ + PL_op = proto_perl->Top; - /* current interpreter roots */ - PL_main_cv = cv_dup_inc(proto_perl->Imain_cv, param); - PL_main_root = OpREFCNT_inc(proto_perl->Imain_root); - PL_main_start = proto_perl->Imain_start; - PL_eval_root = proto_perl->Ieval_root; - PL_eval_start = proto_perl->Ieval_start; + PL_Sv = Nullsv; + PL_Xpv = (XPV*)NULL; + PL_na = proto_perl->Tna; - /* runtime control stuff */ - PL_curcopdb = (COP*)any_dup(proto_perl->Icurcopdb, proto_perl); - PL_copline = proto_perl->Icopline; + PL_statbuf = proto_perl->Tstatbuf; + PL_statcache = proto_perl->Tstatcache; + PL_statgv = gv_dup(proto_perl->Tstatgv, param); + PL_statname = sv_dup_inc(proto_perl->Tstatname, param); +#ifdef HAS_TIMES + PL_timesbuf = proto_perl->Ttimesbuf; +#endif - PL_filemode = proto_perl->Ifilemode; - PL_lastfd = proto_perl->Ilastfd; - PL_oldname = proto_perl->Ioldname; /* XXX not quite right */ - PL_Argv = NULL; - PL_Cmd = Nullch; - PL_gensym = proto_perl->Igensym; - PL_preambled = proto_perl->Ipreambled; - PL_preambleav = av_dup_inc(proto_perl->Ipreambleav, param); - PL_laststatval = proto_perl->Ilaststatval; - PL_laststype = proto_perl->Ilaststype; - PL_mess_sv = Nullsv; + PL_tainted = proto_perl->Ttainted; + PL_curpm = proto_perl->Tcurpm; /* XXX No PMOP ref count */ + PL_rs = sv_dup_inc(proto_perl->Trs, param); + PL_last_in_gv = gv_dup(proto_perl->Tlast_in_gv, param); + PL_ofs_sv = sv_dup_inc(proto_perl->Tofs_sv, param); + PL_defoutgv = gv_dup_inc(proto_perl->Tdefoutgv, param); + PL_chopset = proto_perl->Tchopset; /* XXX never deallocated */ + PL_toptarget = sv_dup_inc(proto_perl->Ttoptarget, param); + PL_bodytarget = sv_dup_inc(proto_perl->Tbodytarget, param); + PL_formtarget = sv_dup(proto_perl->Tformtarget, param); - PL_ors_sv = sv_dup_inc(proto_perl->Iors_sv, param); + PL_restartop = proto_perl->Trestartop; + PL_in_eval = proto_perl->Tin_eval; + PL_delaymagic = proto_perl->Tdelaymagic; + PL_dirty = proto_perl->Tdirty; + PL_localizing = proto_perl->Tlocalizing; - /* interpreter atexit processing */ - PL_exitlistlen = proto_perl->Iexitlistlen; - if (PL_exitlistlen) { - Newx(PL_exitlist, PL_exitlistlen, PerlExitListEntry); - Copy(proto_perl->Iexitlist, PL_exitlist, PL_exitlistlen, PerlExitListEntry); - } - else - PL_exitlist = (PerlExitListEntry*)NULL; - PL_modglobal = hv_dup_inc(proto_perl->Imodglobal, param); - PL_custom_op_names = hv_dup_inc(proto_perl->Icustom_op_names,param); - PL_custom_op_descs = hv_dup_inc(proto_perl->Icustom_op_descs,param); + PL_errors = sv_dup_inc(proto_perl->Terrors, param); + PL_hv_fetch_ent_mh = Nullhe; + PL_modcount = proto_perl->Tmodcount; + PL_lastgotoprobe = Nullop; + PL_dumpindent = proto_perl->Tdumpindent; - PL_profiledata = NULL; - PL_rsfp = fp_dup(proto_perl->Irsfp, '<', param); - /* PL_rsfp_filters entries have fake IoDIRP() */ - PL_rsfp_filters = av_dup_inc(proto_perl->Irsfp_filters, param); + PL_sortcop = (OP*)any_dup(proto_perl->Tsortcop, proto_perl); + PL_sortstash = hv_dup(proto_perl->Tsortstash, param); + PL_firstgv = gv_dup(proto_perl->Tfirstgv, param); + PL_secondgv = gv_dup(proto_perl->Tsecondgv, param); + PL_efloatbuf = Nullch; /* reinits on demand */ + PL_efloatsize = 0; /* reinits on demand */ - PL_compcv = cv_dup(proto_perl->Icompcv, param); + /* regex stuff */ - PAD_CLONE_VARS(proto_perl, param); + PL_screamfirst = NULL; + PL_screamnext = NULL; + PL_maxscream = -1; /* reinits on demand */ + PL_lastscream = Nullsv; -#ifdef HAVE_INTERP_INTERN - sys_intern_dup(&proto_perl->Isys_intern, &PL_sys_intern); + PL_watchaddr = NULL; + PL_watchok = Nullch; + + PL_regdummy = proto_perl->Tregdummy; + PL_regprecomp = Nullch; + PL_regnpar = 0; + PL_regsize = 0; + PL_colorset = 0; /* reinits PL_colors[] */ + /*PL_colors[6] = {0,0,0,0,0,0};*/ + PL_reginput = Nullch; + PL_regbol = Nullch; + PL_regeol = Nullch; + PL_regstartp = (I32*)NULL; + PL_regendp = (I32*)NULL; + PL_reglastparen = (U32*)NULL; + PL_reglastcloseparen = (U32*)NULL; + PL_regtill = Nullch; + PL_reg_start_tmp = (char**)NULL; + PL_reg_start_tmpl = 0; + PL_regdata = (struct reg_data*)NULL; + PL_bostr = Nullch; + PL_reg_flags = 0; + PL_reg_eval_set = 0; + PL_regnarrate = 0; + PL_regprogram = (regnode*)NULL; + PL_regindent = 0; + PL_regcc = (CURCUR*)NULL; + PL_reg_call_cc = (struct re_cc_state*)NULL; + PL_reg_re = (regexp*)NULL; + PL_reg_ganch = Nullch; + PL_reg_sv = Nullsv; + PL_reg_match_utf8 = FALSE; + PL_reg_magic = (MAGIC*)NULL; + PL_reg_oldpos = 0; + PL_reg_oldcurpm = (PMOP*)NULL; + PL_reg_curpm = (PMOP*)NULL; + PL_reg_oldsaved = Nullch; + PL_reg_oldsavedlen = 0; +#ifdef PERL_OLD_COPY_ON_WRITE + PL_nrs = Nullsv; #endif + PL_reg_maxiter = 0; + PL_reg_leftiter = 0; + PL_reg_poscache = Nullch; + PL_reg_poscache_size= 0; - /* more statics moved here */ - PL_generation = proto_perl->Igeneration; - PL_DBcv = cv_dup(proto_perl->IDBcv, param); - - PL_in_clean_objs = proto_perl->Iin_clean_objs; - PL_in_clean_all = proto_perl->Iin_clean_all; + /* RE engine - function pointers */ + PL_regcompp = proto_perl->Tregcompp; + PL_regexecp = proto_perl->Tregexecp; + PL_regint_start = proto_perl->Tregint_start; + PL_regint_string = proto_perl->Tregint_string; + PL_regfree = proto_perl->Tregfree; - PL_uid = proto_perl->Iuid; - PL_euid = proto_perl->Ieuid; - PL_gid = proto_perl->Igid; - PL_egid = proto_perl->Iegid; - PL_nomemok = proto_perl->Inomemok; - PL_an = proto_perl->Ian; - PL_evalseq = proto_perl->Ievalseq; - PL_origenviron = proto_perl->Iorigenviron; /* XXX not quite right */ - PL_origalen = proto_perl->Iorigalen; -#ifdef PERL_USES_PL_PIDSTATUS - PL_pidstatus = newHV(); /* XXX flag for cloning? */ -#endif - PL_osname = SAVEPV(proto_perl->Iosname); - PL_sighandlerp = proto_perl->Isighandlerp; + PL_reginterp_cnt = 0; + PL_reg_starttry = 0; - PL_runops = proto_perl->Irunops; + /* Pluggable optimizer */ + PL_peepp = proto_perl->Tpeepp; - Copy(proto_perl->Itokenbuf, PL_tokenbuf, 256, char); + PL_stashcache = newHV(); -#ifdef CSH - PL_cshlen = proto_perl->Icshlen; - PL_cshname = proto_perl->Icshname; /* XXX never deallocated */ -#endif + if (!(flags & CLONEf_KEEP_PTR_TABLE)) { + ptr_table_free(PL_ptr_table); + PL_ptr_table = NULL; + } - PL_lex_state = proto_perl->Ilex_state; - PL_lex_defer = proto_perl->Ilex_defer; - PL_lex_expect = proto_perl->Ilex_expect; - PL_lex_formbrack = proto_perl->Ilex_formbrack; - PL_lex_dojoin = proto_perl->Ilex_dojoin; - PL_lex_starts = proto_perl->Ilex_starts; - PL_lex_stuff = sv_dup_inc(proto_perl->Ilex_stuff, param); - PL_lex_repl = sv_dup_inc(proto_perl->Ilex_repl, param); - PL_lex_op = proto_perl->Ilex_op; - PL_lex_inpat = proto_perl->Ilex_inpat; - PL_lex_inwhat = proto_perl->Ilex_inwhat; - PL_lex_brackets = proto_perl->Ilex_brackets; - i = (PL_lex_brackets < 120 ? 120 : PL_lex_brackets); - PL_lex_brackstack = SAVEPVN(proto_perl->Ilex_brackstack,i); - PL_lex_casemods = proto_perl->Ilex_casemods; - i = (PL_lex_casemods < 12 ? 12 : PL_lex_casemods); - PL_lex_casestack = SAVEPVN(proto_perl->Ilex_casestack,i); + /* Call the ->CLONE method, if it exists, for each of the stashes + identified by sv_dup() above. + */ + while(av_len(param->stashes) != -1) { + HV* const stash = (HV*) av_shift(param->stashes); + GV* const cloner = gv_fetchmethod_autoload(stash, "CLONE", 0); + if (cloner && GvCV(cloner)) { + dSP; + ENTER; + SAVETMPS; + PUSHMARK(SP); + XPUSHs(sv_2mortal(newSVhek(HvNAME_HEK(stash)))); + PUTBACK; + call_sv((SV*)GvCV(cloner), G_DISCARD); + FREETMPS; + LEAVE; + } + } - Copy(proto_perl->Inextval, PL_nextval, 5, YYSTYPE); - Copy(proto_perl->Inexttype, PL_nexttype, 5, I32); - PL_nexttoke = proto_perl->Inexttoke; + SvREFCNT_dec(param->stashes); - /* XXX This is probably masking the deeper issue of why - * SvANY(proto_perl->Ilinestr) can be NULL at this point. For test case: - * http://archive.develooper.com/perl5-porters%40perl.org/msg83298.html - * (A little debugging with a watchpoint on it may help.) - */ - if (SvANY(proto_perl->Ilinestr)) { - PL_linestr = sv_dup_inc(proto_perl->Ilinestr, param); - i = proto_perl->Ibufptr - SvPVX_const(proto_perl->Ilinestr); - PL_bufptr = SvPVX(PL_linestr) + (i < 0 ? 0 : i); - i = proto_perl->Ioldbufptr - SvPVX_const(proto_perl->Ilinestr); - PL_oldbufptr = SvPVX(PL_linestr) + (i < 0 ? 0 : i); - i = proto_perl->Ioldoldbufptr - SvPVX_const(proto_perl->Ilinestr); - PL_oldoldbufptr = SvPVX(PL_linestr) + (i < 0 ? 0 : i); - i = proto_perl->Ilinestart - SvPVX_const(proto_perl->Ilinestr); - PL_linestart = SvPVX(PL_linestr) + (i < 0 ? 0 : i); - } - else { - PL_linestr = NEWSV(65,79); - sv_upgrade(PL_linestr,SVt_PVIV); - sv_setpvn(PL_linestr,"",0); - PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); + /* orphaned? eg threads->new inside BEGIN or use */ + if (PL_compcv && ! SvREFCNT(PL_compcv)) { + (void)SvREFCNT_inc(PL_compcv); + SAVEFREESV(PL_compcv); } - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_pending_ident = proto_perl->Ipending_ident; - PL_sublex_info = proto_perl->Isublex_info; /* XXX not quite right */ - PL_expect = proto_perl->Iexpect; + return my_perl; +} - PL_multi_start = proto_perl->Imulti_start; - PL_multi_end = proto_perl->Imulti_end; - PL_multi_open = proto_perl->Imulti_open; - PL_multi_close = proto_perl->Imulti_close; +#endif /* USE_ITHREADS */ - PL_error_count = proto_perl->Ierror_count; - PL_subline = proto_perl->Isubline; - PL_subname = sv_dup_inc(proto_perl->Isubname, param); +/* +=head1 Unicode Support - /* XXX See comment on SvANY(proto_perl->Ilinestr) above */ - if (SvANY(proto_perl->Ilinestr)) { - i = proto_perl->Ilast_uni - SvPVX_const(proto_perl->Ilinestr); - PL_last_uni = SvPVX(PL_linestr) + (i < 0 ? 0 : i); - i = proto_perl->Ilast_lop - SvPVX_const(proto_perl->Ilinestr); - PL_last_lop = SvPVX(PL_linestr) + (i < 0 ? 0 : i); - PL_last_lop_op = proto_perl->Ilast_lop_op; - } - else { - PL_last_uni = SvPVX(PL_linestr); - PL_last_lop = SvPVX(PL_linestr); - PL_last_lop_op = 0; - } - PL_in_my = proto_perl->Iin_my; - PL_in_my_stash = hv_dup(proto_perl->Iin_my_stash, param); -#ifdef FCRYPT - PL_cryptseen = proto_perl->Icryptseen; -#endif +=for apidoc sv_recode_to_utf8 - PL_hints = proto_perl->Ihints; +The encoding is assumed to be an Encode object, on entry the PV +of the sv is assumed to be octets in that encoding, and the sv +will be converted into Unicode (and UTF-8). - PL_amagic_generation = proto_perl->Iamagic_generation; +If the sv already is UTF-8 (or if it is not POK), or if the encoding +is not a reference, nothing is done to the sv. If the encoding is not +an C Encoding object, bad things will happen. +(See F and L). -#ifdef USE_LOCALE_COLLATE - PL_collation_ix = proto_perl->Icollation_ix; - PL_collation_name = SAVEPV(proto_perl->Icollation_name); - PL_collation_standard = proto_perl->Icollation_standard; - PL_collxfrm_base = proto_perl->Icollxfrm_base; - PL_collxfrm_mult = proto_perl->Icollxfrm_mult; -#endif /* USE_LOCALE_COLLATE */ +The PV of the sv is returned. -#ifdef USE_LOCALE_NUMERIC - PL_numeric_name = SAVEPV(proto_perl->Inumeric_name); - PL_numeric_standard = proto_perl->Inumeric_standard; - PL_numeric_local = proto_perl->Inumeric_local; - PL_numeric_radix_sv = sv_dup_inc(proto_perl->Inumeric_radix_sv, param); -#endif /* !USE_LOCALE_NUMERIC */ +=cut */ - /* utf8 character classes */ - PL_utf8_alnum = sv_dup_inc(proto_perl->Iutf8_alnum, param); - PL_utf8_alnumc = sv_dup_inc(proto_perl->Iutf8_alnumc, param); - PL_utf8_ascii = sv_dup_inc(proto_perl->Iutf8_ascii, param); - PL_utf8_alpha = sv_dup_inc(proto_perl->Iutf8_alpha, param); - PL_utf8_space = sv_dup_inc(proto_perl->Iutf8_space, param); - PL_utf8_cntrl = sv_dup_inc(proto_perl->Iutf8_cntrl, param); - PL_utf8_graph = sv_dup_inc(proto_perl->Iutf8_graph, param); - PL_utf8_digit = sv_dup_inc(proto_perl->Iutf8_digit, param); - PL_utf8_upper = sv_dup_inc(proto_perl->Iutf8_upper, param); - PL_utf8_lower = sv_dup_inc(proto_perl->Iutf8_lower, param); - PL_utf8_print = sv_dup_inc(proto_perl->Iutf8_print, param); - PL_utf8_punct = sv_dup_inc(proto_perl->Iutf8_punct, param); - PL_utf8_xdigit = sv_dup_inc(proto_perl->Iutf8_xdigit, param); - PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); - PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param); - PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param); - PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param); - PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param); - PL_utf8_idstart = sv_dup_inc(proto_perl->Iutf8_idstart, param); - PL_utf8_idcont = sv_dup_inc(proto_perl->Iutf8_idcont, param); +char * +Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) +{ + dVAR; + if (SvPOK(sv) && !SvUTF8(sv) && !IN_BYTES && SvROK(encoding)) { + SV *uni; + STRLEN len; + const char *s; + dSP; + ENTER; + SAVETMPS; + save_re_context(); + PUSHMARK(sp); + EXTEND(SP, 3); + XPUSHs(encoding); + XPUSHs(sv); +/* + NI-S 2002/07/09 + Passing sv_yes is wrong - it needs to be or'ed set of constants + for Encode::XS, while UTf-8 decode (currently) assumes a true value means + remove converted chars from source. + + Both will default the value - let them. + + XPUSHs(&PL_sv_yes); +*/ + PUTBACK; + call_method("decode", G_SCALAR); + SPAGAIN; + uni = POPs; + PUTBACK; + s = SvPV_const(uni, len); + if (s != SvPVX_const(sv)) { + SvGROW(sv, len + 1); + Move(s, SvPVX(sv), len + 1, char); + SvCUR_set(sv, len); + } + FREETMPS; + LEAVE; + SvUTF8_on(sv); + return SvPVX(sv); + } + return SvPOKp(sv) ? SvPVX(sv) : NULL; +} - /* Did the locale setup indicate UTF-8? */ - PL_utf8locale = proto_perl->Iutf8locale; - /* Unicode features (see perlrun/-C) */ - PL_unicode = proto_perl->Iunicode; +/* +=for apidoc sv_cat_decode - /* Pre-5.8 signals control */ - PL_signals = proto_perl->Isignals; +The encoding is assumed to be an Encode object, the PV of the ssv is +assumed to be octets in that encoding and decoding the input starts +from the position which (PV + *offset) pointed to. The dsv will be +concatenated the decoded UTF-8 string from ssv. Decoding will terminate +when the string tstr appears in decoding output or the input ends on +the PV of the ssv. The value which the offset points will be modified +to the last input position on the ssv. - /* times() ticks per second */ - PL_clocktick = proto_perl->Iclocktick; +Returns TRUE if the terminator was found, else returns FALSE. - /* Recursion stopper for PerlIO_find_layer */ - PL_in_load_module = proto_perl->Iin_load_module; +=cut */ - /* sort() routine */ - PL_sort_RealCmp = proto_perl->Isort_RealCmp; +bool +Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding, + SV *ssv, int *offset, char *tstr, int tlen) +{ + dVAR; + bool ret = FALSE; + if (SvPOK(ssv) && SvPOK(dsv) && SvROK(encoding) && offset) { + SV *offsv; + dSP; + ENTER; + SAVETMPS; + save_re_context(); + PUSHMARK(sp); + EXTEND(SP, 6); + XPUSHs(encoding); + XPUSHs(dsv); + XPUSHs(ssv); + XPUSHs(offsv = sv_2mortal(newSViv(*offset))); + XPUSHs(sv_2mortal(newSVpvn(tstr, tlen))); + PUTBACK; + call_method("cat_decode", G_SCALAR); + SPAGAIN; + ret = SvTRUE(TOPs); + *offset = SvIV(offsv); + PUTBACK; + FREETMPS; + LEAVE; + } + else + Perl_croak(aTHX_ "Invalid argument to sv_cat_decode"); + return ret; - /* Not really needed/useful since the reenrant_retint is "volatile", - * but do it for consistency's sake. */ - PL_reentrant_retint = proto_perl->Ireentrant_retint; +} - /* Hooks to shared SVs and locks. */ - PL_sharehook = proto_perl->Isharehook; - PL_lockhook = proto_perl->Ilockhook; - PL_unlockhook = proto_perl->Iunlockhook; - PL_threadhook = proto_perl->Ithreadhook; +/* --------------------------------------------------------------------- + * + * support functions for report_uninit() + */ - PL_runops_std = proto_perl->Irunops_std; - PL_runops_dbg = proto_perl->Irunops_dbg; +/* the maxiumum size of array or hash where we will scan looking + * for the undefined element that triggered the warning */ -#ifdef THREADS_HAVE_PIDS - PL_ppid = proto_perl->Ippid; -#endif +#define FUV_MAX_SEARCH_SIZE 1000 - /* swatch cache */ - PL_last_swash_hv = Nullhv; /* reinits on demand */ - PL_last_swash_klen = 0; - PL_last_swash_key[0]= '\0'; - PL_last_swash_tmps = (U8*)NULL; - PL_last_swash_slen = 0; +/* Look for an entry in the hash whose value has the same SV as val; + * If so, return a mortal copy of the key. */ - PL_glob_index = proto_perl->Iglob_index; - PL_srand_called = proto_perl->Isrand_called; - PL_uudmap['M'] = 0; /* reinits on demand */ - PL_bitcount = Nullch; /* reinits on demand */ +STATIC SV* +S_find_hash_subscript(pTHX_ HV *hv, SV* val) +{ + dVAR; + register HE **array; + I32 i; - if (proto_perl->Ipsig_pend) { - Newxz(PL_psig_pend, SIG_SIZE, int); - } - else { - PL_psig_pend = (int*)NULL; - } + if (!hv || SvMAGICAL(hv) || !HvARRAY(hv) || + (HvTOTALKEYS(hv) > FUV_MAX_SEARCH_SIZE)) + return Nullsv; - if (proto_perl->Ipsig_ptr) { - Newxz(PL_psig_ptr, SIG_SIZE, SV*); - Newxz(PL_psig_name, SIG_SIZE, SV*); - for (i = 1; i < SIG_SIZE; i++) { - PL_psig_ptr[i] = sv_dup_inc(proto_perl->Ipsig_ptr[i], param); - PL_psig_name[i] = sv_dup_inc(proto_perl->Ipsig_name[i], param); + array = HvARRAY(hv); + + for (i=HvMAX(hv); i>0; i--) { + register HE *entry; + for (entry = array[i]; entry; entry = HeNEXT(entry)) { + if (HeVAL(entry) != val) + continue; + if ( HeVAL(entry) == &PL_sv_undef || + HeVAL(entry) == &PL_sv_placeholder) + continue; + if (!HeKEY(entry)) + return Nullsv; + if (HeKLEN(entry) == HEf_SVKEY) + return sv_mortalcopy(HeKEY_sv(entry)); + return sv_2mortal(newSVpvn(HeKEY(entry), HeKLEN(entry))); } } - else { - PL_psig_ptr = (SV**)NULL; - PL_psig_name = (SV**)NULL; + return Nullsv; +} + +/* Look for an entry in the array whose value has the same SV as val; + * If so, return the index, otherwise return -1. */ + +STATIC I32 +S_find_array_subscript(pTHX_ AV *av, SV* val) +{ + SV** svp; + I32 i; + if (!av || SvMAGICAL(av) || !AvARRAY(av) || + (AvFILLp(av) > FUV_MAX_SEARCH_SIZE)) + return -1; + + svp = AvARRAY(av); + for (i=AvFILLp(av); i>=0; i--) { + if (svp[i] == val && svp[i] != &PL_sv_undef) + return i; } + return -1; +} - /* thrdvar.h stuff */ +/* S_varname(): return the name of a variable, optionally with a subscript. + * If gv is non-zero, use the name of that global, along with gvtype (one + * of "$", "@", "%"); otherwise use the name of the lexical at pad offset + * targ. Depending on the value of the subscript_type flag, return: + */ - if (flags & CLONEf_COPY_STACKS) { - /* next allocation will be PL_tmps_stack[PL_tmps_ix+1] */ - PL_tmps_ix = proto_perl->Ttmps_ix; - PL_tmps_max = proto_perl->Ttmps_max; - PL_tmps_floor = proto_perl->Ttmps_floor; - Newxz(PL_tmps_stack, PL_tmps_max, SV*); - i = 0; - while (i <= PL_tmps_ix) { - PL_tmps_stack[i] = sv_dup_inc(proto_perl->Ttmps_stack[i], param); - ++i; - } +#define FUV_SUBSCRIPT_NONE 1 /* "@foo" */ +#define FUV_SUBSCRIPT_ARRAY 2 /* "$foo[aindex]" */ +#define FUV_SUBSCRIPT_HASH 3 /* "$foo{keyname}" */ +#define FUV_SUBSCRIPT_WITHIN 4 /* "within @foo" */ - /* next PUSHMARK() sets *(PL_markstack_ptr+1) */ - i = proto_perl->Tmarkstack_max - proto_perl->Tmarkstack; - Newxz(PL_markstack, i, I32); - PL_markstack_max = PL_markstack + (proto_perl->Tmarkstack_max - - proto_perl->Tmarkstack); - PL_markstack_ptr = PL_markstack + (proto_perl->Tmarkstack_ptr - - proto_perl->Tmarkstack); - Copy(proto_perl->Tmarkstack, PL_markstack, - PL_markstack_ptr - PL_markstack + 1, I32); +STATIC SV* +S_varname(pTHX_ GV *gv, const char gvtype, PADOFFSET targ, + SV* keyname, I32 aindex, int subscript_type) +{ - /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix] - * NOTE: unlike the others! */ - PL_scopestack_ix = proto_perl->Tscopestack_ix; - PL_scopestack_max = proto_perl->Tscopestack_max; - Newxz(PL_scopestack, PL_scopestack_max, I32); - Copy(proto_perl->Tscopestack, PL_scopestack, PL_scopestack_ix, I32); + SV * const name = sv_newmortal(); + if (gv) { + char buffer[2]; + buffer[0] = gvtype; + buffer[1] = 0; - /* NOTE: si_dup() looks at PL_markstack */ - PL_curstackinfo = si_dup(proto_perl->Tcurstackinfo, param); + /* as gv_fullname4(), but add literal '^' for $^FOO names */ - /* PL_curstack = PL_curstackinfo->si_stack; */ - PL_curstack = av_dup(proto_perl->Tcurstack, param); - PL_mainstack = av_dup(proto_perl->Tmainstack, param); + gv_fullname4(name, gv, buffer, 0); - /* next PUSHs() etc. set *(PL_stack_sp+1) */ - PL_stack_base = AvARRAY(PL_curstack); - PL_stack_sp = PL_stack_base + (proto_perl->Tstack_sp - - proto_perl->Tstack_base); - PL_stack_max = PL_stack_base + AvMAX(PL_curstack); + if ((unsigned int)SvPVX(name)[1] <= 26) { + buffer[0] = '^'; + buffer[1] = SvPVX(name)[1] + 'A' - 1; - /* next SSPUSHFOO() sets PL_savestack[PL_savestack_ix] - * NOTE: unlike the others! */ - PL_savestack_ix = proto_perl->Tsavestack_ix; - PL_savestack_max = proto_perl->Tsavestack_max; - /*Newxz(PL_savestack, PL_savestack_max, ANY);*/ - PL_savestack = ss_dup(proto_perl, param); + /* Swap the 1 unprintable control character for the 2 byte pretty + version - ie substr($name, 1, 1) = $buffer; */ + sv_insert(name, 1, 1, buffer, 2); + } } else { - init_stacks(); - ENTER; /* perl_destruct() wants to LEAVE; */ + U32 unused; + CV * const cv = find_runcv(&unused); + SV *sv; + AV *av; + + if (!cv || !CvPADLIST(cv)) + return Nullsv; + av = (AV*)(*av_fetch(CvPADLIST(cv), 0, FALSE)); + sv = *av_fetch(av, targ, FALSE); + /* SvLEN in a pad name is not to be trusted */ + sv_setpv(name, SvPV_nolen_const(sv)); + } + + if (subscript_type == FUV_SUBSCRIPT_HASH) { + SV * const sv = NEWSV(0,0); + *SvPVX(name) = '$'; + Perl_sv_catpvf(aTHX_ name, "{%s}", + pv_display(sv,SvPVX_const(keyname), SvCUR(keyname), 0, 32)); + SvREFCNT_dec(sv); + } + else if (subscript_type == FUV_SUBSCRIPT_ARRAY) { + *SvPVX(name) = '$'; + Perl_sv_catpvf(aTHX_ name, "[%"IVdf"]", (IV)aindex); } + else if (subscript_type == FUV_SUBSCRIPT_WITHIN) + sv_insert(name, 0, 0, "within ", 7); + + return name; +} + + +/* +=for apidoc find_uninit_var - PL_start_env = proto_perl->Tstart_env; /* XXXXXX */ - PL_top_env = &PL_start_env; +Find the name of the undefined variable (if any) that caused the operator o +to issue a "Use of uninitialized value" warning. +If match is true, only return a name if it's value matches uninit_sv. +So roughly speaking, if a unary operator (such as OP_COS) generates a +warning, then following the direct child of the op may yield an +OP_PADSV or OP_GV that gives the name of the undefined variable. On the +other hand, with OP_ADD there are two branches to follow, so we only print +the variable name if we get an exact match. - PL_op = proto_perl->Top; +The name is returned as a mortal SV. - PL_Sv = Nullsv; - PL_Xpv = (XPV*)NULL; - PL_na = proto_perl->Tna; +Assumes that PL_op is the op that originally triggered the error, and that +PL_comppad/PL_curpad points to the currently executing pad. - PL_statbuf = proto_perl->Tstatbuf; - PL_statcache = proto_perl->Tstatcache; - PL_statgv = gv_dup(proto_perl->Tstatgv, param); - PL_statname = sv_dup_inc(proto_perl->Tstatname, param); -#ifdef HAS_TIMES - PL_timesbuf = proto_perl->Ttimesbuf; -#endif +=cut +*/ - PL_tainted = proto_perl->Ttainted; - PL_curpm = proto_perl->Tcurpm; /* XXX No PMOP ref count */ - PL_rs = sv_dup_inc(proto_perl->Trs, param); - PL_last_in_gv = gv_dup(proto_perl->Tlast_in_gv, param); - PL_ofs_sv = sv_dup_inc(proto_perl->Tofs_sv, param); - PL_defoutgv = gv_dup_inc(proto_perl->Tdefoutgv, param); - PL_chopset = proto_perl->Tchopset; /* XXX never deallocated */ - PL_toptarget = sv_dup_inc(proto_perl->Ttoptarget, param); - PL_bodytarget = sv_dup_inc(proto_perl->Tbodytarget, param); - PL_formtarget = sv_dup(proto_perl->Tformtarget, param); +STATIC SV * +S_find_uninit_var(pTHX_ OP* obase, SV* uninit_sv, bool match) +{ + dVAR; + SV *sv; + AV *av; + GV *gv; + OP *o, *o2, *kid; - PL_restartop = proto_perl->Trestartop; - PL_in_eval = proto_perl->Tin_eval; - PL_delaymagic = proto_perl->Tdelaymagic; - PL_dirty = proto_perl->Tdirty; - PL_localizing = proto_perl->Tlocalizing; + if (!obase || (match && (!uninit_sv || uninit_sv == &PL_sv_undef || + uninit_sv == &PL_sv_placeholder))) + return Nullsv; - PL_errors = sv_dup_inc(proto_perl->Terrors, param); - PL_hv_fetch_ent_mh = Nullhe; - PL_modcount = proto_perl->Tmodcount; - PL_lastgotoprobe = Nullop; - PL_dumpindent = proto_perl->Tdumpindent; + switch (obase->op_type) { - PL_sortcop = (OP*)any_dup(proto_perl->Tsortcop, proto_perl); - PL_sortstash = hv_dup(proto_perl->Tsortstash, param); - PL_firstgv = gv_dup(proto_perl->Tfirstgv, param); - PL_secondgv = gv_dup(proto_perl->Tsecondgv, param); - PL_efloatbuf = Nullch; /* reinits on demand */ - PL_efloatsize = 0; /* reinits on demand */ + case OP_RV2AV: + case OP_RV2HV: + case OP_PADAV: + case OP_PADHV: + { + const bool pad = (obase->op_type == OP_PADAV || obase->op_type == OP_PADHV); + const bool hash = (obase->op_type == OP_PADHV || obase->op_type == OP_RV2HV); + I32 index = 0; + SV *keysv = Nullsv; + int subscript_type = FUV_SUBSCRIPT_WITHIN; - /* regex stuff */ + if (pad) { /* @lex, %lex */ + sv = PAD_SVl(obase->op_targ); + gv = Nullgv; + } + else { + if (cUNOPx(obase)->op_first->op_type == OP_GV) { + /* @global, %global */ + gv = cGVOPx_gv(cUNOPx(obase)->op_first); + if (!gv) + break; + sv = hash ? (SV*)GvHV(gv): (SV*)GvAV(gv); + } + else /* @{expr}, %{expr} */ + return find_uninit_var(cUNOPx(obase)->op_first, + uninit_sv, match); + } - PL_screamfirst = NULL; - PL_screamnext = NULL; - PL_maxscream = -1; /* reinits on demand */ - PL_lastscream = Nullsv; + /* attempt to find a match within the aggregate */ + if (hash) { + keysv = S_find_hash_subscript(aTHX_ (HV*)sv, uninit_sv); + if (keysv) + subscript_type = FUV_SUBSCRIPT_HASH; + } + else { + index = S_find_array_subscript(aTHX_ (AV*)sv, uninit_sv); + if (index >= 0) + subscript_type = FUV_SUBSCRIPT_ARRAY; + } - PL_watchaddr = NULL; - PL_watchok = Nullch; + if (match && subscript_type == FUV_SUBSCRIPT_WITHIN) + break; - PL_regdummy = proto_perl->Tregdummy; - PL_regprecomp = Nullch; - PL_regnpar = 0; - PL_regsize = 0; - PL_colorset = 0; /* reinits PL_colors[] */ - /*PL_colors[6] = {0,0,0,0,0,0};*/ - PL_reginput = Nullch; - PL_regbol = Nullch; - PL_regeol = Nullch; - PL_regstartp = (I32*)NULL; - PL_regendp = (I32*)NULL; - PL_reglastparen = (U32*)NULL; - PL_reglastcloseparen = (U32*)NULL; - PL_regtill = Nullch; - PL_reg_start_tmp = (char**)NULL; - PL_reg_start_tmpl = 0; - PL_regdata = (struct reg_data*)NULL; - PL_bostr = Nullch; - PL_reg_flags = 0; - PL_reg_eval_set = 0; - PL_regnarrate = 0; - PL_regprogram = (regnode*)NULL; - PL_regindent = 0; - PL_regcc = (CURCUR*)NULL; - PL_reg_call_cc = (struct re_cc_state*)NULL; - PL_reg_re = (regexp*)NULL; - PL_reg_ganch = Nullch; - PL_reg_sv = Nullsv; - PL_reg_match_utf8 = FALSE; - PL_reg_magic = (MAGIC*)NULL; - PL_reg_oldpos = 0; - PL_reg_oldcurpm = (PMOP*)NULL; - PL_reg_curpm = (PMOP*)NULL; - PL_reg_oldsaved = Nullch; - PL_reg_oldsavedlen = 0; -#ifdef PERL_OLD_COPY_ON_WRITE - PL_nrs = Nullsv; -#endif - PL_reg_maxiter = 0; - PL_reg_leftiter = 0; - PL_reg_poscache = Nullch; - PL_reg_poscache_size= 0; + return varname(gv, hash ? '%' : '@', obase->op_targ, + keysv, index, subscript_type); + } - /* RE engine - function pointers */ - PL_regcompp = proto_perl->Tregcompp; - PL_regexecp = proto_perl->Tregexecp; - PL_regint_start = proto_perl->Tregint_start; - PL_regint_string = proto_perl->Tregint_string; - PL_regfree = proto_perl->Tregfree; + case OP_PADSV: + if (match && PAD_SVl(obase->op_targ) != uninit_sv) + break; + return varname(Nullgv, '$', obase->op_targ, + Nullsv, 0, FUV_SUBSCRIPT_NONE); - PL_reginterp_cnt = 0; - PL_reg_starttry = 0; + case OP_GVSV: + gv = cGVOPx_gv(obase); + if (!gv || (match && GvSV(gv) != uninit_sv)) + break; + return varname(gv, '$', 0, Nullsv, 0, FUV_SUBSCRIPT_NONE); - /* Pluggable optimizer */ - PL_peepp = proto_perl->Tpeepp; + case OP_AELEMFAST: + if (obase->op_flags & OPf_SPECIAL) { /* lexical array */ + if (match) { + SV **svp; + av = (AV*)PAD_SV(obase->op_targ); + if (!av || SvRMAGICAL(av)) + break; + svp = av_fetch(av, (I32)obase->op_private, FALSE); + if (!svp || *svp != uninit_sv) + break; + } + return varname(Nullgv, '$', obase->op_targ, + Nullsv, (I32)obase->op_private, FUV_SUBSCRIPT_ARRAY); + } + else { + gv = cGVOPx_gv(obase); + if (!gv) + break; + if (match) { + SV **svp; + av = GvAV(gv); + if (!av || SvRMAGICAL(av)) + break; + svp = av_fetch(av, (I32)obase->op_private, FALSE); + if (!svp || *svp != uninit_sv) + break; + } + return varname(gv, '$', 0, + Nullsv, (I32)obase->op_private, FUV_SUBSCRIPT_ARRAY); + } + break; - PL_stashcache = newHV(); + case OP_EXISTS: + o = cUNOPx(obase)->op_first; + if (!o || o->op_type != OP_NULL || + ! (o->op_targ == OP_AELEM || o->op_targ == OP_HELEM)) + break; + return find_uninit_var(cBINOPo->op_last, uninit_sv, match); - if (!(flags & CLONEf_KEEP_PTR_TABLE)) { - ptr_table_free(PL_ptr_table); - PL_ptr_table = NULL; - } + case OP_AELEM: + case OP_HELEM: + if (PL_op == obase) + /* $a[uninit_expr] or $h{uninit_expr} */ + return find_uninit_var(cBINOPx(obase)->op_last, uninit_sv, match); - /* Call the ->CLONE method, if it exists, for each of the stashes - identified by sv_dup() above. - */ - while(av_len(param->stashes) != -1) { - HV* const stash = (HV*) av_shift(param->stashes); - GV* const cloner = gv_fetchmethod_autoload(stash, "CLONE", 0); - if (cloner && GvCV(cloner)) { - dSP; - ENTER; - SAVETMPS; - PUSHMARK(SP); - XPUSHs(sv_2mortal(newSVhek(HvNAME_HEK(stash)))); - PUTBACK; - call_sv((SV*)GvCV(cloner), G_DISCARD); - FREETMPS; - LEAVE; + gv = Nullgv; + o = cBINOPx(obase)->op_first; + kid = cBINOPx(obase)->op_last; + + /* get the av or hv, and optionally the gv */ + sv = Nullsv; + if (o->op_type == OP_PADAV || o->op_type == OP_PADHV) { + sv = PAD_SV(o->op_targ); + } + else if ((o->op_type == OP_RV2AV || o->op_type == OP_RV2HV) + && cUNOPo->op_first->op_type == OP_GV) + { + gv = cGVOPx_gv(cUNOPo->op_first); + if (!gv) + break; + sv = o->op_type == OP_RV2HV ? (SV*)GvHV(gv) : (SV*)GvAV(gv); + } + if (!sv) + break; + + if (kid && kid->op_type == OP_CONST && SvOK(cSVOPx_sv(kid))) { + /* index is constant */ + if (match) { + if (SvMAGICAL(sv)) + break; + if (obase->op_type == OP_HELEM) { + HE* he = hv_fetch_ent((HV*)sv, cSVOPx_sv(kid), 0, 0); + if (!he || HeVAL(he) != uninit_sv) + break; + } + else { + SV * const * const svp = av_fetch((AV*)sv, SvIV(cSVOPx_sv(kid)), FALSE); + if (!svp || *svp != uninit_sv) + break; + } + } + if (obase->op_type == OP_HELEM) + return varname(gv, '%', o->op_targ, + cSVOPx_sv(kid), 0, FUV_SUBSCRIPT_HASH); + else + return varname(gv, '@', o->op_targ, Nullsv, + SvIV(cSVOPx_sv(kid)), FUV_SUBSCRIPT_ARRAY); + } + else { + /* index is an expression; + * attempt to find a match within the aggregate */ + if (obase->op_type == OP_HELEM) { + SV * const keysv = S_find_hash_subscript(aTHX_ (HV*)sv, uninit_sv); + if (keysv) + return varname(gv, '%', o->op_targ, + keysv, 0, FUV_SUBSCRIPT_HASH); + } + else { + const I32 index = S_find_array_subscript(aTHX_ (AV*)sv, uninit_sv); + if (index >= 0) + return varname(gv, '@', o->op_targ, + Nullsv, index, FUV_SUBSCRIPT_ARRAY); + } + if (match) + break; + return varname(gv, + (o->op_type == OP_PADAV || o->op_type == OP_RV2AV) + ? '@' : '%', + o->op_targ, Nullsv, 0, FUV_SUBSCRIPT_WITHIN); } - } - SvREFCNT_dec(param->stashes); + break; - /* orphaned? eg threads->new inside BEGIN or use */ - if (PL_compcv && ! SvREFCNT(PL_compcv)) { - (void)SvREFCNT_inc(PL_compcv); - SAVEFREESV(PL_compcv); - } + case OP_AASSIGN: + /* only examine RHS */ + return find_uninit_var(cBINOPx(obase)->op_first, uninit_sv, match); - return my_perl; -} + case OP_OPEN: + o = cUNOPx(obase)->op_first; + if (o->op_type == OP_PUSHMARK) + o = o->op_sibling; -#endif /* USE_ITHREADS */ + if (!o->op_sibling) { + /* one-arg version of open is highly magical */ -/* -=head1 Unicode Support + if (o->op_type == OP_GV) { /* open FOO; */ + gv = cGVOPx_gv(o); + if (match && GvSV(gv) != uninit_sv) + break; + return varname(gv, '$', 0, + Nullsv, 0, FUV_SUBSCRIPT_NONE); + } + /* other possibilities not handled are: + * open $x; or open my $x; should return '${*$x}' + * open expr; should return '$'.expr ideally + */ + break; + } + goto do_op; -=for apidoc sv_recode_to_utf8 + /* ops where $_ may be an implicit arg */ + case OP_TRANS: + case OP_SUBST: + case OP_MATCH: + if ( !(obase->op_flags & OPf_STACKED)) { + if (uninit_sv == ((obase->op_private & OPpTARGET_MY) + ? PAD_SVl(obase->op_targ) + : DEFSV)) + { + sv = sv_newmortal(); + sv_setpvn(sv, "$_", 2); + return sv; + } + } + goto do_op; -The encoding is assumed to be an Encode object, on entry the PV -of the sv is assumed to be octets in that encoding, and the sv -will be converted into Unicode (and UTF-8). + case OP_PRTF: + case OP_PRINT: + /* skip filehandle as it can't produce 'undef' warning */ + o = cUNOPx(obase)->op_first; + if ((obase->op_flags & OPf_STACKED) && o->op_type == OP_PUSHMARK) + o = o->op_sibling->op_sibling; + goto do_op2; -If the sv already is UTF-8 (or if it is not POK), or if the encoding -is not a reference, nothing is done to the sv. If the encoding is not -an C Encoding object, bad things will happen. -(See F and L). -The PV of the sv is returned. + case OP_RV2SV: + case OP_CUSTOM: + case OP_ENTERSUB: + match = 1; /* XS or custom code could trigger random warnings */ + goto do_op; -=cut */ + case OP_SCHOMP: + case OP_CHOMP: + if (SvROK(PL_rs) && uninit_sv == SvRV(PL_rs)) + return sv_2mortal(newSVpvn("${$/}", 5)); + /* FALL THROUGH */ -char * -Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) -{ - dVAR; - if (SvPOK(sv) && !SvUTF8(sv) && !IN_BYTES && SvROK(encoding)) { - SV *uni; - STRLEN len; - const char *s; - dSP; - ENTER; - SAVETMPS; - save_re_context(); - PUSHMARK(sp); - EXTEND(SP, 3); - XPUSHs(encoding); - XPUSHs(sv); -/* - NI-S 2002/07/09 - Passing sv_yes is wrong - it needs to be or'ed set of constants - for Encode::XS, while UTf-8 decode (currently) assumes a true value means - remove converted chars from source. + default: + do_op: + if (!(obase->op_flags & OPf_KIDS)) + break; + o = cUNOPx(obase)->op_first; + + do_op2: + if (!o) + break; - Both will default the value - let them. + /* if all except one arg are constant, or have no side-effects, + * or are optimized away, then it's unambiguous */ + o2 = Nullop; + for (kid=o; kid; kid = kid->op_sibling) { + if (kid && + ( (kid->op_type == OP_CONST && SvOK(cSVOPx_sv(kid))) + || (kid->op_type == OP_NULL && ! (kid->op_flags & OPf_KIDS)) + || (kid->op_type == OP_PUSHMARK) + ) + ) + continue; + if (o2) { /* more than one found */ + o2 = Nullop; + break; + } + o2 = kid; + } + if (o2) + return find_uninit_var(o2, uninit_sv, match); - XPUSHs(&PL_sv_yes); -*/ - PUTBACK; - call_method("decode", G_SCALAR); - SPAGAIN; - uni = POPs; - PUTBACK; - s = SvPV_const(uni, len); - if (s != SvPVX_const(sv)) { - SvGROW(sv, len + 1); - Move(s, SvPVX(sv), len + 1, char); - SvCUR_set(sv, len); + /* scan all args */ + while (o) { + sv = find_uninit_var(o, uninit_sv, 1); + if (sv) + return sv; + o = o->op_sibling; } - FREETMPS; - LEAVE; - SvUTF8_on(sv); - return SvPVX(sv); + break; } - return SvPOKp(sv) ? SvPVX(sv) : NULL; + return Nullsv; } -/* -=for apidoc sv_cat_decode -The encoding is assumed to be an Encode object, the PV of the ssv is -assumed to be octets in that encoding and decoding the input starts -from the position which (PV + *offset) pointed to. The dsv will be -concatenated the decoded UTF-8 string from ssv. Decoding will terminate -when the string tstr appears in decoding output or the input ends on -the PV of the ssv. The value which the offset points will be modified -to the last input position on the ssv. +/* +=for apidoc report_uninit -Returns TRUE if the terminator was found, else returns FALSE. +Print appropriate "Use of uninitialized variable" warning -=cut */ +=cut +*/ -bool -Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding, - SV *ssv, int *offset, char *tstr, int tlen) +void +Perl_report_uninit(pTHX_ SV* uninit_sv) { - dVAR; - bool ret = FALSE; - if (SvPOK(ssv) && SvPOK(dsv) && SvROK(encoding) && offset) { - SV *offsv; - dSP; - ENTER; - SAVETMPS; - save_re_context(); - PUSHMARK(sp); - EXTEND(SP, 6); - XPUSHs(encoding); - XPUSHs(dsv); - XPUSHs(ssv); - XPUSHs(offsv = sv_2mortal(newSViv(*offset))); - XPUSHs(sv_2mortal(newSVpvn(tstr, tlen))); - PUTBACK; - call_method("cat_decode", G_SCALAR); - SPAGAIN; - ret = SvTRUE(TOPs); - *offset = SvIV(offsv); - PUTBACK; - FREETMPS; - LEAVE; + if (PL_op) { + SV* varname = Nullsv; + if (uninit_sv) { + varname = find_uninit_var(PL_op, uninit_sv,0); + if (varname) + sv_insert(varname, 0, 0, " ", 1); + } + Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit, + varname ? SvPV_nolen_const(varname) : "", + " in ", OP_DESC(PL_op)); } else - Perl_croak(aTHX_ "Invalid argument to sv_cat_decode"); - return ret; + Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit, + "", "", ""); } /*