#endif
#ifdef PERL_UTF8_CACHE_ASSERT
-/* The cache element 0 is the Unicode offset;
- * the cache element 1 is the byte offset of the element 0;
- * the cache element 2 is the Unicode length of the substring;
- * the cache element 3 is the byte length of the substring;
- * The checking of the substring side would be good
- * but substr() has enough code paths to make my head spin;
- * if adding more checks watch out for the following tests:
+/* if adding more checks watch out for the following tests:
* t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
* lib/utf8.t lib/Unicode/Collate/t/index.t
* --jhi
*/
#define ASSERT_UTF8_CACHE(cache) \
- STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); } } STMT_END
+ STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
+ assert((cache)[2] <= (cache)[3]); \
+ assert((cache)[3] <= (cache)[1]);} \
+ } STMT_END
#else
#define ASSERT_UTF8_CACHE(cache) NOOP
#endif
# define SvARENA_CHAIN(sv) ((sv)->sv_u.svu_rv)
/* Whilst I'd love to do this, it seems that things like to check on
unreferenced scalars
-# define POSION_SV_HEAD(sv) Poison(sv, 1, struct STRUCT_SV)
+# define POSION_SV_HEAD(sv) PoisonNew(sv, 1, struct STRUCT_SV)
*/
-# define POSION_SV_HEAD(sv) Poison(&SvANY(sv), 1, void *), \
- Poison(&SvREFCNT(sv), 1, U32)
+# define POSION_SV_HEAD(sv) PoisonNew(&SvANY(sv), 1, void *), \
+ PoisonNew(&SvREFCNT(sv), 1, U32)
#else
# define SvARENA_CHAIN(sv) SvANY(sv)
# define POSION_SV_HEAD(sv)
do_clean_named_objs(pTHX_ SV *sv)
{
dVAR;
- if (SvTYPE(sv) == SVt_PVGV && GvGP(sv)) {
+ if (SvTYPE(sv) == SVt_PVGV && isGV_with_GP(sv) && GvGP(sv)) {
if ((
#ifdef PERL_DONT_CREATE_GVSV
GvSV(sv) &&
struct arena_desc set[ARENAS_PER_SET];
};
-#if !ARENASETS
-
-static void
-S_free_arena(pTHX_ void **root) {
- while (root) {
- void ** const next = *(void **)root;
- Safefree(root);
- root = next;
- }
-}
-#endif
-
/*
=for apidoc sv_free_arenas
Safefree(sva);
}
-#if ARENASETS
{
struct arena_set *next, *aroot = (struct arena_set*) PL_body_arenas;
Safefree(aroot);
}
}
-#else
- S_free_arena(aTHX_ (void**) PL_body_arenas);
-#endif
PL_body_arenas = 0;
for (i=0; i<PERL_ARENA_ROOTS_SIZE; i++)
contexts below (line ~10k)
*/
-/* get_arena(size): when ARENASETS is enabled, this creates
- custom-sized arenas, otherwize it uses PERL_ARENA_SIZE, as
- previously done.
+/* get_arena(size): this creates custom-sized arenas
TBD: export properly for hv.c: S_more_he().
*/
void*
Perl_get_arena(pTHX_ int arena_size)
{
-#if !ARENASETS
- union arena* arp;
-
- /* allocate and attach arena */
- Newx(arp, arena_size, char);
- arp->next = PL_body_arenas;
- PL_body_arenas = arp;
- return arp;
-
-#else
struct arena_desc* adesc;
struct arena_set *newroot, **aroot = (struct arena_set**) &PL_body_arenas;
int curr;
curr, adesc->arena, arena_size));
return adesc->arena;
-#endif
}
limited by PERL_ARENA_SIZE, so we can safely oversize the
declarations.
*/
-#define FIT_ARENA(count, body_size) \
- (!count || count * body_size > PERL_ARENA_SIZE) \
- ? (int)(PERL_ARENA_SIZE / body_size) * body_size : count * body_size
+#define FIT_ARENA0(body_size) \
+ ((size_t)(PERL_ARENA_SIZE / body_size) * body_size)
+#define FIT_ARENAn(count,body_size) \
+ ( count * body_size <= PERL_ARENA_SIZE) \
+ ? count * body_size \
+ : FIT_ARENA0 (body_size)
+#define FIT_ARENA(count,body_size) \
+ count \
+ ? FIT_ARENAn (count, body_size) \
+ : FIT_ARENA0 (body_size)
/* A macro to work out the offset needed to subtract from a pointer to (say)
#ifdef DEBUGGING
if (!done_sanity_check) {
- int i = SVt_LAST;
+ unsigned int i = SVt_LAST;
done_sanity_check = TRUE;
end = start + bdp->arena_size - body_size;
-#if !ARENASETS
- /* The initial slot is used to link the arenas together, so it isn't to be
- linked into the list of ready-to-use bodies. */
- start += body_size;
-#else
/* computed count doesnt reflect the 1st slot reservation */
DEBUG_m(PerlIO_printf(Perl_debug_log,
"arena %p end %p arena-size %d type %d size %d ct %d\n",
start, end, bdp->arena_size, sv_type, body_size,
bdp->arena_size / body_size));
-#endif
*root = (void *)start;
void ** const r3wt = &PL_body_roots[sv_type]; \
LOCK_SV_MUTEX; \
xpv = *((void **)(r3wt)) \
- ? *((void **)(r3wt)) : S_more_bodies(aTHX_ sv_type); \
+ ? *((void **)(r3wt)) : more_bodies(sv_type); \
*(r3wt) = *(void**)(xpv); \
UNLOCK_SV_MUTEX; \
} STMT_END
SvPV_set(sv, NULL);
if (old_type >= SVt_PVMG) {
- SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_magic);
+ SvMAGIC_set(sv, ((XPVMG*)old_body)->xmg_u.xmg_magic);
SvSTASH_set(sv, ((XPVMG*)old_body)->xmg_stash);
}
break;
SvANY(sv) = new_body;
if (old_type_details->copy) {
- Copy((char *)old_body + old_type_details->offset,
- (char *)new_body + old_type_details->offset,
- old_type_details->copy, char);
+ /* There is now the potential for an upgrade from something without
+ an offset (PVNV or PVMG) to something with one (PVCV, PVFM) */
+ int offset = old_type_details->offset;
+ int length = old_type_details->copy;
+
+ if (new_type_details->offset > old_type_details->offset) {
+ const int difference
+ = new_type_details->offset - old_type_details->offset;
+ offset += difference;
+ length -= difference;
+ }
+ assert (length >= 0);
+
+ Copy((char *)old_body + offset, (char *)new_body + offset, length,
+ char);
}
#ifndef NV_ZERO_IS_ALLBITS_ZERO
{
register char *s;
+ if (PL_madskills && newlen >= 0x100000) {
+ PerlIO_printf(Perl_debug_log,
+ "Allocation too large: %"UVxf"\n", (UV)newlen);
+ }
#ifdef HAS_64K_LIMIT
if (newlen >= 0x10000) {
PerlIO_printf(Perl_debug_log,
certainly cast into the IV range at IV_MAX, whereas the correct
answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
cases go to UV */
+#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
+ if (Perl_isnan(SvNVX(sv))) {
+ SvUV_set(sv, 0);
+ SvIsUV_on(sv);
+ return FALSE;
+ }
+#endif
if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
SvIV_set(sv, I_V(SvNVX(sv)));
if (SvNVX(sv) == (NV) SvIVX(sv)
}
}
else {
- if (((SvFLAGS(sv) & (SVp_POK|SVp_SCREAM)) == SVp_SCREAM)
- && (SvTYPE(sv) == SVt_PVGV || SvTYPE(sv) == SVt_PVLV)) {
- return PTR2IV(glob_2inpuv((GV *)sv, NULL, TRUE));
+ if (isGV_with_GP(sv)) {
+ return (bool)PTR2IV(glob_2inpuv((GV *)sv, NULL, TRUE));
}
- if (SvTYPE(sv) == SVt_PVGV)
- sv_dump(sv);
if (!(SvFLAGS(sv) & SVs_PADTMP)) {
if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
#endif /* NV_PRESERVES_UV */
}
else {
- if (((SvFLAGS(sv) & (SVp_POK|SVp_SCREAM)) == SVp_SCREAM)
- && (SvTYPE(sv) == SVt_PVGV || SvTYPE(sv) == SVt_PVLV)) {
+ if (isGV_with_GP(sv)) {
glob_2inpuv((GV *)sv, NULL, TRUE);
return 0.0;
}
#endif
}
else {
- if (((SvFLAGS(sv) & (SVp_POK|SVp_SCREAM)) == SVp_SCREAM)
- && (SvTYPE(sv) == SVt_PVGV || SvTYPE(sv) == SVt_PVLV)) {
+ if (isGV_with_GP(sv)) {
return glob_2inpuv((GV *)sv, lp, FALSE);
}
if (SvNOKp(sv))
return SvNVX(sv) != 0.0;
else {
- if ((SvFLAGS(sv) & SVp_SCREAM)
- && (SvTYPE(sv) == (SVt_PVGV) || SvTYPE(sv) == (SVt_PVLV)))
+ if (isGV_with_GP(sv))
return TRUE;
else
return FALSE;
const char * const name = GvNAME(sstr);
const STRLEN len = GvNAMELEN(sstr);
/* don't upgrade SVt_PVLV: it can hold a glob */
- if (dtype != SVt_PVLV)
+ if (dtype != SVt_PVLV) {
+ if (dtype >= SVt_PV) {
+ SvPV_free(dstr);
+ SvPV_set(dstr, 0);
+ SvLEN_set(dstr, 0);
+ SvCUR_set(dstr, 0);
+ }
sv_upgrade(dstr, SVt_PVGV);
+ (void)SvOK_off(dstr);
+ SvSCREAM_on(dstr);
+ }
GvSTASH(dstr) = GvSTASH(sstr);
if (GvSTASH(dstr))
Perl_sv_add_backref(aTHX_ (SV*)GvSTASH(dstr), dstr);
- GvNAME(dstr) = savepvn(name, len);
- GvNAMELEN(dstr) = len;
+ gv_name_set((GV *)dstr, name, len, GV_ADD);
SvFAKE_on(dstr); /* can coerce to non-glob */
}
}
#endif
+ gp_free((GV*)dstr);
+ SvSCREAM_off(dstr);
(void)SvOK_off(dstr);
SvSCREAM_on(dstr);
GvINTRO_off(dstr); /* one-shot flag */
- gp_free((GV*)dstr);
GvGP(dstr) = gp_ref(GvGP(sstr));
if (SvTAINTED(sstr))
SvTAINT(dstr);
}
break;
}
- if (dref)
- SvREFCNT_dec(dref);
+ SvREFCNT_dec(dref);
if (SvTAINTED(sstr))
SvTAINT(dstr);
return;
if (dtype < SVt_PVNV)
sv_upgrade(dstr, SVt_PVNV);
break;
- case SVt_PVAV:
- case SVt_PVHV:
- case SVt_PVCV:
- case SVt_PVIO:
+ default:
{
const char * const type = sv_reftype(sstr,0);
if (PL_op)
case SVt_PVGV:
if (dtype <= SVt_PVGV) {
- S_glob_assign_glob(aTHX_ dstr, sstr, dtype);
+ glob_assign_glob(dstr, sstr, dtype);
return;
}
/*FALLTHROUGH*/
- default:
+ case SVt_PVMG:
+ case SVt_PVLV:
+ case SVt_PVBM:
if (SvGMAGICAL(sstr) && (flags & SV_GMAGIC)) {
mg_get(sstr);
if ((int)SvTYPE(sstr) != stype) {
stype = SvTYPE(sstr);
if (stype == SVt_PVGV && dtype <= SVt_PVGV) {
- S_glob_assign_glob(aTHX_ dstr, sstr, dtype);
+ glob_assign_glob(dstr, sstr, dtype);
return;
}
}
GvMULTI_on(dstr);
return;
}
- S_glob_assign_glob(aTHX_ dstr, sstr, dtype);
+ glob_assign_glob(dstr, sstr, dtype);
return;
}
if (dtype >= SVt_PV) {
if (dtype == SVt_PVGV) {
- S_glob_assign_ref(aTHX_ dstr, sstr);
+ glob_assign_ref(dstr, sstr);
return;
}
if (SvPVX_const(dstr)) {
if (sflags & SVf_IVisUV)
SvIsUV_on(dstr);
}
- SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8);
+ SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8
+ |SVf_AMAGIC);
{
const MAGIC * const smg = SvVOK(sstr);
if (smg) {
}
else if (sflags & (SVp_IOK|SVp_NOK)) {
(void)SvOK_off(dstr);
- SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_IVisUV|SVf_NOK|SVp_NOK);
+ SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_IVisUV|SVf_NOK|SVp_NOK
+ |SVf_AMAGIC);
if (sflags & SVp_IOK) {
/* XXXX Do we want to set IsUV for IV(ROK)? Be extra safe... */
SvIV_set(dstr, SvIVX(sstr));
}
}
else {
- if ((stype == SVt_PVGV || stype == SVt_PVLV)
- && (sflags & SVp_SCREAM)) {
+ if (isGV_with_GP(sstr)) {
/* This stringification rule for globs is spread in 3 places.
This feels bad. FIXME. */
const U32 wasfake = sflags & SVf_FAKE;
SvFAKE_off(sstr);
gv_efullname3(dstr, (GV *)sstr, "*");
SvFLAGS(sstr) |= wasfake;
+ SvFLAGS(dstr) |= sflags & SVf_AMAGIC;
}
else
(void)SvOK_off(dstr);
mg->mg_obj = obj;
}
else {
- mg->mg_obj = SvREFCNT_inc(obj);
+ mg->mg_obj = SvREFCNT_inc_simple(obj);
mg->mg_flags |= MGf_REFCOUNTED;
}
if (namlen > 0)
mg->mg_ptr = savepvn(name, namlen);
else if (namlen == HEf_SVKEY)
- mg->mg_ptr = (char*)SvREFCNT_inc((SV*)name);
+ mg->mg_ptr = (char*)SvREFCNT_inc_simple_NN((SV*)name);
else
mg->mg_ptr = (char *) name;
}
case PERL_MAGIC_qr:
vtable = &PL_vtbl_regexp;
break;
+ case PERL_MAGIC_hints:
+ /* As this vtable is all NULL, we can reuse it. */
case PERL_MAGIC_sig:
vtable = &PL_vtbl_sig;
break;
case PERL_MAGIC_backref:
vtable = &PL_vtbl_backref;
break;
+ case PERL_MAGIC_hintselem:
+ vtable = &PL_vtbl_hintselem;
+ break;
case PERL_MAGIC_ext:
/* Reserved for use by extensions not perl internals. */
/* Useful for attaching extension internal data to perl vars. */
MAGIC** mgp;
if (SvTYPE(sv) < SVt_PVMG || !SvMAGIC(sv))
return 0;
- mgp = &SvMAGIC(sv);
+ mgp = &(((XPVMG*) SvANY(sv))->xmg_u.xmg_magic);
for (mg = *mgp; mg; mg = *mgp) {
if (mg->mg_type == type) {
const MGVTBL* const vtbl = mg->mg_virtual;
Safefree(mg->mg_ptr);
else if (mg->mg_len == HEf_SVKEY)
SvREFCNT_dec((SV*)mg->mg_ptr);
- else if (mg->mg_type == PERL_MAGIC_utf8 && mg->mg_ptr)
+ else if (mg->mg_type == PERL_MAGIC_utf8)
Safefree(mg->mg_ptr);
}
if (mg->mg_flags & MGf_REFCOUNTED)
} else {
av = newAV();
AvREAL_off(av);
- SvREFCNT_inc(av);
+ SvREFCNT_inc_simple_void(av);
}
*avp = av;
}
}
}
if (type >= SVt_PVMG) {
- if (SvMAGIC(sv))
+ HV *ourstash;
+ if ((type == SVt_PVMG || type == SVt_PVGV) &&
+ (ourstash = OURSTASH(sv))) {
+ SvREFCNT_dec(ourstash);
+ } else if (SvMAGIC(sv))
mg_free(sv);
if (type == SVt_PVMG && SvPAD_TYPED(sv))
SvREFCNT_dec(SvSTASH(sv));
goto freescalar;
case SVt_PVGV:
gp_free((GV*)sv);
- Safefree(GvNAME(sv));
+ if (GvNAME_HEK(sv)) {
+ unshare_hek(GvNAME_HEK(sv));
+ }
/* If we're in a stash, we don't own a reference to it. However it does
have a back reference to us, which needs to be cleared. */
if (GvSTASH(sv))
case SVt_PV:
case SVt_RV:
if (SvROK(sv)) {
- SV *target = SvRV(sv);
+ SV * const target = SvRV(sv);
if (SvWEAKREF(sv))
sv_del_backref(target, sv);
else
/*
* The length is cached in PERL_UTF8_magic, in the mg_len field. Also the
- * mg_ptr is used, by sv_pos_u2b(), see the comments of S_utf8_mg_pos_init().
- * (Note that the mg_len is not the length of the mg_ptr field.)
+ * mg_ptr is used, by sv_pos_u2b() and sv_pos_b2u() - see the comments below.
+ * (Note that the mg_len is not the length of the mg_ptr field.
+ * This allows the cache to store the character length of the string without
+ * needing to malloc() extra storage to attach to the mg_ptr.)
*
*/
return mg_length(sv);
else
{
- STRLEN len, ulen;
+ STRLEN len;
const U8 *s = (U8*)SvPV_const(sv, len);
- MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0;
- if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) {
- ulen = mg->mg_len;
-#ifdef PERL_UTF8_CACHE_ASSERT
- assert(ulen == Perl_utf8_length(aTHX_ s, s + len));
-#endif
- }
- else {
- ulen = Perl_utf8_length(aTHX_ s, s + len);
- if (!mg && !SvREADONLY(sv)) {
- sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0);
- mg = mg_find(sv, PERL_MAGIC_utf8);
- assert(mg);
+ if (PL_utf8cache) {
+ STRLEN ulen;
+ MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0;
+
+ if (mg && mg->mg_len != -1) {
+ ulen = mg->mg_len;
+ if (PL_utf8cache < 0) {
+ const STRLEN real = Perl_utf8_length(aTHX_ s, s + len);
+ if (real != ulen) {
+ /* Need to turn the assertions off otherwise we may
+ recurse infinitely while printing error messages.
+ */
+ SAVEI8(PL_utf8cache);
+ PL_utf8cache = 0;
+ Perl_croak(aTHX_ "panic: sv_len_utf8 cache %"UVf
+ " real %"UVf" for %"SVf,
+ (UV) ulen, (UV) real, sv);
+ }
+ }
+ }
+ else {
+ ulen = Perl_utf8_length(aTHX_ s, s + len);
+ if (!SvREADONLY(sv)) {
+ if (!mg) {
+ mg = sv_magicext(sv, 0, PERL_MAGIC_utf8,
+ &PL_vtbl_utf8, 0, 0);
+ }
+ assert(mg);
+ mg->mg_len = ulen;
+ }
}
- if (mg)
- mg->mg_len = ulen;
+ return ulen;
}
- return ulen;
+ return Perl_utf8_length(aTHX_ s, s + len);
}
}
-/* S_utf8_mg_pos_init() is used to initialize the mg_ptr field of
- * a PERL_UTF8_magic. The mg_ptr is used to store the mapping
- * between UTF-8 and byte offsets. There are two (substr offset and substr
- * length, the i offset, PERL_MAGIC_UTF8_CACHESIZE) times two (UTF-8 offset
- * and byte offset) cache positions.
- *
- * The mg_len field is used by sv_len_utf8(), see its comments.
- * Note that the mg_len is not the length of the mg_ptr field.
- *
- */
-STATIC bool
-S_utf8_mg_pos_init(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i,
- I32 offsetp, const U8 *s, const U8 *start)
+/* Walk forwards to find the byte corresponding to the passed in UTF-8
+ offset. */
+static STRLEN
+S_sv_pos_u2b_forwards(pTHX_ const U8 *const start, const U8 *const send,
+ STRLEN uoffset)
{
- bool found = FALSE;
+ const U8 *s = start;
- if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
- if (!*mgp)
- *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0, 0);
- assert(*mgp);
+ PERL_UNUSED_CONTEXT;
- if ((*mgp)->mg_ptr)
- *cachep = (STRLEN *) (*mgp)->mg_ptr;
- else {
- Newxz(*cachep, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
- (*mgp)->mg_ptr = (char *) *cachep;
- }
- assert(*cachep);
+ while (s < send && uoffset--)
+ s += UTF8SKIP(s);
+ if (s > send) {
+ /* This is the existing behaviour. Possibly it should be a croak, as
+ it's actually a bounds error */
+ s = send;
+ }
+ return s - start;
+}
+
+/* Given the length of the string in both bytes and UTF-8 characters, decide
+ whether to walk forwards or backwards to find the byte corresponding to
+ the passed in UTF-8 offset. */
+static STRLEN
+S_sv_pos_u2b_midway(pTHX_ const U8 *const start, const U8 *send,
+ STRLEN uoffset, STRLEN uend)
+{
+ STRLEN backw = uend - uoffset;
+ if (uoffset < 2 * backw) {
+ /* The assumption is that going forwards is twice the speed of going
+ forward (that's where the 2 * backw comes from).
+ (The real figure of course depends on the UTF-8 data.) */
+ return S_sv_pos_u2b_forwards(aTHX_ start, send, uoffset);
+ }
+
+ while (backw--) {
+ send--;
+ while (UTF8_IS_CONTINUATION(*send))
+ send--;
+ }
+ return send - start;
+}
+
+/* For the string representation of the given scalar, find the byte
+ corresponding to the passed in UTF-8 offset. uoffset0 and boffset0
+ give another position in the string, *before* the sought offset, which
+ (which is always true, as 0, 0 is a valid pair of positions), which should
+ help reduce the amount of linear searching.
+ If *mgp is non-NULL, it should point to the UTF-8 cache magic, which
+ will be used to reduce the amount of linear searching. The cache will be
+ created if necessary, and the found value offered to it for update. */
+static STRLEN
+S_sv_pos_u2b_cached(pTHX_ SV *sv, MAGIC **mgp, const U8 *const start,
+ const U8 *const send, STRLEN uoffset,
+ STRLEN uoffset0, STRLEN boffset0) {
+ STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy. */
+ bool found = FALSE;
- (*cachep)[i] = offsetp;
- (*cachep)[i+1] = s - start;
- found = TRUE;
- }
+ assert (uoffset >= uoffset0);
- return found;
-}
+ if (SvMAGICAL(sv) && !SvREADONLY(sv) && PL_utf8cache
+ && (*mgp || (*mgp = mg_find(sv, PERL_MAGIC_utf8)))) {
+ if ((*mgp)->mg_ptr) {
+ STRLEN *cache = (STRLEN *) (*mgp)->mg_ptr;
+ if (cache[0] == uoffset) {
+ /* An exact match. */
+ return cache[1];
+ }
+ if (cache[2] == uoffset) {
+ /* An exact match. */
+ return cache[3];
+ }
-/*
- * S_utf8_mg_pos() is used to query and update mg_ptr field of
- * a PERL_UTF8_magic. The mg_ptr is used to store the mapping
- * between UTF-8 and byte offsets. See also the comments of
- * S_utf8_mg_pos_init().
- *
- */
-STATIC bool
-S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I32 uoff, const U8 **sp, const U8 *start, const U8 *send)
-{
- bool found = FALSE;
+ if (cache[0] < uoffset) {
+ /* The cache already knows part of the way. */
+ if (cache[0] > uoffset0) {
+ /* The cache knows more than the passed in pair */
+ uoffset0 = cache[0];
+ boffset0 = cache[1];
+ }
+ if ((*mgp)->mg_len != -1) {
+ /* And we know the end too. */
+ boffset = boffset0
+ + S_sv_pos_u2b_midway(aTHX_ start + boffset0, send,
+ uoffset - uoffset0,
+ (*mgp)->mg_len - uoffset0);
+ } else {
+ boffset = boffset0
+ + S_sv_pos_u2b_forwards(aTHX_ start + boffset0,
+ send, uoffset - uoffset0);
+ }
+ }
+ else if (cache[2] < uoffset) {
+ /* We're between the two cache entries. */
+ if (cache[2] > uoffset0) {
+ /* and the cache knows more than the passed in pair */
+ uoffset0 = cache[2];
+ boffset0 = cache[3];
+ }
- if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
- if (!*mgp)
- *mgp = mg_find(sv, PERL_MAGIC_utf8);
- if (*mgp && (*mgp)->mg_ptr) {
- *cachep = (STRLEN *) (*mgp)->mg_ptr;
- ASSERT_UTF8_CACHE(*cachep);
- if ((*cachep)[i] == (STRLEN)uoff) /* An exact match. */
- found = TRUE;
- else { /* We will skip to the right spot. */
- STRLEN forw = 0;
- STRLEN backw = 0;
- const U8* p = NULL;
-
- /* The assumption is that going backward is half
- * the speed of going forward (that's where the
- * 2 * backw in the below comes from). (The real
- * figure of course depends on the UTF-8 data.) */
-
- if ((*cachep)[i] > (STRLEN)uoff) {
- forw = uoff;
- backw = (*cachep)[i] - (STRLEN)uoff;
-
- if (forw < 2 * backw)
- p = start;
- else
- p = start + (*cachep)[i+1];
- }
- /* Try this only for the substr offset (i == 0),
- * not for the substr length (i == 2). */
- else if (i == 0) { /* (*cachep)[i] < uoff */
- const STRLEN ulen = sv_len_utf8(sv);
-
- if ((STRLEN)uoff < ulen) {
- forw = (STRLEN)uoff - (*cachep)[i];
- backw = ulen - (STRLEN)uoff;
-
- if (forw < 2 * backw)
- p = start + (*cachep)[i+1];
- else
- p = send;
- }
-
- /* If the string is not long enough for uoff,
- * we could extend it, but not at this low a level. */
- }
-
- if (p) {
- if (forw < 2 * backw) {
- while (forw--)
- p += UTF8SKIP(p);
- }
- else {
- while (backw--) {
- p--;
- while (UTF8_IS_CONTINUATION(*p))
- p--;
- }
- }
-
- /* Update the cache. */
- (*cachep)[i] = (STRLEN)uoff;
- (*cachep)[i+1] = p - start;
-
- /* Drop the stale "length" cache */
- if (i == 0) {
- (*cachep)[2] = 0;
- (*cachep)[3] = 0;
- }
-
- found = TRUE;
- }
- }
- if (found) { /* Setup the return values. */
- *offsetp = (*cachep)[i+1];
- *sp = start + *offsetp;
- if (*sp >= send) {
- *sp = send;
- *offsetp = send - start;
- }
- else if (*sp < start) {
- *sp = start;
- *offsetp = 0;
- }
+ boffset = boffset0
+ + S_sv_pos_u2b_midway(aTHX_ start + boffset0,
+ start + cache[1],
+ uoffset - uoffset0,
+ cache[0] - uoffset0);
+ } else {
+ boffset = boffset0
+ + S_sv_pos_u2b_midway(aTHX_ start + boffset0,
+ start + cache[3],
+ uoffset - uoffset0,
+ cache[2] - uoffset0);
}
+ found = TRUE;
}
-#ifdef PERL_UTF8_CACHE_ASSERT
- if (found) {
- U8 *s = start;
- I32 n = uoff;
+ else if ((*mgp)->mg_len != -1) {
+ /* If we can take advantage of a passed in offset, do so. */
+ /* In fact, offset0 is either 0, or less than offset, so don't
+ need to worry about the other possibility. */
+ boffset = boffset0
+ + S_sv_pos_u2b_midway(aTHX_ start + boffset0, send,
+ uoffset - uoffset0,
+ (*mgp)->mg_len - uoffset0);
+ found = TRUE;
+ }
+ }
- while (n-- && s < send)
- s += UTF8SKIP(s);
+ if (!found || PL_utf8cache < 0) {
+ const STRLEN real_boffset
+ = boffset0 + S_sv_pos_u2b_forwards(aTHX_ start + boffset0,
+ send, uoffset - uoffset0);
- if (i == 0) {
- assert(*offsetp == s - start);
- assert((*cachep)[0] == (STRLEN)uoff);
- assert((*cachep)[1] == *offsetp);
- }
- ASSERT_UTF8_CACHE(*cachep);
+ if (found && PL_utf8cache < 0) {
+ if (real_boffset != boffset) {
+ /* Need to turn the assertions off otherwise we may recurse
+ infinitely while printing error messages. */
+ SAVEI8(PL_utf8cache);
+ PL_utf8cache = 0;
+ Perl_croak(aTHX_ "panic: sv_pos_u2b_cache cache %"UVf
+ " real %"UVf" for %"SVf,
+ (UV) boffset, (UV) real_boffset, sv);
+ }
}
-#endif
+ boffset = real_boffset;
}
- return found;
+ S_utf8_mg_pos_cache_update(aTHX_ sv, mgp, boffset, uoffset, send - start);
+ return boffset;
}
+
/*
=for apidoc sv_pos_u2b
/*
* sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential
* PERL_UTF8_magic of the sv to store the mapping between UTF-8 and
- * byte offsets. See also the comments of S_utf8_mg_pos().
+ * byte offsets. See also the comments of S_utf8_mg_pos_cache_update().
*
*/
start = (U8*)SvPV_const(sv, len);
if (len) {
- STRLEN boffset = 0;
- STRLEN *cache = NULL;
- const U8 *s = start;
- I32 uoffset = *offsetp;
- const U8 * const send = s + len;
+ STRLEN uoffset = (STRLEN) *offsetp;
+ const U8 * const send = start + len;
MAGIC *mg = NULL;
- bool found = utf8_mg_pos(sv, &mg, &cache, 0, offsetp, *offsetp, &s, start, send);
-
- if (!found && uoffset > 0) {
- while (s < send && uoffset--)
- s += UTF8SKIP(s);
- if (s >= send)
- s = send;
- if (utf8_mg_pos_init(sv, &mg, &cache, 0, *offsetp, s, start))
- boffset = cache[1];
- *offsetp = s - start;
- }
- if (lenp) {
- found = FALSE;
- start = s;
- if (utf8_mg_pos(sv, &mg, &cache, 2, lenp, *lenp, &s, start, send)) {
- *lenp -= boffset;
- found = TRUE;
- }
- if (!found && *lenp > 0) {
- I32 ulen = *lenp;
- if (ulen > 0)
- while (s < send && ulen--)
- s += UTF8SKIP(s);
- if (s >= send)
- s = send;
- utf8_mg_pos_init(sv, &mg, &cache, 2, *lenp, s, start);
- }
- *lenp = s - start;
- }
- ASSERT_UTF8_CACHE(cache);
+ STRLEN boffset = S_sv_pos_u2b_cached(aTHX_ sv, &mg, start, send,
+ uoffset, 0, 0);
+
+ *offsetp = (I32) boffset;
+
+ if (lenp) {
+ /* Convert the relative offset to absolute. */
+ STRLEN uoffset2 = uoffset + (STRLEN) *lenp;
+ STRLEN boffset2
+ = S_sv_pos_u2b_cached(aTHX_ sv, &mg, start, send, uoffset2,
+ uoffset, boffset) - boffset;
+
+ *lenp = boffset2;
+ }
}
else {
*offsetp = 0;
return;
}
+/* Create and update the UTF8 magic offset cache, with the proffered utf8/
+ byte length pairing. The (byte) length of the total SV is passed in too,
+ as blen, because for some (more esoteric) SVs, the call to SvPV_const()
+ may not have updated SvCUR, so we can't rely on reading it directly.
+
+ The proffered utf8/byte length pairing isn't used if the cache already has
+ two pairs, and swapping either for the proffered pair would increase the
+ RMS of the intervals between known byte offsets.
+
+ The cache itself consists of 4 STRLEN values
+ 0: larger UTF-8 offset
+ 1: corresponding byte offset
+ 2: smaller UTF-8 offset
+ 3: corresponding byte offset
+
+ Unused cache pairs have the value 0, 0.
+ Keeping the cache "backwards" means that the invariant of
+ cache[0] >= cache[2] is maintained even with empty slots, which means that
+ the code that uses it doesn't need to worry if only 1 entry has actually
+ been set to non-zero. It also makes the "position beyond the end of the
+ cache" logic much simpler, as the first slot is always the one to start
+ from.
+*/
+static void
+S_utf8_mg_pos_cache_update(pTHX_ SV *sv, MAGIC **mgp, STRLEN byte, STRLEN utf8,
+ STRLEN blen)
+{
+ STRLEN *cache;
+ if (SvREADONLY(sv))
+ return;
+
+ if (!*mgp) {
+ *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0,
+ 0);
+ (*mgp)->mg_len = -1;
+ }
+ assert(*mgp);
+
+ if (!(cache = (STRLEN *)(*mgp)->mg_ptr)) {
+ Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
+ (*mgp)->mg_ptr = (char *) cache;
+ }
+ assert(cache);
+
+ if (PL_utf8cache < 0) {
+ const U8 *start = (const U8 *) SvPVX_const(sv);
+ const U8 *const end = start + byte;
+ STRLEN realutf8 = 0;
+
+ while (start < end) {
+ start += UTF8SKIP(start);
+ realutf8++;
+ }
+
+ /* Can't use S_sv_pos_b2u_forwards as it will scream warnings on
+ surrogates. FIXME - is it inconsistent that b2u warns, but u2b
+ doesn't? I don't know whether this difference was introduced with
+ the caching code in 5.8.1. */
+
+ if (realutf8 != utf8) {
+ /* Need to turn the assertions off otherwise we may recurse
+ infinitely while printing error messages. */
+ SAVEI8(PL_utf8cache);
+ PL_utf8cache = 0;
+ Perl_croak(aTHX_ "panic: utf8_mg_pos_cache_update cache %"UVf
+ " real %"UVf" for %"SVf, (UV) utf8, (UV) realutf8, sv);
+ }
+ }
+
+ /* Cache is held with the later position first, to simplify the code
+ that deals with unbounded ends. */
+
+ ASSERT_UTF8_CACHE(cache);
+ if (cache[1] == 0) {
+ /* Cache is totally empty */
+ cache[0] = utf8;
+ cache[1] = byte;
+ } else if (cache[3] == 0) {
+ if (byte > cache[1]) {
+ /* New one is larger, so goes first. */
+ cache[2] = cache[0];
+ cache[3] = cache[1];
+ cache[0] = utf8;
+ cache[1] = byte;
+ } else {
+ cache[2] = utf8;
+ cache[3] = byte;
+ }
+ } else {
+#define THREEWAY_SQUARE(a,b,c,d) \
+ ((float)((d) - (c))) * ((float)((d) - (c))) \
+ + ((float)((c) - (b))) * ((float)((c) - (b))) \
+ + ((float)((b) - (a))) * ((float)((b) - (a)))
+
+ /* Cache has 2 slots in use, and we know three potential pairs.
+ Keep the two that give the lowest RMS distance. Do the
+ calcualation in bytes simply because we always know the byte
+ length. squareroot has the same ordering as the positive value,
+ so don't bother with the actual square root. */
+ const float existing = THREEWAY_SQUARE(0, cache[3], cache[1], blen);
+ if (byte > cache[1]) {
+ /* New position is after the existing pair of pairs. */
+ const float keep_earlier
+ = THREEWAY_SQUARE(0, cache[3], byte, blen);
+ const float keep_later
+ = THREEWAY_SQUARE(0, cache[1], byte, blen);
+
+ if (keep_later < keep_earlier) {
+ if (keep_later < existing) {
+ cache[2] = cache[0];
+ cache[3] = cache[1];
+ cache[0] = utf8;
+ cache[1] = byte;
+ }
+ }
+ else {
+ if (keep_earlier < existing) {
+ cache[0] = utf8;
+ cache[1] = byte;
+ }
+ }
+ }
+ else if (byte > cache[3]) {
+ /* New position is between the existing pair of pairs. */
+ const float keep_earlier
+ = THREEWAY_SQUARE(0, cache[3], byte, blen);
+ const float keep_later
+ = THREEWAY_SQUARE(0, byte, cache[1], blen);
+
+ if (keep_later < keep_earlier) {
+ if (keep_later < existing) {
+ cache[2] = utf8;
+ cache[3] = byte;
+ }
+ }
+ else {
+ if (keep_earlier < existing) {
+ cache[0] = utf8;
+ cache[1] = byte;
+ }
+ }
+ }
+ else {
+ /* New position is before the existing pair of pairs. */
+ const float keep_earlier
+ = THREEWAY_SQUARE(0, byte, cache[3], blen);
+ const float keep_later
+ = THREEWAY_SQUARE(0, byte, cache[1], blen);
+
+ if (keep_later < keep_earlier) {
+ if (keep_later < existing) {
+ cache[2] = utf8;
+ cache[3] = byte;
+ }
+ }
+ else {
+ if (keep_earlier < existing) {
+ cache[0] = cache[2];
+ cache[1] = cache[3];
+ cache[2] = utf8;
+ cache[3] = byte;
+ }
+ }
+ }
+ }
+ ASSERT_UTF8_CACHE(cache);
+}
+
+/* If we don't know the character offset of the end of a region, our only
+ option is to walk forwards to the target byte offset. */
+static STRLEN
+S_sv_pos_b2u_forwards(pTHX_ const U8 *s, const U8 *const target)
+{
+ STRLEN len = 0;
+ while (s < target) {
+ STRLEN n = 1;
+
+ /* Call utf8n_to_uvchr() to validate the sequence
+ * (unless a simple non-UTF character) */
+ if (!UTF8_IS_INVARIANT(*s))
+ utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0);
+ if (n > 0) {
+ s += n;
+ len++;
+ }
+ else
+ break;
+ }
+ return len;
+}
+
+/* We already know all of the way, now we may be able to walk back. The same
+ assumption is made as in S_sv_pos_u2b_midway(), namely that walking
+ backward is half the speed of walking forward. */
+static STRLEN
+S_sv_pos_b2u_midway(pTHX_ const U8 *s, const U8 *const target, const U8 *end,
+ STRLEN endu)
+{
+ const STRLEN forw = target - s;
+ STRLEN backw = end - target;
+
+ if (forw < 2 * backw) {
+ return S_sv_pos_b2u_forwards(aTHX_ s, target);
+ }
+
+ while (end > target) {
+ end--;
+ while (UTF8_IS_CONTINUATION(*end)) {
+ end--;
+ }
+ endu--;
+ }
+ return endu;
+}
+
/*
=for apidoc sv_pos_b2u
/*
* sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential
* PERL_UTF8_magic of the sv to store the mapping between UTF-8 and
- * byte offsets. See also the comments of S_utf8_mg_pos().
+ * byte offsets.
*
*/
-
void
Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp)
{
const U8* s;
- STRLEN len;
+ const STRLEN byte = *offsetp;
+ STRLEN len = 0; /* Actually always set, but let's keep gcc happy. */
+ STRLEN blen;
+ MAGIC* mg = NULL;
+ const U8* send;
+ bool found = FALSE;
if (!sv)
return;
- s = (const U8*)SvPV_const(sv, len);
- if ((I32)len < *offsetp)
- Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset");
- else {
- const U8* send = s + *offsetp;
- MAGIC* mg = NULL;
- STRLEN *cache = NULL;
+ s = (const U8*)SvPV_const(sv, blen);
- len = 0;
-
- if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
- mg = mg_find(sv, PERL_MAGIC_utf8);
- if (mg && mg->mg_ptr) {
- cache = (STRLEN *) mg->mg_ptr;
- if (cache[1] == (STRLEN)*offsetp) {
- /* An exact match. */
- *offsetp = cache[0];
-
- return;
- }
- else if (cache[1] < (STRLEN)*offsetp) {
- /* We already know part of the way. */
- len = cache[0];
- s += cache[1];
- /* Let the below loop do the rest. */
- }
- else { /* cache[1] > *offsetp */
- /* We already know all of the way, now we may
- * be able to walk back. The same assumption
- * is made as in S_utf8_mg_pos(), namely that
- * walking backward is twice slower than
- * walking forward. */
- const STRLEN forw = *offsetp;
- STRLEN backw = cache[1] - *offsetp;
-
- if (!(forw < 2 * backw)) {
- const U8 *p = s + cache[1];
- STRLEN ubackw = 0;
-
- cache[1] -= backw;
-
- while (backw--) {
- p--;
- while (UTF8_IS_CONTINUATION(*p)) {
- p--;
- backw--;
- }
- ubackw++;
- }
+ if (blen < byte)
+ Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset");
- cache[0] -= ubackw;
- *offsetp = cache[0];
+ send = s + byte;
- /* Drop the stale "length" cache */
- cache[2] = 0;
- cache[3] = 0;
+ if (SvMAGICAL(sv) && !SvREADONLY(sv) && PL_utf8cache
+ && (mg = mg_find(sv, PERL_MAGIC_utf8))) {
+ if (mg->mg_ptr) {
+ STRLEN * const cache = (STRLEN *) mg->mg_ptr;
+ if (cache[1] == byte) {
+ /* An exact match. */
+ *offsetp = cache[0];
+ return;
+ }
+ if (cache[3] == byte) {
+ /* An exact match. */
+ *offsetp = cache[2];
+ return;
+ }
- return;
- }
+ if (cache[1] < byte) {
+ /* We already know part of the way. */
+ if (mg->mg_len != -1) {
+ /* Actually, we know the end too. */
+ len = cache[0]
+ + S_sv_pos_b2u_midway(aTHX_ s + cache[1], send,
+ s + blen, mg->mg_len - cache[0]);
+ } else {
+ len = cache[0]
+ + S_sv_pos_b2u_forwards(aTHX_ s + cache[1], send);
}
}
- ASSERT_UTF8_CACHE(cache);
- }
-
- while (s < send) {
- STRLEN n = 1;
+ else if (cache[3] < byte) {
+ /* We're between the two cached pairs, so we do the calculation
+ offset by the byte/utf-8 positions for the earlier pair,
+ then add the utf-8 characters from the string start to
+ there. */
+ len = S_sv_pos_b2u_midway(aTHX_ s + cache[3], send,
+ s + cache[1], cache[0] - cache[2])
+ + cache[2];
- /* Call utf8n_to_uvchr() to validate the sequence
- * (unless a simple non-UTF character) */
- if (!UTF8_IS_INVARIANT(*s))
- utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0);
- if (n > 0) {
- s += n;
- len++;
}
- else
- break;
- }
+ else { /* cache[3] > byte */
+ len = S_sv_pos_b2u_midway(aTHX_ s, send, s + cache[3],
+ cache[2]);
- if (!SvREADONLY(sv)) {
- if (!mg) {
- sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0);
- mg = mg_find(sv, PERL_MAGIC_utf8);
}
- assert(mg);
+ ASSERT_UTF8_CACHE(cache);
+ found = TRUE;
+ } else if (mg->mg_len != -1) {
+ len = S_sv_pos_b2u_midway(aTHX_ s, send, s + blen, mg->mg_len);
+ found = TRUE;
+ }
+ }
+ if (!found || PL_utf8cache < 0) {
+ const STRLEN real_len = S_sv_pos_b2u_forwards(aTHX_ s, send);
- if (!mg->mg_ptr) {
- Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
- mg->mg_ptr = (char *) cache;
+ if (found && PL_utf8cache < 0) {
+ if (len != real_len) {
+ /* Need to turn the assertions off otherwise we may recurse
+ infinitely while printing error messages. */
+ SAVEI8(PL_utf8cache);
+ PL_utf8cache = 0;
+ Perl_croak(aTHX_ "panic: sv_pos_b2u cache %"UVf
+ " real %"UVf" for %"SVf,
+ (UV) len, (UV) real_len, sv);
}
- assert(cache);
-
- cache[0] = len;
- cache[1] = *offsetp;
- /* Drop the stale "length" cache */
- cache[2] = 0;
- cache[3] = 0;
}
-
- *offsetp = len;
+ len = real_len;
}
- return;
+ *offsetp = len;
+
+ S_utf8_mg_pos_cache_update(aTHX_ sv, &mg, byte, len, blen);
}
/*
if (cur1 == cur2)
eq = (pv1 == pv2) || memEQ(pv1, pv2, cur1);
- if (svrecode)
- SvREFCNT_dec(svrecode);
-
+ SvREFCNT_dec(svrecode);
if (tpv)
Safefree(tpv);
}
}
- if (svrecode)
- SvREFCNT_dec(svrecode);
-
+ SvREFCNT_dec(svrecode);
if (tpv)
Safefree(tpv);
return xf + sizeof(PL_collation_ix);
}
if (! mg) {
- sv_magic(sv, 0, PERL_MAGIC_collxfrm, 0, 0);
- mg = mg_find(sv, PERL_MAGIC_collxfrm);
+#ifdef PERL_OLD_COPY_ON_WRITE
+ if (SvIsCOW(sv))
+ sv_force_normal_flags(sv, 0);
+#endif
+ mg = sv_magicext(sv, 0, PERL_MAGIC_collxfrm, &PL_vtbl_collxfrm,
+ 0, 0);
assert(mg);
}
mg->mg_ptr = xf;
register I32 cnt;
I32 i = 0;
I32 rspara = 0;
- I32 recsize;
if (SvTHINKFIRST(sv))
sv_force_normal_flags(sv, append ? 0 : SV_COW_DROP_PV);
}
else if (RsSNARF(PL_rs)) {
/* If it is a regular disk file use size from stat() as estimate
- of amount we are going to read - may result in malloc-ing
- more memory than we realy need if layers bellow reduce
- size we read (e.g. CRLF or a gzip layer)
+ of amount we are going to read -- may result in mallocing
+ more memory than we really need if the layers below reduce
+ the size we read (e.g. CRLF or a gzip layer).
*/
Stat_t st;
if (!PerlLIO_fstat(PerlIO_fileno(fp), &st) && S_ISREG(st.st_mode)) {
else if (RsRECORD(PL_rs)) {
I32 bytesread;
char *buffer;
+ U32 recsize;
/* Grab the size of the record we're getting */
- recsize = SvIV(SvRV(PL_rs));
+ recsize = SvUV(SvRV(PL_rs)); /* RsRECORD() guarantees > 0. */
buffer = SvGROW(sv, (STRLEN)(recsize + append + 1)) + append;
/* Go yank in */
#ifdef VMS
*/
SV *
-Perl_newRV(pTHX_ SV *tmpRef)
+Perl_newRV(pTHX_ SV *sv)
{
dVAR;
- return newRV_noinc(SvREFCNT_inc(tmpRef));
+ return newRV_noinc(SvREFCNT_inc_simple_NN(sv));
}
/*
sv_clear(rv);
SvFLAGS(rv) = 0;
SvREFCNT(rv) = refcnt;
- }
- if (SvTYPE(rv) < SVt_RV)
+ sv_upgrade(rv, SVt_RV);
+ } else if (SvROK(rv)) {
+ SvREFCNT_dec(SvRV(rv));
+ } else if (SvTYPE(rv) < SVt_RV)
sv_upgrade(rv, SVt_RV);
else if (SvTYPE(rv) > SVt_RV) {
SvPV_free(rv);
if (SvTYPE(tmpRef) != SVt_PVIO)
++PL_sv_objcount;
SvUPGRADE(tmpRef, SVt_PVMG);
- SvSTASH_set(tmpRef, (HV*)SvREFCNT_inc(stash));
+ SvSTASH_set(tmpRef, (HV*)SvREFCNT_inc_simple(stash));
if (Gv_AMG(stash))
SvAMAGIC_on(sv);
{
dVAR;
void *xpvmg;
- SV *temp = sv_newmortal();
+ SV * const temp = sv_newmortal();
assert(SvTYPE(sv) == SVt_PVGV);
SvFAKE_off(sv);
gv_efullname3(temp, (GV *) sv, "*");
- if (GvGP(sv))
+ if (GvGP(sv)) {
gp_free((GV*)sv);
+ }
if (GvSTASH(sv)) {
sv_del_backref((SV*)GvSTASH(sv), sv);
GvSTASH(sv) = NULL;
}
- SvSCREAM_off(sv);
- Safefree(GvNAME(sv));
GvMULTI_off(sv);
+ if (GvNAME_HEK(sv)) {
+ unshare_hek(GvNAME_HEK(sv));
+ }
+ SvSCREAM_off(sv);
/* need to keep SvANY(sv) in the right arena */
xpvmg = new_XPVMG();
#if defined(USE_ITHREADS)
+/* XXX Remove this so it doesn't have to go thru the macro and return for nothing */
#ifndef GpREFCNT_inc
# define GpREFCNT_inc(gp) ((gp) ? (++(gp)->gp_refcnt, (gp)) : (GP*)NULL)
#endif
+/* Certain cases in Perl_ss_dup have been merged, by relying on the fact
+ that currently av_dup and hv_dup are the same as sv_dup. If this changes,
+ please unmerge ss_dup. */
#define sv_dup_inc(s,t) SvREFCNT_inc(sv_dup(s,t))
+#define sv_dup_inc_NN(s,t) SvREFCNT_inc_NN(sv_dup(s,t))
#define av_dup(s,t) (AV*)sv_dup((SV*)s,t)
#define av_dup_inc(s,t) (AV*)SvREFCNT_inc(sv_dup((SV*)s,t))
#define hv_dup(s,t) (HV*)sv_dup((SV*)s,t)
Perl_gp_dup(pTHX_ GP *gp, CLONE_PARAMS* param)
{
GP *ret;
+
if (!gp)
return (GP*)NULL;
/* look for it in the table first */
if (tblent->oldval == sv)
return tblent;
}
- return 0;
+ return NULL;
}
void *
{
PTR_TBL_ENT_t const *const tblent = ptr_table_find(tbl, sv);
PERL_UNUSED_CONTEXT;
- return tblent ? tblent->newval : (void *) 0;
+ return tblent ? tblent->newval : NULL;
}
/* add a new entry to a pointer-mapping table */
void
Perl_ptr_table_store(pTHX_ PTR_TBL_t *tbl, const void *oldsv, void *newsv)
{
- PTR_TBL_ENT_t *tblent = S_ptr_table_find(tbl, oldsv);
+ PTR_TBL_ENT_t *tblent = ptr_table_find(tbl, oldsv);
PERL_UNUSED_CONTEXT;
if (tblent) {
}
else {
/* Special case - not normally malloced for some reason */
- if ((SvREADONLY(sstr) && SvFAKE(sstr))) {
+ if (isGV_with_GP(sstr)) {
+ /* Don't need to do anything here. */
+ }
+ else if ((SvREADONLY(sstr) && SvFAKE(sstr))) {
/* A "shared" PV - clone it as "shared" PV */
SvPV_set(dstr,
HEK_KEY(hek_dup(SvSHARED_HEK_FROM_PV(SvPVX_const(sstr)),
sv_type_details->body_size + sv_type_details->offset, char);
#endif
- if (sv_type != SVt_PVAV && sv_type != SVt_PVHV)
+ if (sv_type != SVt_PVAV && sv_type != SVt_PVHV
+ && !isGV_with_GP(dstr))
Perl_rvpv_dup(aTHX_ dstr, sstr, param);
/* The Copy above means that all the source (unduplicated) pointers
missing by always going for the destination.
FIXME - instrument and check that assumption */
if (sv_type >= SVt_PVMG) {
- if (SvMAGIC(dstr))
+ HV *ourstash;
+ if ((sv_type == SVt_PVMG) && (ourstash = OURSTASH(dstr))) {
+ OURSTASH_set(dstr, hv_dup_inc(ourstash, param));
+ } else if (SvMAGIC(dstr))
SvMAGIC_set(dstr, mg_dup(SvMAGIC(dstr), param));
if (SvSTASH(dstr))
SvSTASH_set(dstr, hv_dup_inc(SvSTASH(dstr), param));
LvTARG(dstr) = sv_dup_inc(LvTARG(dstr), param);
break;
case SVt_PVGV:
- GvNAME(dstr) = SAVEPVN(GvNAME(dstr), GvNAMELEN(dstr));
- GvSTASH(dstr) = hv_dup(GvSTASH(dstr), param);
+ if (GvNAME_HEK(dstr))
+ GvNAME_HEK(dstr) = hek_dup(GvNAME_HEK(dstr), param);
+
/* Don't call sv_add_backref here as it's going to be created
as part of the magic cloning of the symbol table. */
- GvGP(dstr) = gp_dup(GvGP(dstr), param);
- (void)GpREFCNT_inc(GvGP(dstr));
+ GvSTASH(dstr) = hv_dup(GvSTASH(dstr), param);
+ if(isGV_with_GP(sstr)) {
+ /* Danger Will Robinson - GvGP(dstr) isn't initialised
+ at the point of this comment. */
+ GvGP(dstr) = gp_dup(GvGP(sstr), param);
+ (void)GpREFCNT_inc(GvGP(dstr));
+ } else
+ Perl_rvpv_dup(aTHX_ dstr, sstr, param);
break;
case SVt_PVIO:
IoIFP(dstr) = fp_dup(IoIFP(dstr), IoTYPE(dstr), param);
TOPINT(nss,ix) = i;
switch (i) {
case SAVEt_ITEM: /* normal string */
+ case SAVEt_SV: /* scalar reference */
sv = (SV*)POPPTR(ss,ix);
TOPPTR(nss,ix) = sv_dup_inc(sv, param);
sv = (SV*)POPPTR(ss,ix);
TOPPTR(nss,ix) = sv_dup_inc(sv, param);
break;
- case SAVEt_SV: /* scalar reference */
- sv = (SV*)POPPTR(ss,ix);
- TOPPTR(nss,ix) = sv_dup_inc(sv, param);
- gv = (GV*)POPPTR(ss,ix);
- TOPPTR(nss,ix) = gv_dup_inc(gv, param);
- break;
- case SAVEt_GENERIC_PVREF: /* generic char* */
- c = (char*)POPPTR(ss,ix);
- TOPPTR(nss,ix) = pv_dup(c);
- ptr = POPPTR(ss,ix);
- TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
- break;
case SAVEt_SHARED_PVREF: /* char* in shared space */
c = (char*)POPPTR(ss,ix);
TOPPTR(nss,ix) = savesharedpv(c);
ptr = POPPTR(ss,ix);
TOPPTR(nss,ix) = svp_dup_inc((SV**)ptr, proto_perl);/* XXXXX */
break;
- case SAVEt_AV: /* array reference */
- av = (AV*)POPPTR(ss,ix);
- TOPPTR(nss,ix) = av_dup_inc(av, param);
- gv = (GV*)POPPTR(ss,ix);
- TOPPTR(nss,ix) = gv_dup(gv, param);
- break;
case SAVEt_HV: /* hash reference */
- hv = (HV*)POPPTR(ss,ix);
- TOPPTR(nss,ix) = hv_dup_inc(hv, param);
+ case SAVEt_AV: /* array reference */
+ sv = POPPTR(ss,ix);
+ TOPPTR(nss,ix) = sv_dup_inc(sv, param);
gv = (GV*)POPPTR(ss,ix);
TOPPTR(nss,ix) = gv_dup(gv, param);
break;
case SAVEt_I32: /* I32 reference */
case SAVEt_I16: /* I16 reference */
case SAVEt_I8: /* I8 reference */
+ case SAVEt_COP_ARYBASE: /* call CopARYBASE_set */
ptr = POPPTR(ss,ix);
TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
i = POPINT(ss,ix);
iv = POPIV(ss,ix);
TOPIV(nss,ix) = iv;
break;
+ case SAVEt_HPTR: /* HV* reference */
+ case SAVEt_APTR: /* AV* reference */
case SAVEt_SPTR: /* SV* reference */
ptr = POPPTR(ss,ix);
TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
ptr = POPPTR(ss,ix);
TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
break;
+ case SAVEt_GENERIC_PVREF: /* generic char* */
case SAVEt_PPTR: /* char* reference */
ptr = POPPTR(ss,ix);
TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
c = (char*)POPPTR(ss,ix);
TOPPTR(nss,ix) = pv_dup(c);
break;
- case SAVEt_HPTR: /* HV* reference */
- ptr = POPPTR(ss,ix);
- TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
- hv = (HV*)POPPTR(ss,ix);
- TOPPTR(nss,ix) = hv_dup(hv, param);
- break;
- case SAVEt_APTR: /* AV* reference */
- ptr = POPPTR(ss,ix);
- TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
- av = (AV*)POPPTR(ss,ix);
- TOPPTR(nss,ix) = av_dup(av, param);
- break;
case SAVEt_NSTAB:
gv = (GV*)POPPTR(ss,ix);
TOPPTR(nss,ix) = gv_dup(gv, param);
case SAVEt_HINTS:
i = POPINT(ss,ix);
TOPINT(nss,ix) = i;
+ ptr = POPPTR(ss,ix);
+ TOPPTR(nss,ix) = Perl_refcounted_he_dup(aTHX_ ptr, param);
+ if (i & HINT_LOCALIZE_HH) {
+ hv = (HV*)POPPTR(ss,ix);
+ TOPPTR(nss,ix) = hv_dup_inc(hv, param);
+ }
break;
case SAVEt_COMPPAD:
av = (AV*)POPPTR(ss,ix);
sv = (SV*)POPPTR(ss,ix);
TOPPTR(nss,ix) = sv_dup(sv, param);
break;
+ case SAVEt_RE_STATE:
+ {
+ const struct re_save_state *const old_state
+ = (struct re_save_state *)
+ (ss + ix - SAVESTACK_ALLOC_FOR_RE_SAVE_STATE);
+ struct re_save_state *const new_state
+ = (struct re_save_state *)
+ (nss + ix - SAVESTACK_ALLOC_FOR_RE_SAVE_STATE);
+
+ Copy(old_state, new_state, 1, struct re_save_state);
+ ix -= SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
+
+ new_state->re_state_bostr
+ = pv_dup(old_state->re_state_bostr);
+ new_state->re_state_reginput
+ = pv_dup(old_state->re_state_reginput);
+ new_state->re_state_regbol
+ = pv_dup(old_state->re_state_regbol);
+ new_state->re_state_regeol
+ = pv_dup(old_state->re_state_regeol);
+ new_state->re_state_regstartp
+ = any_dup(old_state->re_state_regstartp, proto_perl);
+ new_state->re_state_regendp
+ = any_dup(old_state->re_state_regendp, proto_perl);
+ new_state->re_state_reglastparen
+ = any_dup(old_state->re_state_reglastparen, proto_perl);
+ new_state->re_state_reglastcloseparen
+ = any_dup(old_state->re_state_reglastcloseparen,
+ proto_perl);
+ new_state->re_state_regtill
+ = pv_dup(old_state->re_state_regtill);
+ /* XXX This just has to be broken. The old save_re_context
+ code did SAVEGENERICPV(PL_reg_start_tmp);
+ PL_reg_start_tmp is char **.
+ Look above to what the dup code does for
+ SAVEt_GENERIC_PVREF
+ It can never have worked.
+ So this is merely a faithful copy of the exiting bug: */
+ new_state->re_state_reg_start_tmp
+ = (char **) pv_dup((char *)
+ old_state->re_state_reg_start_tmp);
+ /* I assume that it only ever "worked" because no-one called
+ (pseudo)fork while the regexp engine had re-entered itself.
+ */
+ new_state->re_state_reg_call_cc
+ = any_dup(old_state->re_state_reg_call_cc, proto_perl);
+ new_state->re_state_reg_re
+ = any_dup(old_state->re_state_reg_re, proto_perl);
+ new_state->re_state_reg_ganch
+ = pv_dup(old_state->re_state_reg_ganch);
+ new_state->re_state_reg_sv
+ = sv_dup(old_state->re_state_reg_sv, param);
+#ifdef PERL_OLD_COPY_ON_WRITE
+ new_state->re_state_nrs
+ = sv_dup(old_state->re_state_nrs, param);
+#endif
+ new_state->re_state_reg_magic
+ = any_dup(old_state->re_state_reg_magic, proto_perl);
+ new_state->re_state_reg_oldcurpm
+ = any_dup(old_state->re_state_reg_oldcurpm, proto_perl);
+ new_state->re_state_reg_curpm
+ = any_dup(old_state->re_state_reg_curpm, proto_perl);
+ new_state->re_state_reg_oldsaved
+ = pv_dup(old_state->re_state_reg_oldsaved);
+ new_state->re_state_reg_poscache
+ = pv_dup(old_state->re_state_reg_poscache);
+#ifdef DEBUGGING
+ new_state->re_state_reg_starttry
+ = pv_dup(old_state->re_state_reg_starttry);
+#endif
+ break;
+ }
default:
- Perl_croak(aTHX_ "panic: ss_dup inconsistency");
+ Perl_croak(aTHX_ "panic: ss_dup inconsistency (%"IVdf")", (IV) i);
}
}
PERL_SET_THX(my_perl);
# ifdef DEBUGGING
- Poison(my_perl, 1, PerlInterpreter);
+ PoisonNew(my_perl, 1, PerlInterpreter);
PL_op = NULL;
PL_curcop = NULL;
PL_markstack = 0;
PERL_SET_THX(my_perl);
# ifdef DEBUGGING
- Poison(my_perl, 1, PerlInterpreter);
+ PoisonNew(my_perl, 1, PerlInterpreter);
PL_op = NULL;
PL_curcop = NULL;
PL_markstack = 0;
PL_compiling.cop_warnings = sv_dup_inc(PL_compiling.cop_warnings, param);
if (!specialCopIO(PL_compiling.cop_io))
PL_compiling.cop_io = sv_dup_inc(PL_compiling.cop_io, param);
+ PL_compiling.cop_hints
+ = Perl_refcounted_he_dup(aTHX_ PL_compiling.cop_hints, param);
PL_curcop = (COP*)any_dup(proto_perl->Tcurcop, proto_perl);
/* pseudo environmental stuff */
PL_formfeed = sv_dup(proto_perl->Iformfeed, param);
PL_maxsysfd = proto_perl->Imaxsysfd;
- PL_multiline = proto_perl->Imultiline;
PL_statusvalue = proto_perl->Istatusvalue;
#ifdef VMS
PL_statusvalue_vms = proto_perl->Istatusvalue_vms;
const I32 len = av_len((AV*)proto_perl->Iregex_padav);
SV* const * const regexen = AvARRAY((AV*)proto_perl->Iregex_padav);
IV i;
- av_push(PL_regex_padav,
- sv_dup_inc(regexen[0],param));
+ av_push(PL_regex_padav, sv_dup_inc_NN(regexen[0],param));
for(i = 1; i <= len; i++) {
const SV * const regex = regexen[i];
SV * const sv =
i = (PL_lex_casemods < 12 ? 12 : PL_lex_casemods);
PL_lex_casestack = SAVEPVN(proto_perl->Ilex_casestack,i);
+#ifdef PERL_MAD
+ Copy(proto_perl->Inexttoke, PL_nexttoke, 5, NEXTTOKE);
+ PL_lasttoke = proto_perl->Ilasttoke;
+ PL_realtokenstart = proto_perl->Irealtokenstart;
+ PL_faketokens = proto_perl->Ifaketokens;
+ PL_thismad = proto_perl->Ithismad;
+ PL_thistoken = proto_perl->Ithistoken;
+ PL_thisopen = proto_perl->Ithisopen;
+ PL_thisstuff = proto_perl->Ithisstuff;
+ PL_thisclose = proto_perl->Ithisclose;
+ PL_thiswhite = proto_perl->Ithiswhite;
+ PL_nextwhite = proto_perl->Inextwhite;
+ PL_skipwhite = proto_perl->Iskipwhite;
+ PL_endwhite = proto_perl->Iendwhite;
+ PL_curforce = proto_perl->Icurforce;
+#else
Copy(proto_perl->Inextval, PL_nextval, 5, YYSTYPE);
Copy(proto_perl->Inexttype, PL_nexttype, 5, I32);
PL_nexttoke = proto_perl->Inexttoke;
+#endif
/* XXX This is probably masking the deeper issue of why
* SvANY(proto_perl->Ilinestr) can be NULL at this point. For test case:
proto_perl->Ttmps_stack[i]);
if (nsv && !SvREFCNT(nsv)) {
EXTEND_MORTAL(1);
- PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc(nsv);
+ PL_tmps_stack[++PL_tmps_ix] = SvREFCNT_inc_simple(nsv);
}
}
}
PL_watchok = NULL;
PL_regdummy = proto_perl->Tregdummy;
- PL_regprecomp = NULL;
- PL_regnpar = 0;
- PL_regsize = 0;
PL_colorset = 0; /* reinits PL_colors[] */
/*PL_colors[6] = {0,0,0,0,0,0};*/
- PL_reginput = NULL;
- PL_regbol = NULL;
- PL_regeol = NULL;
- PL_regstartp = (I32*)NULL;
- PL_regendp = (I32*)NULL;
- PL_reglastparen = (U32*)NULL;
- PL_reglastcloseparen = (U32*)NULL;
- PL_regtill = NULL;
- PL_reg_start_tmp = (char**)NULL;
- PL_reg_start_tmpl = 0;
- PL_regdata = (struct reg_data*)NULL;
- PL_bostr = NULL;
- PL_reg_flags = 0;
- PL_reg_eval_set = 0;
- PL_regnarrate = 0;
- PL_regprogram = (regnode*)NULL;
- PL_regindent = 0;
- PL_regcc = (CURCUR*)NULL;
- PL_reg_call_cc = (struct re_cc_state*)NULL;
- PL_reg_re = (regexp*)NULL;
- PL_reg_ganch = NULL;
- PL_reg_sv = NULL;
- PL_reg_match_utf8 = FALSE;
- PL_reg_magic = (MAGIC*)NULL;
- PL_reg_oldpos = 0;
- PL_reg_oldcurpm = (PMOP*)NULL;
- PL_reg_curpm = (PMOP*)NULL;
- PL_reg_oldsaved = NULL;
- PL_reg_oldsavedlen = 0;
-#ifdef PERL_OLD_COPY_ON_WRITE
- PL_nrs = NULL;
-#endif
- PL_reg_maxiter = 0;
- PL_reg_leftiter = 0;
- PL_reg_poscache = NULL;
- PL_reg_poscache_size= 0;
/* RE engine - function pointers */
PL_regcompp = proto_perl->Tregcompp;
PL_regint_start = proto_perl->Tregint_start;
PL_regint_string = proto_perl->Tregint_string;
PL_regfree = proto_perl->Tregfree;
-
+ Zero(&PL_reg_state, 1, struct re_save_state);
PL_reginterp_cnt = 0;
- PL_reg_starttry = 0;
+ PL_regmatch_slab = NULL;
/* Pluggable optimizer */
PL_peepp = proto_perl->Tpeepp;
/* orphaned? eg threads->new inside BEGIN or use */
if (PL_compcv && ! SvREFCNT(PL_compcv)) {
- (void)SvREFCNT_inc(PL_compcv);
+ SvREFCNT_inc_simple_void(PL_compcv);
SAVEFREESV(PL_compcv);
}
/* attempt to find a match within the aggregate */
if (hash) {
- keysv = S_find_hash_subscript(aTHX_ (HV*)sv, uninit_sv);
+ keysv = find_hash_subscript((HV*)sv, uninit_sv);
if (keysv)
subscript_type = FUV_SUBSCRIPT_HASH;
}
/* index is an expression;
* attempt to find a match within the aggregate */
if (obase->op_type == OP_HELEM) {
- SV * const keysv = S_find_hash_subscript(aTHX_ (HV*)sv, uninit_sv);
+ SV * const keysv = find_hash_subscript((HV*)sv, uninit_sv);
if (keysv)
return varname(gv, '%', o->op_targ,
keysv, 0, FUV_SUBSCRIPT_HASH);
}
else {
- const I32 index = S_find_array_subscript(aTHX_ (AV*)sv, uninit_sv);
+ const I32 index = find_array_subscript((AV*)sv, uninit_sv);
if (index >= 0)
return varname(gv, '@', o->op_targ,
NULL, index, FUV_SUBSCRIPT_ARRAY);