X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=sv.c;h=d6e307d583196a696ad4efb56f87a7807830bf5f;hb=3dab1dad3c281a8a3802c3e053703d7cabca032a;hp=147d13bf73c691a6b26a7a60b612e65cba487230;hpb=dd2eae666980a8d8bd145f2f6cc632a45513f9ce;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/sv.c b/sv.c
index 147d13b..d6e307d 100644
--- a/sv.c
+++ b/sv.c
@@ -30,21 +30,18 @@
 #endif
 
 #ifdef PERL_UTF8_CACHE_ASSERT
-/* The cache element 0 is the Unicode offset;
- * the cache element 1 is the byte offset of the element 0;
- * the cache element 2 is the Unicode length of the substring;
- * the cache element 3 is the byte length of the substring;
- * The checking of the substring side would be good
- * but substr() has enough code paths to make my head spin;
- * if adding more checks watch out for the following tests:
+/* if adding more checks watch out for the following tests:
  *   t/op/index.t t/op/length.t t/op/pat.t t/op/substr.t
  *   lib/utf8.t lib/Unicode/Collate/t/index.t
  * --jhi
  */
-#define ASSERT_UTF8_CACHE(cache) \
-    STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); } } STMT_END
+#   define ASSERT_UTF8_CACHE(cache) \
+    STMT_START { if (cache) { assert((cache)[0] <= (cache)[1]); \
+			      assert((cache)[2] <= (cache)[3]); \
+			      assert((cache)[3] <= (cache)[1]);} \
+			      } STMT_END
 #else
-#define ASSERT_UTF8_CACHE(cache) NOOP
+#   define ASSERT_UTF8_CACHE(cache) NOOP
 #endif
 
 #ifdef PERL_OLD_COPY_ON_WRITE
@@ -193,10 +190,10 @@ Perl_offer_nice_chunk(pTHX_ void *chunk, U32 chunk_size)
 #  define SvARENA_CHAIN(sv)	((sv)->sv_u.svu_rv)
 /* Whilst I'd love to do this, it seems that things like to check on
    unreferenced scalars
-#  define POSION_SV_HEAD(sv)	Poison(sv, 1, struct STRUCT_SV)
+#  define POSION_SV_HEAD(sv)	PoisonNew(sv, 1, struct STRUCT_SV)
 */
-#  define POSION_SV_HEAD(sv)	Poison(&SvANY(sv), 1, void *), \
-				Poison(&SvREFCNT(sv), 1, U32)
+#  define POSION_SV_HEAD(sv)	PoisonNew(&SvANY(sv), 1, void *), \
+				PoisonNew(&SvREFCNT(sv), 1, U32)
 #else
 #  define SvARENA_CHAIN(sv)	SvANY(sv)
 #  define POSION_SV_HEAD(sv)
@@ -681,6 +678,7 @@ Perl_sv_free_arenas(pTHX)
 void*
 Perl_get_arena(pTHX_ int arena_size)
 {
+    dVAR;
     struct arena_desc* adesc;
     struct arena_set *newroot, **aroot = (struct arena_set**) &PL_body_arenas;
     int curr;
@@ -695,7 +693,7 @@ Perl_get_arena(pTHX_ int arena_size)
 	newroot->set_size = ARENAS_PER_SET;
 	newroot->next = *aroot;
 	*aroot = newroot;
-	DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", *aroot));
+	DEBUG_m(PerlIO_printf(Perl_debug_log, "new arenaset %p\n", (void*)*aroot));
     }
 
     /* ok, now have arena-set with at least 1 empty/available arena-desc */
@@ -1068,8 +1066,9 @@ S_more_bodies (pTHX_ svtype sv_type)
     /* computed count doesnt reflect the 1st slot reservation */
     DEBUG_m(PerlIO_printf(Perl_debug_log,
 			  "arena %p end %p arena-size %d type %d size %d ct %d\n",
-			  start, end, bdp->arena_size, sv_type, body_size,
-			  bdp->arena_size / body_size));
+			  start, end,
+			  (int)bdp->arena_size, sv_type, (int)body_size,
+			  (int)bdp->arena_size / (int)body_size));
 
     *root = (void *)start;
 
@@ -1092,7 +1091,7 @@ S_more_bodies (pTHX_ svtype sv_type)
 	void ** const r3wt = &PL_body_roots[sv_type]; \
 	LOCK_SV_MUTEX; \
 	xpv = *((void **)(r3wt)) \
-	  ? *((void **)(r3wt)) : S_more_bodies(aTHX_ sv_type); \
+	  ? *((void **)(r3wt)) : more_bodies(sv_type); \
 	*(r3wt) = *(void**)(xpv); \
 	UNLOCK_SV_MUTEX; \
     } STMT_END
@@ -1322,7 +1321,7 @@ Perl_sv_upgrade(pTHX_ register SV *sv, U32 new_type)
 	    int length = old_type_details->copy;
 
 	    if (new_type_details->offset > old_type_details->offset) {
-		int difference
+		const int difference
 		    = new_type_details->offset - old_type_details->offset;
 		offset += difference;
 		length -= difference;
@@ -1720,8 +1719,29 @@ Perl_looks_like_number(pTHX_ SV *sv)
     return grok_number(sbegin, len, NULL);
 }
 
+STATIC bool
+S_glob_2number(pTHX_ GV * const gv)
+{
+    const U32 wasfake = SvFLAGS(gv) & SVf_FAKE;
+    SV *const buffer = sv_newmortal();
+
+    /* FAKE globs can get coerced, so need to turn this off temporarily if it
+       is on.  */
+    SvFAKE_off(gv);
+    gv_efullname3(buffer, gv, "*");
+    SvFLAGS(gv) |= wasfake;
+
+    /* We know that all GVs stringify to something that is not-a-number,
+	so no need to test that.  */
+    if (ckWARN(WARN_NUMERIC))
+	not_a_number(buffer);
+    /* We just want something true to return, so that S_sv_2iuv_common
+	can tail call us and return true.  */
+    return TRUE;
+}
+
 STATIC char *
-S_glob_2inpuv(pTHX_ GV *gv, STRLEN *len, bool want_number)
+S_glob_2pv(pTHX_ GV * const gv, STRLEN * const len)
 {
     const U32 wasfake = SvFLAGS(gv) & SVf_FAKE;
     SV *const buffer = sv_newmortal();
@@ -1732,17 +1752,11 @@ S_glob_2inpuv(pTHX_ GV *gv, STRLEN *len, bool want_number)
     gv_efullname3(buffer, gv, "*");
     SvFLAGS(gv) |= wasfake;
 
-    if (want_number) {
-	/* We know that all GVs stringify to something that is not-a-number,
-	   so no need to test that.  */
-	if (ckWARN(WARN_NUMERIC))
-	    not_a_number(buffer);
-	/* We just want something true to return, so that S_sv_2iuv_common
-	   can tail call us and return true.  */
-	return (char *) 1;
-    } else {
-	return SvPV(buffer, *len);
+    assert(SvPOK(buffer));
+    if (len) {
+	*len = SvCUR(buffer);
     }
+    return SvPVX(buffer);
 }
 
 /* Actually, ISO C leaves conversion of UV to IV undefined, but
@@ -1895,6 +1909,13 @@ S_sv_2iuv_common(pTHX_ SV *sv) {
 	   certainly cast into the IV range at IV_MAX, whereas the correct
 	   answer is the UV IV_MAX +1. Hence < ensures that dodgy boundary
 	   cases go to UV */
+#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
+	if (Perl_isnan(SvNVX(sv))) {
+	    SvUV_set(sv, 0);
+	    SvIsUV_on(sv);
+	    return FALSE;
+	}
+#endif
 	if (SvNVX(sv) < (NV)IV_MAX + 0.5) {
 	    SvIV_set(sv, I_V(SvNVX(sv)));
 	    if (SvNVX(sv) == (NV) SvIVX(sv)
@@ -2046,7 +2067,7 @@ S_sv_2iuv_common(pTHX_ SV *sv) {
                 if ((NV)(SvIVX(sv)) == SvNVX(sv)) {
                     SvIOK_on(sv);
                 } else {
-		    /*EMPTY*/;  /* Integer is imprecise. NOK, IOKp */
+		    NOOP;  /* Integer is imprecise. NOK, IOKp */
                 }
                 /* UV will not work better than IV */
             } else {
@@ -2061,7 +2082,7 @@ S_sv_2iuv_common(pTHX_ SV *sv) {
                     if ((NV)(SvUVX(sv)) == SvNVX(sv)) {
                         SvIOK_on(sv);
                     } else {
-			/*EMPTY*/;   /* Integer is imprecise. NOK, IOKp, is UV */
+			NOOP;   /* Integer is imprecise. NOK, IOKp, is UV */
                     }
                 }
 		SvIsUV_on(sv);
@@ -2105,9 +2126,8 @@ S_sv_2iuv_common(pTHX_ SV *sv) {
 	}
     }
     else  {
-	if (isGV_with_GP(sv)) {
-	    return (bool)PTR2IV(glob_2inpuv((GV *)sv, NULL, TRUE));
-	}
+	if (isGV_with_GP(sv))
+	    return glob_2number((GV *)sv);
 
 	if (!(SvFLAGS(sv) & SVs_PADTMP)) {
 	    if (!PL_localizing && ckWARN(WARN_UNINITIALIZED))
@@ -2457,7 +2477,7 @@ Perl_sv_2nv(pTHX_ register SV *sv)
     }
     else  {
 	if (isGV_with_GP(sv)) {
-	    glob_2inpuv((GV *)sv, NULL, TRUE);
+	    glob_2number((GV *)sv);
 	    return 0.0;
 	}
 
@@ -2640,8 +2660,9 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
 	    STRLEN len;
 
 	    if (SvIOKp(sv)) {
-		len = SvIsUV(sv) ? my_sprintf(tbuf,"%"UVuf, (UV)SvUVX(sv))
-		    : my_sprintf(tbuf,"%"IVdf, (IV)SvIVX(sv));
+		len = SvIsUV(sv)
+		    ? my_snprintf(tbuf, sizeof(tbuf), "%"UVuf, (UV)SvUVX(sv))
+		    : my_snprintf(tbuf, sizeof(tbuf), "%"IVdf, (IV)SvIVX(sv));
 	    } else {
 		Gconvert(SvNVX(sv), NV_DIG, 0, tbuf);
 		len = strlen(tbuf);
@@ -2793,9 +2814,8 @@ Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags)
 #endif
     }
     else {
-	if (isGV_with_GP(sv)) {
-	    return glob_2inpuv((GV *)sv, lp, FALSE);
-	}
+	if (isGV_with_GP(sv))
+	    return glob_2pv((GV *)sv, lp);
 
 	if (!PL_localizing && !(SvFLAGS(sv) & SVs_PADTMP) && ckWARN(WARN_UNINITIALIZED))
 	    report_uninit(sv);
@@ -3076,13 +3096,13 @@ flag off so that it looks like octets again.
 void
 Perl_sv_utf8_encode(pTHX_ register SV *sv)
 {
-    (void) sv_utf8_upgrade(sv);
     if (SvIsCOW(sv)) {
         sv_force_normal_flags(sv, 0);
     }
     if (SvREADONLY(sv)) {
 	Perl_croak(aTHX_ PL_no_modify);
     }
+    (void) sv_utf8_upgrade(sv);
     SvUTF8_off(sv);
 }
 
@@ -3279,7 +3299,7 @@ S_glob_assign_ref(pTHX_ SV *dstr, SV *sstr) {
 			   it was a const and its value changed. */
 			if (CvCONST(cv)	&& CvCONST((CV*)sref)
 			    && cv_const_sv(cv) == cv_const_sv((CV*)sref)) {
-			    /*EMPTY*/
+			    NOOP;
 			    /* They are 2 constant subroutines generated from
 			       the same constant. This probably means that
 			       they are really the "same" proxy subroutine
@@ -3301,8 +3321,9 @@ S_glob_assign_ref(pTHX_ SV *dstr, SV *sstr) {
 			}
 		    }
 		if (!intro)
-		    cv_ckproto(cv, (GV*)dstr,
-			       SvPOK(sref) ? SvPVX_const(sref) : NULL);
+		    cv_ckproto_len(cv, (GV*)dstr,
+				   SvPOK(sref) ? SvPVX_const(sref) : NULL,
+				   SvPOK(sref) ? SvCUR(sref) : 0);
 	    }
 	    GvCVGEN(dstr) = 0; /* Switch off cacheness. */
 	    GvASSUMECV_on(dstr);
@@ -3441,7 +3462,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags)
 
     case SVt_PVGV:
 	if (dtype <= SVt_PVGV) {
-	    S_glob_assign_glob(aTHX_ dstr, sstr, dtype);
+	    glob_assign_glob(dstr, sstr, dtype);
 	    return;
 	}
 	/*FALLTHROUGH*/
@@ -3454,7 +3475,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags)
 	    if ((int)SvTYPE(sstr) != stype) {
 		stype = SvTYPE(sstr);
 		if (stype == SVt_PVGV && dtype <= SVt_PVGV) {
-		    S_glob_assign_glob(aTHX_ dstr, sstr, dtype);
+		    glob_assign_glob(dstr, sstr, dtype);
 		    return;
 		}
 	    }
@@ -3482,13 +3503,13 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags)
 		GvMULTI_on(dstr);
 		return;
 	    }
-	    S_glob_assign_glob(aTHX_ dstr, sstr, dtype);
+	    glob_assign_glob(dstr, sstr, dtype);
 	    return;
 	}
 
 	if (dtype >= SVt_PV) {
 	    if (dtype == SVt_PVGV) {
-		S_glob_assign_ref(aTHX_ dstr, sstr);
+		glob_assign_ref(dstr, sstr);
 		return;
 	    }
 	    if (SvPVX_const(dstr)) {
@@ -3655,7 +3676,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags)
 	SvFLAGS(dstr) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8
 				   |SVf_AMAGIC);
 	{
-	    const MAGIC * const smg = SvVOK(sstr);
+	    const MAGIC * const smg = SvVSTRING_mg(sstr);
 	    if (smg) {
 		sv_magic(dstr, NULL, PERL_MAGIC_vstring,
 			 smg->mg_ptr, smg->mg_len);
@@ -3877,21 +3898,27 @@ Perl_sv_setpv_mg(pTHX_ register SV *sv, register const char *ptr)
 }
 
 /*
-=for apidoc sv_usepvn
-
-Tells an SV to use C<ptr> to find its string value.  Normally the string is
-stored inside the SV but sv_usepvn allows the SV to use an outside string.
-The C<ptr> should point to memory that was allocated by C<malloc>.  The
-string length, C<len>, must be supplied.  This function will realloc the
-memory pointed to by C<ptr>, so that pointer should not be freed or used by
-the programmer after giving it to sv_usepvn.  Does not handle 'set' magic.
-See C<sv_usepvn_mg>.
+=for apidoc sv_usepvn_flags
+
+Tells an SV to use C<ptr> to find its string value.  Normally the
+string is stored inside the SV but sv_usepvn allows the SV to use an
+outside string.  The C<ptr> should point to memory that was allocated
+by C<malloc>.  The string length, C<len>, must be supplied.  By default
+this function will realloc (i.e. move) the memory pointed to by C<ptr>,
+so that pointer should not be freed or used by the programmer after
+giving it to sv_usepvn, and neither should any pointers from "behind"
+that pointer (e.g. ptr + 1) be used.
+
+If C<flags> & SV_SMAGIC is true, will call SvSETMAGIC. If C<flags> &
+SV_HAS_TRAILING_NUL is true, then C<ptr[len]> must be NUL, and the realloc
+will be skipped. (i.e. the buffer is actually at least 1 byte longer than
+C<len>, and already meets the requirements for storing in C<SvPVX>)
 
 =cut
 */
 
 void
-Perl_sv_usepvn(pTHX_ register SV *sv, register char *ptr, register STRLEN len)
+Perl_sv_usepvn_flags(pTHX_ SV *sv, char *ptr, STRLEN len, U32 flags)
 {
     dVAR;
     STRLEN allocate;
@@ -3899,34 +3926,43 @@ Perl_sv_usepvn(pTHX_ register SV *sv, register char *ptr, register STRLEN len)
     SvUPGRADE(sv, SVt_PV);
     if (!ptr) {
 	(void)SvOK_off(sv);
+	if (flags & SV_SMAGIC)
+	    SvSETMAGIC(sv);
 	return;
     }
     if (SvPVX_const(sv))
 	SvPV_free(sv);
 
-    allocate = PERL_STRLEN_ROUNDUP(len + 1);
-    ptr = saferealloc (ptr, allocate);
+    if (flags & SV_HAS_TRAILING_NUL)
+	assert(ptr[len] == '\0');
+
+    allocate = (flags & SV_HAS_TRAILING_NUL)
+	? len + 1: PERL_STRLEN_ROUNDUP(len + 1);
+    if (flags & SV_HAS_TRAILING_NUL) {
+	/* It's long enough - do nothing.
+	   Specfically Perl_newCONSTSUB is relying on this.  */
+    } else {
+#ifdef DEBUGGING
+	/* Force a move to shake out bugs in callers.  */
+	char *new_ptr = safemalloc(allocate);
+	Copy(ptr, new_ptr, len, char);
+	PoisonFree(ptr,len,char);
+	Safefree(ptr);
+	ptr = new_ptr;
+#else
+	ptr = saferealloc (ptr, allocate);
+#endif
+    }
     SvPV_set(sv, ptr);
     SvCUR_set(sv, len);
     SvLEN_set(sv, allocate);
-    *SvEND(sv) = '\0';
+    if (!(flags & SV_HAS_TRAILING_NUL)) {
+	*SvEND(sv) = '\0';
+    }
     (void)SvPOK_only_UTF8(sv);		/* validate pointer */
     SvTAINT(sv);
-}
-
-/*
-=for apidoc sv_usepvn_mg
-
-Like C<sv_usepvn>, but also handles 'set' magic.
-
-=cut
-*/
-
-void
-Perl_sv_usepvn_mg(pTHX_ register SV *sv, register char *ptr, register STRLEN len)
-{
-    sv_usepvn(sv,ptr,len);
-    SvSETMAGIC(sv);
+    if (flags & SV_SMAGIC)
+	SvSETMAGIC(sv);
 }
 
 #ifdef PERL_OLD_COPY_ON_WRITE
@@ -4485,6 +4521,8 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam
     case PERL_MAGIC_qr:
 	vtable = &PL_vtbl_regexp;
 	break;
+    case PERL_MAGIC_hints:
+	/* As this vtable is all NULL, we can reuse it.  */
     case PERL_MAGIC_sig:
 	vtable = &PL_vtbl_sig;
 	break;
@@ -4524,6 +4562,9 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam
     case PERL_MAGIC_backref:
 	vtable = &PL_vtbl_backref;
 	break;
+    case PERL_MAGIC_hintselem:
+	vtable = &PL_vtbl_hintselem;
+	break;
     case PERL_MAGIC_ext:
 	/* Reserved for use by extensions not perl internals.	        */
 	/* Useful for attaching extension internal data to perl vars.	*/
@@ -4576,7 +4617,7 @@ Perl_sv_unmagic(pTHX_ SV *sv, int type)
 		    Safefree(mg->mg_ptr);
 		else if (mg->mg_len == HEf_SVKEY)
 		    SvREFCNT_dec((SV*)mg->mg_ptr);
-		else if (mg->mg_type == PERL_MAGIC_utf8 && mg->mg_ptr)
+		else if (mg->mg_type == PERL_MAGIC_utf8)
 		    Safefree(mg->mg_ptr);
             }
 	    if (mg->mg_flags & MGf_REFCOUNTED)
@@ -5034,10 +5075,8 @@ Perl_sv_clear(pTHX_ register SV *sv)
 	}
     }
     if (type >= SVt_PVMG) {
-	HV *ourstash;
-	if ((type == SVt_PVMG || type == SVt_PVGV) &&
-	    (ourstash = OURSTASH(sv))) {
-	    SvREFCNT_dec(ourstash);
+	if ((type == SVt_PVMG || type == SVt_PVGV) && SvPAD_OUR(sv)) {
+	    SvREFCNT_dec(OURSTASH(sv));
 	} else if (SvMAGIC(sv))
 	    mg_free(sv);
 	if (type == SVt_PVMG && SvPAD_TYPED(sv))
@@ -5270,8 +5309,10 @@ UTF-8 bytes as a single character. Handles magic and type coercion.
 
 /*
  * The length is cached in PERL_UTF8_magic, in the mg_len field.  Also the
- * mg_ptr is used, by sv_pos_u2b(), see the comments of S_utf8_mg_pos_init().
- * (Note that the mg_len is not the length of the mg_ptr field.)
+ * mg_ptr is used, by sv_pos_u2b() and sv_pos_b2u() - see the comments below.
+ * (Note that the mg_len is not the length of the mg_ptr field.
+ * This allows the cache to store the character length of the string without
+ * needing to malloc() extra storage to attach to the mg_ptr.)
  *
  */
 
@@ -5285,185 +5326,194 @@ Perl_sv_len_utf8(pTHX_ register SV *sv)
 	return mg_length(sv);
     else
     {
-	STRLEN len, ulen;
+	STRLEN len;
 	const U8 *s = (U8*)SvPV_const(sv, len);
-	MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0;
 
-	if (mg && mg->mg_len != -1 && (mg->mg_len > 0 || len == 0)) {
-	    ulen = mg->mg_len;
-#ifdef PERL_UTF8_CACHE_ASSERT
-	    assert(ulen == Perl_utf8_length(aTHX_ s, s + len));
-#endif
-	}
-	else {
-	    ulen = Perl_utf8_length(aTHX_ s, s + len);
-	    if (!mg && !SvREADONLY(sv)) {
-		sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0);
-		mg = mg_find(sv, PERL_MAGIC_utf8);
-		assert(mg);
+	if (PL_utf8cache) {
+	    STRLEN ulen;
+	    MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : 0;
+
+	    if (mg && mg->mg_len != -1) {
+		ulen = mg->mg_len;
+		if (PL_utf8cache < 0) {
+		    const STRLEN real = Perl_utf8_length(aTHX_ s, s + len);
+		    if (real != ulen) {
+			/* Need to turn the assertions off otherwise we may
+			   recurse infinitely while printing error messages.
+			*/
+			SAVEI8(PL_utf8cache);
+			PL_utf8cache = 0;
+			Perl_croak(aTHX_ "panic: sv_len_utf8 cache %"UVf
+				   " real %"UVf" for %"SVf,
+				   (UV) ulen, (UV) real, (void*)sv);
+		    }
+		}
 	    }
-	    if (mg)
-		mg->mg_len = ulen;
+	    else {
+		ulen = Perl_utf8_length(aTHX_ s, s + len);
+		if (!SvREADONLY(sv)) {
+		    if (!mg) {
+			mg = sv_magicext(sv, 0, PERL_MAGIC_utf8,
+					 &PL_vtbl_utf8, 0, 0);
+		    }
+		    assert(mg);
+		    mg->mg_len = ulen;
+		}
+	    }
+	    return ulen;
 	}
-	return ulen;
+	return Perl_utf8_length(aTHX_ s, s + len);
     }
 }
 
-/* S_utf8_mg_pos_init() is used to initialize the mg_ptr field of
- * a PERL_UTF8_magic.  The mg_ptr is used to store the mapping
- * between UTF-8 and byte offsets.  There are two (substr offset and substr
- * length, the i offset, PERL_MAGIC_UTF8_CACHESIZE) times two (UTF-8 offset
- * and byte offset) cache positions.
- *
- * The mg_len field is used by sv_len_utf8(), see its comments.
- * Note that the mg_len is not the length of the mg_ptr field.
- *
- */
-STATIC bool
-S_utf8_mg_pos_init(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i,
-		   I32 offsetp, const U8 *s, const U8 *start)
+/* Walk forwards to find the byte corresponding to the passed in UTF-8
+   offset.  */
+static STRLEN
+S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send,
+		      STRLEN uoffset)
 {
-    bool found = FALSE;
-
-    if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
-	if (!*mgp)
-	    *mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0, 0);
-	assert(*mgp);
+    const U8 *s = start;
 
-	if ((*mgp)->mg_ptr)
-	    *cachep = (STRLEN *) (*mgp)->mg_ptr;
-	else {
-	    Newxz(*cachep, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
-	    (*mgp)->mg_ptr = (char *) *cachep;
-	}
-	assert(*cachep);
+    while (s < send && uoffset--)
+	s += UTF8SKIP(s);
+    if (s > send) {
+	/* This is the existing behaviour. Possibly it should be a croak, as
+	   it's actually a bounds error  */
+	s = send;
+    }
+    return s - start;
+}
 
-	(*cachep)[i]   = offsetp;
-	(*cachep)[i+1] = s - start;
-	found = TRUE;
+/* Given the length of the string in both bytes and UTF-8 characters, decide
+   whether to walk forwards or backwards to find the byte corresponding to
+   the passed in UTF-8 offset.  */
+static STRLEN
+S_sv_pos_u2b_midway(const U8 *const start, const U8 *send,
+		      STRLEN uoffset, STRLEN uend)
+{
+    STRLEN backw = uend - uoffset;
+    if (uoffset < 2 * backw) {
+	/* The assumption is that going forwards is twice the speed of going
+	   forward (that's where the 2 * backw comes from).
+	   (The real figure of course depends on the UTF-8 data.)  */
+	return sv_pos_u2b_forwards(start, send, uoffset);
     }
 
-    return found;
+    while (backw--) {
+	send--;
+	while (UTF8_IS_CONTINUATION(*send))
+	    send--;
+    }
+    return send - start;
 }
 
-/*
- * S_utf8_mg_pos() is used to query and update mg_ptr field of
- * a PERL_UTF8_magic.  The mg_ptr is used to store the mapping
- * between UTF-8 and byte offsets.  See also the comments of
- * S_utf8_mg_pos_init().
- *
- */
-STATIC bool
-S_utf8_mg_pos(pTHX_ SV *sv, MAGIC **mgp, STRLEN **cachep, I32 i, I32 *offsetp, I32 uoff, const U8 **sp, const U8 *start, const U8 *send)
-{
+/* For the string representation of the given scalar, find the byte
+   corresponding to the passed in UTF-8 offset.  uoffset0 and boffset0
+   give another position in the string, *before* the sought offset, which
+   (which is always true, as 0, 0 is a valid pair of positions), which should
+   help reduce the amount of linear searching.
+   If *mgp is non-NULL, it should point to the UTF-8 cache magic, which
+   will be used to reduce the amount of linear searching. The cache will be
+   created if necessary, and the found value offered to it for update.  */
+static STRLEN
+S_sv_pos_u2b_cached(pTHX_ SV *sv, MAGIC **mgp, const U8 *const start,
+		    const U8 *const send, STRLEN uoffset,
+		    STRLEN uoffset0, STRLEN boffset0) {
+    STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy.  */
     bool found = FALSE;
 
-    if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
-	if (!*mgp)
-	    *mgp = mg_find(sv, PERL_MAGIC_utf8);
-	if (*mgp && (*mgp)->mg_ptr) {
-	    *cachep = (STRLEN *) (*mgp)->mg_ptr;
-	    ASSERT_UTF8_CACHE(*cachep);
-	    if ((*cachep)[i] == (STRLEN)uoff)	/* An exact match. */
-                 found = TRUE;
-	    else {			/* We will skip to the right spot. */
-		 STRLEN forw  = 0;
-		 STRLEN backw = 0;
-		 const U8* p = NULL;
-
-		 /* The assumption is that going backward is half
-		  * the speed of going forward (that's where the
-		  * 2 * backw in the below comes from).  (The real
-		  * figure of course depends on the UTF-8 data.) */
-
-		 if ((*cachep)[i] > (STRLEN)uoff) {
-		      forw  = uoff;
-		      backw = (*cachep)[i] - (STRLEN)uoff;
-
-		      if (forw < 2 * backw)
-			   p = start;
-		      else
-			   p = start + (*cachep)[i+1];
-		 }
-		 /* Try this only for the substr offset (i == 0),
-		  * not for the substr length (i == 2). */
-		 else if (i == 0) { /* (*cachep)[i] < uoff */
-		      const STRLEN ulen = sv_len_utf8(sv);
-
-		      if ((STRLEN)uoff < ulen) {
-			   forw  = (STRLEN)uoff - (*cachep)[i];
-			   backw = ulen - (STRLEN)uoff;
-
-			   if (forw < 2 * backw)
-				p = start + (*cachep)[i+1];
-			   else
-				p = send;
-		      }
-
-		      /* If the string is not long enough for uoff,
-		       * we could extend it, but not at this low a level. */
-		 }
-
-		 if (p) {
-		      if (forw < 2 * backw) {
-			   while (forw--)
-				p += UTF8SKIP(p);
-		      }
-		      else {
-			   while (backw--) {
-				p--;
-				while (UTF8_IS_CONTINUATION(*p))
-				     p--;
-			   }
-		      }
-
-		      /* Update the cache. */
-		      (*cachep)[i]   = (STRLEN)uoff;
-		      (*cachep)[i+1] = p - start;
-
-		      /* Drop the stale "length" cache */
-		      if (i == 0) {
-			  (*cachep)[2] = 0;
-			  (*cachep)[3] = 0;
-		      }
-
-		      found = TRUE;
-		 }
-	    }
-	    if (found) {	/* Setup the return values. */
-		 *offsetp = (*cachep)[i+1];
-		 *sp = start + *offsetp;
-		 if (*sp >= send) {
-		      *sp = send;
-		      *offsetp = send - start;
-		 }
-		 else if (*sp < start) {
-		      *sp = start;
-		      *offsetp = 0;
-		 }
+    assert (uoffset >= uoffset0);
+
+    if (SvMAGICAL(sv) && !SvREADONLY(sv) && PL_utf8cache
+	&& (*mgp || (*mgp = mg_find(sv, PERL_MAGIC_utf8)))) {
+	if ((*mgp)->mg_ptr) {
+	    STRLEN *cache = (STRLEN *) (*mgp)->mg_ptr;
+	    if (cache[0] == uoffset) {
+		/* An exact match. */
+		return cache[1];
+	    }
+	    if (cache[2] == uoffset) {
+		/* An exact match. */
+		return cache[3];
+	    }
+
+	    if (cache[0] < uoffset) {
+		/* The cache already knows part of the way.   */
+		if (cache[0] > uoffset0) {
+		    /* The cache knows more than the passed in pair  */
+		    uoffset0 = cache[0];
+		    boffset0 = cache[1];
+		}
+		if ((*mgp)->mg_len != -1) {
+		    /* And we know the end too.  */
+		    boffset = boffset0
+			+ sv_pos_u2b_midway(start + boffset0, send,
+					      uoffset - uoffset0,
+					      (*mgp)->mg_len - uoffset0);
+		} else {
+		    boffset = boffset0
+			+ sv_pos_u2b_forwards(start + boffset0,
+						send, uoffset - uoffset0);
+		}
+	    }
+	    else if (cache[2] < uoffset) {
+		/* We're between the two cache entries.  */
+		if (cache[2] > uoffset0) {
+		    /* and the cache knows more than the passed in pair  */
+		    uoffset0 = cache[2];
+		    boffset0 = cache[3];
+		}
+
+		boffset = boffset0
+		    + sv_pos_u2b_midway(start + boffset0,
+					  start + cache[1],
+					  uoffset - uoffset0,
+					  cache[0] - uoffset0);
+	    } else {
+		boffset = boffset0
+		    + sv_pos_u2b_midway(start + boffset0,
+					  start + cache[3],
+					  uoffset - uoffset0,
+					  cache[2] - uoffset0);
 	    }
+	    found = TRUE;
 	}
-#ifdef PERL_UTF8_CACHE_ASSERT
-	if (found) {
-	     U8 *s = start;
-	     I32 n = uoff;
+	else if ((*mgp)->mg_len != -1) {
+	    /* If we can take advantage of a passed in offset, do so.  */
+	    /* In fact, offset0 is either 0, or less than offset, so don't
+	       need to worry about the other possibility.  */
+	    boffset = boffset0
+		+ sv_pos_u2b_midway(start + boffset0, send,
+				      uoffset - uoffset0,
+				      (*mgp)->mg_len - uoffset0);
+	    found = TRUE;
+	}
+    }
 
-	     while (n-- && s < send)
-		  s += UTF8SKIP(s);
+    if (!found || PL_utf8cache < 0) {
+	const STRLEN real_boffset
+	    = boffset0 + sv_pos_u2b_forwards(start + boffset0,
+					       send, uoffset - uoffset0);
 
-	     if (i == 0) {
-		  assert(*offsetp == s - start);
-		  assert((*cachep)[0] == (STRLEN)uoff);
-		  assert((*cachep)[1] == *offsetp);
-	     }
-	     ASSERT_UTF8_CACHE(*cachep);
+	if (found && PL_utf8cache < 0) {
+	    if (real_boffset != boffset) {
+		/* Need to turn the assertions off otherwise we may recurse
+		   infinitely while printing error messages.  */
+		SAVEI8(PL_utf8cache);
+		PL_utf8cache = 0;
+		Perl_croak(aTHX_ "panic: sv_pos_u2b_cache cache %"UVf
+			   " real %"UVf" for %"SVf,
+			   (UV) boffset, (UV) real_boffset, (void*)sv);
+	    }
 	}
-#endif
+	boffset = real_boffset;
     }
 
-    return found;
+    S_utf8_mg_pos_cache_update(aTHX_ sv, mgp, boffset, uoffset, send - start);
+    return boffset;
 }
 
+
 /*
 =for apidoc sv_pos_u2b
 
@@ -5479,7 +5529,7 @@ type coercion.
 /*
  * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential
  * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and
- * byte offsets.  See also the comments of S_utf8_mg_pos().
+ * byte offsets.  See also the comments of S_utf8_mg_pos_cache_update().
  *
  */
 
@@ -5494,42 +5544,23 @@ Perl_sv_pos_u2b(pTHX_ register SV *sv, I32* offsetp, I32* lenp)
 
     start = (U8*)SvPV_const(sv, len);
     if (len) {
-	STRLEN boffset = 0;
-	STRLEN *cache = NULL;
-	const U8 *s = start;
-	I32 uoffset = *offsetp;
-	const U8 * const send = s + len;
+	STRLEN uoffset = (STRLEN) *offsetp;
+	const U8 * const send = start + len;
 	MAGIC *mg = NULL;
-	bool found = utf8_mg_pos(sv, &mg, &cache, 0, offsetp, *offsetp, &s, start, send);
-
-	 if (!found && uoffset > 0) {
-	      while (s < send && uoffset--)
-		   s += UTF8SKIP(s);
-	      if (s >= send)
-		   s = send;
-              if (utf8_mg_pos_init(sv, &mg, &cache, 0, *offsetp, s, start))
-                  boffset = cache[1];
-	      *offsetp = s - start;
-	 }
-	 if (lenp) {
-	      found = FALSE;
-	      start = s;
-              if (utf8_mg_pos(sv, &mg, &cache, 2, lenp, *lenp, &s, start, send)) {
-                  *lenp -= boffset;
-                  found = TRUE;
-              }
-	      if (!found && *lenp > 0) {
-		   I32 ulen = *lenp;
-		   if (ulen > 0)
-			while (s < send && ulen--)
-			     s += UTF8SKIP(s);
-		   if (s >= send)
-			s = send;
-                   utf8_mg_pos_init(sv, &mg, &cache, 2, *lenp, s, start);
-	      }
-	      *lenp = s - start;
-	 }
-	 ASSERT_UTF8_CACHE(cache);
+	const STRLEN boffset = sv_pos_u2b_cached(sv, &mg, start, send,
+					     uoffset, 0, 0);
+
+	*offsetp = (I32) boffset;
+
+	if (lenp) {
+	    /* Convert the relative offset to absolute.  */
+	    const STRLEN uoffset2 = uoffset + (STRLEN) *lenp;
+	    const STRLEN boffset2
+		= sv_pos_u2b_cached(sv, &mg, start, send, uoffset2,
+				      uoffset, boffset) - boffset;
+
+	    *lenp = boffset2;
+	}
     }
     else {
 	 *offsetp = 0;
@@ -5540,6 +5571,221 @@ Perl_sv_pos_u2b(pTHX_ register SV *sv, I32* offsetp, I32* lenp)
     return;
 }
 
+/* Create and update the UTF8 magic offset cache, with the proffered utf8/
+   byte length pairing. The (byte) length of the total SV is passed in too,
+   as blen, because for some (more esoteric) SVs, the call to SvPV_const()
+   may not have updated SvCUR, so we can't rely on reading it directly.
+
+   The proffered utf8/byte length pairing isn't used if the cache already has
+   two pairs, and swapping either for the proffered pair would increase the
+   RMS of the intervals between known byte offsets.
+
+   The cache itself consists of 4 STRLEN values
+   0: larger UTF-8 offset
+   1: corresponding byte offset
+   2: smaller UTF-8 offset
+   3: corresponding byte offset
+
+   Unused cache pairs have the value 0, 0.
+   Keeping the cache "backwards" means that the invariant of
+   cache[0] >= cache[2] is maintained even with empty slots, which means that
+   the code that uses it doesn't need to worry if only 1 entry has actually
+   been set to non-zero.  It also makes the "position beyond the end of the
+   cache" logic much simpler, as the first slot is always the one to start
+   from.   
+*/
+static void
+S_utf8_mg_pos_cache_update(pTHX_ SV *sv, MAGIC **mgp, STRLEN byte, STRLEN utf8,
+			   STRLEN blen)
+{
+    STRLEN *cache;
+    if (SvREADONLY(sv))
+	return;
+
+    if (!*mgp) {
+	*mgp = sv_magicext(sv, 0, PERL_MAGIC_utf8, (MGVTBL*)&PL_vtbl_utf8, 0,
+			   0);
+	(*mgp)->mg_len = -1;
+    }
+    assert(*mgp);
+
+    if (!(cache = (STRLEN *)(*mgp)->mg_ptr)) {
+	Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
+	(*mgp)->mg_ptr = (char *) cache;
+    }
+    assert(cache);
+
+    if (PL_utf8cache < 0) {
+	const U8 *start = (const U8 *) SvPVX_const(sv);
+	const U8 *const end = start + byte;
+	STRLEN realutf8 = 0;
+
+	while (start < end) {
+	    start += UTF8SKIP(start);
+	    realutf8++;
+	}
+
+	/* Can't use S_sv_pos_b2u_forwards as it will scream warnings on
+	   surrogates.  FIXME - is it inconsistent that b2u warns, but u2b
+	   doesn't?  I don't know whether this difference was introduced with
+	   the caching code in 5.8.1.  */
+
+	if (realutf8 != utf8) {
+	    /* Need to turn the assertions off otherwise we may recurse
+	       infinitely while printing error messages.  */
+	    SAVEI8(PL_utf8cache);
+	    PL_utf8cache = 0;
+	    Perl_croak(aTHX_ "panic: utf8_mg_pos_cache_update cache %"UVf
+		       " real %"UVf" for %"SVf, (UV) utf8, (UV) realutf8, (void*)sv);
+	}
+    }
+
+    /* Cache is held with the later position first, to simplify the code
+       that deals with unbounded ends.  */
+       
+    ASSERT_UTF8_CACHE(cache);
+    if (cache[1] == 0) {
+	/* Cache is totally empty  */
+	cache[0] = utf8;
+	cache[1] = byte;
+    } else if (cache[3] == 0) {
+	if (byte > cache[1]) {
+	    /* New one is larger, so goes first.  */
+	    cache[2] = cache[0];
+	    cache[3] = cache[1];
+	    cache[0] = utf8;
+	    cache[1] = byte;
+	} else {
+	    cache[2] = utf8;
+	    cache[3] = byte;
+	}
+    } else {
+#define THREEWAY_SQUARE(a,b,c,d) \
+	    ((float)((d) - (c))) * ((float)((d) - (c))) \
+	    + ((float)((c) - (b))) * ((float)((c) - (b))) \
+	       + ((float)((b) - (a))) * ((float)((b) - (a)))
+
+	/* Cache has 2 slots in use, and we know three potential pairs.
+	   Keep the two that give the lowest RMS distance. Do the
+	   calcualation in bytes simply because we always know the byte
+	   length.  squareroot has the same ordering as the positive value,
+	   so don't bother with the actual square root.  */
+	const float existing = THREEWAY_SQUARE(0, cache[3], cache[1], blen);
+	if (byte > cache[1]) {
+	    /* New position is after the existing pair of pairs.  */
+	    const float keep_earlier
+		= THREEWAY_SQUARE(0, cache[3], byte, blen);
+	    const float keep_later
+		= THREEWAY_SQUARE(0, cache[1], byte, blen);
+
+	    if (keep_later < keep_earlier) {
+		if (keep_later < existing) {
+		    cache[2] = cache[0];
+		    cache[3] = cache[1];
+		    cache[0] = utf8;
+		    cache[1] = byte;
+		}
+	    }
+	    else {
+		if (keep_earlier < existing) {
+		    cache[0] = utf8;
+		    cache[1] = byte;
+		}
+	    }
+	}
+	else if (byte > cache[3]) {
+	    /* New position is between the existing pair of pairs.  */
+	    const float keep_earlier
+		= THREEWAY_SQUARE(0, cache[3], byte, blen);
+	    const float keep_later
+		= THREEWAY_SQUARE(0, byte, cache[1], blen);
+
+	    if (keep_later < keep_earlier) {
+		if (keep_later < existing) {
+		    cache[2] = utf8;
+		    cache[3] = byte;
+		}
+	    }
+	    else {
+		if (keep_earlier < existing) {
+		    cache[0] = utf8;
+		    cache[1] = byte;
+		}
+	    }
+	}
+	else {
+ 	    /* New position is before the existing pair of pairs.  */
+	    const float keep_earlier
+		= THREEWAY_SQUARE(0, byte, cache[3], blen);
+	    const float keep_later
+		= THREEWAY_SQUARE(0, byte, cache[1], blen);
+
+	    if (keep_later < keep_earlier) {
+		if (keep_later < existing) {
+		    cache[2] = utf8;
+		    cache[3] = byte;
+		}
+	    }
+	    else {
+		if (keep_earlier < existing) {
+		    cache[0] = cache[2];
+		    cache[1] = cache[3];
+		    cache[2] = utf8;
+		    cache[3] = byte;
+		}
+	    }
+	}
+    }
+    ASSERT_UTF8_CACHE(cache);
+}
+
+/* If we don't know the character offset of the end of a region, our only
+   option is to walk forwards to the target byte offset.  */
+static STRLEN
+S_sv_pos_b2u_forwards(pTHX_ const U8 *s, const U8 *const target)
+{
+    STRLEN len = 0;
+    while (s < target) {
+	STRLEN n = 1;
+
+	/* Call utf8n_to_uvchr() to validate the sequence
+	 * (unless a simple non-UTF character) */
+	if (!UTF8_IS_INVARIANT(*s))
+	    utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0);
+	if (n > 0) {
+	    s += n;
+	    len++;
+	}
+	else
+	    break;
+    }
+    return len;
+}
+
+/* We already know all of the way, now we may be able to walk back.  The same
+   assumption is made as in S_sv_pos_u2b_midway(), namely that walking
+   backward is half the speed of walking forward. */
+static STRLEN
+S_sv_pos_b2u_midway(pTHX_ const U8 *s, const U8 *const target, const U8 *end,
+		    STRLEN endu)
+{
+    const STRLEN forw = target - s;
+    STRLEN backw = end - target;
+
+    if (forw < 2 * backw) {
+	return S_sv_pos_b2u_forwards(aTHX_ s, target);
+    }
+
+    while (end > target) {
+	end--;
+	while (UTF8_IS_CONTINUATION(*end)) {
+	    end--;
+	}
+	endu--;
+    }
+    return endu;
+}
+
 /*
 =for apidoc sv_pos_b2u
 
@@ -5553,121 +5799,98 @@ Handles magic and type coercion.
 /*
  * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential
  * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and
- * byte offsets.  See also the comments of S_utf8_mg_pos().
+ * byte offsets.
  *
  */
-
 void
 Perl_sv_pos_b2u(pTHX_ register SV* sv, I32* offsetp)
 {
     const U8* s;
-    STRLEN len;
+    const STRLEN byte = *offsetp;
+    STRLEN len = 0; /* Actually always set, but let's keep gcc happy.  */
+    STRLEN blen;
+    MAGIC* mg = NULL;
+    const U8* send;
+    bool found = FALSE;
 
     if (!sv)
 	return;
 
-    s = (const U8*)SvPV_const(sv, len);
-    if ((I32)len < *offsetp)
-	Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset");
-    else {
-	const U8* send = s + *offsetp;
-	MAGIC* mg = NULL;
-	STRLEN *cache = NULL;
-
-	len = 0;
-
-	if (SvMAGICAL(sv) && !SvREADONLY(sv)) {
-	    mg = mg_find(sv, PERL_MAGIC_utf8);
-	    if (mg && mg->mg_ptr) {
-		cache = (STRLEN *) mg->mg_ptr;
-		if (cache[1] == (STRLEN)*offsetp) {
-                    /* An exact match. */
-                    *offsetp = cache[0];
+    s = (const U8*)SvPV_const(sv, blen);
 
-		    return;
-		}
-		else if (cache[1] < (STRLEN)*offsetp) {
-		    /* We already know part of the way. */
-		    len = cache[0];
-		    s  += cache[1];
-		    /* Let the below loop do the rest. */
-		}
-		else { /* cache[1] > *offsetp */
-		    /* We already know all of the way, now we may
-		     * be able to walk back.  The same assumption
-		     * is made as in S_utf8_mg_pos(), namely that
-		     * walking backward is twice slower than
-		     * walking forward. */
-		    const STRLEN forw  = *offsetp;
-		    STRLEN backw = cache[1] - *offsetp;
-
-		    if (!(forw < 2 * backw)) {
-			const U8 *p = s + cache[1];
-			STRLEN ubackw = 0;
-			
-			cache[1] -= backw;
-
-			while (backw--) {
-			    p--;
-			    while (UTF8_IS_CONTINUATION(*p)) {
-				p--;
-				backw--;
-			    }
-			    ubackw++;
-			}
+    if (blen < byte)
+	Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset");
 
-			cache[0] -= ubackw;
-			*offsetp = cache[0];
+    send = s + byte;
 
-			/* Drop the stale "length" cache */
-			cache[2] = 0;
-			cache[3] = 0;
+    if (SvMAGICAL(sv) && !SvREADONLY(sv) && PL_utf8cache
+	&& (mg = mg_find(sv, PERL_MAGIC_utf8))) {
+	if (mg->mg_ptr) {
+	    STRLEN * const cache = (STRLEN *) mg->mg_ptr;
+	    if (cache[1] == byte) {
+		/* An exact match. */
+		*offsetp = cache[0];
+		return;
+	    }
+	    if (cache[3] == byte) {
+		/* An exact match. */
+		*offsetp = cache[2];
+		return;
+	    }
 
-			return;
-		    }
+	    if (cache[1] < byte) {
+		/* We already know part of the way. */
+		if (mg->mg_len != -1) {
+		    /* Actually, we know the end too.  */
+		    len = cache[0]
+			+ S_sv_pos_b2u_midway(aTHX_ s + cache[1], send,
+					      s + blen, mg->mg_len - cache[0]);
+		} else {
+		    len = cache[0]
+			+ S_sv_pos_b2u_forwards(aTHX_ s + cache[1], send);
 		}
 	    }
-	    ASSERT_UTF8_CACHE(cache);
-	}
+	    else if (cache[3] < byte) {
+		/* We're between the two cached pairs, so we do the calculation
+		   offset by the byte/utf-8 positions for the earlier pair,
+		   then add the utf-8 characters from the string start to
+		   there.  */
+		len = S_sv_pos_b2u_midway(aTHX_ s + cache[3], send,
+					  s + cache[1], cache[0] - cache[2])
+		    + cache[2];
 
-	while (s < send) {
-	    STRLEN n = 1;
-
-	    /* Call utf8n_to_uvchr() to validate the sequence
-	     * (unless a simple non-UTF character) */
-	    if (!UTF8_IS_INVARIANT(*s))
-		utf8n_to_uvchr(s, UTF8SKIP(s), &n, 0);
-	    if (n > 0) {
-		s += n;
-		len++;
 	    }
-	    else
-		break;
-	}
+	    else { /* cache[3] > byte */
+		len = S_sv_pos_b2u_midway(aTHX_ s, send, s + cache[3],
+					  cache[2]);
 
-	if (!SvREADONLY(sv)) {
-	    if (!mg) {
-		sv_magic(sv, 0, PERL_MAGIC_utf8, 0, 0);
-		mg = mg_find(sv, PERL_MAGIC_utf8);
 	    }
-	    assert(mg);
+	    ASSERT_UTF8_CACHE(cache);
+	    found = TRUE;
+	} else if (mg->mg_len != -1) {
+	    len = S_sv_pos_b2u_midway(aTHX_ s, send, s + blen, mg->mg_len);
+	    found = TRUE;
+	}
+    }
+    if (!found || PL_utf8cache < 0) {
+	const STRLEN real_len = S_sv_pos_b2u_forwards(aTHX_ s, send);
 
-	    if (!mg->mg_ptr) {
-		Newxz(cache, PERL_MAGIC_UTF8_CACHESIZE * 2, STRLEN);
-		mg->mg_ptr = (char *) cache;
+	if (found && PL_utf8cache < 0) {
+	    if (len != real_len) {
+		/* Need to turn the assertions off otherwise we may recurse
+		   infinitely while printing error messages.  */
+		SAVEI8(PL_utf8cache);
+		PL_utf8cache = 0;
+		Perl_croak(aTHX_ "panic: sv_pos_b2u cache %"UVf
+			   " real %"UVf" for %"SVf,
+			   (UV) len, (UV) real_len, (void*)sv);
 	    }
-	    assert(cache);
-
-	    cache[0] = len;
-	    cache[1] = *offsetp;
-	    /* Drop the stale "length" cache */
-	    cache[2] = 0;
-	    cache[3] = 0;
 	}
-
-	*offsetp = len;
+	len = real_len;
     }
-    return;
+    *offsetp = len;
+
+    S_utf8_mg_pos_cache_update(aTHX_ sv, &mg, byte, len, blen);
 }
 
 /*
@@ -5943,8 +6166,12 @@ Perl_sv_collxfrm(pTHX_ SV *sv, STRLEN *nxp)
 		return xf + sizeof(PL_collation_ix);
 	    }
 	    if (! mg) {
-		sv_magic(sv, 0, PERL_MAGIC_collxfrm, 0, 0);
-		mg = mg_find(sv, PERL_MAGIC_collxfrm);
+#ifdef PERL_OLD_COPY_ON_WRITE
+		if (SvIsCOW(sv))
+		    sv_force_normal_flags(sv, 0);
+#endif
+		mg = sv_magicext(sv, 0, PERL_MAGIC_collxfrm, &PL_vtbl_collxfrm,
+				 0, 0);
 		assert(mg);
 	    }
 	    mg->mg_ptr = xf;
@@ -5989,7 +6216,6 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append)
     register I32 cnt;
     I32 i = 0;
     I32 rspara = 0;
-    I32 recsize;
 
     if (SvTHINKFIRST(sv))
 	sv_force_normal_flags(sv, append ? 0 : SV_COW_DROP_PV);
@@ -6030,9 +6256,9 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append)
     }
     else if (RsSNARF(PL_rs)) {
     	/* If it is a regular disk file use size from stat() as estimate
-	   of amount we are going to read - may result in malloc-ing
-	   more memory than we realy need if layers bellow reduce
-	   size we read (e.g. CRLF or a gzip layer)
+	   of amount we are going to read -- may result in mallocing
+	   more memory than we really need if the layers below reduce
+	   the size we read (e.g. CRLF or a gzip layer).
 	 */
 	Stat_t st;
 	if (!PerlLIO_fstat(PerlIO_fileno(fp), &st) && S_ISREG(st.st_mode))  {
@@ -6047,9 +6273,10 @@ Perl_sv_gets(pTHX_ register SV *sv, register PerlIO *fp, I32 append)
     else if (RsRECORD(PL_rs)) {
       I32 bytesread;
       char *buffer;
+      U32 recsize;
 
       /* Grab the size of the record we're getting */
-      recsize = SvIV(SvRV(PL_rs));
+      recsize = SvUV(SvRV(PL_rs)); /* RsRECORD() guarantees > 0. */
       buffer = SvGROW(sv, (STRLEN)(recsize + append + 1)) + append;
       /* Go yank in */
 #ifdef VMS
@@ -6302,7 +6529,7 @@ screamer2:
 	     *
 	     * - jik 9/25/96
 	     */
-	    if (!(cnt < sizeof(buf) && PerlIO_eof(fp)))
+	    if (!(cnt < (I32)sizeof(buf) && PerlIO_eof(fp)))
 		goto screamer2;
 	}
 
@@ -6752,12 +6979,15 @@ Perl_newSVhek(pTHX_ const HEK *hek)
 	    SvUTF8_on (sv);
 	    Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */
 	    return sv;
-	} else if (flags & HVhek_REHASH) {
+	} else if (flags & (HVhek_REHASH|HVhek_UNSHARED)) {
 	    /* We don't have a pointer to the hv, so we have to replicate the
 	       flag into every HEK. This hv is using custom a hasing
 	       algorithm. Hence we can't return a shared string scalar, as
 	       that would contain the (wrong) hash value, and might get passed
-	       into an hv routine with a regular hash  */
+	       into an hv routine with a regular hash.
+	       Similarly, a hash that isn't using shared hash keys has to have
+	       the flag in every key so that we know not to try to call
+	       share_hek_kek on it.  */
 
 	    SV * const sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek));
 	    if (HEK_UTF8(hek))
@@ -6765,9 +6995,23 @@ Perl_newSVhek(pTHX_ const HEK *hek)
 	    return sv;
 	}
 	/* This will be overwhelminly the most common case.  */
-	return newSVpvn_share(HEK_KEY(hek),
-			      (HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)),
-			      HEK_HASH(hek));
+	{
+	    /* Inline most of newSVpvn_share(), because share_hek_hek() is far
+	       more efficient than sharepvn().  */
+	    SV *sv;
+
+	    new_SV(sv);
+	    sv_upgrade(sv, SVt_PV);
+	    SvPV_set(sv, (char *)HEK_KEY(share_hek_hek(hek)));
+	    SvCUR_set(sv, HEK_LEN(hek));
+	    SvLEN_set(sv, 0);
+	    SvREADONLY_on(sv);
+	    SvFAKE_on(sv);
+	    SvPOK_on(sv);
+	    if (HEK_UTF8(hek))
+		SvUTF8_on(sv);
+	    return sv;
+	}
     }
 }
 
@@ -6791,6 +7035,8 @@ Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash)
     dVAR;
     register SV *sv;
     bool is_utf8 = FALSE;
+    const char *const orig_src = src;
+
     if (len < 0) {
 	STRLEN tmplen = -len;
         is_utf8 = TRUE;
@@ -6810,6 +7056,8 @@ Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash)
     SvPOK_on(sv);
     if (is_utf8)
         SvUTF8_on(sv);
+    if (src != orig_src)
+	Safefree(src);
     return sv;
 }
 
@@ -7121,7 +7369,7 @@ Perl_sv_2io(pTHX_ SV *sv)
 	else
 	    io = 0;
 	if (!io)
-	    Perl_croak(aTHX_ "Bad filehandle: %"SVf, sv);
+	    Perl_croak(aTHX_ "Bad filehandle: %"SVf, (void*)sv);
 	break;
     }
     return io;
@@ -7213,7 +7461,7 @@ Perl_sv_2cv(pTHX_ SV *sv, HV **st, GV **gvp, I32 lref)
 	    LEAVE;
 	    if (!GvCVu(gv))
 		Perl_croak(aTHX_ "Unable to create sub named \"%"SVf"\"",
-			   sv);
+			   (void*)sv);
 	}
 	return GvCVu(gv);
     }
@@ -7489,9 +7737,11 @@ Perl_newSVrv(pTHX_ SV *rv, const char *classname)
 	sv_clear(rv);
 	SvFLAGS(rv) = 0;
 	SvREFCNT(rv) = refcnt;
-    }
 
-    if (SvTYPE(rv) < SVt_RV)
+	sv_upgrade(rv, SVt_RV);
+    } else if (SvROK(rv)) {
+	SvREFCNT_dec(SvRV(rv));
+    } else if (SvTYPE(rv) < SVt_RV)
 	sv_upgrade(rv, SVt_RV);
     else if (SvTYPE(rv) > SVt_RV) {
 	SvPV_free(rv);
@@ -9016,8 +9266,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
 		 * --jhi */
 #if defined(HAS_LONG_DOUBLE)
 		elen = ((intsize == 'q')
-			? my_sprintf(PL_efloatbuf, ptr, nv)
-			: my_sprintf(PL_efloatbuf, ptr, (double)nv));
+			? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, nv)
+			: my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)nv));
 #else
 		elen = my_sprintf(PL_efloatbuf, ptr, nv);
 #endif
@@ -9068,7 +9318,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
 				       (UV)c & 0xFF);
 		} else
 		    sv_catpvs(msg, "end of string");
-		Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%"SVf, msg); /* yes, this is reentrant */
+		Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%"SVf, (void*)msg); /* yes, this is reentrant */
 	    }
 
 	    /* output mangled stuff ... */
@@ -9183,11 +9433,15 @@ ptr_table_* functions.
 
 #if defined(USE_ITHREADS)
 
+/* XXX Remove this so it doesn't have to go thru the macro and return for nothing */
 #ifndef GpREFCNT_inc
 #  define GpREFCNT_inc(gp)	((gp) ? (++(gp)->gp_refcnt, (gp)) : (GP*)NULL)
 #endif
 
 
+/* Certain cases in Perl_ss_dup have been merged, by relying on the fact
+   that currently av_dup and hv_dup are the same as sv_dup. If this changes,
+   please unmerge ss_dup.  */
 #define sv_dup_inc(s,t)	SvREFCNT_inc(sv_dup(s,t))
 #define sv_dup_inc_NN(s,t)	SvREFCNT_inc_NN(sv_dup(s,t))
 #define av_dup(s,t)	(AV*)sv_dup((SV*)s,t)
@@ -9385,7 +9639,7 @@ Perl_gp_dup(pTHX_ GP *gp, CLONE_PARAMS* param)
     ret->gp_cv		= cv_dup_inc(gp->gp_cv, param);
     ret->gp_cvgen	= gp->gp_cvgen;
     ret->gp_line	= gp->gp_line;
-    ret->gp_file	= gp->gp_file;		/* points to COP.cop_file */
+    ret->gp_file_hek	= hek_dup(gp->gp_file_hek, param);
     return ret;
 }
 
@@ -9495,7 +9749,7 @@ S_ptr_table_find(PTR_TBL_t *tbl, const void *sv) {
 	if (tblent->oldval == sv)
 	    return tblent;
     }
-    return 0;
+    return NULL;
 }
 
 void *
@@ -9503,7 +9757,7 @@ Perl_ptr_table_fetch(pTHX_ PTR_TBL_t *tbl, const void *sv)
 {
     PTR_TBL_ENT_t const *const tblent = ptr_table_find(tbl, sv);
     PERL_UNUSED_CONTEXT;
-    return tblent ? tblent->newval : (void *) 0;
+    return tblent ? tblent->newval : NULL;
 }
 
 /* add a new entry to a pointer-mapping table */
@@ -9736,7 +9990,7 @@ Perl_sv_dup(pTHX_ const SV *sstr, CLONE_PARAMS* param)
 
 	    case SVt_PVGV:
 		if (GvUNIQUE((GV*)sstr)) {
-		    /*EMPTY*/;   /* Do sharing here, and fall through */
+		    NOOP;   /* Do sharing here, and fall through */
 		}
 	    case SVt_PVIO:
 	    case SVt_PVFM:
@@ -9781,9 +10035,8 @@ Perl_sv_dup(pTHX_ const SV *sstr, CLONE_PARAMS* param)
 	       missing by always going for the destination.
 	       FIXME - instrument and check that assumption  */
 	    if (sv_type >= SVt_PVMG) {
-		HV *ourstash;
-		if ((sv_type == SVt_PVMG) && (ourstash = OURSTASH(dstr))) {
-		    OURSTASH_set(dstr, hv_dup_inc(ourstash, param));
+		if ((sv_type == SVt_PVMG) && SvPAD_OUR(dstr)) {
+		    OURSTASH_set(dstr, hv_dup_inc(OURSTASH(dstr), param));
 		} else if (SvMAGIC(dstr))
 		    SvMAGIC_set(dstr, mg_dup(SvMAGIC(dstr), param));
 		if (SvSTASH(dstr))
@@ -9847,7 +10100,7 @@ Perl_sv_dup(pTHX_ const SV *sstr, CLONE_PARAMS* param)
 		    if (IoDIRP(dstr)) {
 			IoDIRP(dstr)	= dirp_dup(IoDIRP(dstr));
 		    } else {
-			/*EMPTY*/;
+			NOOP;
 			/* IoDIRP(dstr) is already a copy of IoDIRP(sstr)  */
 		    }
 		}
@@ -10018,6 +10271,8 @@ Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param)
 		ncx->blk_sub.hasargs	= cx->blk_sub.hasargs;
 		ncx->blk_sub.lval	= cx->blk_sub.lval;
 		ncx->blk_sub.retop	= cx->blk_sub.retop;
+		ncx->blk_sub.oldcomppad = (PAD*)ptr_table_fetch(PL_ptr_table,
+					   cx->blk_sub.oldcomppad);
 		break;
 	    case CXt_EVAL:
 		ncx->blk_eval.old_in_eval = cx->blk_eval.old_in_eval;
@@ -10169,23 +10424,12 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
 	TOPINT(nss,ix) = i;
 	switch (i) {
 	case SAVEt_ITEM:			/* normal string */
+        case SAVEt_SV:				/* scalar reference */
 	    sv = (SV*)POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = sv_dup_inc(sv, param);
 	    sv = (SV*)POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = sv_dup_inc(sv, param);
 	    break;
-        case SAVEt_SV:				/* scalar reference */
-	    sv = (SV*)POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = sv_dup_inc(sv, param);
-	    gv = (GV*)POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = gv_dup_inc(gv, param);
-	    break;
-	case SAVEt_GENERIC_PVREF:		/* generic char* */
-	    c = (char*)POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = pv_dup(c);
-	    ptr = POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
-	    break;
 	case SAVEt_SHARED_PVREF:		/* char* in shared space */
 	    c = (char*)POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = savesharedpv(c);
@@ -10199,15 +10443,10 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
 	    ptr = POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = svp_dup_inc((SV**)ptr, proto_perl);/* XXXXX */
 	    break;
-        case SAVEt_AV:				/* array reference */
-	    av = (AV*)POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = av_dup_inc(av, param);
-	    gv = (GV*)POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = gv_dup(gv, param);
-	    break;
         case SAVEt_HV:				/* hash reference */
-	    hv = (HV*)POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = hv_dup_inc(hv, param);
+        case SAVEt_AV:				/* array reference */
+	    sv = POPPTR(ss,ix);
+	    TOPPTR(nss,ix) = sv_dup_inc(sv, param);
 	    gv = (GV*)POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = gv_dup(gv, param);
 	    break;
@@ -10226,6 +10465,7 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
 	case SAVEt_I32:				/* I32 reference */
 	case SAVEt_I16:				/* I16 reference */
 	case SAVEt_I8:				/* I8 reference */
+	case SAVEt_COP_ARYBASE:			/* call CopARYBASE_set */
 	    ptr = POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
 	    i = POPINT(ss,ix);
@@ -10237,6 +10477,8 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
 	    iv = POPIV(ss,ix);
 	    TOPIV(nss,ix) = iv;
 	    break;
+	case SAVEt_HPTR:			/* HV* reference */
+	case SAVEt_APTR:			/* AV* reference */
 	case SAVEt_SPTR:			/* SV* reference */
 	    ptr = POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
@@ -10249,24 +10491,13 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
 	    ptr = POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
 	    break;
+	case SAVEt_GENERIC_PVREF:		/* generic char* */
 	case SAVEt_PPTR:			/* char* reference */
 	    ptr = POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
 	    c = (char*)POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = pv_dup(c);
 	    break;
-	case SAVEt_HPTR:			/* HV* reference */
-	    ptr = POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
-	    hv = (HV*)POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = hv_dup(hv, param);
-	    break;
-	case SAVEt_APTR:			/* AV* reference */
-	    ptr = POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = any_dup(ptr, proto_perl);
-	    av = (AV*)POPPTR(ss,ix);
-	    TOPPTR(nss,ix) = av_dup(av, param);
-	    break;
 	case SAVEt_NSTAB:
 	    gv = (GV*)POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = gv_dup(gv, param);
@@ -10378,6 +10609,17 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
 	case SAVEt_HINTS:
 	    i = POPINT(ss,ix);
 	    TOPINT(nss,ix) = i;
+	    ptr = POPPTR(ss,ix);
+	    if (ptr) {
+		HINTS_REFCNT_LOCK;
+		((struct refcounted_he *)ptr)->refcounted_he_refcnt++;
+		HINTS_REFCNT_UNLOCK;
+	    }
+	    TOPPTR(nss,ix) = ptr;
+	    if (i & HINT_LOCALIZE_HH) {
+		hv = (HV*)POPPTR(ss,ix);
+		TOPPTR(nss,ix) = hv_dup_inc(hv, param);
+	    }
 	    break;
 	case SAVEt_COMPPAD:
 	    av = (AV*)POPPTR(ss,ix);
@@ -10405,8 +10647,70 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
 	    sv = (SV*)POPPTR(ss,ix);
 	    TOPPTR(nss,ix) = sv_dup(sv, param);
 	    break;
+	case SAVEt_RE_STATE:
+	    {
+		const struct re_save_state *const old_state
+		    = (struct re_save_state *)
+		    (ss + ix - SAVESTACK_ALLOC_FOR_RE_SAVE_STATE);
+		struct re_save_state *const new_state
+		    = (struct re_save_state *)
+		    (nss + ix - SAVESTACK_ALLOC_FOR_RE_SAVE_STATE);
+
+		Copy(old_state, new_state, 1, struct re_save_state);
+		ix -= SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
+
+		new_state->re_state_bostr
+		    = pv_dup(old_state->re_state_bostr);
+		new_state->re_state_reginput
+		    = pv_dup(old_state->re_state_reginput);
+		new_state->re_state_regeol
+		    = pv_dup(old_state->re_state_regeol);
+		new_state->re_state_regstartp
+		    = any_dup(old_state->re_state_regstartp, proto_perl);
+		new_state->re_state_regendp
+		    = any_dup(old_state->re_state_regendp, proto_perl);
+		new_state->re_state_reglastparen
+		    = any_dup(old_state->re_state_reglastparen, proto_perl);
+		new_state->re_state_reglastcloseparen
+		    = any_dup(old_state->re_state_reglastcloseparen,
+			      proto_perl);
+		/* XXX This just has to be broken. The old save_re_context
+		   code did SAVEGENERICPV(PL_reg_start_tmp);
+		   PL_reg_start_tmp is char **.
+		   Look above to what the dup code does for
+		   SAVEt_GENERIC_PVREF
+		   It can never have worked.
+		   So this is merely a faithful copy of the exiting bug:  */
+		new_state->re_state_reg_start_tmp
+		    = (char **) pv_dup((char *)
+				      old_state->re_state_reg_start_tmp);
+		/* I assume that it only ever "worked" because no-one called
+		   (pseudo)fork while the regexp engine had re-entered itself.
+		*/
+#ifdef PERL_OLD_COPY_ON_WRITE
+		new_state->re_state_nrs
+		    = sv_dup(old_state->re_state_nrs, param);
+#endif
+		new_state->re_state_reg_magic
+		    = any_dup(old_state->re_state_reg_magic, proto_perl);
+		new_state->re_state_reg_oldcurpm
+		    = any_dup(old_state->re_state_reg_oldcurpm, proto_perl);
+		new_state->re_state_reg_curpm
+		    = any_dup(old_state->re_state_reg_curpm, proto_perl);
+		new_state->re_state_reg_oldsaved
+		    = pv_dup(old_state->re_state_reg_oldsaved);
+		new_state->re_state_reg_poscache
+		    = pv_dup(old_state->re_state_reg_poscache);
+		new_state->re_state_reg_starttry
+		    = pv_dup(old_state->re_state_reg_starttry);
+		break;
+	    }
+	case SAVEt_COMPILE_WARNINGS:
+	    ptr = POPPTR(ss,ix);
+	    TOPPTR(nss,ix) = DUP_WARNINGS((STRLEN*)ptr);
+	    break;
 	default:
-	    Perl_croak(aTHX_ "panic: ss_dup inconsistency");
+	    Perl_croak(aTHX_ "panic: ss_dup inconsistency (%"IVdf")", (IV) i);
 	}
     }
 
@@ -10535,7 +10839,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PERL_SET_THX(my_perl);
 
 #  ifdef DEBUGGING
-    Poison(my_perl, 1, PerlInterpreter);
+    PoisonNew(my_perl, 1, PerlInterpreter);
     PL_op = NULL;
     PL_curcop = NULL;
     PL_markstack = 0;
@@ -10569,7 +10873,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PERL_SET_THX(my_perl);
 
 #    ifdef DEBUGGING
-    Poison(my_perl, 1, PerlInterpreter);
+    PoisonNew(my_perl, 1, PerlInterpreter);
     PL_op = NULL;
     PL_curcop = NULL;
     PL_markstack = 0;
@@ -10624,7 +10928,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     SvREFCNT(&PL_sv_no)		= (~(U32)0)/2;
     SvFLAGS(&PL_sv_no)		= SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK
 				  |SVp_POK|SVf_POK|SVf_READONLY|SVt_PVNV;
-    SvPV_set(&PL_sv_no, SAVEPVN(PL_No, 0));
+    SvPV_set(&PL_sv_no, savepvn(PL_No, 0));
     SvCUR_set(&PL_sv_no, 0);
     SvLEN_set(&PL_sv_no, 1);
     SvIV_set(&PL_sv_no, 0);
@@ -10635,7 +10939,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     SvREFCNT(&PL_sv_yes)	= (~(U32)0)/2;
     SvFLAGS(&PL_sv_yes)		= SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK
 				  |SVp_POK|SVf_POK|SVf_READONLY|SVt_PVNV;
-    SvPV_set(&PL_sv_yes, SAVEPVN(PL_Yes, 1));
+    SvPV_set(&PL_sv_yes, savepvn(PL_Yes, 1));
     SvCUR_set(&PL_sv_yes, 1);
     SvLEN_set(&PL_sv_yes, 2);
     SvIV_set(&PL_sv_yes, 1);
@@ -10658,10 +10962,12 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     ptr_table_store(PL_ptr_table, proto_perl->Icompiling.cop_file, PL_compiling.cop_file);
 
     ptr_table_store(PL_ptr_table, &proto_perl->Icompiling, &PL_compiling);
-    if (!specialWARN(PL_compiling.cop_warnings))
-	PL_compiling.cop_warnings = sv_dup_inc(PL_compiling.cop_warnings, param);
-    if (!specialCopIO(PL_compiling.cop_io))
-	PL_compiling.cop_io = sv_dup_inc(PL_compiling.cop_io, param);
+    PL_compiling.cop_warnings = DUP_WARNINGS(PL_compiling.cop_warnings);
+    if (PL_compiling.cop_hints_hash) {
+	HINTS_REFCNT_LOCK;
+	PL_compiling.cop_hints_hash->refcounted_he_refcnt++;
+	HINTS_REFCNT_UNLOCK;
+    }
     PL_curcop		= (COP*)any_dup(proto_perl->Tcurcop, proto_perl);
 
     /* pseudo environmental stuff */
@@ -10715,7 +11021,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_formfeed		= sv_dup(proto_perl->Iformfeed, param);
 
     PL_maxsysfd		= proto_perl->Imaxsysfd;
-    PL_multiline	= proto_perl->Imultiline;
     PL_statusvalue	= proto_perl->Istatusvalue;
 #ifdef VMS
     PL_statusvalue_vms	= proto_perl->Istatusvalue_vms;
@@ -11223,47 +11528,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_watchok		= NULL;
 
     PL_regdummy		= proto_perl->Tregdummy;
-    PL_regprecomp	= NULL;
-    PL_regnpar		= 0;
-    PL_regsize		= 0;
     PL_colorset		= 0;		/* reinits PL_colors[] */
     /*PL_colors[6]	= {0,0,0,0,0,0};*/
-    PL_reginput		= NULL;
-    PL_regbol		= NULL;
-    PL_regeol		= NULL;
-    PL_regstartp	= (I32*)NULL;
-    PL_regendp		= (I32*)NULL;
-    PL_reglastparen	= (U32*)NULL;
-    PL_reglastcloseparen	= (U32*)NULL;
-    PL_regtill		= NULL;
-    PL_reg_start_tmp	= (char**)NULL;
-    PL_reg_start_tmpl	= 0;
-    PL_regdata		= (struct reg_data*)NULL;
-    PL_bostr		= NULL;
-    PL_reg_flags	= 0;
-    PL_reg_eval_set	= 0;
-    PL_regnarrate	= 0;
-    PL_regprogram	= (regnode*)NULL;
-    PL_regindent	= 0;
-    PL_regcc		= (CURCUR*)NULL;
-    PL_reg_call_cc	= (struct re_cc_state*)NULL;
-    PL_reg_re		= (regexp*)NULL;
-    PL_reg_ganch	= NULL;
-    PL_reg_sv		= NULL;
-    PL_reg_match_utf8	= FALSE;
-    PL_reg_magic	= (MAGIC*)NULL;
-    PL_reg_oldpos	= 0;
-    PL_reg_oldcurpm	= (PMOP*)NULL;
-    PL_reg_curpm	= (PMOP*)NULL;
-    PL_reg_oldsaved	= NULL;
-    PL_reg_oldsavedlen	= 0;
-#ifdef PERL_OLD_COPY_ON_WRITE
-    PL_nrs		= NULL;
-#endif
-    PL_reg_maxiter	= 0;
-    PL_reg_leftiter	= 0;
-    PL_reg_poscache	= NULL;
-    PL_reg_poscache_size= 0;
 
     /* RE engine - function pointers */
     PL_regcompp		= proto_perl->Tregcompp;
@@ -11271,9 +11537,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_regint_start	= proto_perl->Tregint_start;
     PL_regint_string	= proto_perl->Tregint_string;
     PL_regfree		= proto_perl->Tregfree;
-
+    Zero(&PL_reg_state, 1, struct re_save_state);
     PL_reginterp_cnt	= 0;
-    PL_reg_starttry	= 0;
+    PL_regmatch_slab	= NULL;
 
     /* Pluggable optimizer */
     PL_peepp		= proto_perl->Tpeepp;
@@ -11480,16 +11746,17 @@ STATIC I32
 S_find_array_subscript(pTHX_ AV *av, SV* val)
 {
     dVAR;
-    SV** svp;
-    I32 i;
     if (!av || SvMAGICAL(av) || !AvARRAY(av) ||
 			(AvFILLp(av) > FUV_MAX_SEARCH_SIZE))
 	return -1;
 
-    svp = AvARRAY(av);
-    for (i=AvFILLp(av); i>=0; i--) {
-	if (svp[i] == val && svp[i] != &PL_sv_undef)
-	    return i;
+    if (val != &PL_sv_undef) {
+	SV ** const svp = AvARRAY(av);
+	I32 i;
+
+	for (i=AvFILLp(av); i>=0; i--)
+	    if (svp[i] == val)
+		return i;
     }
     return -1;
 }
@@ -11626,12 +11893,12 @@ S_find_uninit_var(pTHX_ OP* obase, SV* uninit_sv, bool match)
 
 	/* attempt to find a match within the aggregate */
 	if (hash) {
-	    keysv = S_find_hash_subscript(aTHX_ (HV*)sv, uninit_sv);
+	    keysv = find_hash_subscript((HV*)sv, uninit_sv);
 	    if (keysv)
 		subscript_type = FUV_SUBSCRIPT_HASH;
 	}
 	else {
-	    index = S_find_array_subscript(aTHX_ (AV*)sv, uninit_sv);
+	    index = find_array_subscript((AV*)sv, uninit_sv);
 	    if (index >= 0)
 		subscript_type = FUV_SUBSCRIPT_ARRAY;
 	}
@@ -11747,13 +12014,13 @@ S_find_uninit_var(pTHX_ OP* obase, SV* uninit_sv, bool match)
 	    /* index is an expression;
 	     * attempt to find a match within the aggregate */
 	    if (obase->op_type == OP_HELEM) {
-		SV * const keysv = S_find_hash_subscript(aTHX_ (HV*)sv, uninit_sv);
+		SV * const keysv = find_hash_subscript((HV*)sv, uninit_sv);
 		if (keysv)
 		    return varname(gv, '%', o->op_targ,
 						keysv, 0, FUV_SUBSCRIPT_HASH);
 	    }
 	    else {
-		const I32 index = S_find_array_subscript(aTHX_ (AV*)sv, uninit_sv);
+		const I32 index = find_array_subscript((AV*)sv, uninit_sv);
 		if (index >= 0)
 		    return varname(gv, '@', o->op_targ,
 					NULL, index, FUV_SUBSCRIPT_ARRAY);
@@ -11845,13 +12112,14 @@ S_find_uninit_var(pTHX_ OP* obase, SV* uninit_sv, bool match)
 	 * or are optimized away, then it's unambiguous */
 	o2 = NULL;
 	for (kid=o; kid; kid = kid->op_sibling) {
-	    if (kid &&
-		(    (kid->op_type == OP_CONST && SvOK(cSVOPx_sv(kid)))
-		  || (kid->op_type == OP_NULL  && ! (kid->op_flags & OPf_KIDS))
-		  || (kid->op_type == OP_PUSHMARK)
+	    if (kid) {
+		const OPCODE type = kid->op_type;
+		if ( (type == OP_CONST && SvOK(cSVOPx_sv(kid)))
+		  || (type == OP_NULL  && ! (kid->op_flags & OPf_KIDS))
+		  || (type == OP_PUSHMARK)
 		)
-	    )
 		continue;
+	    }
 	    if (o2) { /* more than one found */
 		o2 = NULL;
 		break;