X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regexec.c;h=06fe13a2b481fa0d53af705e8706b87f1adf1c7f;hb=fa58a56f3cdf71021d7d7a49e98845f57652a3fe;hp=5d31d735ff8b3bcfbc6c5d9cccc9e6a1d52dd738;hpb=e9105d30edfbaa7f444bc7984c9bafc8e991ad12;p=p5sagit%2Fp5-mst-13.2.git

diff --git a/regexec.c b/regexec.c
index 5d31d73..06fe13a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -117,6 +117,7 @@
 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 
+/* these are unrolled below in the CCC_TRY_XXX defined */
 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
@@ -124,6 +125,103 @@
 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 
+
+/* 
+   We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
+   so that it is possible to override the option here without having to 
+   rebuild the entire core. as we are required to do if we change regcomp.h
+   which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
+*/
+#if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
+#define BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#endif
+
+#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS_ALNUM()
+#define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS_SPACE()
+#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
+#define RE_utf8_perl_word   PL_utf8_alnum
+#define RE_utf8_perl_space  PL_utf8_space
+#define RE_utf8_posix_digit PL_utf8_digit
+#define perl_word  alnum
+#define perl_space space
+#define posix_digit digit
+#else
+#define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS(perl_word,"a")
+#define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS(perl_space," ")
+#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
+#define RE_utf8_perl_word   PL_utf8_perl_word
+#define RE_utf8_perl_space  PL_utf8_perl_space
+#define RE_utf8_posix_digit PL_utf8_posix_digit
+#endif
+
+
+#define CCC_TRY_AFF(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                          \
+        case NAMEL:                                                              \
+            PL_reg_flags |= RF_tainted;                                                 \
+            /* FALL THROUGH */                                                          \
+        case NAME:                                                                     \
+            if (!nextchr)                                                               \
+                sayNO;                                                                  \
+            if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
+                if (!CAT2(PL_utf8_,CLASS)) {                                            \
+                    bool ok;                                                            \
+                    ENTER;                                                              \
+                    save_re_context();                                                  \
+                    ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
+                    assert(ok);                                                         \
+                    LEAVE;                                                              \
+                }                                                                       \
+                if (!(OP(scan) == NAME                                                  \
+                    ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)   \
+                    : LCFUNC_utf8((U8*)locinput)))                                      \
+                {                                                                       \
+                    sayNO;                                                              \
+                }                                                                       \
+                locinput += PL_utf8skip[nextchr];                                       \
+                nextchr = UCHARAT(locinput);                                            \
+                break;                                                                  \
+            }                                                                           \
+            if (!(OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                  \
+                sayNO;                                                                  \
+            nextchr = UCHARAT(++locinput);                                              \
+            break
+
+#define CCC_TRY_NEG(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                        \
+        case NAMEL:                                                              \
+            PL_reg_flags |= RF_tainted;                                                 \
+            /* FALL THROUGH */                                                          \
+        case NAME :                                                                     \
+            if (!nextchr && locinput >= PL_regeol)                                      \
+                sayNO;                                                                  \
+            if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
+                if (!CAT2(PL_utf8_,CLASS)) {                                            \
+                    bool ok;                                                            \
+                    ENTER;                                                              \
+                    save_re_context();                                                  \
+                    ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
+                    assert(ok);                                                         \
+                    LEAVE;                                                              \
+                }                                                                       \
+                if ((OP(scan) == NAME                                                  \
+                    ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)    \
+                    : LCFUNC_utf8((U8*)locinput)))                                      \
+                {                                                                       \
+                    sayNO;                                                              \
+                }                                                                       \
+                locinput += PL_utf8skip[nextchr];                                       \
+                nextchr = UCHARAT(locinput);                                            \
+                break;                                                                  \
+            }                                                                           \
+            if ((OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                   \
+                sayNO;                                                                  \
+            nextchr = UCHARAT(++locinput);                                              \
+            break
+
+
+
+
+
 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 
 /* for use after a quantifier and before an EXACT-like node -- japhy */
@@ -863,9 +961,9 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 	{
 	    /* If flags & SOMETHING - do not do it many times on the same match */
 	    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
+	    /* XXX Does the destruction order has to change with do_utf8? */
 	    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
-	    if (do_utf8 ? prog->check_substr : prog->check_utf8)
-		SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
+	    SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 	    prog->check_substr = prog->check_utf8 = NULL;	/* disable */
 	    prog->float_substr = prog->float_utf8 = NULL;	/* clear */
 	    check = NULL;			/* abort */
@@ -1007,16 +1105,15 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 
 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
-    UV uvc_unfolded = 0;						    \
     switch (trie_type) {                                                    \
     case trie_utf8_fold:                                                    \
 	if ( foldlen>0 ) {                                                  \
-	    uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
 	    foldlen -= len;                                                 \
 	    uscan += len;                                                   \
 	    len=0;                                                          \
 	} else {                                                            \
-	    uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+	    uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
 	    uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
 	    foldlen -= UNISKIP( uvc );                                      \
 	    uscan = foldbuf + UNISKIP( uvc );                               \
@@ -1042,7 +1139,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
 	uvc = (UV)*uc;                                                      \
 	len = 1;                                                            \
     }                                                                       \
-									    \
     if (uvc < 256) {                                                        \
 	charid = trie->charmap[ uvc ];                                      \
     }                                                                       \
@@ -1055,9 +1151,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
 		charid = (U16)SvIV(*svpp);                                  \
 	}                                                                   \
     }                                                                       \
-    if (!charid && trie_type == trie_utf8_fold && !UTF) {		    \
-	charid = trie->charmap[uvc_unfolded];			    	    \
-    }								    	    \
 } STMT_END
 
 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
@@ -1426,8 +1519,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    break;
 	case ALNUM:
 	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_ALNUM(),
-		swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
+		LOAD_UTF8_CHARCLASS_PERL_WORD(),
+		swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
 		isALNUM(*s)
 	    );
 	case ALNUML:
@@ -1437,8 +1530,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	case NALNUM:
 	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_ALNUM(),
-		!swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
+		LOAD_UTF8_CHARCLASS_PERL_WORD(),
+		!swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
 		!isALNUM(*s)
 	    );
 	case NALNUML:
@@ -1448,8 +1541,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	case SPACE:
 	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_SPACE(),
-		*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
+		LOAD_UTF8_CHARCLASS_PERL_SPACE(),
+		*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8),
 		isSPACE(*s)
 	    );
 	case SPACEL:
@@ -1459,8 +1552,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	case NSPACE:
 	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_SPACE(),
-		!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
+		LOAD_UTF8_CHARCLASS_PERL_SPACE(),
+		!(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8)),
 		!isSPACE(*s)
 	    );
 	case NSPACEL:
@@ -1470,8 +1563,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	case DIGIT:
 	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_DIGIT(),
-		swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
+		LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
+		swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
 		isDIGIT(*s)
 	    );
 	case DIGITL:
@@ -1481,8 +1574,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 	    );
 	case NDIGIT:
 	    REXEC_FBC_CSCAN_PRELOAD(
-		LOAD_UTF8_CHARCLASS_DIGIT(),
-		!swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
+		LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
+		!swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
 		!isDIGIT(*s)
 	    );
 	case NDIGITL:
@@ -1821,24 +1914,38 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
 
     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
 	MAGIC *mg;
-
-	if (flags & REXEC_IGNOREPOS)	/* Means: check only at start */
+	if (flags & REXEC_IGNOREPOS){	/* Means: check only at start */
 	    reginfo.ganch = startpos + prog->gofs;
-	else if (sv && SvTYPE(sv) >= SVt_PVMG
+	    DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
+	      "GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
+	} else if (sv && SvTYPE(sv) >= SVt_PVMG
 		  && SvMAGIC(sv)
 		  && (mg = mg_find(sv, PERL_MAGIC_regex_global))
 		  && mg->mg_len >= 0) {
 	    reginfo.ganch = strbeg + mg->mg_len;	/* Defined pos() */
+	    DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
+		"GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
+
 	    if (prog->extflags & RXf_ANCH_GPOS) {
 	        if (s > reginfo.ganch)
 		    goto phooey;
 		s = reginfo.ganch - prog->gofs;
+	        DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
+		     "GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
+		if (s < strbeg)
+		    goto phooey;
 	    }
 	}
 	else if (data) {
 	    reginfo.ganch = strbeg + PTR2UV(data);
-	} else				/* pos() not defined */
+            DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
+		 "GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
+
+	} else {				/* pos() not defined */
 	    reginfo.ganch = strbeg;
+            DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
+		 "GPOS: reginfo.ganch = strbeg\n"));
+	}
     }
     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
         /* We have to be careful. If the previous successful match
@@ -1915,7 +2022,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN 
            and we only enter this block when the same bit is set. */
         char *tmp_s = reginfo.ganch - prog->gofs;
-	if (regtry(&reginfo, &tmp_s))
+
+	if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
 	    goto got_it;
 	goto phooey;
     }
@@ -3329,85 +3437,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 	    nextchr = UCHARAT(locinput);
 	    break;
 	    }
-	case ANYOF:
-	    if (do_utf8) {
-	        STRLEN inclasslen = PL_regeol - locinput;
-
-	        if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
-		    goto anyof_fail;
-		if (locinput >= PL_regeol)
-		    sayNO;
-		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
-		nextchr = UCHARAT(locinput);
-		break;
-	    }
-	    else {
-		if (nextchr < 0)
-		    nextchr = UCHARAT(locinput);
-		if (!REGINCLASS(rex, scan, (U8*)locinput))
-		    goto anyof_fail;
-		if (!nextchr && locinput >= PL_regeol)
-		    sayNO;
-		nextchr = UCHARAT(++locinput);
-		break;
-	    }
-	anyof_fail:
-	    /* If we might have the case of the German sharp s
-	     * in a casefolding Unicode character class. */
-
-	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
-		 locinput += SHARP_S_SKIP;
-		 nextchr = UCHARAT(locinput);
-	    }
-	    else
-		 sayNO;
-	    break;
-	case ALNUML:
-	    PL_reg_flags |= RF_tainted;
-	    /* FALL THROUGH */
-	case ALNUM:
-	    if (!nextchr)
-		sayNO;
-	    if (do_utf8) {
-		LOAD_UTF8_CHARCLASS_ALNUM();
-		if (!(OP(scan) == ALNUM
-		      ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
-		      : isALNUM_LC_utf8((U8*)locinput)))
-		{
-		    sayNO;
-		}
-		locinput += PL_utf8skip[nextchr];
-		nextchr = UCHARAT(locinput);
-		break;
-	    }
-	    if (!(OP(scan) == ALNUM
-		  ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
-		sayNO;
-	    nextchr = UCHARAT(++locinput);
-	    break;
-	case NALNUML:
-	    PL_reg_flags |= RF_tainted;
-	    /* FALL THROUGH */
-	case NALNUM:
-	    if (!nextchr && locinput >= PL_regeol)
-		sayNO;
-	    if (do_utf8) {
-		LOAD_UTF8_CHARCLASS_ALNUM();
-		if (OP(scan) == NALNUM
-		    ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
-		    : isALNUM_LC_utf8((U8*)locinput))
-		{
-		    sayNO;
-		}
-		locinput += PL_utf8skip[nextchr];
-		nextchr = UCHARAT(locinput);
-		break;
-	    }
-	    if (OP(scan) == NALNUM
-		? isALNUM(nextchr) : isALNUM_LC(nextchr))
-		sayNO;
-	    nextchr = UCHARAT(++locinput);
-	    break;
 	case BOUNDL:
 	case NBOUNDL:
 	    PL_reg_flags |= RF_tainted;
@@ -3420,7 +3449,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 		    ln = '\n';
 		else {
 		    const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
-		
+
 		    ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
 		}
 		if (OP(scan) == BOUND || OP(scan) == NBOUND) {
@@ -3449,106 +3478,49 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 				    OP(scan) == BOUNDL))
 		    sayNO;
 	    break;
-	case SPACEL:
-	    PL_reg_flags |= RF_tainted;
-	    /* FALL THROUGH */
-	case SPACE:
-	    if (!nextchr)
-		sayNO;
-	    if (do_utf8) {
-		if (UTF8_IS_CONTINUED(nextchr)) {
-		    LOAD_UTF8_CHARCLASS_SPACE();
-		    if (!(OP(scan) == SPACE
-			  ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
-			  : isSPACE_LC_utf8((U8*)locinput)))
-		    {
-			sayNO;
-		    }
-		    locinput += PL_utf8skip[nextchr];
-		    nextchr = UCHARAT(locinput);
-		    break;
-		}
-		if (!(OP(scan) == SPACE
-		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
-		    sayNO;
-		nextchr = UCHARAT(++locinput);
-	    }
-	    else {
-		if (!(OP(scan) == SPACE
-		      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
-		    sayNO;
-		nextchr = UCHARAT(++locinput);
-	    }
-	    break;
-	case NSPACEL:
-	    PL_reg_flags |= RF_tainted;
-	    /* FALL THROUGH */
-	case NSPACE:
-	    if (!nextchr && locinput >= PL_regeol)
-		sayNO;
+	case ANYOF:
 	    if (do_utf8) {
-		LOAD_UTF8_CHARCLASS_SPACE();
-		if (OP(scan) == NSPACE
-		    ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
-		    : isSPACE_LC_utf8((U8*)locinput))
-		{
+	        STRLEN inclasslen = PL_regeol - locinput;
+
+	        if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
+		    goto anyof_fail;
+		if (locinput >= PL_regeol)
 		    sayNO;
-		}
-		locinput += PL_utf8skip[nextchr];
+		locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
 		nextchr = UCHARAT(locinput);
 		break;
 	    }
-	    if (OP(scan) == NSPACE
-		? isSPACE(nextchr) : isSPACE_LC(nextchr))
-		sayNO;
-	    nextchr = UCHARAT(++locinput);
-	    break;
-	case DIGITL:
-	    PL_reg_flags |= RF_tainted;
-	    /* FALL THROUGH */
-	case DIGIT:
-	    if (!nextchr)
-		sayNO;
-	    if (do_utf8) {
-		LOAD_UTF8_CHARCLASS_DIGIT();
-		if (!(OP(scan) == DIGIT
-		      ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
-		      : isDIGIT_LC_utf8((U8*)locinput)))
-		{
+	    else {
+		if (nextchr < 0)
+		    nextchr = UCHARAT(locinput);
+		if (!REGINCLASS(rex, scan, (U8*)locinput))
+		    goto anyof_fail;
+		if (!nextchr && locinput >= PL_regeol)
 		    sayNO;
-		}
-		locinput += PL_utf8skip[nextchr];
-		nextchr = UCHARAT(locinput);
+		nextchr = UCHARAT(++locinput);
 		break;
 	    }
-	    if (!(OP(scan) == DIGIT
-		  ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
-		sayNO;
-	    nextchr = UCHARAT(++locinput);
-	    break;
-	case NDIGITL:
-	    PL_reg_flags |= RF_tainted;
-	    /* FALL THROUGH */
-	case NDIGIT:
-	    if (!nextchr && locinput >= PL_regeol)
-		sayNO;
-	    if (do_utf8) {
-		LOAD_UTF8_CHARCLASS_DIGIT();
-		if (OP(scan) == NDIGIT
-		    ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
-		    : isDIGIT_LC_utf8((U8*)locinput))
-		{
-		    sayNO;
-		}
-		locinput += PL_utf8skip[nextchr];
-		nextchr = UCHARAT(locinput);
-		break;
+	anyof_fail:
+	    /* If we might have the case of the German sharp s
+	     * in a casefolding Unicode character class. */
+
+	    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
+		 locinput += SHARP_S_SKIP;
+		 nextchr = UCHARAT(locinput);
 	    }
-	    if (OP(scan) == NDIGIT
-		? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
-		sayNO;
-	    nextchr = UCHARAT(++locinput);
+	    else
+		 sayNO;
 	    break;
+	/* Special char classes - The defines start on line 129 or so */
+	CCC_TRY_AFF( ALNUM,  ALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
+	CCC_TRY_NEG(NALNUM, NALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
+
+	CCC_TRY_AFF( SPACE,  SPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
+	CCC_TRY_NEG(NSPACE, NSPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
+
+	CCC_TRY_AFF( DIGIT,  DIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
+	CCC_TRY_NEG(NDIGIT, NDIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
+
 	case CLUMP:
 	    if (locinput >= PL_regeol)
 		sayNO;
@@ -3778,7 +3750,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 			assert(rx);
 		    }
 		    if (rx) {
-			rx = reg_temp_copy(rx);
+			rx = reg_temp_copy(NULL, rx);
 		    }
 		    else {
 			U32 pm_flags = 0;