Full initial pass at perldelta. ready for sanity checking
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 6d3da0f..337f0c4 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -475,23 +475,22 @@ static const scan_data_t zero_scan_data =
            (int)offset, RExC_precomp, RExC_precomp + offset);  \
 } STMT_END
 
-#define        vWARN(loc,m) STMT_START {                                       \
+#define        ckWARNreg(loc,m) STMT_START {                                   \
     const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION,     \
-           m, (int)offset, RExC_precomp, RExC_precomp + offset);       \
+    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+           (int)offset, RExC_precomp, RExC_precomp + offset);          \
 } STMT_END
 
-#define        vWARNdep(loc,m) STMT_START {                                    \
+#define        ckWARNregdep(loc,m) STMT_START {                                \
     const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),         \
-           "%s" REPORT_LOCATION,                                       \
-           m, (int)offset, RExC_precomp, RExC_precomp + offset);       \
+    Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),    \
+           m REPORT_LOCATION,                                          \
+           (int)offset, RExC_precomp, RExC_precomp + offset);          \
 } STMT_END
 
-
-#define        vWARN2(loc, m, a1) STMT_START {                                 \
+#define        ckWARN2reg(loc, m, a1) STMT_START {                             \
     const IV offset = loc - RExC_precomp;                              \
-    Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
+    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
            a1, (int)offset, RExC_precomp, RExC_precomp + offset);      \
 } STMT_END
 
@@ -501,12 +500,24 @@ static const scan_data_t zero_scan_data =
            a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset);  \
 } STMT_END
 
+#define        ckWARN3reg(loc, m, a1, a2) STMT_START {                         \
+    const IV offset = loc - RExC_precomp;                              \
+    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+           a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset);  \
+} STMT_END
+
 #define        vWARN4(loc, m, a1, a2, a3) STMT_START {                         \
     const IV offset = loc - RExC_precomp;                              \
     Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
            a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
 } STMT_END
 
+#define        ckWARN4reg(loc, m, a1, a2, a3) STMT_START {                     \
+    const IV offset = loc - RExC_precomp;                              \
+    Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,     \
+           a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
+} STMT_END
+
 #define        vWARN5(loc, m, a1, a2, a3, a4) STMT_START {                     \
     const IV offset = loc - RExC_precomp;                              \
     Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,                \
@@ -2822,13 +2833,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                 }
                             } else {
 /* 
-    Currently we assume that the trie can handle unicode and ascii
-    matches fold cased matches. If this proves true then the following
-    define will prevent tries in this situation. 
-    
-    #define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
-*/
+    Currently we do not believe that the trie logic can
+    handle case insensitive matching properly when the
+    pattern is not unicode (thus forcing unicode semantics).
+
+    If/when this is fixed the following define can be swapped
+    in below to fully enable trie logic.
+
 #define TRIE_TYPE_IS_SAFE 1
+
+*/
+#define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
+
                                 if ( last && TRIE_TYPE_IS_SAFE ) {
                                     make_trie( pRExC_state, 
                                             startbranch, first, cur, tail, count, 
@@ -3179,11 +3195,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                    (next_is_eval || !(mincount == 0 && maxcount == 1))
                    && (minnext == 0) && (deltanext == 0)
                    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
-                   && maxcount <= REG_INFTY/3 /* Complement check for big count */
-                   && ckWARN(WARN_REGEXP))
+                   && maxcount <= REG_INFTY/3) /* Complement check for big count */
                {
-                   vWARN(RExC_parse,
-                         "Quantifier unexpected on zero-length expression");
+                   ckWARNreg(RExC_parse,
+                             "Quantifier unexpected on zero-length expression");
                }
 
                min += minnext * mincount;
@@ -3727,11 +3742,22 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     data->whilem_c = data_fake.whilem_c;
                 }
                 if (f & SCF_DO_STCLASS_AND) {
-                    const int was = (data->start_class->flags & ANYOF_EOS);
-
-                    cl_and(data->start_class, &intrnl);
-                    if (was)
-                        data->start_class->flags |= ANYOF_EOS;
+                   if (flags & SCF_DO_STCLASS_OR) {
+                       /* OR before, AND after: ideally we would recurse with
+                        * data_fake to get the AND applied by study of the
+                        * remainder of the pattern, and then derecurse;
+                        * *** HACK *** for now just treat as "no information".
+                        * See [perl #56690].
+                        */
+                       cl_init(pRExC_state, data->start_class);
+                   }  else {
+                       /* AND before and after: combine and continue */
+                       const int was = (data->start_class->flags & ANYOF_EOS);
+
+                       cl_and(data->start_class, &intrnl);
+                       if (was)
+                           data->start_class->flags |= ANYOF_EOS;
+                   }
                 }
            }
 #if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
@@ -6065,8 +6091,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                        break;
                    case KEEPCOPY_PAT_MOD: /* 'p' */
                         if (flagsp == &negflags) {
-                            if (SIZE_ONLY && ckWARN(WARN_REGEXP))
-                                vWARN(RExC_parse + 1,"Useless use of (?-p)");
+                            if (SIZE_ONLY)
+                                ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
                         } else {
                             *flagsp |= RXf_PMf_KEEPCOPY;
                         }
@@ -6138,6 +6164,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     /* Pick up the branches, linking them together. */
     parse_start = RExC_parse;   /* MJD */
     br = regbranch(pRExC_state, &flags, 1,depth+1);
+
+    if (freeze_paren) {
+        if (RExC_npar > after_freeze)
+            after_freeze = RExC_npar;
+        RExC_npar = freeze_paren;
+    }
+
     /*     branch_len = (paren != 0); */
 
     if (br == NULL)
@@ -6514,11 +6547,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        goto do_curly;
     }
   nest_check:
-    if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
-       vWARN3(RExC_parse,
-              "%.*s matches null string many times",
-              (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
-              origparse);
+    if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
+       ckWARN3reg(RExC_parse,
+                  "%.*s matches null string many times",
+                  (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
+                  origparse);
     }
 
     if (RExC_parse < RExC_end && *RExC_parse == '?') {
@@ -6553,7 +6586,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
 /* reg_namedseq(pRExC_state,UVp)
    
    This is expected to be called by a parser routine that has 
-   recognized'\N' and needs to handle the rest. RExC_parse is 
+   recognized '\N' and needs to handle the rest. RExC_parse is
    expected to point at the first char following the N at the time
    of the call.
    
@@ -6567,11 +6600,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
    be returned to indicate failure. (This will NOT be a valid pointer 
    to a regnode.)
    
-   If value is null then it is assumed that we are parsing normal text
+   If valuep is null then it is assumed that we are parsing normal text
    and inserts a new EXACT node into the program containing the resolved
    string and returns a pointer to the new node. If the string is 
    zerolength a NOTHING node is emitted.
-   
+
    On success RExC_parse is set to the char following the endbrace.
    Parsing failures will generate a fatal errorvia vFAIL(...)
    
@@ -6585,7 +6618,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
    
  */
 STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep) 
+S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
 {
     char * name;        /* start of the content of the name */
     char * endbrace;    /* endbrace following the name */
@@ -6597,8 +6630,22 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
  
     PERL_ARGS_ASSERT_REG_NAMEDSEQ;
    
-    if (*RExC_parse != '{') {
-        vFAIL("Missing braces on \\N{}");
+    if (*RExC_parse != '{' ||
+           (*RExC_parse == '{' && RExC_parse[1]
+            && strchr("0123456789", RExC_parse[1])))
+    {
+       GET_RE_DEBUG_FLAGS_DECL;
+       if (valuep)
+           /* no bare \N in a charclass */
+           vFAIL("Missing braces on \\N{}");
+       GET_RE_DEBUG_FLAGS;
+       nextchar(pRExC_state);
+       ret = reg_node(pRExC_state, REG_ANY);
+       *flagp |= HASWIDTH|SIMPLE;
+       RExC_naughty++;
+       RExC_parse--;
+        Set_Node_Length(ret, 1); /* MJD */
+       return ret;
     }
     name = RExC_parse+1;
     endbrace = strchr(RExC_parse, '}');
@@ -6617,20 +6664,30 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
             | PERL_SCAN_DISALLOW_PREFIX
             | (SIZE_ONLY ? PERL_SCAN_SILENT_ILLDIGIT : 0);
         UV cp;
-       char string;
         len = (STRLEN)(endbrace - name - 2);
         cp = grok_hex(name + 2, &len, &fl, NULL);
         if ( len != (STRLEN)(endbrace - name - 2) ) {
             cp = 0xFFFD;
         }    
-        if (cp > 0xff)
-            RExC_utf8 = 1;
         if ( valuep ) {
+           if (cp > 0xff) RExC_utf8 = 1;
             *valuep = cp;
             return NULL;
         }
-       string = (char)cp;
-        sv_str= newSVpvn(&string, 1);
+
+       /* Need to convert to utf8 if either: won't fit into a byte, or the re
+        * is going to be in utf8 and the representation changes under utf8. */
+       if (cp > 0xff || (RExC_utf8 && ! UNI_IS_INVARIANT(cp))) {
+           U8 string[UTF8_MAXBYTES+1];
+           U8 *tmps;
+           RExC_utf8 = 1;
+           tmps = uvuni_to_utf8(string, cp);
+           sv_str = newSVpvn_utf8((char*)string, tmps - string, TRUE);
+       } else {    /* Otherwise, no need for utf8, can skip that step */
+           char string;
+           string = (char)cp;
+           sv_str= newSVpvn(&string, 1);
+       }
     } else {
         /* fetch the charnames handler for this scope */
         HV * const table = GvHV(PL_hintgv);
@@ -6644,13 +6701,13 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
         sv_name = newSVpvn(name, endbrace - name);
         
         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
-            vFAIL2("Constant(\\N{%s}) unknown: "
+            vFAIL2("Constant(\\N{%" SVf "}) unknown: "
                   "(possibly a missing \"use charnames ...\")",
-                  SvPVX(sv_name));
+                  SVfARG(sv_name));
         }
         if (!cvp || !SvOK(*cvp)) { /* when $^H{charnames} = undef; */
-            vFAIL2("Constant(\\N{%s}): "
-                  "$^H{charnames} is not defined",SvPVX(sv_name));
+            vFAIL2("Constant(\\N{%" SVf "}): "
+                  "$^H{charnames} is not defined", SVfARG(sv_name));
         }
         
         
@@ -6689,8 +6746,8 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
             LEAVE ;
             
             if ( !sv_str || !SvOK(sv_str) ) {
-                vFAIL2("Constant(\\N{%s}): Call to &{$^H{charnames}} "
-                      "did not return a defined value",SvPVX(sv_name));
+                vFAIL2("Constant(\\N{%" SVf "}): Call to &{$^H{charnames}} "
+                      "did not return a defined value", SVfARG(sv_name));
             }
             if (hv_store_ent( RExC_charnames, sv_name, sv_str, 0))
                 cached = 1;
@@ -6721,20 +6778,19 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
                 *valuep = (UV)*p;
                 /* warn if we havent used the whole string? */
             }
-            if (numlen<len && SIZE_ONLY && ckWARN(WARN_REGEXP)) {
-                vWARN2(RExC_parse,
-                    "Ignoring excess chars from \\N{%s} in character class",
-                    SvPVX(sv_name)
+            if (numlen<len && SIZE_ONLY) {
+                ckWARN2reg(RExC_parse,
+                          "Ignoring excess chars from \\N{%" SVf "} in character class",
+                          SVfARG(sv_name)
                 );
             }        
-        } else if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
-            vWARN2(RExC_parse,
-                    "Ignoring zero length \\N{%s} in character class",
-                    SvPVX(sv_name)
+        } else if (SIZE_ONLY) {
+            ckWARN2reg(RExC_parse,
+                      "Ignoring zero length \\N{%" SVf "} in character class",
+                      SVfARG(sv_name)
                 );
         }
-        if (sv_name)    
-            SvREFCNT_dec(sv_name);    
+        SvREFCNT_dec(sv_name);
         if (!cached)
             SvREFCNT_dec(sv_str);    
         return len ? NULL : (regnode *)&len;
@@ -6809,15 +6865,12 @@ S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
         Set_Node_Cur_Length(ret); /* MJD */
         RExC_parse--; 
         nextchar(pRExC_state);
-    } else {
+    } else {   /* zero length */
         ret = reg_node(pRExC_state,NOTHING);
     }
-    if (!cached) {
+    SvREFCNT_dec(sv_name);
+    if (!cached)
         SvREFCNT_dec(sv_str);
-    }
-    if (sv_name) {
-        SvREFCNT_dec(sv_name); 
-    }
     return ret;
 
 }
@@ -7149,12 +7202,12 @@ tryagain:
            }
            break;
         case 'N': 
-            /* Handle \N{NAME} here and not below because it can be 
+            /* Handle \N and \N{NAME} here and not below because it can be
             multicharacter. join_exact() will join them up later on. 
             Also this makes sure that things like /\N{BLAH}+/ and 
             \N{BLAH} being multi char Just Happen. dmq*/
             ++RExC_parse;
-            ret= reg_namedseq(pRExC_state, NULL); 
+            ret= reg_namedseq(pRExC_state, NULL, flagp); 
             break;
        case 'k':    /* Handle \k<NAME> and \k'NAME' */
        parse_named_seq:
@@ -7420,6 +7473,18 @@ tryagain:
                             I32 flags = 0;
                            STRLEN numlen = 3;
                            ender = grok_oct(p, &numlen, &flags, NULL);
+
+                           /* An octal above 0xff is interpreted differently
+                            * depending on if the re is in utf8 or not.  If it
+                            * is in utf8, the value will be itself, otherwise
+                            * it is interpreted as modulo 0x100.  It has been
+                            * decided to discourage the use of octal above the
+                            * single-byte range.  For now, warn only when
+                            * it ends up modulo */
+                           if (SIZE_ONLY && ender >= 0x100
+                                   && ! UTF && ! PL_encoding) {
+                               ckWARNregdep(p, "Use of octal value above 377 is deprecated");
+                           }
                            p += numlen;
                        }
                        else {
@@ -7433,8 +7498,8 @@ tryagain:
                        {
                            SV* enc = PL_encoding;
                            ender = reg_recode((const char)(U8)ender, &enc);
-                           if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
-                               vWARN(p, "Invalid escape in the specified encoding");
+                           if (!enc && SIZE_ONLY)
+                               ckWARNreg(p, "Invalid escape in the specified encoding");
                            RExC_utf8 = 1;
                        }
                        break;
@@ -7443,8 +7508,8 @@ tryagain:
                            FAIL("Trailing \\");
                        /* FALL THROUGH */
                    default:
-                       if (!SIZE_ONLY&& isALPHA(*p) && ckWARN(WARN_REGEXP))
-                           vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
+                       if (!SIZE_ONLY&& isALPHA(*p))
+                           ckWARN2reg(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
                        goto normal_default;
                    }
                    break;
@@ -7736,10 +7801,9 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
        while (isALNUM(*s))
            s++;
        if (*s && c == *s && s[1] == ']') {
-           if (ckWARN(WARN_REGEXP))
-               vWARN3(s+2,
-                       "POSIX syntax [%c %c] belongs inside character classes",
-                       c, c);
+           ckWARN3reg(s+2,
+                      "POSIX syntax [%c %c] belongs inside character classes",
+                      c, c);
 
            /* [[=foo=]] and [[.foo.]] are still future. */
            if (POSIXCC_NOTYET(c)) {
@@ -7794,6 +7858,22 @@ case ANYOF_N##NAME:                                     \
     what = WORD;                                        \
     break
 
+/* 
+   We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
+   so that it is possible to override the option here without having to 
+   rebuild the entire core. as we are required to do if we change regcomp.h
+   which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
+*/
+#if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
+#define BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#endif
+
+#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
+#define POSIX_CC_UNI_NAME(CCNAME) CCNAME
+#else
+#define POSIX_CC_UNI_NAME(CCNAME) "Posix" CCNAME
+#endif
+
 /*
    parse a class specification and produce either an ANYOF node that
    matches the pattern or if the pattern matches a single char only and
@@ -7925,7 +8005,7 @@ parseit:
                     from earlier versions, OTOH that behaviour was broken
                     as well. */
                     UV v; /* value is register so we cant & it /grrr */
-                    if (reg_namedseq(pRExC_state, &v)) {
+                    if (reg_namedseq(pRExC_state, &v, NULL)) {
                         goto parseit;
                     }
                     value= v; 
@@ -8019,16 +8099,16 @@ parseit:
                {
                    SV* enc = PL_encoding;
                    value = reg_recode((const char)(U8)value, &enc);
-                   if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
-                       vWARN(RExC_parse,
-                             "Invalid escape in the specified encoding");
+                   if (!enc && SIZE_ONLY)
+                       ckWARNreg(RExC_parse,
+                                 "Invalid escape in the specified encoding");
                    break;
                }
            default:
-               if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
-                   vWARN2(RExC_parse,
-                          "Unrecognized escape \\%c in character class passed through",
-                          (int)value);
+               if (!SIZE_ONLY && isALPHA(value))
+                   ckWARN2reg(RExC_parse,
+                              "Unrecognized escape \\%c in character class passed through",
+                              (int)value);
                break;
            }
        } /* end of \blah */
@@ -8047,14 +8127,13 @@ parseit:
            /* a bad range like a-\d, a-[:digit:] ? */
            if (range) {
                if (!SIZE_ONLY) {
-                   if (ckWARN(WARN_REGEXP)) {
-                       const int w =
-                           RExC_parse >= rangebegin ?
-                           RExC_parse - rangebegin : 0;
-                       vWARN4(RExC_parse,
+                   const int w =
+                       RExC_parse >= rangebegin ?
+                       RExC_parse - rangebegin : 0;
+                   ckWARN4reg(RExC_parse,
                               "False [] range \"%*.*s\"",
                               w, w, rangebegin);
-                   }
+
                    if (prevvalue < 256) {
                        ANYOF_BITMAP_SET(ret, prevvalue);
                        ANYOF_BITMAP_SET(ret, '-');
@@ -8082,18 +8161,24 @@ parseit:
                 * A similar issue a little earlier when switching on value.
                 * --jhi */
                switch ((I32)namedclass) {
+               
+               case _C_C_T_(ALNUMC, isALNUMC(value), POSIX_CC_UNI_NAME("Alnum"));
+               case _C_C_T_(ALPHA, isALPHA(value), POSIX_CC_UNI_NAME("Alpha"));
+               case _C_C_T_(BLANK, isBLANK(value), POSIX_CC_UNI_NAME("Blank"));
+               case _C_C_T_(CNTRL, isCNTRL(value), POSIX_CC_UNI_NAME("Cntrl"));
+               case _C_C_T_(GRAPH, isGRAPH(value), POSIX_CC_UNI_NAME("Graph"));
+               case _C_C_T_(LOWER, isLOWER(value), POSIX_CC_UNI_NAME("Lower"));
+               case _C_C_T_(PRINT, isPRINT(value), POSIX_CC_UNI_NAME("Print"));
+               case _C_C_T_(PSXSPC, isPSXSPC(value), POSIX_CC_UNI_NAME("Space"));
+               case _C_C_T_(PUNCT, isPUNCT(value), POSIX_CC_UNI_NAME("Punct"));
+               case _C_C_T_(UPPER, isUPPER(value), POSIX_CC_UNI_NAME("Upper"));
+#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
                case _C_C_T_(ALNUM, isALNUM(value), "Word");
-               case _C_C_T_(ALNUMC, isALNUMC(value), "Alnum");
-               case _C_C_T_(ALPHA, isALPHA(value), "Alpha");
-               case _C_C_T_(BLANK, isBLANK(value), "Blank");
-               case _C_C_T_(CNTRL, isCNTRL(value), "Cntrl");
-               case _C_C_T_(GRAPH, isGRAPH(value), "Graph");
-               case _C_C_T_(LOWER, isLOWER(value), "Lower");
-               case _C_C_T_(PRINT, isPRINT(value), "Print");
-               case _C_C_T_(PSXSPC, isPSXSPC(value), "Space");
-               case _C_C_T_(PUNCT, isPUNCT(value), "Punct");
                case _C_C_T_(SPACE, isSPACE(value), "SpacePerl");
-               case _C_C_T_(UPPER, isUPPER(value), "Upper");
+#else
+               case _C_C_T_(SPACE, isSPACE(value), "PerlSpace");
+               case _C_C_T_(ALNUM, isALNUM(value), "PerlWord");
+#endif         
                case _C_C_T_(XDIGIT, isXDIGIT(value), "XDigit");
                case _C_C_T_NOLOC_(VERTWS, is_VERTWS_latin1(&value), "VertSpace");
                case _C_C_T_NOLOC_(HORIZWS, is_HORIZWS_latin1(&value), "HorizSpace");
@@ -8140,7 +8225,7 @@ parseit:
                            ANYOF_BITMAP_SET(ret, value);
                    }
                    yesno = '+';
-                   what = "Digit";
+                   what = POSIX_CC_UNI_NAME("Digit");
                    break;
                case ANYOF_NDIGIT:
                    if (LOC)
@@ -8153,7 +8238,7 @@ parseit:
                            ANYOF_BITMAP_SET(ret, value);
                    }
                    yesno = '!';
-                   what = "Digit";
+                   what = POSIX_CC_UNI_NAME("Digit");
                    break;              
                case ANYOF_MAX:
                    /* this is to handle \p and \P */
@@ -8353,9 +8438,7 @@ parseit:
         *STRING(ret)= (char)value;
         STR_LEN(ret)= 1;
         RExC_emit += STR_SZ(1);
-       if (listsv) {
-           SvREFCNT_dec(listsv);
-       }
+       SvREFCNT_dec(listsv);
         return ret;
     }
     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
@@ -8964,6 +9047,17 @@ Perl_regdump(pTHX_ const regexp *r)
 /*
 - regprop - printable representation of opcode
 */
+#define EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags) \
+STMT_START { \
+        if (do_sep) {                           \
+            Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]); \
+            if (flags & ANYOF_INVERT)           \
+                /*make sure the invert info is in each */ \
+                sv_catpvs(sv, "^");             \
+            do_sep = 0;                         \
+        }                                       \
+} STMT_END
+
 void
 Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 {
@@ -9089,6 +9183,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
     else if (k == ANYOF) {
        int i, rangestart = -1;
        const U8 flags = ANYOF_FLAGS(o);
+       int do_sep = 0;
 
        /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
        static const char * const anyofs[] = {
@@ -9104,8 +9199,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
            "[:^alpha:]",
            "[:ascii:]",
            "[:^ascii:]",
-           "[:ctrl:]",
-           "[:^ctrl:]",
+           "[:cntrl:]",
+           "[:^cntrl:]",
            "[:graph:]",
            "[:^graph:]",
            "[:lower:]",
@@ -9131,6 +9226,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
        if (flags & ANYOF_INVERT)
            sv_catpvs(sv, "^");
+       
+       /* output what the standard cp 0-255 bitmap matches */
        for (i = 0; i <= 256; i++) {
            if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
                if (rangestart == -1)
@@ -9144,15 +9241,23 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
                    sv_catpvs(sv, "-");
                    put_byte(sv, i - 1);
                }
+               do_sep = 1;
                rangestart = -1;
            }
        }
-
+        
+        EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
+        /* output any special charclass tests (used mostly under use locale) */
        if (o->flags & ANYOF_CLASS)
            for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
-               if (ANYOF_CLASS_TEST(o,i))
+               if (ANYOF_CLASS_TEST(o,i)) {
                    sv_catpv(sv, anyofs[i]);
-
+                   do_sep = 1;
+               }
+        
+        EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
+        
+        /* output information about the unicode matching */
        if (flags & ANYOF_UNICODE)
            sv_catpvs(sv, "{unicode}");
        else if (flags & ANYOF_UNICODE_ALL)
@@ -9165,7 +9270,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
            if (lv) {
                if (sw) {
                    U8 s[UTF8_MAXBYTES_CASE+1];
-               
+
                    for (i = 0; i <= 256; i++) { /* just the first 256 */
                        uvchr_to_utf8(s, i);
                        
@@ -9296,26 +9401,19 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
         ReREFCNT_dec(r->mother_re);
     } else {
         CALLREGFREE_PVT(rx); /* free the private data */
-        if (RXp_PAREN_NAMES(r))
-            SvREFCNT_dec(RXp_PAREN_NAMES(r));
+        SvREFCNT_dec(RXp_PAREN_NAMES(r));
     }        
     if (r->substrs) {
-        if (r->anchored_substr)
-            SvREFCNT_dec(r->anchored_substr);
-        if (r->anchored_utf8)
-            SvREFCNT_dec(r->anchored_utf8);
-        if (r->float_substr)
-            SvREFCNT_dec(r->float_substr);
-        if (r->float_utf8)
-            SvREFCNT_dec(r->float_utf8);
+        SvREFCNT_dec(r->anchored_substr);
+        SvREFCNT_dec(r->anchored_utf8);
+        SvREFCNT_dec(r->float_substr);
+        SvREFCNT_dec(r->float_utf8);
        Safefree(r->substrs);
     }
     RX_MATCH_COPY_FREE(rx);
 #ifdef PERL_OLD_COPY_ON_WRITE
-    if (r->saved_copy)
-        SvREFCNT_dec(r->saved_copy);
+    SvREFCNT_dec(r->saved_copy);
 #endif
-    Safefree(r->swap);
     Safefree(r->offs);
 }
 
@@ -9337,15 +9435,18 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
     
     
 REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *rx)
+Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
 {
-    REGEXP *ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
-    struct regexp *ret = (struct regexp *)SvANY(ret_x);
+    struct regexp *ret;
     struct regexp *const r = (struct regexp *)SvANY(rx);
     register const I32 npar = r->nparens+1;
 
     PERL_ARGS_ASSERT_REG_TEMP_COPY;
 
+    if (!ret_x)
+       ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+    ret = (struct regexp *)SvANY(ret_x);
+    
     (void)ReREFCNT_inc(rx);
     /* We can take advantage of the existing "copied buffer" mechanism in SVs
        by pointing directly at the buffer, but flagging that the allocated
@@ -9353,7 +9454,8 @@ Perl_reg_temp_copy (pTHX_ REGEXP *rx)
        a case of zero-ing that, rather than copying the current length.  */
     SvPV_set(ret_x, RX_WRAPPED(rx));
     SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
-    StructCopy(&(r->xpv_cur), &(ret->xpv_cur), struct regexp_allocated);
+    memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+          sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
     SvLEN_set(ret_x, 0);
     Newx(ret->offs, npar, regexp_paren_pair);
     Copy(r->offs, ret->offs, npar, regexp_paren_pair);
@@ -9374,7 +9476,6 @@ Perl_reg_temp_copy (pTHX_ REGEXP *rx)
     ret->saved_copy = NULL;
 #endif
     ret->mother_re = rx;
-    ret->swap = NULL;
     
     return ret_x;
 }
@@ -9598,7 +9699,20 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
     ret->saved_copy = NULL;
 #endif
 
-    ret->mother_re      = NULL;
+    if (ret->mother_re) {
+       if (SvPVX_const(dstr) == SvPVX_const(ret->mother_re)) {
+           /* Our storage points directly to our mother regexp, but that's
+              1: a buffer in a different thread
+              2: something we no longer hold a reference on
+              so we need to copy it locally.  */
+           /* Note we need to sue SvCUR() on our mother_re, because it, in
+              turn, may well be pointing to its own mother_re.  */
+           SvPV_set(dstr, SAVEPVN(SvPVX_const(ret->mother_re),
+                                  SvCUR(ret->mother_re)+1));
+           SvLEN_set(dstr, SvCUR(ret->mother_re)+1);
+       }
+       ret->mother_re      = NULL;
+    }
     ret->gofs = 0;
 }
 #endif /* PERL_IN_XSUB_RE */