Add a todo note about overloadable assertions.
[p5sagit/p5-mst-13.2.git] / regcomp.c
index f0b7c5c..e7042ea 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -220,9 +220,9 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  * arg. Show regex, up to a maximum length. If it's too long, chop and add
  * "...".
  */
-#define        FAIL(m)                                                              \
+#define        FAIL(msg)                                                             \
     STMT_START {                                                             \
-        char *elipises = "";                                                 \
+        char *ellipses = "";                                                 \
         unsigned len = strlen(PL_regprecomp);                                \
                                                                              \
        if (!SIZE_ONLY)                                                      \
@@ -231,10 +231,10 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        if (len > RegexLengthToShowInErrorMessages) {                        \
             /* chop 10 shorter than the max, to ensure meaning of "..." */   \
            len = RegexLengthToShowInErrorMessages - 10;                     \
-           elipises = "...";                                                \
+           ellipses = "...";                                                \
        }                                                                    \
        Perl_croak(aTHX_ "%s in regex m/%.*s%s/",                            \
-                  m, len, PL_regprecomp, elipises);                         \
+                  msg, (int)len, PL_regprecomp, ellipses);                  \
     } STMT_END
 
 /*
@@ -242,9 +242,9 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  * args. Show regex, up to a maximum length. If it's too long, chop and add
  * "...".
  */
-#define        FAIL2(pat,m)                                                         \
+#define        FAIL2(pat,msg)                                                        \
     STMT_START {                                                             \
-        char *elipises = "";                                                 \
+        char *ellipses = "";                                                 \
         unsigned len = strlen(PL_regprecomp);                                \
                                                                              \
        if (!SIZE_ONLY)                                                      \
@@ -253,10 +253,10 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        if (len > RegexLengthToShowInErrorMessages) {                        \
             /* chop 10 shorter than the max, to ensure meaning of "..." */   \
            len = RegexLengthToShowInErrorMessages - 10;                     \
-           elipises = "...";                                                \
+           ellipses = "...";                                                \
        }                                                                    \
        S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/",                        \
-                   m, len, PL_regprecomp, elipises);                        \
+                   msg, (int)len, PL_regprecomp, ellipses);                \
     } STMT_END
 
 
@@ -268,7 +268,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
                                                                              \
       Perl_croak(aTHX_ "%s" REPORT_LOCATION,               \
-                m, offset, PL_regprecomp, PL_regprecomp + offset);          \
+                m, (int)offset, PL_regprecomp, PL_regprecomp + offset);     \
     } STMT_END
 
 /*
@@ -289,7 +289,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
                                                                              \
       S_re_croak2(aTHX_ m, REPORT_LOCATION, a1,       \
-                 offset, PL_regprecomp, PL_regprecomp + offset);            \
+                 (int)offset, PL_regprecomp, PL_regprecomp + offset);       \
     } STMT_END
 
 /*
@@ -311,7 +311,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
                                                                              \
       S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2,   \
-                 offset, PL_regprecomp, PL_regprecomp + offset);            \
+                 (int)offset, PL_regprecomp, PL_regprecomp + offset);       \
     } STMT_END
 
 /*
@@ -332,7 +332,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
                                                                              \
       S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3,\
-                 offset, PL_regprecomp, PL_regprecomp + offset);            \
+                 (int)offset, PL_regprecomp, PL_regprecomp + offset);       \
     } STMT_END
 
 /*
@@ -342,7 +342,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     STMT_START {                                                             \
       unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
       S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, a4,\
-                 offset, PL_regprecomp, PL_regprecomp + offset);            \
+                 (int)offset, PL_regprecomp, PL_regprecomp + offset);       \
     } STMT_END
 
 
@@ -350,7 +350,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     STMT_START {                                                             \
         unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc));          \
        Perl_warner(aTHX_ WARN_REGEXP, "%s" REPORT_LOCATION,\
-                m, offset, PL_regprecomp, PL_regprecomp + offset);          \
+                m, (int)offset, PL_regprecomp, PL_regprecomp + offset);          \
     } STMT_END                                                               \
 
 
@@ -359,7 +359,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc));          \
        Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\
                  a1,                                                         \
-                offset, PL_regprecomp, PL_regprecomp + offset);             \
+                (int)offset, PL_regprecomp, PL_regprecomp + offset);        \
     } STMT_END
 
 #define        vWARN3(loc, m, a1, a2)                                               \
@@ -367,7 +367,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       unsigned offset = strlen(PL_regprecomp) - (PL_regxend - (loc));        \
        Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,                    \
                  a1, a2,                                                     \
-                offset, PL_regprecomp, PL_regprecomp + offset);             \
+                (int)offset, PL_regprecomp, PL_regprecomp + offset);        \
     } STMT_END
 
 #define        vWARN4(loc, m, a1, a2, a3)                                           \
@@ -375,7 +375,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc));            \
        Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\
                  a1, a2, a3,                                                 \
-                offset, PL_regprecomp, PL_regprecomp + offset);             \
+                (int)offset, PL_regprecomp, PL_regprecomp + offset);        \
     } STMT_END
 
 
@@ -443,7 +443,7 @@ S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl)
 {
     int value;
 
-    for (value = 0; value < ANYOF_MAX; value += 2)
+    for (value = 0; value <= ANYOF_MAX; value += 2)
        if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
            return 1;
     for (value = 0; value < 256; ++value)
@@ -1963,7 +1963,10 @@ S_reg(pTHX_ I32 paren, I32 *flagp)
                    else
                        sv = newSVpvn("", 0);
 
+                   ENTER;
+                   Perl_save_re_context(aTHX);
                    rop = sv_compile_2op(sv, &sop, "re", &av);
+                   LEAVE;
 
                    n = add_data(3, "nop");
                    PL_regcomp_rx->data->data[n] = (void*)rop;
@@ -2468,9 +2471,9 @@ tryagain:
            ret = reg_node(BOL);
        break;
     case '$':
-       if (PL_regcomp_parse[1]) 
-           PL_seen_zerolen++;
        nextchar();
+       if (*PL_regcomp_parse) 
+           PL_seen_zerolen++;
        if (PL_regflags & PMf_MULTILINE)
            ret = reg_node(MEOL);
        else if (PL_regflags & PMf_SINGLELINE)
@@ -2881,7 +2884,7 @@ tryagain:
                default:
                  normal_default:
                    if ((*p & 0xc0) == 0xc0 && UTF) {
-                       ender = utf8_to_uv((U8*)p, &numlen);
+                       ender = utf8_to_uv_chk((U8*)p, &numlen, 0);
                        p += numlen;
                    }
                    else
@@ -3004,6 +3007,11 @@ S_regpposixcc(pTHX_ I32 value)
                            namedclass =
                                complement ? ANYOF_NASCII : ANYOF_ASCII;
                        break;
+                   case 'b':
+                       if (strnEQ(posixcc, "blank", 5))
+                           namedclass =
+                               complement ? ANYOF_NBLANK : ANYOF_BLANK;
+                       break;
                    case 'c':
                        if (strnEQ(posixcc, "cntrl", 5))
                            namedclass =
@@ -3035,7 +3043,7 @@ S_regpposixcc(pTHX_ I32 value)
                    case 's':
                        if (strnEQ(posixcc, "space", 5))
                            namedclass =
-                               complement ? ANYOF_NSPACE : ANYOF_SPACE;
+                               complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
                        break;
                    case 'u':
                        if (strnEQ(posixcc, "upper", 5))
@@ -3160,7 +3168,7 @@ S_regclass(pTHX)
        else if (value == '\\') {
            value = UCHARAT(PL_regcomp_parse++);
            /* Some compilers cannot handle switching on 64-bit integer
-            * values, therefore value cannot be an UV. --jhi */
+            * values, therefore the 'value' cannot be an UV. --jhi */
            switch (value) {
            case 'w':   namedclass = ANYOF_ALNUM;       break;
            case 'W':   namedclass = ANYOF_NALNUM;      break;
@@ -3339,6 +3347,24 @@ S_regclass(pTHX)
 #endif /* EBCDIC */
                    }
                    break;
+               case ANYOF_BLANK:
+                   if (LOC)
+                       ANYOF_CLASS_SET(ret, ANYOF_BLANK);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isBLANK(value))
+                               ANYOF_BITMAP_SET(ret, value);
+                   }
+                   break;
+               case ANYOF_NBLANK:
+                   if (LOC)
+                       ANYOF_CLASS_SET(ret, ANYOF_NBLANK);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isBLANK(value))
+                               ANYOF_BITMAP_SET(ret, value);
+                   }
+                   break;
                case ANYOF_CNTRL:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
@@ -3412,6 +3438,24 @@ S_regclass(pTHX)
                                ANYOF_BITMAP_SET(ret, value);
                    }
                    break;
+               case ANYOF_PSXSPC:
+                   if (LOC)
+                       ANYOF_CLASS_SET(ret, ANYOF_PSXSPC);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (isPSXSPC(value))
+                               ANYOF_BITMAP_SET(ret, value);
+                   }
+                   break;
+               case ANYOF_NPSXSPC:
+                   if (LOC)
+                       ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC);
+                   else {
+                       for (value = 0; value < 256; value++)
+                           if (!isPSXSPC(value))
+                               ANYOF_BITMAP_SET(ret, value);
+                   }
+                   break;
                case ANYOF_PUNCT:
                    if (LOC)
                        ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
@@ -3594,12 +3638,12 @@ S_regclassutf8(pTHX)
        namedclass = OOB_NAMEDCLASS;
        if (!range)
            rangebegin = PL_regcomp_parse;
-       value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
+       value = utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0);
        PL_regcomp_parse += numlen;
        if (value == '[')
            namedclass = regpposixcc(value);
        else if (value == '\\') {
-           value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
+           value = (U32)utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0);
            PL_regcomp_parse += numlen;
            /* Some compilers cannot handle switching on 64-bit integer
             * values, therefore value cannot be an UV.  Yes, this will
@@ -3739,8 +3783,12 @@ S_regclassutf8(pTHX)
                case ANYOF_NPUNCT:
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n");   break;
                case ANYOF_SPACE:
+               case ANYOF_PSXSPC:
+               case ANYOF_BLANK:
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n");   break;
                case ANYOF_NSPACE:
+               case ANYOF_NPSXSPC:
+               case ANYOF_NBLANK:
                    Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n");   break;
                case ANYOF_UPPER:
                    Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n");   break;
@@ -4193,7 +4241,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
     else if (k == ANYOF) {
        int i, rangestart = -1;
        const char * const out[] = {    /* Should be syncronized with
-                                          a table in regcomp.h */
+                                          ANYOF_ #xdefines in regcomp.h */
            "\\w",
            "\\W",
            "\\s",
@@ -4217,9 +4265,13 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
            "[:punct:]",
            "[:^punct:]",
            "[:upper:]",
-           "[:!upper:]",
+           "[:^upper:]",
            "[:xdigit:]",
-           "[:^xdigit:]"
+           "[:^xdigit:]",
+           "[:space:]",
+           "[:^space:]",
+           "[:blank:]",
+           "[:^blank:]"
        };
 
        if (o->flags & ANYOF_LOCALE)
@@ -4323,8 +4375,13 @@ Perl_pregfree(pTHX_ struct regexp *r)
                    Perl_croak(aTHX_ "panic: pregfree comppad");
                old_comppad = PL_comppad;
                old_curpad = PL_curpad;
-               PL_comppad = new_comppad;
-               PL_curpad = AvARRAY(new_comppad);
+               /* Watch out for global destruction's random ordering. */
+               if (SvTYPE(new_comppad) == SVt_PVAV) {
+                   PL_comppad = new_comppad;
+                   PL_curpad = AvARRAY(new_comppad);
+               }
+               else
+                   PL_curpad = NULL;
                op_free((OP_4tree*)r->data->data[n]);
                PL_comppad = old_comppad;
                PL_curpad = old_curpad;