make xsubpp generate well-formed code with CAPI && !PERL_OBJECT

[p5sagit/p5-mst-13.2.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index f256c4e..0f883f3 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -38,6 +38,7 @@
 /* *These* symbols are masked to allow static link. */
 #  define Perl_pregfree my_regfree
 #  define Perl_regnext my_regnext
+#  define save_re_context my_save_re_context
 #endif 
 
 /*SUPPRESS 112*/
@@ -319,7 +320,10 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
 
        }
        if (OP(scan) != CURLYX) {
-           int max = (reg_off_by_arg[OP(scan)] ? I32_MAX : U16_MAX);
+           int max = (reg_off_by_arg[OP(scan)]
+                      ? I32_MAX
+                      /* I32 may be smaller than U16 on CRAYs! */
+                      : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
            int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
            int noff;
            regnode *n = scan;
@@ -486,10 +490,10 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                                        ? (flags & ~SCF_DO_SUBSTR) : flags);
                if (!scan)              /* It was not CURLYX, but CURLY. */
                    scan = next;
-               if (PL_dowarn && (minnext + deltanext == 0) 
+               if (ckWARN(WARN_UNSAFE) && (minnext + deltanext == 0) 
                    && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
                    && maxcount <= 10000) /* Complement check for big count */
-                   warn("Strange *+?{} on zero-length expression");
+                   warner(WARN_UNSAFE, "Strange *+?{} on zero-length expression");
                min += minnext * mincount;
                is_inf |= (maxcount == REG_INFTY && (minnext + deltanext) > 0
                           || deltanext == I32_MAX);
@@ -1554,8 +1558,8 @@ regpiece(I32 *flagp)
        goto do_curly;
     }
   nest_check:
-    if (PL_dowarn && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) {
-       warn("%.*s matches null string many times",
+    if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) {
+       warner(WARN_UNSAFE, "%.*s matches null string many times",
            PL_regcomp_parse - origparse, origparse);
     }
 
@@ -1633,6 +1637,9 @@ tryagain:
     case '[':
        PL_regcomp_parse++;
        ret = (UTF ? regclassutf8() : regclass());
+       if (*PL_regcomp_parse != ']')
+           FAIL("unmatched [] in regexp");
+       nextchar();
        *flagp |= HASWIDTH|SIMPLE;
        break;
     case '(':
@@ -1700,7 +1707,7 @@ tryagain:
            *flagp |= HASWIDTH;
            nextchar();
            if (UTF && !PL_utf8_mark)
-               is_utf8_mark("~");      /* preload table */
+               is_utf8_mark((U8*)"~");         /* preload table */
            break;
        case 'w':
            ret = reg_node(
@@ -1710,7 +1717,7 @@ tryagain:
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum("a");     /* preload table */
+               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'W':
            ret = reg_node(
@@ -1720,7 +1727,7 @@ tryagain:
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum("a");     /* preload table */
+               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'b':
            PL_seen_zerolen++;
@@ -1731,7 +1738,7 @@ tryagain:
            *flagp |= SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum("a");     /* preload table */
+               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'B':
            PL_seen_zerolen++;
@@ -1742,7 +1749,7 @@ tryagain:
            *flagp |= SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum("a");     /* preload table */
+               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 's':
            ret = reg_node(
@@ -1752,7 +1759,7 @@ tryagain:
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_space)
-               is_utf8_space(" ");     /* preload table */
+               is_utf8_space((U8*)" ");        /* preload table */
            break;
        case 'S':
            ret = reg_node(
@@ -1762,21 +1769,44 @@ tryagain:
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_space)
-               is_utf8_space(" ");     /* preload table */
+               is_utf8_space((U8*)" ");        /* preload table */
            break;
        case 'd':
            ret = reg_node(UTF ? DIGITUTF8 : DIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_digit)
-               is_utf8_digit("1");     /* preload table */
+               is_utf8_digit((U8*)"1");        /* preload table */
            break;
        case 'D':
            ret = reg_node(UTF ? NDIGITUTF8 : NDIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_digit)
-               is_utf8_digit("1");     /* preload table */
+               is_utf8_digit((U8*)"1");        /* preload table */
+           break;
+       case 'p':
+       case 'P':
+           {   /* a lovely hack--pretend we saw [\pX] instead */
+               char* oldregxend = PL_regxend;
+
+               if (PL_regcomp_parse[1] == '{') {
+                   PL_regxend = strchr(PL_regcomp_parse, '}');
+                   if (!PL_regxend)
+                       FAIL("Missing right brace on \\p{}");
+                   PL_regxend++;
+               }
+               else
+                   PL_regxend = PL_regcomp_parse + 2;
+               PL_regcomp_parse--;
+
+               ret = regclassutf8();
+
+               PL_regxend = oldregxend;
+               PL_regcomp_parse--;
+               nextchar();
+               *flagp |= HASWIDTH|SIMPLE;
+           }
            break;
        case 'n':
        case 'r':
@@ -1873,6 +1903,8 @@ tryagain:
                    case 'S':
                    case 'd':
                    case 'D':
+                   case 'p':
+                   case 'P':
                        --p;
                        goto loopdone;
                    case 'n':
@@ -1949,7 +1981,7 @@ tryagain:
                default:
                  normal_default:
                    if ((*p & 0xc0) == 0xc0 && UTF) {
-                       ender = utf8_to_uv(p, &numlen);
+                       ender = utf8_to_uv((U8*)p, &numlen);
                        p += numlen;
                    }
                    else
@@ -2083,8 +2115,9 @@ regclass(void)
                     * (POSIX Extended Character Classes, that is)
                     * The text between e.g. [: and :] would start
                     * at posixccs + 1 and stop at regcomp_parse - 2. */
-                   if (dowarn && !SIZE_ONLY)
-                       warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
+                   if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
+                       warner(WARN_UNSAFE,
+                           "Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
                    PL_regcomp_parse++; /* skip over the ending ] */
                }
            }
@@ -2212,9 +2245,6 @@ regclass(void)
        }
        lastvalue = value;
     }
-    if (*PL_regcomp_parse != ']')
-       FAIL("unmatched [] in regexp");
-    nextchar();
     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
     if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) {
        for (value = 0; value < 256; ++value) {
@@ -2267,7 +2297,7 @@ regclassutf8(void)
 
     while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
        skipcond:
-       value = utf8_to_uv(PL_regcomp_parse, &numlen);
+       value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
        PL_regcomp_parse += numlen;
 
        if (value == '[' && PL_regcomp_parse + 1 < PL_regxend &&
@@ -2288,15 +2318,16 @@ regclassutf8(void)
                     * (POSIX Extended Character Classes, that is)
                     * The text between e.g. [: and :] would start
                     * at posixccs + 1 and stop at regcomp_parse - 2. */
-                   if (dowarn && !SIZE_ONLY)
-                       warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
+                   if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
+                       warner(WARN_UNSAFE,
+                           "Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
                    PL_regcomp_parse++; /* skip over the ending ] */
                }
            }
        }
 
        if (value == '\\') {
-           value = utf8_to_uv(PL_regcomp_parse, &numlen);
+           value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
            PL_regcomp_parse += numlen;
            switch (value) {
            case 'w':
@@ -2326,7 +2357,7 @@ regclassutf8(void)
                        flags |= ANYOF_SPACEL;
                    sv_catpvf(listsv, "+utf8::IsSpace\n");
                    if (!PL_utf8_space)
-                       is_utf8_space(" ");
+                       is_utf8_space((U8*)" ");
                }
                lastvalue = 123456;
                continue;
@@ -2337,7 +2368,7 @@ regclassutf8(void)
                    sv_catpvf(listsv,
                        "!utf8::IsSpace\n");
                    if (!PL_utf8_space)
-                       is_utf8_space(" ");
+                       is_utf8_space((U8*)" ");
                }
                lastvalue = 123456;
                continue;
@@ -2441,9 +2472,6 @@ regclassutf8(void)
                sv_catpvf(listsv, "%04x\n", value);
        }
     }
-    if (*PL_regcomp_parse != ']')
-       FAIL("unmatched [] in regexp");
-    nextchar();
 
     ret = reganode(ANYOFUTF8, 0);
 
@@ -2547,11 +2575,11 @@ reguni(UV uv, char* s, I32* lenp)
 {
     dTHR;
     if (SIZE_ONLY) {
-       char tmpbuf[10];
+       U8 tmpbuf[10];
        *lenp = uv_to_utf8(tmpbuf, uv) - tmpbuf;
     }
     else
-       *lenp = uv_to_utf8(s, uv) - s;
+       *lenp = uv_to_utf8((U8*)s, uv) - (U8*)s;
 
 }