OS/2: bug found by John Poltorak.
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 12f9016..6b17be1 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
 #endif
 
 typedef struct RExC_state_t {
-    U16                flags16;                /* are we folding, multilining? */
+    U32                flags;                  /* are we folding, multilining? */
     char       *precomp;               /* uncompiled string. */
     regexp     *rx;
     char       *start;                 /* Start of input for compile */
@@ -132,7 +132,7 @@ typedef struct RExC_state_t {
 #endif
 } RExC_state_t;
 
-#define RExC_flags16   (pRExC_state->flags16)
+#define RExC_flags     (pRExC_state->flags)
 #define RExC_precomp   (pRExC_state->precomp)
 #define RExC_rx                (pRExC_state->rx)
 #define RExC_start     (pRExC_state->start)
@@ -227,9 +227,9 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 #define SCF_DO_STCLASS         (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
 #define SCF_WHILEM_VISITED_POS 0x2000
 
-#define UTF RExC_utf8
-#define LOC (RExC_flags16 & PMf_LOCALE)
-#define FOLD (RExC_flags16 & PMf_FOLD)
+#define UTF (RExC_utf8 != 0)
+#define LOC ((RExC_flags & PMf_LOCALE) != 0)
+#define FOLD ((RExC_flags & PMf_FOLD) != 0)
 
 #define OOB_UNICODE            12345678
 #define OOB_NAMEDCLASS         -1
@@ -505,6 +505,8 @@ S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
            data->offset_float_max = (l
                                      ? data->last_start_max
                                      : data->pos_min + data->pos_delta);
+           if ((U32)data->offset_float_max > (U32)I32_MAX)
+               data->offset_float_max = I32_MAX;
            if (data->flags & SF_BEFORE_EOL)
                data->flags
                    |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
@@ -1169,7 +1171,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                    if (OP(nxt) != CLOSE)
                        goto nogo;
                    /* Now we know that nxt2 is the only contents: */
-                   oscan->flags = ARG(nxt);
+                   oscan->flags = (U8)ARG(nxt);
                    OP(oscan) = CURLYN;
                    OP(nxt1) = NOTHING; /* was OPEN. */
 #ifdef DEBUGGING
@@ -1205,7 +1207,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 
                        if (OP(nxt) != CLOSE)
                            FAIL("Panic opt close");
-                       oscan->flags = ARG(nxt);
+                       oscan->flags = (U8)ARG(nxt);
                        OP(nxt1) = OPTIMIZED;   /* was OPEN. */
                        OP(nxt) = OPTIMIZED;    /* was CLOSE. */
 #ifdef DEBUGGING
@@ -1249,8 +1251,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
 
                    if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
                        nxt += ARG(nxt);
-                   PREVOPER(nxt)->flags = data->whilem_c
-                       | (RExC_whilem_seen << 4); /* On WHILEM */
+                   PREVOPER(nxt)->flags = (U8)(data->whilem_c
+                       | (RExC_whilem_seen << 4)); /* On WHILEM */
                }
                if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
                    pars++;
@@ -1591,7 +1593,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                else if (minnext > U8_MAX) {
                    vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
                }
-               scan->flags = minnext;
+               scan->flags = (U8)minnext;
            }
            if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
                pars++;
@@ -1611,7 +1613,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            pars++;
        }
        else if (OP(scan) == CLOSE) {
-           if (ARG(scan) == is_par) {
+           if ((I32)ARG(scan) == is_par) {
                next = regnext(scan);
 
                if ( next && (OP(next) != WHILEM) && next < last)
@@ -1744,7 +1746,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
                       PL_colors[4],PL_colors[5],PL_colors[0],
                       (int)(xend - exp), RExC_precomp, PL_colors[1]);
     });
-    RExC_flags16 = pm->op_pmflags;
+    RExC_flags = pm->op_pmflags;
     RExC_sawback = 0;
 
     RExC_seen = 0;
@@ -1813,7 +1815,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_rx = r;
 
     /* Second pass: emit code. */
-    RExC_flags16 = pm->op_pmflags;     /* don't let top level (?i) bleed */
+    RExC_flags = pm->op_pmflags;       /* don't let top level (?i) bleed */
     RExC_parse = exp;
     RExC_end = xend;
     RExC_naughty = 0;
@@ -1821,7 +1823,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_emit_start = r->program;
     RExC_emit = r->program;
     /* Store the count of eval-groups for security checks: */
-    RExC_emit->next_off = ((RExC_seen_evals > U16_MAX) ? U16_MAX : RExC_seen_evals);
+    RExC_emit->next_off = (U16)((RExC_seen_evals > U16_MAX) ? U16_MAX : RExC_seen_evals);
     REGC((U8)REG_MAGIC, (char*) RExC_emit++);
     r->data = 0;
     if (reg(pRExC_state, 0, &flags) == NULL)
@@ -1829,7 +1831,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
 
     /* Dig out information for optimizations. */
     r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
-    pm->op_pmflags = RExC_flags16;
+    pm->op_pmflags = RExC_flags;
     if (UTF)
         r->reganch |= ROPT_UTF8;       /* Unicode in it? */
     r->regstclass = NULL;
@@ -1957,7 +1959,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        if (longest_float_length
            || (data.flags & SF_FL_BEFORE_EOL
                && (!(data.flags & SF_FL_BEFORE_MEOL)
-                   || (RExC_flags16 & PMf_MULTILINE)))) {
+                   || (RExC_flags & PMf_MULTILINE)))) {
            int t;
 
            if (SvCUR(data.longest_fixed)                       /* ok to leave SvCUR */
@@ -1976,7 +1978,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            r->float_max_offset = data.offset_float_max;
            t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
                       && (!(data.flags & SF_FL_BEFORE_MEOL)
-                          || (RExC_flags16 & PMf_MULTILINE)));
+                          || (RExC_flags & PMf_MULTILINE)));
            fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0);
        }
        else {
@@ -1990,7 +1992,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        if (longest_fixed_length
            || (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
                && (!(data.flags & SF_FIX_BEFORE_MEOL)
-                   || (RExC_flags16 & PMf_MULTILINE)))) {
+                   || (RExC_flags & PMf_MULTILINE)))) {
            int t;
 
            if (SvUTF8(data.longest_fixed)) {
@@ -2003,7 +2005,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
            r->anchored_offset = data.offset_fixed;
            t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
                 && (!(data.flags & SF_FIX_BEFORE_MEOL)
-                    || (RExC_flags16 & PMf_MULTILINE)));
+                    || (RExC_flags & PMf_MULTILINE)));
            fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0);
        }
        else {
@@ -2017,7 +2019,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
            && stclass_flag
            && !(data.start_class->flags & ANYOF_EOS)
-           && !cl_is_anything(data.start_class)) {
+           && !cl_is_anything(data.start_class))
+       {
            I32 n = add_data(pRExC_state, 1, "f");
 
            New(1006, RExC_rx->data->data[n], 1,
@@ -2072,7 +2075,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
                = r->float_substr = r->float_utf8 = Nullsv;
        if (!(data.start_class->flags & ANYOF_EOS)
-           && !cl_is_anything(data.start_class)) {
+           && !cl_is_anything(data.start_class))
+       {
            I32 n = add_data(pRExC_state, 1, "f");
 
            New(1006, RExC_rx->data->data[n], 1,
@@ -2124,7 +2128,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
     register regnode *lastbr;
     register regnode *ender = 0;
     register I32 parno = 0;
-    I32 flags, oregflags = RExC_flags16, have_branch = 0, open = 0;
+    I32 flags, oregflags = RExC_flags, have_branch = 0, open = 0;
 
     /* for (?g), (?gc), and (?o) warnings; warning
        about (?c) will warn about (?g) -- japhy    */
@@ -2145,8 +2149,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
     /* Make an OPEN node, if parenthesized. */
     if (paren) {
        if (*RExC_parse == '?') { /* (?...) */
-           U16 posflags = 0, negflags = 0;
-           U16 *flagsp = &posflags;
+           U32 posflags = 0, negflags = 0;
+           U32 *flagsp = &posflags;
            int logical = 0;
            char *seqstart = RExC_parse;
 
@@ -2367,8 +2371,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                    ++RExC_parse;
                    goto parse_flags;
                }
-               RExC_flags16 |= posflags;
-               RExC_flags16 &= ~negflags;
+               RExC_flags |= posflags;
+               RExC_flags &= ~negflags;
                if (*RExC_parse == ':') {
                    RExC_parse++;
                    paren = ':';
@@ -2484,7 +2488,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
        static char parens[] = "=!<,>";
 
        if (paren && (p = strchr(parens, paren))) {
-           int node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
+           U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
            int flag = (p - parens) > 1;
 
            if (paren == '>')
@@ -2497,7 +2501,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
 
     /* Check for proper termination. */
     if (paren) {
-       RExC_flags16 = oregflags;
+       RExC_flags = oregflags;
        if (RExC_parse >= RExC_end || *nextchar(pRExC_state) != ')') {
            RExC_parse = oregcomp_parse;
            vFAIL("Unmatched (");
@@ -2679,8 +2683,8 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
            if (max && max < min)
                vFAIL("Can't do {n,m} with n > m");
            if (!SIZE_ONLY) {
-               ARG1_SET(ret, min);
-               ARG2_SET(ret, max);
+               ARG1_SET(ret, (U16)min);
+               ARG2_SET(ret, (U16)max);
            }
 
            goto nest_check;
@@ -2779,9 +2783,9 @@ tryagain:
     case '^':
        RExC_seen_zerolen++;
        nextchar(pRExC_state);
-       if (RExC_flags16 & PMf_MULTILINE)
+       if (RExC_flags & PMf_MULTILINE)
            ret = reg_node(pRExC_state, MBOL);
-       else if (RExC_flags16 & PMf_SINGLELINE)
+       else if (RExC_flags & PMf_SINGLELINE)
            ret = reg_node(pRExC_state, SBOL);
        else
            ret = reg_node(pRExC_state, BOL);
@@ -2791,9 +2795,9 @@ tryagain:
        nextchar(pRExC_state);
        if (*RExC_parse)
            RExC_seen_zerolen++;
-       if (RExC_flags16 & PMf_MULTILINE)
+       if (RExC_flags & PMf_MULTILINE)
            ret = reg_node(pRExC_state, MEOL);
-       else if (RExC_flags16 & PMf_SINGLELINE)
+       else if (RExC_flags & PMf_SINGLELINE)
            ret = reg_node(pRExC_state, SEOL);
        else
            ret = reg_node(pRExC_state, EOL);
@@ -2801,7 +2805,7 @@ tryagain:
        break;
     case '.':
        nextchar(pRExC_state);
-       if (RExC_flags16 & PMf_SINGLELINE)
+       if (RExC_flags & PMf_SINGLELINE)
            ret = reg_node(pRExC_state, SANY);
        else
            ret = reg_node(pRExC_state, REG_ANY);
@@ -2902,13 +2906,13 @@ tryagain:
             Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'w':
-           ret = reg_node(pRExC_state, LOC ? ALNUML     : ALNUM);
+           ret = reg_node(pRExC_state, (U8)(LOC ? ALNUML     : ALNUM));
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'W':
-           ret = reg_node(pRExC_state, LOC ? NALNUML     : NALNUM);
+           ret = reg_node(pRExC_state, (U8)(LOC ? NALNUML    : NALNUM));
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
@@ -2916,7 +2920,7 @@ tryagain:
        case 'b':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           ret = reg_node(pRExC_state, LOC ? BOUNDL     : BOUND);
+           ret = reg_node(pRExC_state, (U8)(LOC ? BOUNDL     : BOUND));
            *flagp |= SIMPLE;
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
@@ -2924,19 +2928,19 @@ tryagain:
        case 'B':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           ret = reg_node(pRExC_state, LOC ? NBOUNDL     : NBOUND);
+           ret = reg_node(pRExC_state, (U8)(LOC ? NBOUNDL    : NBOUND));
            *flagp |= SIMPLE;
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
            break;
        case 's':
-           ret = reg_node(pRExC_state, LOC ? SPACEL     : SPACE);
+           ret = reg_node(pRExC_state, (U8)(LOC ? SPACEL     : SPACE));
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'S':
-           ret = reg_node(pRExC_state, LOC ? NSPACEL     : NSPACE);
+           ret = reg_node(pRExC_state, (U8)(LOC ? NSPACEL    : NSPACE));
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
@@ -2970,8 +2974,11 @@ tryagain:
                    }
                    RExC_end++;
                }
-               else
+               else {
                    RExC_end = RExC_parse + 2;
+                   if (RExC_end > oldregxend)
+                       RExC_end = oldregxend;
+               }
                RExC_parse--;
 
                ret = regclass(pRExC_state);
@@ -3005,12 +3012,12 @@ tryagain:
                    while (isDIGIT(*RExC_parse))
                        RExC_parse++;
 
-                   if (!SIZE_ONLY && num > RExC_rx->nparens)
+                   if (!SIZE_ONLY && num > (I32)RExC_rx->nparens)
                        vFAIL("Reference to nonexistent group");
                    RExC_sawback = 1;
-                   ret = reganode(pRExC_state, FOLD
-                                  ? (LOC ? REFFL : REFF)
-                                  : REF, num);
+                   ret = reganode(pRExC_state,
+                                  (U8)(FOLD ? (LOC ? REFFL : REFF) : REF),
+                                  num);
                    *flagp |= HASWIDTH;
                     
                     /* override incorrect value set in reganode MJD */
@@ -3033,7 +3040,7 @@ tryagain:
        break;
 
     case '#':
-       if (RExC_flags16 & PMf_EXTENDED) {
+       if (RExC_flags & PMf_EXTENDED) {
            while (RExC_parse < RExC_end && *RExC_parse != '\n') RExC_parse++;
            if (RExC_parse < RExC_end)
                goto tryagain;
@@ -3055,9 +3062,8 @@ tryagain:
 
        defchar:
            ender = 0;
-           ret = reg_node(pRExC_state, FOLD
-                         ? (LOC ? EXACTFL : EXACTF)
-                         : EXACT);
+           ret = reg_node(pRExC_state,
+                          (U8)(FOLD ? (LOC ? EXACTFL : EXACTF) : EXACT));
            s = STRING(ret);
            for (len = 0, p = RExC_parse - 1;
              len < 127 && p < RExC_end;
@@ -3065,7 +3071,7 @@ tryagain:
            {
                oldp = p;
 
-               if (RExC_flags16 & PMf_EXTENDED)
+               if (RExC_flags & PMf_EXTENDED)
                    p = regwhite(p, RExC_end);
                switch (*p) {
                case '^':
@@ -3190,7 +3196,7 @@ tryagain:
                        ender = *p++;
                    break;
                }
-               if (RExC_flags16 & PMf_EXTENDED)
+               if (RExC_flags & PMf_EXTENDED)
                    p = regwhite(p, RExC_end);
                if (UTF && FOLD) {
                    /* Prime the casefolded buffer. */
@@ -3232,7 +3238,7 @@ tryagain:
                    }
                    else {
                        len++;
-                       REGC(ender, s++);
+                       REGC((char)ender, s++);
                    }
                    break;
                }
@@ -3269,7 +3275,7 @@ tryagain:
                     len--;
                }
                else
-                   REGC(ender, s++);
+                   REGC((char)ender, s++);
            }
        loopdone:
            RExC_parse = p - 1;
@@ -3517,6 +3523,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
     UV n;
     bool optimize_invert   = TRUE;
     AV* unicode_alternate  = 0;
+#ifdef EBCDIC
+    UV literal_endpoint = 0;
+#endif
 
     ret = reganode(pRExC_state, ANYOF, 0);
 
@@ -3593,6 +3602,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            case 'D':   namedclass = ANYOF_NDIGIT;      break;
            case 'p':
            case 'P':
+               if (RExC_parse >= RExC_end)
+                   vFAIL2("Empty \\%c{}", (U8)value);
                if (*RExC_parse == '{') {
                    U8 c = (U8)value;
                    e = strchr(RExC_parse++, '}');
@@ -3677,6 +3688,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                break;
            }
        } /* end of \blah */
+#ifdef EBCDIC
+       else
+           literal_endpoint++;
+#endif
 
        if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
 
@@ -4041,7 +4056,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        } /* end of namedclass \blah */
 
        if (range) {
-           if (prevvalue > value) /* b-a */ {
+           if (prevvalue > (IV)value) /* b-a */ {
                Simple_vFAIL4("Invalid [] range \"%*.*s\"",
                              RExC_parse - rangebegin,
                              RExC_parse - rangebegin,
@@ -4079,8 +4094,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                IV ceilvalue = value < 256 ? value : 255;
 
 #ifdef EBCDIC
-               if ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
-                   (isUPPER(prevvalue) && isUPPER(ceilvalue)))
+               /* In EBCDIC [\x89-\x91] should include
+                * the \x8e but [i-j] should not. */
+               if (literal_endpoint == 2 &&
+                   ((isLOWER(prevvalue) && isLOWER(ceilvalue)) ||
+                    (isUPPER(prevvalue) && isUPPER(ceilvalue))))
                {
                    if (isLOWER(prevvalue)) {
                        for (i = prevvalue; i <= ceilvalue; i++)
@@ -4117,7 +4135,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                          * character, insert also the folded version
                          * to the charclass. */
                         if (f != value) {
-                             if (foldlen == UNISKIP(f))
+                             if (foldlen == (STRLEN)UNISKIP(f))
                                  Perl_sv_catpvf(aTHX_ listsv,
                                                 "%04"UVxf"\n", f);
                              else {
@@ -4160,6 +4178,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                    }
                }
            }
+#ifdef EBCDIC
+           literal_endpoint = 0;
+#endif
         }
 
        range = 0; /* this range (if it was one) is done now */
@@ -4180,7 +4201,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
        ) {
        for (value = 0; value < 256; ++value) {
            if (ANYOF_BITMAP_TEST(ret, value)) {
-               IV fold = PL_fold[value];
+               UV fold = PL_fold[value];
 
                if (fold != value)
                    ANYOF_BITMAP_SET(ret, fold);
@@ -4233,7 +4254,7 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
            RExC_parse++;
            continue;
        }
-       if (RExC_flags16 & PMf_EXTENDED) {
+       if (RExC_flags & PMf_EXTENDED) {
            if (isSPACE(*RExC_parse)) {
                RExC_parse++;
                continue;
@@ -4767,7 +4788,6 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
        
            if (lv) {
                if (sw) {
-                   UV i;
                    U8 s[UTF8_MAXLEN+1];
                
                    for (i = 0; i <= 256; i++) { /* just the first 256 */
@@ -5010,7 +5030,7 @@ Perl_save_re_context(pTHX)
     SAVEPPTR(RExC_precomp);            /* uncompiled string. */
     SAVEI32(RExC_npar);                /* () count. */
     SAVEI32(RExC_size);                /* Code size. */
-    SAVEI16(RExC_flags16);             /* are we folding, multilining? */
+    SAVEI32(RExC_flags);               /* are we folding, multilining? */
     SAVEVPTR(RExC_rx);         /* from regcomp.c */
     SAVEI32(RExC_seen);                /* from regcomp.c */
     SAVEI32(RExC_sawback);             /* Did we see \1, ...? */