integrate mainline to get tr.t
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 2daa72b..ca0fd09 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -38,7 +38,7 @@
 /* *These* symbols are masked to allow static link. */
 #  define Perl_pregfree my_regfree
 #  define Perl_regnext my_regnext
-#  define save_re_context my_save_re_context
+#  define Perl_save_re_context my_save_re_context
 #endif 
 
 /*SUPPRESS 112*/
@@ -252,6 +252,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
     regnode *scan = *scanp, *next;
     I32 delta = 0;
     int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
+    int is_inf_internal = 0;           /* The studied chunk is infinite */
     I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
     scan_data_t data_fake;
     
@@ -366,7 +367,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                    if (max1 < minnext + deltanext)
                        max1 = minnext + deltanext;
                    if (deltanext == I32_MAX)
-                       is_inf = 1;
+                       is_inf = is_inf_internal = 1;
                    scan = next;
                    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
                        pars++;
@@ -461,7 +462,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                min++;
                /* Fall through. */
            case STAR:
-               is_inf = 1; 
+               is_inf = is_inf_internal = 1; 
                scan = regnext(scan);
                if (flags & SCF_DO_SUBSTR) {
                    scan_commit(data);
@@ -490,13 +491,15 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                                        ? (flags & ~SCF_DO_SUBSTR) : flags);
                if (!scan)              /* It was not CURLYX, but CURLY. */
                    scan = next;
-               if (PL_dowarn && (minnext + deltanext == 0) 
+               if (ckWARN(WARN_UNSAFE) && (minnext + deltanext == 0) 
                    && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
-                   && maxcount <= 10000) /* Complement check for big count */
-                   warn("Strange *+?{} on zero-length expression");
+                   && maxcount <= REG_INFTY/3) /* Complement check for big count */
+                   warner(WARN_UNSAFE, "Strange *+?{} on zero-length expression");
                min += minnext * mincount;
-               is_inf |= (maxcount == REG_INFTY && (minnext + deltanext) > 0
-                          || deltanext == I32_MAX);
+               is_inf_internal |= (maxcount == REG_INFTY 
+                                   && (minnext + deltanext) > 0
+                                  || deltanext == I32_MAX);
+               is_inf |= is_inf_internal;
                delta += (minnext + deltanext) * maxcount - minnext * mincount;
 
                /* Try powerful optimization CURLYX => CURLYN. */
@@ -637,6 +640,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                        }
                        data->longest = &(data->longest_float);
                    }
+                   SvREFCNT_dec(last_str);
                }
                if (data && (fl & SF_HAS_EVAL))
                    data->flags |= SF_HAS_EVAL;
@@ -652,7 +656,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                    scan_commit(data);
                    data->longest = &(data->longest_float);
                }
-               is_inf = 1;
+               is_inf = is_inf_internal = 1;
                break;
            }
        }
@@ -705,13 +709,20 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                if (data)
                    data->flags |= SF_HAS_EVAL;
        }
+       else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded */
+               if (flags & SCF_DO_SUBSTR) {
+                   scan_commit(data);
+                   data->longest = &(data->longest_float);
+               }
+               is_inf = is_inf_internal = 1;
+       }
        /* Else: zero-length, ignore. */
        scan = regnext(scan);
     }
 
   finish:
     *scanp = scan;
-    *deltap = is_inf ? I32_MAX : delta;
+    *deltap = is_inf_internal ? I32_MAX : delta;
     if (flags & SCF_DO_SUBSTR && is_inf) 
        data->pos_delta = I32_MAX - data->pos_min;
     if (is_par > U8_MAX)
@@ -781,14 +792,38 @@ pregcomp(char *exp, char *xend, PMOP *pm)
     if (exp == NULL)
        FAIL("NULL regexp argument");
 
-    if (PL_curcop == &compiling ? (PL_hints & HINT_UTF8) : IN_UTF8)
+    if (PL_curcop == &PL_compiling ? (PL_hints & HINT_UTF8) : IN_UTF8)
        PL_reg_flags |= RF_utf8;
     else
        PL_reg_flags = 0;
 
     PL_regprecomp = savepvn(exp, xend - exp);
-    DEBUG_r(PerlIO_printf(Perl_debug_log, "compiling RE `%*s'\n",
-                         xend - exp, PL_regprecomp));
+    DEBUG_r(
+       if (!PL_colorset) {
+           int i = 0;
+           char *s = PerlEnv_getenv("PERL_RE_COLORS");
+           
+           if (s) {
+               PL_colors[0] = s = savepv(s);
+               while (++i < 6) {
+                   s = strchr(s, '\t');
+                   if (s) {
+                       *s = '\0';
+                       PL_colors[i] = ++s;
+                   }
+                   else
+                       PL_colors[i] = "";
+               }
+           } else {
+               while (i < 6) 
+                   PL_colors[i++] = "";
+           }
+           PL_colorset = 1;
+       }
+       );
+    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling%s RE `%s%*s%s'\n",
+                         PL_colors[4],PL_colors[5],PL_colors[0],
+                         xend - exp, PL_regprecomp, PL_colors[1]));
     PL_regflags = pm->op_pmflags;
     PL_regsawback = 0;
 
@@ -812,32 +847,6 @@ pregcomp(char *exp, char *xend, PMOP *pm)
     }
     DEBUG_r(PerlIO_printf(Perl_debug_log, "size %d ", PL_regsize));
 
-    DEBUG_r(
-       if (!PL_colorset) {
-           int i = 0;
-           char *s = PerlEnv_getenv("TERMCAP_COLORS");
-           
-           PL_colorset = 1;
-           if (s) {
-               PL_colors[0] = s = savepv(s);
-               while (++i < 4) {
-                   s = strchr(s, '\t');
-                   if (!s) 
-                       FAIL("Not enough TABs in TERMCAP_COLORS");
-                   *s = '\0';
-                   PL_colors[i] = ++s;
-               }
-           }
-           else {
-               while (i < 4) 
-                   PL_colors[i++] = "";
-           }
-           /* Reset colors: */
-           PerlIO_printf(Perl_debug_log, "%s%s%s%s", 
-                         PL_colors[0],PL_colors[1],PL_colors[2],PL_colors[3]);
-       }
-       );
-
     /* Small enough for pointer-storage convention?
        If extralen==0, this means that we will not need long jumps. */
     if (PL_regsize >= 0x10000L && PL_extralen)
@@ -972,9 +981,10 @@ pregcomp(char *exp, char *xend, PMOP *pm)
                && (!(data.flags & SF_FL_BEFORE_MEOL)
                    || (PL_regflags & PMf_MULTILINE)))) {
            if (SvCUR(data.longest_fixed)                       /* ok to leave SvCUR */
-               && data.offset_fixed == data.offset_float_min)
-               goto remove;            /* Like in (a)+. */
-           
+               && data.offset_fixed == data.offset_float_min
+               && SvCUR(data.longest_fixed) == SvCUR(data.longest_float))
+                   goto remove_float;          /* As in (a)+. */
+
            r->float_substr = data.longest_float;
            r->float_min_offset = data.offset_float_min;
            r->float_max_offset = data.offset_float_max;
@@ -986,7 +996,7 @@ pregcomp(char *exp, char *xend, PMOP *pm)
                SvTAIL_on(r->float_substr);
        }
        else {
-         remove:
+         remove_float:
            r->float_substr = Nullsv;
            SvREFCNT_dec(data.longest_float);
            longest_float_length = 0;
@@ -1077,6 +1087,7 @@ reg(I32 paren, I32 *flagp)
        if (*PL_regcomp_parse == '?') {
            U16 posflags = 0, negflags = 0;
            U16 *flagsp = &posflags;
+           int logical = 0;
 
            PL_regcomp_parse++;
            paren = *PL_regcomp_parse++;
@@ -1107,6 +1118,10 @@ reg(I32 paren, I32 *flagp)
                nextchar();
                *flagp = TRYAGAIN;
                return NULL;
+           case 'p':
+               logical = 1;
+               paren = *PL_regcomp_parse++;
+               /* FALL THROUGH */
            case '{':
            {
                dTHR;
@@ -1145,8 +1160,9 @@ reg(I32 paren, I32 *flagp)
                    PL_regcomp_rx->data->data[n+2] = (void*)sop;
                    SvREFCNT_dec(sv);
                }
-               else {          /* First pass */
-                   if (PL_reginterp_cnt < ++PL_seen_evals && PL_curcop != &compiling)
+               else {                                          /* First pass */
+                   if (PL_reginterp_cnt < ++PL_seen_evals
+                       && PL_curcop != &PL_compiling)
                        /* No compiled RE interpolated, has runtime
                           components ===> unsafe.  */
                        FAIL("Eval-group not allowed at runtime, use re 'eval'");
@@ -1155,6 +1171,13 @@ reg(I32 paren, I32 *flagp)
                }
                
                nextchar();
+               if (logical) {
+                   ret = reg_node(LOGICAL);
+                   if (!SIZE_ONLY)
+                       ret->flags = 2;
+                   regtail(ret, reganode(EVAL, n));
+                   return ret;
+               }
                return reganode(EVAL, n);
            }
            case '(':
@@ -1166,6 +1189,8 @@ reg(I32 paren, I32 *flagp)
                        I32 flag;
                        
                        ret = reg_node(LOGICAL);
+                       if (!SIZE_ONLY)
+                           ret->flags = 1;
                        regtail(ret, reg(1, &flag));
                        goto insert_if;
                    } 
@@ -1186,10 +1211,14 @@ reg(I32 paren, I32 *flagp)
                    else
                        regtail(br, reganode(LONGJMP, 0));
                    c = *nextchar();
+                   if (flags&HASWIDTH)
+                       *flagp |= HASWIDTH;
                    if (c == '|') {
                        lastbr = reganode(IFTHEN, 0); /* Fake one for optimizer. */
                        regbranch(&flags, 1);
                        regtail(ret, lastbr);
+                       if (flags&HASWIDTH)
+                           *flagp |= HASWIDTH;
                        c = *nextchar();
                    }
                    else
@@ -1558,8 +1587,8 @@ regpiece(I32 *flagp)
        goto do_curly;
     }
   nest_check:
-    if (PL_dowarn && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) {
-       warn("%.*s matches null string many times",
+    if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3) {
+       warner(WARN_UNSAFE, "%.*s matches null string many times",
            PL_regcomp_parse - origparse, origparse);
     }
 
@@ -1707,7 +1736,7 @@ tryagain:
            *flagp |= HASWIDTH;
            nextchar();
            if (UTF && !PL_utf8_mark)
-               is_utf8_mark("~");      /* preload table */
+               is_utf8_mark((U8*)"~");         /* preload table */
            break;
        case 'w':
            ret = reg_node(
@@ -1717,7 +1746,7 @@ tryagain:
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum("a");     /* preload table */
+               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'W':
            ret = reg_node(
@@ -1727,7 +1756,7 @@ tryagain:
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum("a");     /* preload table */
+               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'b':
            PL_seen_zerolen++;
@@ -1738,7 +1767,7 @@ tryagain:
            *flagp |= SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum("a");     /* preload table */
+               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 'B':
            PL_seen_zerolen++;
@@ -1749,7 +1778,7 @@ tryagain:
            *flagp |= SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_alnum)
-               is_utf8_alnum("a");     /* preload table */
+               is_utf8_alnum((U8*)"a");        /* preload table */
            break;
        case 's':
            ret = reg_node(
@@ -1759,7 +1788,7 @@ tryagain:
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_space)
-               is_utf8_space(" ");     /* preload table */
+               is_utf8_space((U8*)" ");        /* preload table */
            break;
        case 'S':
            ret = reg_node(
@@ -1769,21 +1798,21 @@ tryagain:
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_space)
-               is_utf8_space(" ");     /* preload table */
+               is_utf8_space((U8*)" ");        /* preload table */
            break;
        case 'd':
            ret = reg_node(UTF ? DIGITUTF8 : DIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_digit)
-               is_utf8_digit("1");     /* preload table */
+               is_utf8_digit((U8*)"1");        /* preload table */
            break;
        case 'D':
            ret = reg_node(UTF ? NDIGITUTF8 : NDIGIT);
            *flagp |= HASWIDTH|SIMPLE;
            nextchar();
            if (UTF && !PL_utf8_digit)
-               is_utf8_digit("1");     /* preload table */
+               is_utf8_digit((U8*)"1");        /* preload table */
            break;
        case 'p':
        case 'P':
@@ -1981,7 +2010,7 @@ tryagain:
                default:
                  normal_default:
                    if ((*p & 0xc0) == 0xc0 && UTF) {
-                       ender = utf8_to_uv(p, &numlen);
+                       ender = utf8_to_uv((U8*)p, &numlen);
                        p += numlen;
                    }
                    else
@@ -2115,8 +2144,9 @@ regclass(void)
                     * (POSIX Extended Character Classes, that is)
                     * The text between e.g. [: and :] would start
                     * at posixccs + 1 and stop at regcomp_parse - 2. */
-                   if (dowarn && !SIZE_ONLY)
-                       warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
+                   if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
+                       warner(WARN_UNSAFE,
+                           "Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
                    PL_regcomp_parse++; /* skip over the ending ] */
                }
            }
@@ -2239,9 +2269,26 @@ regclass(void)
            }
        }
        if (!SIZE_ONLY) {
-           for ( ; lastvalue <= value; lastvalue++)
-               ANYOF_SET(opnd, lastvalue);
-       }
+#ifndef ASCIIish
+           if ((isLOWER(lastvalue) && isLOWER(value)) ||
+               (isUPPER(lastvalue) && isUPPER(value)))
+           {
+               I32 i;
+               if (isLOWER(lastvalue)) {
+                   for (i = lastvalue; i <= value; i++)
+                       if (isLOWER(i))
+                           ANYOF_SET(opnd, i);
+               } else {
+                   for (i = lastvalue; i <= value; i++)
+                       if (isUPPER(i))
+                           ANYOF_SET(opnd, i);
+               }
+           }
+           else
+#endif
+               for ( ; lastvalue <= value; lastvalue++)
+                   ANYOF_SET(opnd, lastvalue);
+        }
        lastvalue = value;
     }
     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
@@ -2296,7 +2343,7 @@ regclassutf8(void)
 
     while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
        skipcond:
-       value = utf8_to_uv(PL_regcomp_parse, &numlen);
+       value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
        PL_regcomp_parse += numlen;
 
        if (value == '[' && PL_regcomp_parse + 1 < PL_regxend &&
@@ -2317,15 +2364,16 @@ regclassutf8(void)
                     * (POSIX Extended Character Classes, that is)
                     * The text between e.g. [: and :] would start
                     * at posixccs + 1 and stop at regcomp_parse - 2. */
-                   if (dowarn && !SIZE_ONLY)
-                       warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
+                   if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
+                       warner(WARN_UNSAFE,
+                           "Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc);
                    PL_regcomp_parse++; /* skip over the ending ] */
                }
            }
        }
 
        if (value == '\\') {
-           value = utf8_to_uv(PL_regcomp_parse, &numlen);
+           value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
            PL_regcomp_parse += numlen;
            switch (value) {
            case 'w':
@@ -2355,7 +2403,7 @@ regclassutf8(void)
                        flags |= ANYOF_SPACEL;
                    sv_catpvf(listsv, "+utf8::IsSpace\n");
                    if (!PL_utf8_space)
-                       is_utf8_space(" ");
+                       is_utf8_space((U8*)" ");
                }
                lastvalue = 123456;
                continue;
@@ -2366,7 +2414,7 @@ regclassutf8(void)
                    sv_catpvf(listsv,
                        "!utf8::IsSpace\n");
                    if (!PL_utf8_space)
-                       is_utf8_space(" ");
+                       is_utf8_space((U8*)" ");
                }
                lastvalue = 123456;
                continue;
@@ -2573,11 +2621,11 @@ reguni(UV uv, char* s, I32* lenp)
 {
     dTHR;
     if (SIZE_ONLY) {
-       char tmpbuf[10];
+       U8 tmpbuf[10];
        *lenp = uv_to_utf8(tmpbuf, uv) - tmpbuf;
     }
     else
-       *lenp = uv_to_utf8(s, uv) - s;
+       *lenp = uv_to_utf8((U8*)s, uv) - (U8*)s;
 
 }
 
@@ -3019,7 +3067,7 @@ regprop(SV *sv, regnode *o)
        sv_catpvf(sv, "GROUPP%d", ARG(o));
        break;
     case LOGICAL:
-       p = "LOGICAL";
+       sv_catpvf(sv, "LOGICAL[%d]", o->flags);
        break;
     case SUSPEND:
        p = "SUSPEND";