integrate mainline to get tr.t
[p5sagit/p5-mst-13.2.git] / regcomp.c
index ef651fa..ca0fd09 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -38,7 +38,7 @@
 /* *These* symbols are masked to allow static link. */
 #  define Perl_pregfree my_regfree
 #  define Perl_regnext my_regnext
-#  define save_re_context my_save_re_context
+#  define Perl_save_re_context my_save_re_context
 #endif 
 
 /*SUPPRESS 112*/
@@ -493,7 +493,7 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                    scan = next;
                if (ckWARN(WARN_UNSAFE) && (minnext + deltanext == 0) 
                    && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
-                   && maxcount <= 10000) /* Complement check for big count */
+                   && maxcount <= REG_INFTY/3) /* Complement check for big count */
                    warner(WARN_UNSAFE, "Strange *+?{} on zero-length expression");
                min += minnext * mincount;
                is_inf_internal |= (maxcount == REG_INFTY 
@@ -709,6 +709,13 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32
                if (data)
                    data->flags |= SF_HAS_EVAL;
        }
+       else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded */
+               if (flags & SCF_DO_SUBSTR) {
+                   scan_commit(data);
+                   data->longest = &(data->longest_float);
+               }
+               is_inf = is_inf_internal = 1;
+       }
        /* Else: zero-length, ignore. */
        scan = regnext(scan);
     }
@@ -785,14 +792,38 @@ pregcomp(char *exp, char *xend, PMOP *pm)
     if (exp == NULL)
        FAIL("NULL regexp argument");
 
-    if (PL_curcop == &compiling ? (PL_hints & HINT_UTF8) : IN_UTF8)
+    if (PL_curcop == &PL_compiling ? (PL_hints & HINT_UTF8) : IN_UTF8)
        PL_reg_flags |= RF_utf8;
     else
        PL_reg_flags = 0;
 
     PL_regprecomp = savepvn(exp, xend - exp);
-    DEBUG_r(PerlIO_printf(Perl_debug_log, "compiling RE `%*s'\n",
-                         xend - exp, PL_regprecomp));
+    DEBUG_r(
+       if (!PL_colorset) {
+           int i = 0;
+           char *s = PerlEnv_getenv("PERL_RE_COLORS");
+           
+           if (s) {
+               PL_colors[0] = s = savepv(s);
+               while (++i < 6) {
+                   s = strchr(s, '\t');
+                   if (s) {
+                       *s = '\0';
+                       PL_colors[i] = ++s;
+                   }
+                   else
+                       PL_colors[i] = "";
+               }
+           } else {
+               while (i < 6) 
+                   PL_colors[i++] = "";
+           }
+           PL_colorset = 1;
+       }
+       );
+    DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling%s RE `%s%*s%s'\n",
+                         PL_colors[4],PL_colors[5],PL_colors[0],
+                         xend - exp, PL_regprecomp, PL_colors[1]));
     PL_regflags = pm->op_pmflags;
     PL_regsawback = 0;
 
@@ -816,32 +847,6 @@ pregcomp(char *exp, char *xend, PMOP *pm)
     }
     DEBUG_r(PerlIO_printf(Perl_debug_log, "size %d ", PL_regsize));
 
-    DEBUG_r(
-       if (!PL_colorset) {
-           int i = 0;
-           char *s = PerlEnv_getenv("TERMCAP_COLORS");
-           
-           PL_colorset = 1;
-           if (s) {
-               PL_colors[0] = s = savepv(s);
-               while (++i < 4) {
-                   s = strchr(s, '\t');
-                   if (!s) 
-                       FAIL("Not enough TABs in TERMCAP_COLORS");
-                   *s = '\0';
-                   PL_colors[i] = ++s;
-               }
-           }
-           else {
-               while (i < 4) 
-                   PL_colors[i++] = "";
-           }
-           /* Reset colors: */
-           PerlIO_printf(Perl_debug_log, "%s%s%s%s", 
-                         PL_colors[0],PL_colors[1],PL_colors[2],PL_colors[3]);
-       }
-       );
-
     /* Small enough for pointer-storage convention?
        If extralen==0, this means that we will not need long jumps. */
     if (PL_regsize >= 0x10000L && PL_extralen)
@@ -1082,6 +1087,7 @@ reg(I32 paren, I32 *flagp)
        if (*PL_regcomp_parse == '?') {
            U16 posflags = 0, negflags = 0;
            U16 *flagsp = &posflags;
+           int logical = 0;
 
            PL_regcomp_parse++;
            paren = *PL_regcomp_parse++;
@@ -1112,6 +1118,10 @@ reg(I32 paren, I32 *flagp)
                nextchar();
                *flagp = TRYAGAIN;
                return NULL;
+           case 'p':
+               logical = 1;
+               paren = *PL_regcomp_parse++;
+               /* FALL THROUGH */
            case '{':
            {
                dTHR;
@@ -1150,8 +1160,9 @@ reg(I32 paren, I32 *flagp)
                    PL_regcomp_rx->data->data[n+2] = (void*)sop;
                    SvREFCNT_dec(sv);
                }
-               else {          /* First pass */
-                   if (PL_reginterp_cnt < ++PL_seen_evals && PL_curcop != &compiling)
+               else {                                          /* First pass */
+                   if (PL_reginterp_cnt < ++PL_seen_evals
+                       && PL_curcop != &PL_compiling)
                        /* No compiled RE interpolated, has runtime
                           components ===> unsafe.  */
                        FAIL("Eval-group not allowed at runtime, use re 'eval'");
@@ -1160,6 +1171,13 @@ reg(I32 paren, I32 *flagp)
                }
                
                nextchar();
+               if (logical) {
+                   ret = reg_node(LOGICAL);
+                   if (!SIZE_ONLY)
+                       ret->flags = 2;
+                   regtail(ret, reganode(EVAL, n));
+                   return ret;
+               }
                return reganode(EVAL, n);
            }
            case '(':
@@ -1171,6 +1189,8 @@ reg(I32 paren, I32 *flagp)
                        I32 flag;
                        
                        ret = reg_node(LOGICAL);
+                       if (!SIZE_ONLY)
+                           ret->flags = 1;
                        regtail(ret, reg(1, &flag));
                        goto insert_if;
                    } 
@@ -1191,10 +1211,14 @@ reg(I32 paren, I32 *flagp)
                    else
                        regtail(br, reganode(LONGJMP, 0));
                    c = *nextchar();
+                   if (flags&HASWIDTH)
+                       *flagp |= HASWIDTH;
                    if (c == '|') {
                        lastbr = reganode(IFTHEN, 0); /* Fake one for optimizer. */
                        regbranch(&flags, 1);
                        regtail(ret, lastbr);
+                       if (flags&HASWIDTH)
+                           *flagp |= HASWIDTH;
                        c = *nextchar();
                    }
                    else
@@ -1563,7 +1587,7 @@ regpiece(I32 *flagp)
        goto do_curly;
     }
   nest_check:
-    if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) {
+    if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3) {
        warner(WARN_UNSAFE, "%.*s matches null string many times",
            PL_regcomp_parse - origparse, origparse);
     }
@@ -2245,9 +2269,26 @@ regclass(void)
            }
        }
        if (!SIZE_ONLY) {
-           for ( ; lastvalue <= value; lastvalue++)
-               ANYOF_SET(opnd, lastvalue);
-       }
+#ifndef ASCIIish
+           if ((isLOWER(lastvalue) && isLOWER(value)) ||
+               (isUPPER(lastvalue) && isUPPER(value)))
+           {
+               I32 i;
+               if (isLOWER(lastvalue)) {
+                   for (i = lastvalue; i <= value; i++)
+                       if (isLOWER(i))
+                           ANYOF_SET(opnd, i);
+               } else {
+                   for (i = lastvalue; i <= value; i++)
+                       if (isUPPER(i))
+                           ANYOF_SET(opnd, i);
+               }
+           }
+           else
+#endif
+               for ( ; lastvalue <= value; lastvalue++)
+                   ANYOF_SET(opnd, lastvalue);
+        }
        lastvalue = value;
     }
     /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
@@ -3026,7 +3067,7 @@ regprop(SV *sv, regnode *o)
        sv_catpvf(sv, "GROUPP%d", ARG(o));
        break;
     case LOGICAL:
-       p = "LOGICAL";
+       sv_catpvf(sv, "LOGICAL[%d]", o->flags);
        break;
     case SUSPEND:
        p = "SUSPEND";