The exact error message is system-dependent.
[p5sagit/p5-mst-13.2.git] / regcomp.c
index 98cf21b..4b07fd2 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
 #define PERL_IN_REGCOMP_C
 #include "perl.h"
 
-#ifdef PERL_IN_XSUB_RE
-#  if defined(PERL_CAPI) || defined(PERL_OBJECT)
-#    include "XSUB.h"
-#  endif
-#else
+#ifndef PERL_IN_XSUB_RE
 #  include "INTERN.h"
 #endif
 
@@ -393,6 +389,14 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 m, (int)offset, RExC_precomp, RExC_precomp + offset);          \
     } STMT_END                                                               \
 
+#define        vWARNdep(loc,m)                                                         \
+    STMT_START {                                                             \
+        unsigned offset = strlen(RExC_precomp)-(RExC_end-(loc));          \
+        int warn_cat = ckWARN(WARN_REGEXP) ? WARN_REGEXP : WARN_DEPRECATED;  \
+       Perl_warner(aTHX_ warn_cat, "%s" REPORT_LOCATION,\
+                m, (int)offset, RExC_precomp, RExC_precomp + offset);          \
+    } STMT_END                                                               \
+
 
 #define        vWARN2(loc, m, a1)                                                   \
     STMT_START {                                                             \
@@ -418,9 +422,18 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 (int)offset, RExC_precomp, RExC_precomp + offset);        \
     } STMT_END
 
+/* used for the parse_flags section for (?c) -- japhy */
+#define        vWARN5(loc, m, a1, a2, a3, a4)                                       \
+  STMT_START {                                                   \
+      unsigned offset = strlen(RExC_precomp)-(RExC_end-(loc));   \
+        Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,      \
+                 a1, a2, a3, a4,                                 \
+                 (int)offset, RExC_precomp, RExC_precomp + offset);  \
+    } STMT_END
+
 
 /* Allow for side effects in s */
-#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (s);} STMT_END
+#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (void)(s);} STMT_END
 
 /* Macros for recording node offsets.   20001227 mjd@plover.com 
  * Nodes are numbered 1, 2, 3, 4.  Node #n's position is recorded in
@@ -466,7 +479,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 #define Node_Offset(n) (RExC_offsets[2*((n)-RExC_emit_start)-1])
 #define Node_Length(n) (RExC_offsets[2*((n)-RExC_emit_start)])
 
-static void clear_re(pTHXo_ void *r);
+static void clear_re(pTHX_ void *r);
 
 /* Mark that we cannot extend a found fixed substring at this point.
    Updata the longest found anchored substring and the longest found
@@ -954,6 +967,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
            regnode *oscan = scan;
            struct regnode_charclass_class this_class;
            struct regnode_charclass_class *oclass = NULL;
+           I32 next_is_eval = 0;
 
            switch (PL_regkind[(U8)OP(scan)]) {
            case WHILEM:                /* End of (?:...)* . */
@@ -999,6 +1013,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                    scan->flags = ((lp <= U8_MAX) ? lp : U8_MAX);
                }
                scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
+               next_is_eval = (OP(scan) == EVAL);
              do_curly:
                if (flags & SCF_DO_SUBSTR) {
                    if (mincount == 0) scan_commit(pRExC_state,data); /* Cannot extend fixed substrings */
@@ -1059,7 +1074,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                }
                if (!scan)              /* It was not CURLYX, but CURLY. */
                    scan = next;
-               if (ckWARN(WARN_REGEXP) && (minnext + deltanext == 0)
+               if (ckWARN(WARN_REGEXP)
+                      /* ? quantifier ok, except for (?{ ... }) */
+                   && (next_is_eval || !(mincount == 0 && maxcount == 1))
+                   && (minnext == 0) && (deltanext == 0)
                    && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
                    && maxcount <= REG_INFTY/3) /* Complement check for big count */
                {
@@ -1081,7 +1099,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                      && !deltanext && minnext == 1 ) {
                    /* Try to optimize to CURLYN.  */
                    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
-                   regnode *nxt1 = nxt, *nxt2;
+                   regnode *nxt1 = nxt;
+#ifdef DEBUGGING
+                   regnode *nxt2;
+#endif
 
                    /* Skip open. */
                    nxt = regnext(nxt);
@@ -1089,7 +1110,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 *deltap, reg
                        && !(PL_regkind[(U8)OP(nxt)] == EXACT
                             && STR_LEN(nxt) == 1))
                        goto nogo;
+#ifdef DEBUGGING
                    nxt2 = nxt;
+#endif
                    nxt = regnext(nxt);
                    if (OP(nxt) != CLOSE)
                        goto nogo;
@@ -1723,6 +1746,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     RExC_rx = r;
 
     /* Second pass: emit code. */
+    RExC_flags16 = pm->op_pmflags;     /* don't let top level (?i) bleed */
     RExC_parse = exp;
     RExC_end = xend;
     RExC_naughty = 0;
@@ -1990,8 +2014,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
        r->reganch |= ROPT_LOOKBEHIND_SEEN;
     if (RExC_seen & REG_SEEN_EVAL)
        r->reganch |= ROPT_EVAL_SEEN;
-    if (RExC_seen & REG_SEEN_SANY)
-       r->reganch |= ROPT_SANY_SEEN;
+    if (RExC_seen & REG_SEEN_CANY)
+       r->reganch |= ROPT_CANY_SEEN;
     Newz(1002, r->startp, RExC_npar, I32);
     Newz(1002, r->endp, RExC_npar, I32);
     PL_regdata = r->data; /* for regprop() */
@@ -2018,12 +2042,23 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
     register regnode *ender = 0;
     register I32 parno = 0;
     I32 flags, oregflags = RExC_flags16, have_branch = 0, open = 0;
+
+    /* for (?g), (?gc), and (?o) warnings; warning
+       about (?c) will warn about (?g) -- japhy    */
+
+    I32 wastedflags = 0x00,
+        wasted_o    = 0x01,
+        wasted_g    = 0x02,
+        wasted_gc   = 0x02 | 0x04,
+        wasted_c    = 0x04;
+
     char * parse_start = RExC_parse; /* MJD */
     char *oregcomp_parse = RExC_parse;
     char c;
 
     *flagp = 0;                                /* Tentatively. */
 
+
     /* Make an OPEN node, if parenthesized. */
     if (paren) {
        if (*RExC_parse == '?') { /* (?...) */
@@ -2062,8 +2097,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                *flagp = TRYAGAIN;
                return NULL;
            case 'p':           /* (?p...) */
-               if (SIZE_ONLY)
-                   vWARN(RExC_parse, "(?p{}) is deprecated - use (??{})");
+               if (SIZE_ONLY && ckWARN2(WARN_DEPRECATED, WARN_REGEXP))
+                   vWARNdep(RExC_parse, "(?p{}) is deprecated - use (??{})");
                /* FALL THROUGH*/
            case '?':           /* (??...) */
                logical = 1;
@@ -2104,6 +2139,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                    ENTER;
                    Perl_save_re_context(aTHX);
                    rop = sv_compile_2op(sv, &sop, "re", &av);
+                   sop->op_private |= OPpREFCOUNTED;
+                   /* re_dup will OpREFCNT_inc */
+                   OpREFCNT_set(sop, 1);
                    LEAVE;
 
                    n = add_data(pRExC_state, 3, "nop");
@@ -2202,12 +2240,45 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp)
                --RExC_parse;
              parse_flags:      /* (?i) */
                while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) {
-                   if (*RExC_parse != 'o')
-                       pmflag(flagsp, *RExC_parse);
+                   /* (?g), (?gc) and (?o) are useless here
+                      and must be globally applied -- japhy */
+
+                   if (*RExC_parse == 'o' || *RExC_parse == 'g') {
+                       if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
+                           I32 wflagbit = *RExC_parse == 'o' ? wasted_o : wasted_g;
+                           if (! (wastedflags & wflagbit) ) {
+                               wastedflags |= wflagbit;
+                               vWARN5(
+                                   RExC_parse + 1,
+                                   "Useless (%s%c) - %suse /%c modifier",
+                                   flagsp == &negflags ? "?-" : "?",
+                                   *RExC_parse,
+                                   flagsp == &negflags ? "don't " : "",
+                                   *RExC_parse
+                               );
+                           }
+                       }
+                   }
+                   else if (*RExC_parse == 'c') {
+                       if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
+                           if (! (wastedflags & wasted_c) ) {
+                               wastedflags |= wasted_gc;
+                               vWARN3(
+                                   RExC_parse + 1,
+                                   "Useless (%sc) - %suse /gc modifier",
+                                   flagsp == &negflags ? "?-" : "?",
+                                   flagsp == &negflags ? "don't " : ""
+                               );
+                           }
+                       }
+                   }
+                   else { pmflag(flagsp, *RExC_parse); }
+
                    ++RExC_parse;
                }
                if (*RExC_parse == '-') {
                    flagsp = &negflags;
+                   wastedflags = 0;  /* reset so (?g-c) warns twice */
                    ++RExC_parse;
                    goto parse_flags;
                }
@@ -2616,6 +2687,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp)
 {
     register regnode *ret = 0;
     I32 flags;
+    char *parse_start = 0;
 
     *flagp = WORST;            /* Tentatively. */
 
@@ -2733,8 +2805,8 @@ tryagain:
             Set_Node_Length(ret, 2); /* MJD */
            break;
        case 'C':
-           ret = reg_node(pRExC_state, SANY);
-           RExC_seen |= REG_SEEN_SANY;
+           ret = reg_node(pRExC_state, CANY);
+           RExC_seen |= REG_SEEN_CANY;
            *flagp |= HASWIDTH|SIMPLE;
            nextchar(pRExC_state);
             Set_Node_Length(ret, 2); /* MJD */
@@ -2889,7 +2961,8 @@ tryagain:
            register char *p;
            char *oldp, *s;
            STRLEN numlen;
-            char *parse_start = RExC_parse - 1;
+
+            parse_start = RExC_parse - 1;
 
            RExC_parse++;
 
@@ -2966,8 +3039,9 @@ tryagain:
                                vFAIL("Missing right brace on \\x{}");
                            }
                            else {
-                               numlen = 1;     /* allow underscores */
-                               ender = (UV)scan_hex(p + 1, e - p - 1, &numlen);
+                                I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
+                                numlen = e - p - 1;
+                               ender = grok_hex(p + 1, &numlen, &flags, NULL);
                                if (ender > 0xff)
                                    RExC_utf8 = 1;
                                /* numlen is generous */
@@ -2979,8 +3053,9 @@ tryagain:
                            }
                        }
                        else {
-                           numlen = 0;         /* disallow underscores */
-                           ender = (UV)scan_hex(p, 2, &numlen);
+                            I32 flags = 0;
+                           numlen = 2;
+                           ender = grok_hex(p, &numlen, &flags, NULL);
                            p += numlen;
                        }
                        break;
@@ -2993,8 +3068,9 @@ tryagain:
                    case '5': case '6': case '7': case '8':case '9':
                        if (*p == '0' ||
                          (isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
-                           numlen = 0;         /* disallow underscores */
-                           ender = (UV)scan_oct(p, 3, &numlen);
+                            I32 flags = 0;
+                           numlen = 3;
+                           ender = grok_oct(p, &numlen, &flags, NULL);
                            p += numlen;
                        }
                        else {
@@ -3373,18 +3449,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
            case 'a':   value = ASCII_TO_NATIVE('\007');break;
            case 'x':
                if (*RExC_parse == '{') {
+                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
                    e = strchr(RExC_parse++, '}');
                     if (!e)
                         vFAIL("Missing right brace on \\x{}");
-                   numlen = 1;         /* allow underscores */
-                   value = (UV)scan_hex(RExC_parse,
-                                        e - RExC_parse,
-                                        &numlen);
+
+                   numlen = e - RExC_parse;
+                   value = grok_hex(RExC_parse, &numlen, &flags, NULL);
                    RExC_parse = e + 1;
                }
                else {
-                   numlen = 0;         /* disallow underscores */
-                   value = (UV)scan_hex(RExC_parse, 2, &numlen);
+                    I32 flags = 0;
+                   numlen = 2;
+                   value = grok_hex(RExC_parse, &numlen, &flags, NULL);
                    RExC_parse += numlen;
                }
                break;
@@ -3394,10 +3471,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state)
                break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
-               numlen = 0;             /* disallow underscores */
-               value = (UV)scan_oct(--RExC_parse, 3, &numlen);
+            {
+                I32 flags = 0;
+               numlen = 3;
+               value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
                RExC_parse += numlen;
                break;
+            }
            default:
                if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value))
                    vWARN2(RExC_parse,
@@ -4122,10 +4202,11 @@ S_regcurly(pTHX_ register char *s)
 }
 
 
+#ifdef DEBUGGING
+
 STATIC regnode *
 S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
 {
-#ifdef DEBUGGING
     register U8 op = EXACT;    /* Arbitrary non-END op. */
     register regnode *next;
 
@@ -4191,10 +4272,11 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
        else if (op == WHILEM)
            l--;
     }
-#endif /* DEBUGGING */
     return node;
 }
 
+#endif /* DEBUGGING */
+
 /*
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
@@ -4275,10 +4357,12 @@ Perl_regdump(pTHX_ regexp *r)
 #endif /* DEBUGGING */
 }
 
+#ifdef DEBUGGING
+
 STATIC void
 S_put_byte(pTHX_ SV *sv, int c)
 {
-    if (isCNTRL(c) || c == 127 || c == 255 || !isPRINT(c))
+    if (isCNTRL(c) || c == 255 || !isPRINT(c))
        Perl_sv_catpvf(aTHX_ sv, "\\%o", c);
     else if (c == '-' || c == ']' || c == '\\' || c == '^')
        Perl_sv_catpvf(aTHX_ sv, "\\%c", c);
@@ -4286,6 +4370,8 @@ S_put_byte(pTHX_ SV *sv, int c)
        Perl_sv_catpvf(aTHX_ sv, "%c", c);
 }
 
+#endif /* DEBUGGING */
+
 /*
 - regprop - printable representation of opcode
 */
@@ -4511,6 +4597,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
        SV** old_curpad;
 
        while (--n >= 0) {
+          /* If you add a ->what type here, update the comment in regcomp.h */
            switch (r->data->what[n]) {
            case 's':
                SvREFCNT_dec((SV*)r->data->data[n]);
@@ -4533,7 +4620,11 @@ Perl_pregfree(pTHX_ struct regexp *r)
                }
                else
                    PL_curpad = NULL;
-               op_free((OP_4tree*)r->data->data[n]);
+
+               if (!OpREFCNT_dec((OP_4tree*)r->data->data[n])) {
+                    op_free((OP_4tree*)r->data->data[n]);
+               }
+
                PL_comppad = old_comppad;
                PL_curpad = old_curpad;
                SvREFCNT_dec((SV*)new_comppad);
@@ -4651,6 +4742,7 @@ Perl_save_re_context(pTHX)
     SAVEVPTR(PL_reg_re);               /* from regexec.c */
     SAVEPPTR(PL_reg_ganch);            /* from regexec.c */
     SAVESPTR(PL_reg_sv);               /* from regexec.c */
+    SAVEI8(PL_reg_match_utf8);         /* from regexec.c */
     SAVEVPTR(PL_reg_magic);            /* from regexec.c */
     SAVEI32(PL_reg_oldpos);                    /* from regexec.c */
     SAVEVPTR(PL_reg_oldcurpm);         /* from regexec.c */
@@ -4662,14 +4754,8 @@ Perl_save_re_context(pTHX)
 #endif
 }
 
-#ifdef PERL_OBJECT
-#include "XSUB.h"
-#undef this
-#define this pPerl
-#endif
-
 static void
-clear_re(pTHXo_ void *r)
+clear_re(pTHX_ void *r)
 {
     ReREFCNT_dec((regexp *)r);
 }