X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=a3106dc4454178b8d5824747a107564951724881;hb=d2a0188286383355aaa13e586da8618f82242215;hp=83438e90a0cbd151621f1874d66a5b85a527432c;hpb=653099ff2c52a6af02b3894d684593dfe31dcc17;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 83438e9..a3106dc 100644 --- a/regcomp.c +++ b/regcomp.c @@ -69,7 +69,7 @@ * **** Alterations to Henry's code are... **** - **** Copyright (c) 1991-1999, Larry Wall + **** Copyright (c) 1991-2000, Larry Wall **** **** You may distribute under the terms of either the GNU General Public **** License or the Artistic License, as specified in the README file. @@ -277,6 +277,7 @@ S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl) STATIC void S_cl_init(pTHX_ struct regnode_charclass_class *cl) { + Zero(cl, 1, struct regnode_charclass_class); cl->type = ANYOF; cl_anything(cl); } @@ -284,10 +285,9 @@ S_cl_init(pTHX_ struct regnode_charclass_class *cl) STATIC void S_cl_init_zero(pTHX_ struct regnode_charclass_class *cl) { + Zero(cl, 1, struct regnode_charclass_class); cl->type = ANYOF; cl_anything(cl); - ANYOF_CLASS_ZERO(cl); - ANYOF_BITMAP_ZERO(cl); if (LOC) cl->flags |= ANYOF_LOCALE; } @@ -545,9 +545,21 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da } } else if (flags & SCF_DO_STCLASS_AND) { - cl_and(data->start_class, &accum); - if (min1) + if (min1) { + cl_and(data->start_class, &accum); flags &= ~SCF_DO_STCLASS; + } + else { + /* Switch to OR mode: cache the old value of + * data->start_class */ + StructCopy(data->start_class, &and_with, + struct regnode_charclass_class); + flags &= ~SCF_DO_STCLASS_AND; + StructCopy(&accum, data->start_class, + struct regnode_charclass_class); + flags |= SCF_DO_STCLASS_OR; + data->start_class->flags |= ANYOF_EOS; + } } } else if (code == BRANCHJ) /* single branch is optimized. */ @@ -753,10 +765,10 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da } if (!scan) /* It was not CURLYX, but CURLY. */ scan = next; - if (ckWARN(WARN_UNSAFE) && (minnext + deltanext == 0) + if (ckWARN(WARN_REGEXP) && (minnext + deltanext == 0) && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) && maxcount <= REG_INFTY/3) /* Complement check for big count */ - Perl_warner(aTHX_ WARN_UNSAFE, + Perl_warner(aTHX_ WARN_REGEXP, "Strange *+?{} on zero-length expression"); min += minnext * mincount; is_inf_internal |= (maxcount == REG_INFTY @@ -1327,8 +1339,9 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) if (exp == NULL) FAIL("NULL regexp argument"); - if (PL_curcop == &PL_compiling ? (PL_hints & HINT_UTF8) : IN_UTF8) + if (pm->op_pmdynflags & PMdf_UTF8) { PL_reg_flags |= RF_utf8; + } else PL_reg_flags = 0; @@ -1375,6 +1388,10 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) char, regexp); if (r == NULL) FAIL("regexp out of space"); +#ifdef DEBUGGING + /* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */ + Zero(r, sizeof(regexp) + (unsigned)PL_regsize * sizeof(regnode), char); +#endif r->refcnt = 1; r->prelen = xend - exp; r->precomp = PL_regprecomp; @@ -1590,7 +1607,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ DEBUG_r((sv = sv_newmortal(), regprop(sv, (regnode*)data.start_class), - PerlIO_printf(Perl_debug_log, "synthetic stclass.\n", + PerlIO_printf(Perl_debug_log, "synthetic stclass `%s'.\n", SvPVX(sv)))); } @@ -1639,7 +1656,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ DEBUG_r((sv = sv_newmortal(), regprop(sv, (regnode*)data.start_class), - PerlIO_printf(Perl_debug_log, "synthetic stclass.\n", + PerlIO_printf(Perl_debug_log, "synthetic stclass `%s'.\n", SvPVX(sv)))); } } @@ -2189,8 +2206,8 @@ S_regpiece(pTHX_ I32 *flagp) goto do_curly; } nest_check: - if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3) { - Perl_warner(aTHX_ WARN_UNSAFE, "%.*s matches null string many times", + if (ckWARN(WARN_REGEXP) && !SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3) { + Perl_warner(aTHX_ WARN_REGEXP, "%.*s matches null string many times", PL_regcomp_parse - origparse, origparse); } @@ -2558,11 +2575,19 @@ tryagain: p++; break; case 'e': - ender = '\033'; +#ifdef ASCIIish + ender = '\033'; +#else + ender = '\047'; +#endif p++; break; case 'a': - ender = '\007'; +#ifdef ASCIIish + ender = '\007'; +#else + ender = '\057'; +#endif p++; break; case 'x': @@ -2609,8 +2634,8 @@ tryagain: FAIL("trailing \\ in regexp"); /* FALL THROUGH */ default: - if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && isALPHA(*p)) - Perl_warner(aTHX_ WARN_UNSAFE, + if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(*p)) + Perl_warner(aTHX_ WARN_REGEXP, "/%.127s/: Unrecognized escape \\%c passed through", PL_regprecomp, *p); @@ -2801,9 +2826,9 @@ S_regpposixcc(pTHX_ I32 value) posixcc[skip + 1] == ']')))) Perl_croak(aTHX_ "Character class [:%.*s:] unknown", t - s - 1, s + 1); - } else if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY) + } else if (ckWARN(WARN_REGEXP) && !SIZE_ONLY) /* [[=foo=]] and [[.foo.]] are still future. */ - Perl_warner(aTHX_ WARN_UNSAFE, + Perl_warner(aTHX_ WARN_REGEXP, "Character class syntax [%c %c] is reserved for future extensions", c, c); } else { /* Maternal grandfather: @@ -2819,7 +2844,7 @@ S_regpposixcc(pTHX_ I32 value) STATIC void S_checkposixcc(pTHX) { - if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && + if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && (*PL_regcomp_parse == ':' || *PL_regcomp_parse == '=' || *PL_regcomp_parse == '.')) { @@ -2829,10 +2854,10 @@ S_checkposixcc(pTHX) while(*s && isALNUM(*s)) s++; if (*s && c == *s && s[1] == ']') { - Perl_warner(aTHX_ WARN_UNSAFE, + Perl_warner(aTHX_ WARN_REGEXP, "Character class syntax [%c %c] belongs inside character classes", c, c); if (c == '=' || c == '.') - Perl_warner(aTHX_ WARN_UNSAFE, + Perl_warner(aTHX_ WARN_REGEXP, "Character class syntax [%c %c] is reserved for future extensions", c, c); } } @@ -2871,7 +2896,7 @@ S_regclass(pTHX) ANYOF_FLAGS(ret) |= ANYOF_INVERT; } - if (!SIZE_ONLY && ckWARN(WARN_UNSAFE)) + if (!SIZE_ONLY && ckWARN(WARN_REGEXP)) checkposixcc(); if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-') @@ -2898,8 +2923,13 @@ S_regclass(pTHX) case 't': value = '\t'; break; case 'f': value = '\f'; break; case 'b': value = '\b'; break; +#ifdef ASCIIish case 'e': value = '\033'; break; case 'a': value = '\007'; break; +#else + case 'e': value = '\047'; break; + case 'a': value = '\057'; break; +#endif case 'x': value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen); PL_regcomp_parse += numlen; @@ -2914,11 +2944,11 @@ S_regclass(pTHX) PL_regcomp_parse += numlen; break; default: - if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && isALPHA(value)) - Perl_warner(aTHX_ WARN_UNSAFE, + if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value)) + Perl_warner(aTHX_ WARN_REGEXP, "/%.127s/: Unrecognized escape \\%c in character class passed through", PL_regprecomp, - value); + (int)value); break; } } @@ -2928,8 +2958,8 @@ S_regclass(pTHX) need_class = 1; if (range) { /* a-\d, a-[:digit:] */ if (!SIZE_ONLY) { - if (ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, + if (ckWARN(WARN_REGEXP)) + Perl_warner(aTHX_ WARN_REGEXP, "/%.127s/: false [] range \"%*.*s\" in regexp", PL_regprecomp, PL_regcomp_parse - rangebegin, @@ -3213,8 +3243,8 @@ S_regclass(pTHX) PL_regcomp_parse[1] != ']') { PL_regcomp_parse++; if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ - if (ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, + if (ckWARN(WARN_REGEXP)) + Perl_warner(aTHX_ WARN_REGEXP, "/%.127s/: false [] range \"%*.*s\" in regexp", PL_regprecomp, PL_regcomp_parse - rangebegin, @@ -3307,7 +3337,7 @@ S_regclassutf8(pTHX) listsv = newSVpvn("# comment\n",10); } - if (!SIZE_ONLY && ckWARN(WARN_UNSAFE)) + if (!SIZE_ONLY && ckWARN(WARN_REGEXP)) checkposixcc(); if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-') @@ -3347,10 +3377,10 @@ S_regclassutf8(pTHX) if (!SIZE_ONLY) { if (value == 'p') Perl_sv_catpvf(aTHX_ listsv, - "+utf8::%.*s\n", n, PL_regcomp_parse); + "+utf8::%.*s\n", (int)n, PL_regcomp_parse); else Perl_sv_catpvf(aTHX_ listsv, - "!utf8::%.*s\n", n, PL_regcomp_parse); + "!utf8::%.*s\n", (int)n, PL_regcomp_parse); } PL_regcomp_parse = e + 1; lastvalue = OOB_UTF8; @@ -3360,8 +3390,13 @@ S_regclassutf8(pTHX) case 't': value = '\t'; break; case 'f': value = '\f'; break; case 'b': value = '\b'; break; +#ifdef ASCIIish case 'e': value = '\033'; break; case 'a': value = '\007'; break; +#else + case 'e': value = '\047'; break; + case 'a': value = '\057'; break; +#endif case 'x': if (*PL_regcomp_parse == '{') { e = strchr(PL_regcomp_parse++, '}'); @@ -3387,19 +3422,19 @@ S_regclassutf8(pTHX) PL_regcomp_parse += numlen; break; default: - if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && isALPHA(value)) - Perl_warner(aTHX_ WARN_UNSAFE, + if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value)) + Perl_warner(aTHX_ WARN_REGEXP, "/%.127s/: Unrecognized escape \\%c in character class passed through", PL_regprecomp, - value); + (int)value); break; } } if (namedclass > OOB_NAMEDCLASS) { if (range) { /* a-\d, a-[:digit:] */ if (!SIZE_ONLY) { - if (ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, + if (ckWARN(WARN_REGEXP)) + Perl_warner(aTHX_ WARN_REGEXP, "/%.127s/: false [] range \"%*.*s\" in regexp", PL_regprecomp, PL_regcomp_parse - rangebegin, @@ -3486,8 +3521,8 @@ S_regclassutf8(pTHX) PL_regcomp_parse[1] != ']') { PL_regcomp_parse++; if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ - if (ckWARN(WARN_UNSAFE)) - Perl_warner(aTHX_ WARN_UNSAFE, + if (ckWARN(WARN_REGEXP)) + Perl_warner(aTHX_ WARN_REGEXP, "/%.127s/: false [] range \"%*.*s\" in regexp", PL_regprecomp, PL_regcomp_parse - rangebegin, @@ -3906,7 +3941,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) else if (k == WHILEM && o->flags) /* Ordinal/of */ Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4); else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP ) - Perl_sv_catpvf(aTHX_ sv, "%d", ARG(o)); /* Parenth number */ + Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */ else if (k == LOGICAL) Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */ else if (k == ANYOF) {