X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=64c06f00273812b9689b376ac1186f1a67ceee18;hb=b3656ec34f74d879ca158a5689e6d4537869c415;hp=df2fc0c796cf5cc732b157c797414cb61c68e427;hpb=423cee853811c26846bd1948939b85f9866dfb4a;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index df2fc0c..64c06f0 100644 --- a/regcomp.c +++ b/regcomp.c @@ -192,6 +192,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, #define OOB_CHAR8 1234 #define OOB_UTF8 123456 +#define OOB_NAMEDCLASS -1 #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv)) #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b) @@ -1779,7 +1780,7 @@ tryagain: PL_seen_zerolen++; /* Do not optimize RE away */ nextchar(); break; - case 'O': + case 'C': ret = reg_node(SANY); *flagp |= HASWIDTH|SIMPLE; nextchar(); @@ -2246,7 +2247,7 @@ S_regpposixcc(pTHX_ I32 value) } break; } - if ((namedclass == -1 || + if ((namedclass == OOB_NAMEDCLASS || !(posixcc + skip + 2 < PL_regxend && (posixcc[skip] == ':' && posixcc[skip + 1] == ']')))) @@ -2275,7 +2276,7 @@ S_checkposixcc(pTHX) *PL_regcomp_parse == '=' || *PL_regcomp_parse == '.')) { char *s = PL_regcomp_parse; - char c = *s++; + char c = *s++; while(*s && isALNUM(*s)) s++; @@ -2329,7 +2330,7 @@ S_regclass(pTHX) goto skipcond; /* allow 1st char to be ] or - */ while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') { skipcond: - namedclass = -1; + namedclass = OOB_NAMEDCLASS; value = UCHARAT(PL_regcomp_parse++); if (value == '[') namedclass = regpposixcc(value); @@ -2364,7 +2365,9 @@ S_regclass(pTHX) break; } } - if (!SIZE_ONLY && namedclass > -1) { + if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { + if (range) + FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */ switch (namedclass) { case ANYOF_ALNUM: if (LOC) @@ -2605,25 +2608,27 @@ S_regclass(pTHX) } if (LOC) ANYOF_FLAGS(opnd) |= ANYOF_CLASS; - lastvalue = OOB_CHAR8; + continue; } - else if (range) { if (lastvalue > value) - FAIL("invalid [] range in regexp"); + FAIL("invalid [] range in regexp"); /* [b-a] */ range = 0; } else { lastvalue = value; if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && - PL_regcomp_parse[1] != ']') { + PL_regcomp_parse[1] != ']') { + if (namedclass > OOB_NAMEDCLASS) + FAIL("invalid [] range in regexp"); /* [\w-a] */ PL_regcomp_parse++; range = 1; continue; /* do it next time */ } } + /* now is the next time */ if (!SIZE_ONLY) { -#ifndef ASCIIish +#ifndef ASCIIish /* EBCDIC, for example. */ if ((isLOWER(lastvalue) && isLOWER(value)) || (isUPPER(lastvalue) && isUPPER(value))) { @@ -2643,7 +2648,7 @@ S_regclass(pTHX) for ( ; lastvalue <= value; lastvalue++) ANYOF_BITMAP_SET(opnd, lastvalue); } - lastvalue = value; + range = 0; } /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ if (!SIZE_ONLY && @@ -2701,7 +2706,7 @@ S_regclassutf8(pTHX) while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') { skipcond: - namedclass = -1; + namedclass = OOB_NAMEDCLASS; value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen); PL_regcomp_parse += numlen; @@ -2773,7 +2778,9 @@ S_regclassutf8(pTHX) break; } } - if (!SIZE_ONLY && namedclass > -1) { + if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { + if (range) + FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */ switch (namedclass) { case ANYOF_ALNUM: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; @@ -2828,11 +2835,11 @@ S_regclassutf8(pTHX) case ANYOF_NXDIGIT: Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; } + continue; } - else if (range) { if (lastvalue > value) - FAIL("invalid [] range in regexp"); + FAIL("invalid [] range in regexp"); /* [b-a] */ #ifdef UV_IS_QUAD if (!SIZE_ONLY) Perl_sv_catpvf(aTHX_ listsv, "%04" PERL_PRIx64 "\t%04" PERL_PRIx64 "\n", (UV)lastvalue, (UV)value); @@ -2840,25 +2847,28 @@ S_regclassutf8(pTHX) if (!SIZE_ONLY) Perl_sv_catpvf(aTHX_ listsv, "%04x\t%04x\n", lastvalue, value); #endif - lastvalue = value; range = 0; } else { lastvalue = value; if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && - PL_regcomp_parse[1] != ']') { + PL_regcomp_parse[1] != ']') { + if (namedclass > OOB_NAMEDCLASS) + FAIL("invalid [] range in regexp"); /* [\w-a] */ PL_regcomp_parse++; range = 1; continue; /* do it next time */ } + } + /* now is the next time */ #ifdef UV_IS_QUAD - if (!SIZE_ONLY) - Perl_sv_catpvf(aTHX_ listsv, "%04" PERL_PRIx64 "\n", (UV)value); + if (!SIZE_ONLY) + Perl_sv_catpvf(aTHX_ listsv, "%04" PERL_PRIx64 "\n", (UV)value); #else - if (!SIZE_ONLY) - Perl_sv_catpvf(aTHX_ listsv, "%04x\n", value); + if (!SIZE_ONLY) + Perl_sv_catpvf(aTHX_ listsv, "%04x\n", value); #endif - } + range = 0; } ret = reganode(ANYOFUTF8, 0); @@ -3235,7 +3245,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) k = PL_regkind[(U8)OP(o)]; if (k == EXACT) - Perl_sv_catpvf(aTHX_ sv, " <%s%*s%s>", PL_colors[0], + Perl_sv_catpvf(aTHX_ sv, " <%s%.*s%s>", PL_colors[0], STR_LEN(o), STRING(o), PL_colors[1]); else if (k == CURLY) { if (OP(o) == CURLYM || OP(o) == CURLYN) @@ -3247,7 +3257,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP ) Perl_sv_catpvf(aTHX_ sv, "%d", ARG(o)); /* Parenth number */ else if (k == LOGICAL) - Perl_sv_catpvf(aTHX_ sv, "[%d]", ARG(o)); /* 2: embedded, otherwise 1 */ + Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */ else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags); #endif /* DEBUGGING */ @@ -3277,6 +3287,9 @@ Perl_pregfree(pTHX_ struct regexp *r) { dTHR; DEBUG_r(if (!PL_colorset) reginitcolors()); + + if (!r || (--r->refcnt > 0)) + return; DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFreeing REx:%s `%s%.60s%s%s'\n", PL_colors[4],PL_colors[5],PL_colors[0], @@ -3284,9 +3297,6 @@ Perl_pregfree(pTHX_ struct regexp *r) PL_colors[1], (strlen(r->precomp) > 60 ? "..." : ""))); - - if (!r || (--r->refcnt > 0)) - return; if (r->precomp) Safefree(r->precomp); if (RX_MATCH_COPIED(r)) @@ -3385,7 +3395,7 @@ S_re_croak2(pTHX_ const char* pat1,const char* pat2,...) #else va_start(args); #endif - msv = mess(buf, &args); + msv = vmess(buf, &args); va_end(args); message = SvPV(msv,l1); if (l1 > 512)