X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=19b80968335a65f405ec0c777c940d8f8d8ddd21;hb=76ced9add7b621dfc9d4ecb534aeea8e131a418a;hp=12b2eef63352dac44fe9d769cf74f1ea55c9bf36;hpb=9baa0206214393e14c90c1119dbe3c122969f510;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index 12b2eef..19b8096 100644 --- a/regcomp.c +++ b/regcomp.c @@ -202,25 +202,180 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, #define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b) -#define vFAIL(m) \ +/* length of regex to show in messages that don't mark a position within */ +#define RegexLengthToShowInErrorMessages 127 + +/* + * If MARKER[12] are adjusted, be sure to adjust the constants at the top + * of t/op/regmesg.t, the tests in t/op/re_tests, and those in + * op/pragma/warn/regcomp. + */ +#define MARKER1 "HERE" /* marker as it appears in the description */ +#define MARKER2 " << HERE " /* marker as it appears within the regex */ + +#define REPORT_LOCATION " before " MARKER1 " mark in regex m/%.*s" MARKER2 "%s/" + +/* + * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given + * arg. Show regex, up to a maximum length. If it's too long, chop and add + * "...". + */ +#define FAIL(msg) \ STMT_START { \ + char *ellipses = ""; \ + unsigned len = strlen(PL_regprecomp); \ + \ if (!SIZE_ONLY) \ SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \ - Perl_croak(aTHX_ "%s at {#} mark in regex m/%.*s{#}%s/", m, \ - strlen(PL_regprecomp)-(PL_regxend - PL_regcomp_parse), \ - PL_regprecomp, \ - PL_regprecomp + strlen(PL_regprecomp)-(PL_regxend - PL_regcomp_parse));\ + \ + if (len > RegexLengthToShowInErrorMessages) { \ + /* chop 10 shorter than the max, to ensure meaning of "..." */ \ + len = RegexLengthToShowInErrorMessages - 10; \ + ellipses = "..."; \ + } \ + Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \ + msg, (int)len, PL_regprecomp, ellipses); \ } STMT_END -#define vFAIL2(pat,m) \ +/* + * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given + * args. Show regex, up to a maximum length. If it's too long, chop and add + * "...". + */ +#define FAIL2(pat,msg) \ STMT_START { \ + char *ellipses = ""; \ + unsigned len = strlen(PL_regprecomp); \ + \ if (!SIZE_ONLY) \ SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \ \ - S_re_croak2(aTHX_ pat, " at {#} mark in regex m/%.*s{#}%s/: ", m, \ - strlen(PL_regprecomp)-(PL_regxend - PL_regcomp_parse), \ - PL_regprecomp, \ - PL_regprecomp + strlen(PL_regprecomp)-(PL_regxend - PL_regcomp_parse));\ + if (len > RegexLengthToShowInErrorMessages) { \ + /* chop 10 shorter than the max, to ensure meaning of "..." */ \ + len = RegexLengthToShowInErrorMessages - 10; \ + ellipses = "..."; \ + } \ + S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/", \ + msg, (int)len, PL_regprecomp, ellipses); \ + } STMT_END + + +/* + * Simple_vFAIL -- like FAIL, but marks the current location in the scan + */ +#define Simple_vFAIL(m) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ + \ + Perl_croak(aTHX_ "%s" REPORT_LOCATION, \ + m, (int)offset, PL_regprecomp, PL_regprecomp + offset); \ + } STMT_END + +/* + * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL() + */ +#define vFAIL(m) \ + STMT_START { \ + if (!SIZE_ONLY) \ + SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \ + Simple_vFAIL(m); \ + } STMT_END + +/* + * Like Simple_vFAIL(), but accepts two arguments. + */ +#define Simple_vFAIL2(m,a1) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ + \ + S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ + } STMT_END + +/* + * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2(). + */ +#define vFAIL2(m,a1) \ + STMT_START { \ + if (!SIZE_ONLY) \ + SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \ + Simple_vFAIL2(m, a1); \ + } STMT_END + + +/* + * Like Simple_vFAIL(), but accepts three arguments. + */ +#define Simple_vFAIL3(m, a1, a2) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ + \ + S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ + } STMT_END + +/* + * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3(). + */ +#define vFAIL3(m,a1,a2) \ + STMT_START { \ + if (!SIZE_ONLY) \ + SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \ + Simple_vFAIL3(m, a1, a2); \ + } STMT_END + +/* + * Like Simple_vFAIL(), but accepts four arguments. + */ +#define Simple_vFAIL4(m, a1, a2, a3) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ + \ + S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3,\ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ + } STMT_END + +/* + * Like Simple_vFAIL(), but accepts five arguments. + */ +#define Simple_vFAIL5(m, a1, a2, a3, a4) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ + S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, a4,\ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ + } STMT_END + + +#define vWARN(loc,m) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \ + Perl_warner(aTHX_ WARN_REGEXP, "%s" REPORT_LOCATION,\ + m, (int)offset, PL_regprecomp, PL_regprecomp + offset); \ + } STMT_END \ + + +#define vWARN2(loc, m, a1) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \ + Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\ + a1, \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ + } STMT_END + +#define vWARN3(loc, m, a1, a2) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp) - (PL_regxend - (loc)); \ + Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION, \ + a1, a2, \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ + } STMT_END + +#define vWARN4(loc, m, a1, a2, a3) \ + STMT_START { \ + unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \ + Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\ + a1, a2, a3, \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END @@ -288,7 +443,7 @@ S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl) { int value; - for (value = 0; value < ANYOF_MAX; value += 2) + for (value = 0; value <= ANYOF_MAX; value += 2) if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1)) return 1; for (value = 0; value < 256; ++value) @@ -688,8 +843,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da flags &= ~SCF_DO_STCLASS; } else if (strchr((char*)PL_varies,OP(scan))) { - I32 mincount, maxcount, minnext, deltanext, pos_before, fl; - I32 f = flags; + I32 mincount, maxcount, minnext, deltanext, fl; + I32 f = flags, pos_before = 0; regnode *oscan = scan; struct regnode_charclass_class this_class; struct regnode_charclass_class *oclass = NULL; @@ -788,8 +943,11 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da if (ckWARN(WARN_REGEXP) && (minnext + deltanext == 0) && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) && maxcount <= REG_INFTY/3) /* Complement check for big count */ - Perl_warner(aTHX_ WARN_REGEXP, - "Strange *+?{} on zero-length expression"); + { + vWARN(PL_regcomp_parse, + "Quantifier unexpected on zero-length expression"); + } + min += minnext * mincount; is_inf_internal |= ((maxcount == REG_INFTY && (minnext + deltanext) > 0) @@ -852,7 +1010,7 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/ if (OP(nxt) != CLOSE) - FAIL("panic opt close"); + FAIL("Panic opt close"); oscan->flags = ARG(nxt); OP(nxt1) = OPTIMIZED; /* was OPEN. */ OP(nxt) = OPTIMIZED; /* was CLOSE. */ @@ -1414,7 +1572,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) Newc(1001, r, sizeof(regexp) + (unsigned)PL_regsize * sizeof(regnode), char, regexp); if (r == NULL) - FAIL("regexp out of space"); + FAIL("Regexp out of space"); + #ifdef DEBUGGING /* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */ Zero(r, sizeof(regexp) + (unsigned)PL_regsize * sizeof(regnode), char); @@ -1721,6 +1880,7 @@ S_reg(pTHX_ I32 paren, I32 *flagp) register regnode *ender = 0; register I32 parno = 0; I32 flags, oregflags = PL_regflags, have_branch = 0, open = 0; + char *oregcomp_parse = PL_regcomp_parse; char c; *flagp = 0; /* Tentatively. */ @@ -1731,6 +1891,7 @@ S_reg(pTHX_ I32 paren, I32 *flagp) U16 posflags = 0, negflags = 0; U16 *flagsp = &posflags; int logical = 0; + char *seqstart = PL_regcomp_parse; PL_regcomp_parse++; paren = *PL_regcomp_parse++; @@ -1763,8 +1924,7 @@ S_reg(pTHX_ I32 paren, I32 *flagp) return NULL; case 'p': if (SIZE_ONLY) - Perl_warner(aTHX_ WARN_REGEXP, - "(?p{}) is deprecated - use (??{})"); + vWARN(PL_regcomp_parse, "(?p{}) is deprecated - use (??{})"); /* FALL THROUGH*/ case '?': logical = 1; @@ -1791,7 +1951,10 @@ S_reg(pTHX_ I32 paren, I32 *flagp) PL_regcomp_parse++; } if (*PL_regcomp_parse != ')') - FAIL("Sequence (?{...}) not terminated or not {}-balanced"); + { + PL_regcomp_parse = s; + vFAIL("Sequence (?{...}) not terminated or not {}-balanced"); + } if (!SIZE_ONLY) { AV *av; @@ -1800,7 +1963,10 @@ S_reg(pTHX_ I32 paren, I32 *flagp) else sv = newSVpvn("", 0); + ENTER; + Perl_save_re_context(aTHX); rop = sv_compile_2op(sv, &sop, "re", &av); + LEAVE; n = add_data(3, "nop"); PL_regcomp_rx->data->data[n] = (void*)rop; @@ -1850,7 +2016,7 @@ S_reg(pTHX_ I32 paren, I32 *flagp) PL_regcomp_parse++; ret = reganode(GROUPP, parno); if ((c = *nextchar()) != ')') - vFAIL2("Switch (?(number%c not recognized", c); + vFAIL("Switch condition not recognized"); insert_if: regtail(ret, reganode(IFTHEN, 0)); br = regbranch(&flags, 1); @@ -1884,10 +2050,11 @@ S_reg(pTHX_ I32 paren, I32 *flagp) return ret; } else { - vFAIL2("Unknown condition for (?(%.2s", PL_regcomp_parse); + vFAIL2("Unknown switch condition (?(%.2s", PL_regcomp_parse); } } case 0: + PL_regcomp_parse--; /* for vFAIL to print correctly */ vFAIL("Sequence (? incomplete"); break; default: @@ -1911,8 +2078,10 @@ S_reg(pTHX_ I32 paren, I32 *flagp) break; } unknown: - if (*PL_regcomp_parse != ')') - vFAIL2("Sequence (?%c...) not recognized", *PL_regcomp_parse); + if (*PL_regcomp_parse != ')') { + PL_regcomp_parse++; + vFAIL3("Sequence (%.*s...) not recognized", PL_regcomp_parse-seqstart, seqstart); + } nextchar(); *flagp = TRYAGAIN; return NULL; @@ -2024,15 +2193,17 @@ S_reg(pTHX_ I32 paren, I32 *flagp) if (paren) { PL_regflags = oregflags; if (PL_regcomp_parse >= PL_regxend || *nextchar() != ')') { - FAIL("unmatched () in regexp"); + PL_regcomp_parse = oregcomp_parse; + vFAIL("Unmatched ("); } } else if (!paren && PL_regcomp_parse < PL_regxend) { if (*PL_regcomp_parse == ')') { - FAIL("unmatched () in regexp"); + PL_regcomp_parse++; + vFAIL("Unmatched )"); } else - FAIL("junk on end of regexp"); /* "Can't happen". */ + FAIL("Junk on end of regexp"); /* "Can't happen". */ /* NOTREACHED */ } @@ -2207,8 +2378,19 @@ S_regpiece(pTHX_ I32 *flagp) } #if 0 /* Now runtime fix should be reliable. */ + + /* if this is reinstated, don't forget to put this back into perldiag: + + =item Regexp *+ operand could be empty at {#} in regex m/%s/ + + (F) The part of the regexp subject to either the * or + quantifier + could match an empty string. The {#} shows in the regular + expression about where the problem was discovered. + + */ + if (!(flags&HASWIDTH) && op != '?') - FAIL("regexp *+ operand could be empty"); + vFAIL("Regexp *+ operand could be empty"); #endif nextchar(); @@ -2239,8 +2421,10 @@ S_regpiece(pTHX_ I32 *flagp) } nest_check: if (ckWARN(WARN_REGEXP) && !SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3) { - Perl_warner(aTHX_ WARN_REGEXP, "%.*s matches null string many times", - PL_regcomp_parse - origparse, origparse); + vWARN3(PL_regcomp_parse, + "%.*s matches null string many times", + PL_regcomp_parse - origparse, + origparse); } if (*PL_regcomp_parse == '?') { @@ -2248,8 +2432,10 @@ S_regpiece(pTHX_ I32 *flagp) reginsert(MINMOD, ret); regtail(ret, ret + NODE_STEP_REGNODE); } - if (ISMULT2(PL_regcomp_parse)) - vFAIL("Nested quantifiers in regexp"); + if (ISMULT2(PL_regcomp_parse)) { + PL_regcomp_parse++; + vFAIL("Nested quantifiers"); + } return(ret); } @@ -2262,8 +2448,7 @@ S_regpiece(pTHX_ I32 *flagp) * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. * - * [Yes, it is worth fixing, some scripts can run twice the speed.] - */ + * [Yes, it is worth fixing, some scripts can run twice the speed.] */ STATIC regnode * S_regatom(pTHX_ I32 *flagp) { @@ -2286,9 +2471,9 @@ tryagain: ret = reg_node(BOL); break; case '$': - if (PL_regcomp_parse[1]) - PL_seen_zerolen++; nextchar(); + if (*PL_regcomp_parse) + PL_seen_zerolen++; if (PL_regflags & PMf_MULTILINE) ret = reg_node(MEOL); else if (PL_regflags & PMf_SINGLELINE) @@ -2315,13 +2500,17 @@ tryagain: PL_regnaughty++; break; case '[': - PL_regcomp_parse++; + { + char *oregcomp_parse = ++PL_regcomp_parse; ret = (UTF ? regclassutf8() : regclass()); - if (*PL_regcomp_parse != ']') - FAIL("unmatched [] in regexp"); + if (*PL_regcomp_parse != ']') { + PL_regcomp_parse = oregcomp_parse; + vFAIL("Unmatched ["); + } nextchar(); *flagp |= HASWIDTH|SIMPLE; break; + } case '(': nextchar(); ret = reg(1, &flags); @@ -2344,7 +2533,7 @@ tryagain: *flagp |= TRYAGAIN; return NULL; } - vFAIL("internal urp"); + vFAIL("Internal urp"); /* Supposed to be caught earlier. */ break; case '{': @@ -2356,7 +2545,8 @@ tryagain: case '?': case '+': case '*': - vFAIL("Quantifier follows nothing in regexp"); + PL_regcomp_parse++; + vFAIL("Quantifier follows nothing"); break; case '\\': switch (*++PL_regcomp_parse) { @@ -2480,8 +2670,11 @@ tryagain: if (PL_regcomp_parse[1] == '{') { PL_regxend = strchr(PL_regcomp_parse, '}'); - if (!PL_regxend) - FAIL("Missing right brace on \\p{}"); + if (!PL_regxend) { + PL_regcomp_parse += 2; + PL_regxend = oldregxend; + vFAIL("Missing right brace on \\p{}"); + } PL_regxend++; } else @@ -2514,6 +2707,9 @@ tryagain: if (num > 9 && num >= PL_regnpar) goto defchar; else { + while (isDIGIT(*PL_regcomp_parse)) + PL_regcomp_parse++; + if (!SIZE_ONLY && num > PL_regcomp_rx->nparens) vFAIL("Reference to nonexistent group"); PL_regsawback = 1; @@ -2521,8 +2717,6 @@ tryagain: ? (LOC ? REFFL : REFF) : REF, num); *flagp |= HASWIDTH; - while (isDIGIT(*PL_regcomp_parse)) - PL_regcomp_parse++; PL_regcomp_parse--; nextchar(); } @@ -2530,7 +2724,7 @@ tryagain: break; case '\0': if (PL_regcomp_parse >= PL_regxend) - FAIL("trailing \\ in regexp"); + FAIL("Trailing \\"); /* FALL THROUGH */ default: /* Do not generate `unrecognized' warnings here, we fall @@ -2548,11 +2742,11 @@ tryagain: /* FALL THROUGH */ default: { - register I32 len; + register STRLEN len; register UV ender; register char *p; char *oldp, *s; - I32 numlen; + STRLEN numlen; PL_regcomp_parse++; @@ -2632,8 +2826,10 @@ tryagain: if (*++p == '{') { char* e = strchr(p, '}'); - if (!e) - FAIL("Missing right brace on \\x{}"); + if (!e) { + PL_regcomp_parse = p + 1; + vFAIL("Missing right brace on \\x{}"); + } else if (UTF) { numlen = 1; /* allow underscores */ ender = (UV)scan_hex(p + 1, e - p - 1, &numlen); @@ -2645,7 +2841,11 @@ tryagain: p = e + 1; } else + { + PL_regcomp_parse = e + 1; vFAIL("Can't use \\x{} without 'use utf8' declaration"); + } + } else { numlen = 0; /* disallow underscores */ @@ -2673,21 +2873,19 @@ tryagain: break; case '\0': if (p >= PL_regxend) - FAIL("trailing \\ in regexp"); + FAIL("Trailing \\"); /* FALL THROUGH */ default: if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(*p)) - Perl_warner(aTHX_ WARN_REGEXP, - "/%.127s/: Unrecognized escape \\%c passed through", - PL_regprecomp, - *p); + vWARN2(p +1, "Unrecognized escape \\%c passed through", *p); goto normal_default; } break; default: normal_default: if ((*p & 0xc0) == 0xc0 && UTF) { - ender = utf8_to_uv((U8*)p, &numlen); + ender = utf8_to_uv((U8*)p, PL_regxend - p, + &numlen, 0); p += numlen; } else @@ -2728,7 +2926,7 @@ tryagain: PL_regcomp_parse = p - 1; nextchar(); if (len < 0) - vFAIL("internal disaster"); + vFAIL("Internal disaster"); if (len > 0) *flagp |= HASWIDTH; if (len == 1) @@ -2810,6 +3008,11 @@ S_regpposixcc(pTHX_ I32 value) namedclass = complement ? ANYOF_NASCII : ANYOF_ASCII; break; + case 'b': + if (strnEQ(posixcc, "blank", 5)) + namedclass = + complement ? ANYOF_NBLANK : ANYOF_BLANK; + break; case 'c': if (strnEQ(posixcc, "cntrl", 5)) namedclass = @@ -2841,7 +3044,7 @@ S_regpposixcc(pTHX_ I32 value) case 's': if (strnEQ(posixcc, "space", 5)) namedclass = - complement ? ANYOF_NSPACE : ANYOF_SPACE; + complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC; break; case 'u': if (strnEQ(posixcc, "upper", 5)) @@ -2866,13 +3069,19 @@ S_regpposixcc(pTHX_ I32 value) if (namedclass == OOB_NAMEDCLASS || posixcc[skip] != ':' || posixcc[skip+1] != ']') - Perl_croak(aTHX_ - "Character class [:%.*s:] unknown", - t - s - 1, s + 1); - } else if (!SIZE_ONLY) + { + Simple_vFAIL3("POSIX class [:%.*s:] unknown", + t - s - 1, s + 1); + } + } else if (!SIZE_ONLY) { /* [[=foo=]] and [[.foo.]] are still future. */ - Perl_croak(aTHX_ - "Character class syntax [%c %c] is reserved for future extensions", c, c); + + /* adjust PL_regcomp_parse so the warning shows after + the class closes */ + while (*PL_regcomp_parse && *PL_regcomp_parse != ']') + PL_regcomp_parse++; + Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c); + } } else { /* Maternal grandfather: * "[:" ending in ":" but not in ":]" */ @@ -2897,11 +3106,17 @@ S_checkposixcc(pTHX) while(*s && isALNUM(*s)) s++; if (*s && c == *s && s[1] == ']') { - Perl_warner(aTHX_ WARN_REGEXP, - "Character class syntax [%c %c] belongs inside character classes", c, c); + vWARN3(s+2, "POSIX syntax [%c %c] belongs inside character classes", c, c); + + /* [[=foo=]] and [[.foo.]] are still future. */ if (c == '=' || c == '.') - Perl_croak(aTHX_ - "Character class syntax [%c %c] is reserved for future extensions", c, c); + { + /* adjust PL_regcomp_parse so the error shows after + the class closes */ + while (*PL_regcomp_parse && *PL_regcomp_parse++ != ']') + ; + Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c); + } } } } @@ -2914,7 +3129,7 @@ S_regclass(pTHX) register I32 lastvalue = OOB_CHAR8; register I32 range = 0; register regnode *ret; - I32 numlen; + STRLEN numlen; I32 namedclass; char *rangebegin; bool need_class = 0; @@ -2954,7 +3169,7 @@ S_regclass(pTHX) else if (value == '\\') { value = UCHARAT(PL_regcomp_parse++); /* Some compilers cannot handle switching on 64-bit integer - * values, therefore value cannot be an UV. --jhi */ + * values, therefore the 'value' cannot be an UV. --jhi */ switch (value) { case 'w': namedclass = ANYOF_ALNUM; break; case 'W': namedclass = ANYOF_NALNUM; break; @@ -2991,10 +3206,8 @@ S_regclass(pTHX) break; default: if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value)) - Perl_warner(aTHX_ WARN_REGEXP, - "/%.127s/: Unrecognized escape \\%c in character class passed through", - PL_regprecomp, - (int)value); + + vWARN2(PL_regcomp_parse, "Unrecognized escape \\%c in character class passed through", (int)value); break; } } @@ -3005,12 +3218,11 @@ S_regclass(pTHX) if (range) { /* a-\d, a-[:digit:] */ if (!SIZE_ONLY) { if (ckWARN(WARN_REGEXP)) - Perl_warner(aTHX_ WARN_REGEXP, - "/%.127s/: false [] range \"%*.*s\" in regexp", - PL_regprecomp, - PL_regcomp_parse - rangebegin, - PL_regcomp_parse - rangebegin, - rangebegin); + vWARN4(PL_regcomp_parse, + "False [] range \"%*.*s\"", + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); ANYOF_BITMAP_SET(ret, lastvalue); ANYOF_BITMAP_SET(ret, '-'); } @@ -3136,6 +3348,24 @@ S_regclass(pTHX) #endif /* EBCDIC */ } break; + case ANYOF_BLANK: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_BLANK); + else { + for (value = 0; value < 256; value++) + if (isBLANK(value)) + ANYOF_BITMAP_SET(ret, value); + } + break; + case ANYOF_NBLANK: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_NBLANK); + else { + for (value = 0; value < 256; value++) + if (!isBLANK(value)) + ANYOF_BITMAP_SET(ret, value); + } + break; case ANYOF_CNTRL: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_CNTRL); @@ -3209,6 +3439,24 @@ S_regclass(pTHX) ANYOF_BITMAP_SET(ret, value); } break; + case ANYOF_PSXSPC: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_PSXSPC); + else { + for (value = 0; value < 256; value++) + if (isPSXSPC(value)) + ANYOF_BITMAP_SET(ret, value); + } + break; + case ANYOF_NPSXSPC: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC); + else { + for (value = 0; value < 256; value++) + if (!isPSXSPC(value)) + ANYOF_BITMAP_SET(ret, value); + } + break; case ANYOF_PUNCT: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_PUNCT); @@ -3264,7 +3512,7 @@ S_regclass(pTHX) } break; default: - vFAIL("invalid [::] class"); + vFAIL("Invalid [::] class"); break; } if (LOC) @@ -3274,12 +3522,10 @@ S_regclass(pTHX) } if (range) { if (lastvalue > value) /* b-a */ { - Perl_croak(aTHX_ - "/%.127s/: invalid [] range \"%*.*s\" in regexp", - PL_regprecomp, - PL_regcomp_parse - rangebegin, - PL_regcomp_parse - rangebegin, - rangebegin); + Simple_vFAIL4("Invalid [] range \"%*.*s\"", + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); } range = 0; } @@ -3290,12 +3536,11 @@ S_regclass(pTHX) PL_regcomp_parse++; if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ if (ckWARN(WARN_REGEXP)) - Perl_warner(aTHX_ WARN_REGEXP, - "/%.127s/: false [] range \"%*.*s\" in regexp", - PL_regprecomp, - PL_regcomp_parse - rangebegin, - PL_regcomp_parse - rangebegin, - rangebegin); + vWARN4(PL_regcomp_parse, + "False [] range \"%*.*s\"", + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); if (!SIZE_ONLY) ANYOF_BITMAP_SET(ret, '-'); } else @@ -3362,7 +3607,7 @@ S_regclassutf8(pTHX) register U32 lastvalue = OOB_UTF8; register I32 range = 0; register regnode *ret; - I32 numlen; + STRLEN numlen; I32 n; SV *listsv; U8 flags = 0; @@ -3394,12 +3639,16 @@ S_regclassutf8(pTHX) namedclass = OOB_NAMEDCLASS; if (!range) rangebegin = PL_regcomp_parse; - value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen); + value = utf8_to_uv((U8*)PL_regcomp_parse, + PL_regxend - PL_regcomp_parse, + &numlen, 0); PL_regcomp_parse += numlen; if (value == '[') namedclass = regpposixcc(value); else if (value == '\\') { - value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen); + value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, + PL_regxend - PL_regcomp_parse, + &numlen, 0); PL_regcomp_parse += numlen; /* Some compilers cannot handle switching on 64-bit integer * values, therefore value cannot be an UV. Yes, this will @@ -3416,7 +3665,7 @@ S_regclassutf8(pTHX) if (*PL_regcomp_parse == '{') { e = strchr(PL_regcomp_parse++, '}'); if (!e) - FAIL("Missing right brace on \\p{}"); + vFAIL("Missing right brace on \\p{}"); n = e - PL_regcomp_parse; } else { @@ -3449,8 +3698,8 @@ S_regclassutf8(pTHX) case 'x': if (*PL_regcomp_parse == '{') { e = strchr(PL_regcomp_parse++, '}'); - if (!e) - FAIL("Missing right brace on \\x{}"); + if (!e) + vFAIL("Missing right brace on \\x{}"); numlen = 1; /* allow underscores */ value = (UV)scan_hex(PL_regcomp_parse, e - PL_regcomp_parse, @@ -3475,10 +3724,9 @@ S_regclassutf8(pTHX) break; default: if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value)) - Perl_warner(aTHX_ WARN_REGEXP, - "/%.127s/: Unrecognized escape \\%c in character class passed through", - PL_regprecomp, - (int)value); + vWARN2(PL_regcomp_parse, + "Unrecognized escape \\%c in character class passed through", + (int)value); break; } } @@ -3486,12 +3734,11 @@ S_regclassutf8(pTHX) if (range) { /* a-\d, a-[:digit:] */ if (!SIZE_ONLY) { if (ckWARN(WARN_REGEXP)) - Perl_warner(aTHX_ WARN_REGEXP, - "/%.127s/: false [] range \"%*.*s\" in regexp", - PL_regprecomp, - PL_regcomp_parse - rangebegin, - PL_regcomp_parse - rangebegin, - rangebegin); + vWARN4(PL_regcomp_parse, + "False [] range \"%*.*s\"", + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */ "%04"UVxf"\n002D\n", (UV)lastvalue); @@ -3541,8 +3788,12 @@ S_regclassutf8(pTHX) case ANYOF_NPUNCT: Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; case ANYOF_SPACE: + case ANYOF_PSXSPC: + case ANYOF_BLANK: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; case ANYOF_NSPACE: + case ANYOF_NPSXSPC: + case ANYOF_NBLANK: Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; case ANYOF_UPPER: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; @@ -3558,12 +3809,10 @@ S_regclassutf8(pTHX) } if (range) { if (lastvalue > value) { /* b-a */ - Perl_croak(aTHX_ - "/%.127s/: invalid [] range \"%*.*s\" in regexp", - PL_regprecomp, - PL_regcomp_parse - rangebegin, - PL_regcomp_parse - rangebegin, - rangebegin); + Simple_vFAIL4("invalid [] range \"%*.*s\"", + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); } range = 0; } @@ -3574,12 +3823,11 @@ S_regclassutf8(pTHX) PL_regcomp_parse++; if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ if (ckWARN(WARN_REGEXP)) - Perl_warner(aTHX_ WARN_REGEXP, - "/%.127s/: false [] range \"%*.*s\" in regexp", - PL_regprecomp, - PL_regcomp_parse - rangebegin, - PL_regcomp_parse - rangebegin, - rangebegin); + vWARN4(PL_regcomp_parse, + "False [] range \"%*.*s\"", + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); if (!SIZE_ONLY) Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */ @@ -3694,7 +3942,7 @@ S_reganode(pTHX_ U8 op, U32 arg) - reguni - emit (if appropriate) a Unicode character */ STATIC void -S_reguni(pTHX_ UV uv, char* s, I32* lenp) +S_reguni(pTHX_ UV uv, char* s, STRLEN* lenp) { dTHR; if (SIZE_ONLY) { @@ -3976,7 +4224,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) sv_setpvn(sv, "", 0); if (OP(o) >= reg_num) /* regnode.type is unsigned */ - FAIL("corrupted regexp opcode"); + FAIL("Corrupted regexp opcode"); sv_catpv(sv, (char*)reg_name[OP(o)]); /* Take off const! */ k = PL_regkind[(U8)OP(o)]; @@ -3998,7 +4246,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) else if (k == ANYOF) { int i, rangestart = -1; const char * const out[] = { /* Should be syncronized with - a table in regcomp.h */ + ANYOF_ #xdefines in regcomp.h */ "\\w", "\\W", "\\s", @@ -4022,9 +4270,13 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) "[:punct:]", "[:^punct:]", "[:upper:]", - "[:!upper:]", + "[:^upper:]", "[:xdigit:]", - "[:^xdigit:]" + "[:^xdigit:]", + "[:space:]", + "[:^space:]", + "[:blank:]", + "[:^blank:]" }; if (o->flags & ANYOF_LOCALE) @@ -4128,8 +4380,13 @@ Perl_pregfree(pTHX_ struct regexp *r) Perl_croak(aTHX_ "panic: pregfree comppad"); old_comppad = PL_comppad; old_curpad = PL_curpad; - PL_comppad = new_comppad; - PL_curpad = AvARRAY(new_comppad); + /* Watch out for global destruction's random ordering. */ + if (SvTYPE(new_comppad) == SVt_PVAV) { + PL_comppad = new_comppad; + PL_curpad = AvARRAY(new_comppad); + } + else + PL_curpad = NULL; op_free((OP_4tree*)r->data->data[n]); PL_comppad = old_comppad; PL_curpad = old_curpad;