X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=e7042eaa4e251fe61a057095b75c2a643ddb0525;hb=9f07fdcd4d463e2090de7661b6a313c8ecc5278b;hp=fd4633ba9f35cda66e76711bbafa9c0084f674bd;hpb=b45f050a81173020b0089d3ff02fa0276958461a;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index fd4633b..e7042ea 100644 --- a/regcomp.c +++ b/regcomp.c @@ -210,19 +210,19 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, * of t/op/regmesg.t, the tests in t/op/re_tests, and those in * op/pragma/warn/regcomp. */ -#define MARKER1 " RegexLengthToShowInErrorMessages) { \ /* chop 10 shorter than the max, to ensure meaning of "..." */ \ len = RegexLengthToShowInErrorMessages - 10; \ - elipises = "..."; \ + ellipses = "..."; \ } \ Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \ - m, len, PL_regprecomp, elipises); \ + msg, (int)len, PL_regprecomp, ellipses); \ } STMT_END /* @@ -242,9 +242,9 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, * args. Show regex, up to a maximum length. If it's too long, chop and add * "...". */ -#define FAIL2(pat,m) \ +#define FAIL2(pat,msg) \ STMT_START { \ - char *elipises = ""; \ + char *ellipses = ""; \ unsigned len = strlen(PL_regprecomp); \ \ if (!SIZE_ONLY) \ @@ -253,10 +253,10 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, if (len > RegexLengthToShowInErrorMessages) { \ /* chop 10 shorter than the max, to ensure meaning of "..." */ \ len = RegexLengthToShowInErrorMessages - 10; \ - elipises = "..."; \ + ellipses = "..."; \ } \ S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/", \ - m, len, PL_regprecomp, elipises); \ + msg, (int)len, PL_regprecomp, ellipses); \ } STMT_END @@ -268,7 +268,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ \ Perl_croak(aTHX_ "%s" REPORT_LOCATION, \ - m, offset, PL_regprecomp, PL_regprecomp + offset); \ + m, (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END /* @@ -289,7 +289,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ \ S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, \ - offset, PL_regprecomp, PL_regprecomp + offset); \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END /* @@ -311,7 +311,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ \ S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, \ - offset, PL_regprecomp, PL_regprecomp + offset); \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END /* @@ -332,7 +332,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ \ S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3,\ - offset, PL_regprecomp, PL_regprecomp + offset); \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END /* @@ -342,7 +342,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, STMT_START { \ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \ S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, a4,\ - offset, PL_regprecomp, PL_regprecomp + offset); \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END @@ -350,7 +350,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, STMT_START { \ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \ Perl_warner(aTHX_ WARN_REGEXP, "%s" REPORT_LOCATION,\ - m, offset, PL_regprecomp, PL_regprecomp + offset); \ + m, (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END \ @@ -359,7 +359,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \ Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\ a1, \ - offset, PL_regprecomp, PL_regprecomp + offset); \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END #define vWARN3(loc, m, a1, a2) \ @@ -367,7 +367,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, unsigned offset = strlen(PL_regprecomp) - (PL_regxend - (loc)); \ Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION, \ a1, a2, \ - offset, PL_regprecomp, PL_regprecomp + offset); \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END #define vWARN4(loc, m, a1, a2, a3) \ @@ -375,7 +375,7 @@ static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \ Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\ a1, a2, a3, \ - offset, PL_regprecomp, PL_regprecomp + offset); \ + (int)offset, PL_regprecomp, PL_regprecomp + offset); \ } STMT_END @@ -443,7 +443,7 @@ S_cl_is_anything(pTHX_ struct regnode_charclass_class *cl) { int value; - for (value = 0; value < ANYOF_MAX; value += 2) + for (value = 0; value <= ANYOF_MAX; value += 2) if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1)) return 1; for (value = 0; value < 256; ++value) @@ -1963,7 +1963,10 @@ S_reg(pTHX_ I32 paren, I32 *flagp) else sv = newSVpvn("", 0); + ENTER; + Perl_save_re_context(aTHX); rop = sv_compile_2op(sv, &sop, "re", &av); + LEAVE; n = add_data(3, "nop"); PL_regcomp_rx->data->data[n] = (void*)rop; @@ -2190,14 +2193,14 @@ S_reg(pTHX_ I32 paren, I32 *flagp) if (paren) { PL_regflags = oregflags; if (PL_regcomp_parse >= PL_regxend || *nextchar() != ')') { - PL_regcomp_parse++; - vFAIL("Unmatched ("); + PL_regcomp_parse = oregcomp_parse; + vFAIL("Unmatched ("); } } else if (!paren && PL_regcomp_parse < PL_regxend) { if (*PL_regcomp_parse == ')') { - PL_regcomp_parse = oregcomp_parse; - vFAIL("Unmatched ("); + PL_regcomp_parse++; + vFAIL("Unmatched )"); } else FAIL("Junk on end of regexp"); /* "Can't happen". */ @@ -2468,9 +2471,9 @@ tryagain: ret = reg_node(BOL); break; case '$': - if (PL_regcomp_parse[1]) - PL_seen_zerolen++; nextchar(); + if (*PL_regcomp_parse) + PL_seen_zerolen++; if (PL_regflags & PMf_MULTILINE) ret = reg_node(MEOL); else if (PL_regflags & PMf_SINGLELINE) @@ -2704,8 +2707,8 @@ tryagain: if (num > 9 && num >= PL_regnpar) goto defchar; else { - while (isDIGIT(*PL_regcomp_parse)) - PL_regcomp_parse++; + while (isDIGIT(*PL_regcomp_parse)) + PL_regcomp_parse++; if (!SIZE_ONLY && num > PL_regcomp_rx->nparens) vFAIL("Reference to nonexistent group"); @@ -2881,7 +2884,7 @@ tryagain: default: normal_default: if ((*p & 0xc0) == 0xc0 && UTF) { - ender = utf8_to_uv((U8*)p, &numlen); + ender = utf8_to_uv_chk((U8*)p, &numlen, 0); p += numlen; } else @@ -3004,6 +3007,11 @@ S_regpposixcc(pTHX_ I32 value) namedclass = complement ? ANYOF_NASCII : ANYOF_ASCII; break; + case 'b': + if (strnEQ(posixcc, "blank", 5)) + namedclass = + complement ? ANYOF_NBLANK : ANYOF_BLANK; + break; case 'c': if (strnEQ(posixcc, "cntrl", 5)) namedclass = @@ -3035,7 +3043,7 @@ S_regpposixcc(pTHX_ I32 value) case 's': if (strnEQ(posixcc, "space", 5)) namedclass = - complement ? ANYOF_NSPACE : ANYOF_SPACE; + complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC; break; case 'u': if (strnEQ(posixcc, "upper", 5)) @@ -3160,7 +3168,7 @@ S_regclass(pTHX) else if (value == '\\') { value = UCHARAT(PL_regcomp_parse++); /* Some compilers cannot handle switching on 64-bit integer - * values, therefore value cannot be an UV. --jhi */ + * values, therefore the 'value' cannot be an UV. --jhi */ switch (value) { case 'w': namedclass = ANYOF_ALNUM; break; case 'W': namedclass = ANYOF_NALNUM; break; @@ -3339,6 +3347,24 @@ S_regclass(pTHX) #endif /* EBCDIC */ } break; + case ANYOF_BLANK: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_BLANK); + else { + for (value = 0; value < 256; value++) + if (isBLANK(value)) + ANYOF_BITMAP_SET(ret, value); + } + break; + case ANYOF_NBLANK: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_NBLANK); + else { + for (value = 0; value < 256; value++) + if (!isBLANK(value)) + ANYOF_BITMAP_SET(ret, value); + } + break; case ANYOF_CNTRL: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_CNTRL); @@ -3412,6 +3438,24 @@ S_regclass(pTHX) ANYOF_BITMAP_SET(ret, value); } break; + case ANYOF_PSXSPC: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_PSXSPC); + else { + for (value = 0; value < 256; value++) + if (isPSXSPC(value)) + ANYOF_BITMAP_SET(ret, value); + } + break; + case ANYOF_NPSXSPC: + if (LOC) + ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC); + else { + for (value = 0; value < 256; value++) + if (!isPSXSPC(value)) + ANYOF_BITMAP_SET(ret, value); + } + break; case ANYOF_PUNCT: if (LOC) ANYOF_CLASS_SET(ret, ANYOF_PUNCT); @@ -3594,12 +3638,12 @@ S_regclassutf8(pTHX) namedclass = OOB_NAMEDCLASS; if (!range) rangebegin = PL_regcomp_parse; - value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen); + value = utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0); PL_regcomp_parse += numlen; if (value == '[') namedclass = regpposixcc(value); else if (value == '\\') { - value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen); + value = (U32)utf8_to_uv_chk((U8*)PL_regcomp_parse, &numlen, 0); PL_regcomp_parse += numlen; /* Some compilers cannot handle switching on 64-bit integer * values, therefore value cannot be an UV. Yes, this will @@ -3739,8 +3783,12 @@ S_regclassutf8(pTHX) case ANYOF_NPUNCT: Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; case ANYOF_SPACE: + case ANYOF_PSXSPC: + case ANYOF_BLANK: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; case ANYOF_NSPACE: + case ANYOF_NPSXSPC: + case ANYOF_NBLANK: Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; case ANYOF_UPPER: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; @@ -4193,7 +4241,7 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) else if (k == ANYOF) { int i, rangestart = -1; const char * const out[] = { /* Should be syncronized with - a table in regcomp.h */ + ANYOF_ #xdefines in regcomp.h */ "\\w", "\\W", "\\s", @@ -4217,9 +4265,13 @@ Perl_regprop(pTHX_ SV *sv, regnode *o) "[:punct:]", "[:^punct:]", "[:upper:]", - "[:!upper:]", + "[:^upper:]", "[:xdigit:]", - "[:^xdigit:]" + "[:^xdigit:]", + "[:space:]", + "[:^space:]", + "[:blank:]", + "[:^blank:]" }; if (o->flags & ANYOF_LOCALE) @@ -4323,8 +4375,13 @@ Perl_pregfree(pTHX_ struct regexp *r) Perl_croak(aTHX_ "panic: pregfree comppad"); old_comppad = PL_comppad; old_curpad = PL_curpad; - PL_comppad = new_comppad; - PL_curpad = AvARRAY(new_comppad); + /* Watch out for global destruction's random ordering. */ + if (SvTYPE(new_comppad) == SVt_PVAV) { + PL_comppad = new_comppad; + PL_curpad = AvARRAY(new_comppad); + } + else + PL_curpad = NULL; op_free((OP_4tree*)r->data->data[n]); PL_comppad = old_comppad; PL_curpad = old_curpad;