X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=regcomp.c;h=0f883f3ce88920178b429e5e3ce6182742cdb1e8;hb=7ee8c957e643df1e9e47d243c3269eb47c2da591;hp=f256c4e4d77a96d8bd6efcd46a0697552cfa3baf;hpb=c485e6072d15e92c314a8b9cb6957c3edf13f985;p=p5sagit%2Fp5-mst-13.2.git diff --git a/regcomp.c b/regcomp.c index f256c4e..0f883f3 100644 --- a/regcomp.c +++ b/regcomp.c @@ -38,6 +38,7 @@ /* *These* symbols are masked to allow static link. */ # define Perl_pregfree my_regfree # define Perl_regnext my_regnext +# define save_re_context my_save_re_context #endif /*SUPPRESS 112*/ @@ -319,7 +320,10 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 } if (OP(scan) != CURLYX) { - int max = (reg_off_by_arg[OP(scan)] ? I32_MAX : U16_MAX); + int max = (reg_off_by_arg[OP(scan)] + ? I32_MAX + /* I32 may be smaller than U16 on CRAYs! */ + : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan)); int noff; regnode *n = scan; @@ -486,10 +490,10 @@ study_chunk(regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 ? (flags & ~SCF_DO_SUBSTR) : flags); if (!scan) /* It was not CURLYX, but CURLY. */ scan = next; - if (PL_dowarn && (minnext + deltanext == 0) + if (ckWARN(WARN_UNSAFE) && (minnext + deltanext == 0) && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) && maxcount <= 10000) /* Complement check for big count */ - warn("Strange *+?{} on zero-length expression"); + warner(WARN_UNSAFE, "Strange *+?{} on zero-length expression"); min += minnext * mincount; is_inf |= (maxcount == REG_INFTY && (minnext + deltanext) > 0 || deltanext == I32_MAX); @@ -1554,8 +1558,8 @@ regpiece(I32 *flagp) goto do_curly; } nest_check: - if (PL_dowarn && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) { - warn("%.*s matches null string many times", + if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) { + warner(WARN_UNSAFE, "%.*s matches null string many times", PL_regcomp_parse - origparse, origparse); } @@ -1633,6 +1637,9 @@ tryagain: case '[': PL_regcomp_parse++; ret = (UTF ? regclassutf8() : regclass()); + if (*PL_regcomp_parse != ']') + FAIL("unmatched [] in regexp"); + nextchar(); *flagp |= HASWIDTH|SIMPLE; break; case '(': @@ -1700,7 +1707,7 @@ tryagain: *flagp |= HASWIDTH; nextchar(); if (UTF && !PL_utf8_mark) - is_utf8_mark("~"); /* preload table */ + is_utf8_mark((U8*)"~"); /* preload table */ break; case 'w': ret = reg_node( @@ -1710,7 +1717,7 @@ tryagain: *flagp |= HASWIDTH|SIMPLE; nextchar(); if (UTF && !PL_utf8_alnum) - is_utf8_alnum("a"); /* preload table */ + is_utf8_alnum((U8*)"a"); /* preload table */ break; case 'W': ret = reg_node( @@ -1720,7 +1727,7 @@ tryagain: *flagp |= HASWIDTH|SIMPLE; nextchar(); if (UTF && !PL_utf8_alnum) - is_utf8_alnum("a"); /* preload table */ + is_utf8_alnum((U8*)"a"); /* preload table */ break; case 'b': PL_seen_zerolen++; @@ -1731,7 +1738,7 @@ tryagain: *flagp |= SIMPLE; nextchar(); if (UTF && !PL_utf8_alnum) - is_utf8_alnum("a"); /* preload table */ + is_utf8_alnum((U8*)"a"); /* preload table */ break; case 'B': PL_seen_zerolen++; @@ -1742,7 +1749,7 @@ tryagain: *flagp |= SIMPLE; nextchar(); if (UTF && !PL_utf8_alnum) - is_utf8_alnum("a"); /* preload table */ + is_utf8_alnum((U8*)"a"); /* preload table */ break; case 's': ret = reg_node( @@ -1752,7 +1759,7 @@ tryagain: *flagp |= HASWIDTH|SIMPLE; nextchar(); if (UTF && !PL_utf8_space) - is_utf8_space(" "); /* preload table */ + is_utf8_space((U8*)" "); /* preload table */ break; case 'S': ret = reg_node( @@ -1762,21 +1769,44 @@ tryagain: *flagp |= HASWIDTH|SIMPLE; nextchar(); if (UTF && !PL_utf8_space) - is_utf8_space(" "); /* preload table */ + is_utf8_space((U8*)" "); /* preload table */ break; case 'd': ret = reg_node(UTF ? DIGITUTF8 : DIGIT); *flagp |= HASWIDTH|SIMPLE; nextchar(); if (UTF && !PL_utf8_digit) - is_utf8_digit("1"); /* preload table */ + is_utf8_digit((U8*)"1"); /* preload table */ break; case 'D': ret = reg_node(UTF ? NDIGITUTF8 : NDIGIT); *flagp |= HASWIDTH|SIMPLE; nextchar(); if (UTF && !PL_utf8_digit) - is_utf8_digit("1"); /* preload table */ + is_utf8_digit((U8*)"1"); /* preload table */ + break; + case 'p': + case 'P': + { /* a lovely hack--pretend we saw [\pX] instead */ + char* oldregxend = PL_regxend; + + if (PL_regcomp_parse[1] == '{') { + PL_regxend = strchr(PL_regcomp_parse, '}'); + if (!PL_regxend) + FAIL("Missing right brace on \\p{}"); + PL_regxend++; + } + else + PL_regxend = PL_regcomp_parse + 2; + PL_regcomp_parse--; + + ret = regclassutf8(); + + PL_regxend = oldregxend; + PL_regcomp_parse--; + nextchar(); + *flagp |= HASWIDTH|SIMPLE; + } break; case 'n': case 'r': @@ -1873,6 +1903,8 @@ tryagain: case 'S': case 'd': case 'D': + case 'p': + case 'P': --p; goto loopdone; case 'n': @@ -1949,7 +1981,7 @@ tryagain: default: normal_default: if ((*p & 0xc0) == 0xc0 && UTF) { - ender = utf8_to_uv(p, &numlen); + ender = utf8_to_uv((U8*)p, &numlen); p += numlen; } else @@ -2083,8 +2115,9 @@ regclass(void) * (POSIX Extended Character Classes, that is) * The text between e.g. [: and :] would start * at posixccs + 1 and stop at regcomp_parse - 2. */ - if (dowarn && !SIZE_ONLY) - warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc); + if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY) + warner(WARN_UNSAFE, + "Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc); PL_regcomp_parse++; /* skip over the ending ] */ } } @@ -2212,9 +2245,6 @@ regclass(void) } lastvalue = value; } - if (*PL_regcomp_parse != ']') - FAIL("unmatched [] in regexp"); - nextchar(); /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ if (!SIZE_ONLY && (*opnd & (0xFF ^ ANYOF_INVERT)) == ANYOF_FOLD) { for (value = 0; value < 256; ++value) { @@ -2267,7 +2297,7 @@ regclassutf8(void) while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') { skipcond: - value = utf8_to_uv(PL_regcomp_parse, &numlen); + value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen); PL_regcomp_parse += numlen; if (value == '[' && PL_regcomp_parse + 1 < PL_regxend && @@ -2288,15 +2318,16 @@ regclassutf8(void) * (POSIX Extended Character Classes, that is) * The text between e.g. [: and :] would start * at posixccs + 1 and stop at regcomp_parse - 2. */ - if (dowarn && !SIZE_ONLY) - warn("Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc); + if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY) + warner(WARN_UNSAFE, + "Character class syntax [%c %c] is reserved for future extensions", posixccc, posixccc); PL_regcomp_parse++; /* skip over the ending ] */ } } } if (value == '\\') { - value = utf8_to_uv(PL_regcomp_parse, &numlen); + value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen); PL_regcomp_parse += numlen; switch (value) { case 'w': @@ -2326,7 +2357,7 @@ regclassutf8(void) flags |= ANYOF_SPACEL; sv_catpvf(listsv, "+utf8::IsSpace\n"); if (!PL_utf8_space) - is_utf8_space(" "); + is_utf8_space((U8*)" "); } lastvalue = 123456; continue; @@ -2337,7 +2368,7 @@ regclassutf8(void) sv_catpvf(listsv, "!utf8::IsSpace\n"); if (!PL_utf8_space) - is_utf8_space(" "); + is_utf8_space((U8*)" "); } lastvalue = 123456; continue; @@ -2441,9 +2472,6 @@ regclassutf8(void) sv_catpvf(listsv, "%04x\n", value); } } - if (*PL_regcomp_parse != ']') - FAIL("unmatched [] in regexp"); - nextchar(); ret = reganode(ANYOFUTF8, 0); @@ -2547,11 +2575,11 @@ reguni(UV uv, char* s, I32* lenp) { dTHR; if (SIZE_ONLY) { - char tmpbuf[10]; + U8 tmpbuf[10]; *lenp = uv_to_utf8(tmpbuf, uv) - tmpbuf; } else - *lenp = uv_to_utf8(s, uv) - s; + *lenp = uv_to_utf8((U8*)s, uv) - (U8*)s; }