From: Jarkko Hietaniemi Date: Sun, 26 Nov 2000 21:31:13 +0000 (+0000) Subject: BOUND regex opcodes (\b, \B) could try to scan zero length UTF-8. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=667bb95a2be204307d06062e22190a222414f7ae;p=p5sagit%2Fp5-mst-13.2.git BOUND regex opcodes (\b, \B) could try to scan zero length UTF-8. p4raw-id: //depot/perl@7873 --- diff --git a/regexec.c b/regexec.c index be10dc9..e3ab3d3 100644 --- a/regexec.c +++ b/regexec.c @@ -917,10 +917,15 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), - strend - s, - 0, 0) : '\n'; - tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); + if (s == startpos) + tmp = '\n'; + else { + U8 *r = reghop((U8*)s, -1); + + tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0); + } + tmp = ((OP(c) == BOUNDUTF8 ? + isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUNDUTF8 ? swash_fetch(PL_utf8_alnum, (U8*)s) : @@ -955,10 +960,15 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUNDUTF8: - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), - strend - s, - 0, 0) : '\n'; - tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); + if (s == startpos) + tmp = '\n'; + else { + U8 *r = reghop((U8*)s, -1); + + tmp = (I32)utf8_to_uv(r, s - (char*)r, 0, 0); + } + tmp = ((OP(c) == NBOUNDUTF8 ? + isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUNDUTF8 ? swash_fetch(PL_utf8_alnum, (U8*)s) : @@ -2038,9 +2048,10 @@ S_regmatch(pTHX_ regnode *prog) while (s < e) { if (l >= PL_regeol) sayNO; - if (utf8_to_uv((U8*)s, e - s, 0, 0) != (c1 ? - toLOWER_utf8((U8*)l) : - toLOWER_LC_utf8((U8*)l))) + if (utf8_to_uv((U8*)s, e - s, 0, 0) != + (c1 ? + toLOWER_utf8((U8*)l) : + toLOWER_LC_utf8((U8*)l))) { sayNO; } @@ -2175,9 +2186,13 @@ S_regmatch(pTHX_ regnode *prog) case BOUNDUTF8: case NBOUNDUTF8: /* was last char in word? */ - ln = (locinput != PL_regbol) - ? utf8_to_uv(reghop((U8*)locinput, -1), - PL_regeol - locinput, 0, 0) : PL_regprev; + if (locinput == PL_regbol) + ln = PL_regprev; + else { + U8 *r = reghop((U8*)locinput, -1); + + ln = utf8_to_uv(r, s - (char*)r, 0, 0); + } if (OP(scan) == BOUNDUTF8 || OP(scan) == NBOUNDUTF8) { ln = isALNUM_uni(ln); n = swash_fetch(PL_utf8_alnum, (U8*)locinput);