From: Jarkko Hietaniemi Date: Mon, 1 Apr 2002 02:17:50 +0000 (+0000) Subject: Regex fix from Hugo: in UTF-8 locales the character X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b2f2f093d7e1865d949bed11ba0faa613aba5701;p=p5sagit%2Fp5-mst-13.2.git Regex fix from Hugo: in UTF-8 locales the character counting code didn't work right for minimal matches. p4raw-id: //depot/perl@15645 --- diff --git a/regexec.c b/regexec.c index f2d4b3d..be4b362 100644 --- a/regexec.c +++ b/regexec.c @@ -3568,6 +3568,7 @@ S_regmatch(pTHX_ regnode *prog) if (c1 != -1000) { char *e; /* Should not check after this */ char *old = locinput; + int count = 0; if (n == REG_INFTY) { e = PL_regeol - 1; @@ -3587,7 +3588,6 @@ S_regmatch(pTHX_ regnode *prog) e = PL_regeol - 1; } while (1) { - int count; /* Find place 'next' could work */ if (!do_utf8) { if (c1 == c2) { @@ -3605,18 +3605,20 @@ S_regmatch(pTHX_ regnode *prog) else { STRLEN len; if (c1 == c2) { - for (count = 0; - locinput <= e && - utf8_to_uvchr((U8*)locinput, &len) != c1; - count++) + /* count initialised to 0 or 1 */ + while (locinput <= e && + utf8_to_uvchr((U8*)locinput, &len) != c1) { locinput += len; - + count++; + } } else { - for (count = 0; locinput <= e; count++) { + /* count initialised to 0 or 1 */ + while (locinput <= e) { UV c = utf8_to_uvchr((U8*)locinput, &len); if (c == c1 || c == c2) break; - locinput += len; + locinput += len; + count++; } } } @@ -3638,6 +3640,7 @@ S_regmatch(pTHX_ regnode *prog) locinput += UTF8SKIP(locinput); else locinput++; + count = 1; } } else