From: Jarkko Hietaniemi Date: Fri, 1 Feb 2002 05:17:59 +0000 (+0000) Subject: The Malformed UTF-8 Heisenbug seen by Merijn and NickC X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a6872d423f27ca69297bf4e6466ce19b6475dbcf;p=p5sagit%2Fp5-mst-13.2.git The Malformed UTF-8 Heisenbug seen by Merijn and NickC I got it in Tru64 + ithreads but only without -g, took some debugging by printf (which was no fun either since adding some debug printfs hid the error) p4raw-id: //depot/perl@14511 --- diff --git a/regexec.c b/regexec.c index 6512986..70d401d 100644 --- a/regexec.c +++ b/regexec.c @@ -1043,7 +1043,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta if ( f != c && (f == c1 || f == c2) && (ln == foldlen || - !ibcmp_utf8((char *)foldbuf, + !ibcmp_utf8((char *) foldbuf, (char **)0, foldlen, do_utf8, m, (char **)0, ln, UTF)) diff --git a/utf8.c b/utf8.c index cf3f48d..60933cd 100644 --- a/utf8.c +++ b/utf8.c @@ -1285,7 +1285,7 @@ to the hash is by Perl_to_utf8_case(). */ UV -Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal, char *special) +Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *normal, char *special) { UV uv; @@ -1305,6 +1305,7 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal SV *val = HeVAL(he); char *s = SvPV(val, *lenp); U8 c = *(U8*)s; + if (*lenp > 1 || UNI_IS_INVARIANT(c)) Copy(s, ustrp, *lenp, U8); else { @@ -1807,6 +1808,9 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const if ((e1 == 0 && f1 == 0) || (e2 == 0 && f2 == 0) || (f1 == 0 && f2 == 0)) return 1; /* mismatch; possible infinite loop or false positive */ + if (!u1 || !u2) + natbuf[1] = 0; /* Need to terminate the buffer. */ + while ((e1 == 0 || p1 < e1) && (f1 == 0 || p1 < f1) && (e2 == 0 || p2 < e2) &&