From: Jarkko Hietaniemi Date: Sun, 17 Feb 2002 01:12:37 +0000 (+0000) Subject: The #14715 and #14716 were okay: they just revealed X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=1feea2c750cf5f74093deff93d1536d7d44a8925;p=p5sagit%2Fp5-mst-13.2.git The #14715 and #14716 were okay: they just revealed a bug in the EXACTF matching. p4raw-id: //depot/perl@14724 --- diff --git a/regexec.c b/regexec.c index 8bd2284..900b491 100644 --- a/regexec.c +++ b/regexec.c @@ -2380,8 +2380,8 @@ S_regmatch(pTHX_ regnode *prog) char *l = locinput; char *e = PL_regeol; - if (ibcmp_utf8(s, 0, ln, do_utf8, - l, &e, 0, UTF)) { + if (ibcmp_utf8(s, 0, ln, UTF, + l, &e, 0, do_utf8)) { /* One more case for the sharp s: * pack("U0U*", 0xDF) =~ /ss/i, * the 0xC3 0x9F are the UTF-8 diff --git a/utf8.c b/utf8.c index 71aaf8a..0db449c 100644 --- a/utf8.c +++ b/utf8.c @@ -1287,23 +1287,38 @@ to the hash is by Perl_to_utf8_case(). UV Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *normal, char *special) { - UV uv; + UV uv0, uv1, uv2; + U8 tmpbuf[UTF8_MAXLEN_FOLD+1]; + STRLEN len; if (!*swashp) *swashp = swash_init("utf8", normal, &PL_sv_undef, 4, 0); - uv = swash_fetch(*swashp, p, TRUE); - if (!uv) { + uv0 = utf8_to_uvchr(p, 0); + /* The NATIVE_TO_UNI() and UNI_TO_NATIVE() mappings + * are necessary in EBCDIC, they are redundant no-ops + * in ASCII-ish platforms, and hopefully optimized away. */ + uv1 = NATIVE_TO_UNI(uv0); + uvuni_to_utf8(tmpbuf, uv1); + uv2 = swash_fetch(*swashp, tmpbuf, TRUE); + if (uv2) { + /* It was "normal" (single character mapping). */ + UV uv3 = UNI_TO_NATIVE(uv2); + + len = uvchr_to_utf8(ustrp, uv3) - ustrp; + if (lenp) + *lenp = len; + + return uv3; + } + else { HV *hv; SV *keysv; HE *he; - uv = utf8_to_uvchr(p, 0); - if ((hv = get_hv(special, FALSE)) && - (keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%04"UVXf, uv))) && + (keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%04"UVXf, uv1))) && (he = hv_fetch_ent(hv, keysv, FALSE, 0))) { SV *val = HeVAL(he); - STRLEN len; char *s = SvPV(val, len); if (len > 1) { @@ -1316,8 +1331,6 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *norma * mapping, since any characters in the low 256 * are in Unicode code points, not EBCDIC. * --jhi */ - - U8 tmpbuf[UTF8_MAXLEN_FOLD+1]; U8 *d = tmpbuf; U8 *t, *tend; @@ -1351,14 +1364,17 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *norma } if (lenp) *lenp = len; + return utf8_to_uvchr(ustrp, 0); } - uv = NATIVE_TO_UNI(uv); + + /* So it was not "special": just copy it. */ + len = uvchr_to_utf8(ustrp, uv0) - ustrp; + if (lenp) + *lenp = len; + + return uv0; } - if (lenp) - *lenp = UNISKIP(uv); - uvuni_to_utf8(ustrp, uv); - return uv; } /*