From: Jarkko Hietaniemi Date: Wed, 28 Nov 2001 03:23:04 +0000 (+0000) Subject: Fix for "a\x{100}" =~ /A/i. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=701a277b5182d929c4baa83d419c46c6d08d2101;p=p5sagit%2Fp5-mst-13.2.git Fix for "a\x{100}" =~ /A/i. p4raw-id: //depot/perl@13332 --- diff --git a/embed.h b/embed.h index 269a117..0674b75 100644 --- a/embed.h +++ b/embed.h @@ -261,6 +261,7 @@ #define hv_undef Perl_hv_undef #define ibcmp Perl_ibcmp #define ibcmp_locale Perl_ibcmp_locale +#define ibcmp_utf8 Perl_ibcmp_utf8 #define ingroup Perl_ingroup #define init_argv_symbols Perl_init_argv_symbols #define init_debugger Perl_init_debugger @@ -1780,6 +1781,7 @@ #define hv_undef(a) Perl_hv_undef(aTHX_ a) #define ibcmp(a,b,c) Perl_ibcmp(aTHX_ a,b,c) #define ibcmp_locale(a,b,c) Perl_ibcmp_locale(aTHX_ a,b,c) +#define ibcmp_utf8(a,b,c,d,e) Perl_ibcmp_utf8(aTHX_ a,b,c,d,e) #define ingroup(a,b) Perl_ingroup(aTHX_ a,b) #define init_argv_symbols(a,b) Perl_init_argv_symbols(aTHX_ a,b) #define init_debugger() Perl_init_debugger(aTHX) diff --git a/embed.pl b/embed.pl index f2603ef..02327d9 100755 --- a/embed.pl +++ b/embed.pl @@ -1333,6 +1333,7 @@ Apd |HE* |hv_store_ent |HV* tb|SV* key|SV* val|U32 hash Apd |void |hv_undef |HV* tb Ap |I32 |ibcmp |const char* a|const char* b|I32 len Ap |I32 |ibcmp_locale |const char* a|const char* b|I32 len +Ap |I32 |ibcmp_utf8 |const char* a|bool ua|const char* b|bool ub|I32 len p |bool |ingroup |Gid_t testgid|Uid_t effective p |void |init_argv_symbols|int|char ** p |void |init_debugger diff --git a/global.sym b/global.sym index 86e1491..802dd25 100644 --- a/global.sym +++ b/global.sym @@ -153,6 +153,7 @@ Perl_hv_store_ent Perl_hv_undef Perl_ibcmp Perl_ibcmp_locale +Perl_ibcmp_utf8 Perl_init_stacks Perl_init_tm Perl_instr diff --git a/proto.h b/proto.h index a042c5a..1073831 100644 --- a/proto.h +++ b/proto.h @@ -315,6 +315,7 @@ PERL_CALLCONV HE* Perl_hv_store_ent(pTHX_ HV* tb, SV* key, SV* val, U32 hash); PERL_CALLCONV void Perl_hv_undef(pTHX_ HV* tb); PERL_CALLCONV I32 Perl_ibcmp(pTHX_ const char* a, const char* b, I32 len); PERL_CALLCONV I32 Perl_ibcmp_locale(pTHX_ const char* a, const char* b, I32 len); +PERL_CALLCONV I32 Perl_ibcmp_utf8(pTHX_ const char* a, bool ua, const char* b, bool ub, I32 len); PERL_CALLCONV bool Perl_ingroup(pTHX_ Gid_t testgid, Uid_t effective); PERL_CALLCONV void Perl_init_argv_symbols(pTHX_ int, char **); PERL_CALLCONV void Perl_init_debugger(pTHX); diff --git a/regexec.c b/regexec.c index efdd8df..a8acb06 100644 --- a/regexec.c +++ b/regexec.c @@ -962,14 +962,17 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta if (c1 == c2) while (s <= e) { if ( utf8_to_uvchr((U8*)s, &len) == c1 - && regtry(prog, s) ) + && (ln == 1 || + ibcmp_utf8(s, do_utf8, m, UTF, ln)) ) goto got_it; s += len; } else while (s <= e) { UV c = utf8_to_uvchr((U8*)s, &len); - if ( (c == c1 || c == c2) && regtry(prog, s) ) + if ( (c == c1 || c == c2) + && (ln == 1 || + ibcmp_utf8(s, do_utf8, m, UTF, ln)) ) goto got_it; s += len; } diff --git a/t/op/pat.t b/t/op/pat.t index a94fcaf..6b4b061 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..753\n"; +print "1..757\n"; BEGIN { chdir 't' if -d 't'; @@ -2248,6 +2248,8 @@ print "# some Unicode properties\n"; } { + # Script=, Block=, Category= + print "not " unless "\x{0100}" =~ /\p{Script=Latin}/; print "ok 748\n"; @@ -2259,6 +2261,8 @@ print "# some Unicode properties\n"; } { + print "# the basic character classes and Unicode \n"; + # 0100;LATIN CAPITAL LETTER A WITH MACRON;Lu;0;L;0041 0304;;;;N;LATIN CAPITAL LETTER A MACRON;;;0101; print "not " unless "\x{0100}" =~ /\w/; print "ok 751\n"; @@ -2271,3 +2275,19 @@ print "# some Unicode properties\n"; print "not " unless "\x{1680}" =~ /\s/; print "ok 753\n"; } + +{ + print "# folding matches and Unicode\n"; + + print "not " unless "a\x{100}" =~ /A/i; + print "ok 754\n"; + + print "not " unless "A\x{100}" =~ /A/i; + print "ok 755\n"; + + print "not " unless "a\x{100}" =~ /a/i; + print "ok 756\n"; + + print "not " unless "A\x{100}" =~ /A/i; + print "ok 757\n"; +} diff --git a/utf8.c b/utf8.c index f900724..d7b0784 100644 --- a/utf8.c +++ b/utf8.c @@ -1543,6 +1543,51 @@ Perl_pv_uni_display(pTHX_ SV *dsv, U8 *spv, STRLEN len, STRLEN pvlim, UV flags) char * Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags) { - return Perl_pv_uni_display(aTHX_ dsv, (U8*)SvPVX(ssv), SvCUR(ssv), - pvlim, flags); + return Perl_pv_uni_display(aTHX_ dsv, (U8*)SvPVX(ssv), SvCUR(ssv), + pvlim, flags); +} + +I32 +Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, const char *s2, bool u2, register I32 len) +{ + register U8 *a = (U8*)s1; + register U8 *b = (U8*)s2; + STRLEN la, lb; + UV ca, cb; + STRLEN ulen1, ulen2; + U8 tmpbuf1[UTF8_MAXLEN*3+1]; + U8 tmpbuf2[UTF8_MAXLEN*3+1]; + + while (len) { + if (u1) + ca = utf8_to_uvchr((U8*)a, &la); + else { + ca = *a; + la = 1; + } + if (u2) + cb = utf8_to_uvchr((U8*)b, &lb); + else { + cb = *b; + lb = 1; + } + if (ca != cb) { + if (u1) + to_uni_lower(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1); + else + ulen1 = 1; + if (u2) + to_uni_lower(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2); + else + ulen2 = 1; + if (ulen1 != ulen2 + || (ulen1 == 1 && PL_fold[ca] != PL_fold[cb]) + || memNE(tmpbuf1, tmpbuf2, ulen1)) + return 1; + } + a += la; + b += lb; + } + return 0; } +