From: Jarkko Hietaniemi Date: Sun, 23 Dec 2001 16:43:29 +0000 (+0000) Subject: The funky final sigma casefolding. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=09091399717abb9634c88f60ff8b2384a3297c16;p=p5sagit%2Fp5-mst-13.2.git The funky final sigma casefolding. p4raw-id: //depot/perl@13866 --- diff --git a/regcomp.c b/regcomp.c index 2e0943e..ea98177 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3981,9 +3981,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (prevvalue < value) Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", (UV)prevvalue, (UV)value); - else if (prevvalue == value) + else if (prevvalue == value) { Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", (UV)value); + if (FOLD) { + if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) { + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", + UNICODE_GREEK_CAPITAL_LETTER_SIGMA); + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", + UNICODE_GREEK_SMALL_LETTER_SIGMA); + } + else if (value == UNICODE_GREEK_CAPITAL_LETTER_SIGMA) + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", + UNICODE_GREEK_SMALL_LETTER_SIGMA); + } + } } } diff --git a/regexec.c b/regexec.c index d239a70..7a00dfd 100644 --- a/regexec.c +++ b/regexec.c @@ -975,7 +975,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta * Fortunately, not getting this right is allowed * for Unicode Regular Expression Support level 1, * only one-to-one matching is required. --jhi */ - if (c1 == c2) + if (c1 == c2) { while (s <= e) { if ( utf8_to_uvchr((U8*)s, &len) == c1 && (ln == len || @@ -985,9 +985,13 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta goto got_it; s += len; } - else + } + else { while (s <= e) { UV c = utf8_to_uvchr((U8*)s, &len); + if (c == UNICODE_GREEK_CAPITAL_LETTER_SIGMA || + c == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) + c = UNICODE_GREEK_SMALL_LETTER_SIGMA; if ( (c == c1 || c == c2) && (ln == len || ibcmp_utf8(s, do_utf8, strend - s, @@ -996,6 +1000,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta goto got_it; s += len; } + } } else { if (c1 == c2) diff --git a/t/op/pat.t b/t/op/pat.t index 30fc1a9..a8742f8 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..794\n"; +print "1..812\n"; BEGIN { chdir 't' if -d 't'; @@ -2418,3 +2418,35 @@ print "# some Unicode properties\n"; print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n"; print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n"; } + +{ + print "# FINAL SIGMA\n"; + + my $SIGMA = "\x{03A3}"; # CAPITAL + my $Sigma = "\x{03C2}"; # SMALL FINAL + my $sigma = "\x{03C3}"; # SMALL + + print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n"; + print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n"; + print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n"; + + print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n"; + print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n"; + print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n"; + + print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n"; + print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n"; + print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n"; + + print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n"; + print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n"; + print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n"; + + print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n"; + print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n"; + print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n"; + + print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n"; + print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n"; + print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n"; +} diff --git a/utf8.h b/utf8.h index b35cfeb..d907d26 100644 --- a/utf8.h +++ b/utf8.h @@ -188,3 +188,8 @@ END_EXTERN_C #endif #define UTF8_IS_ASCII(c) UTF8_IS_INVARIANT(c) + +#define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3 +#define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 +#define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3 +