01B7 0292
01B8 01B9
01BC 01BD
-01C4 01C5 01C6
-01C7 01C8 01C9
-01CA 01CB 01CC
+01C4 01C6
+01C5 01C6
+01C7 01C9
+01C8 01C9
+01CA 01CC
+01CB 01CC
01CD 01CE
01CF 01D0
01D1 01D2
01EA 01EB
01EC 01ED
01EE 01EF
-01F1 01F2 01F3
+01F1 01F3
+01F2 01F3
01F4 01F5
01F6 0195
01F7 01BF
my %Fold;
while (<$CaseFold>) {
+ # Skip status 'S', simple case folding
next unless /^([0-9A-Fa-f]+)\s*;\s*([CFI])\s*;\s*([0-9A-Fa-f]+(?: [0-9A-Fa-f]+)*)\s*;/;
my ($code, $status, $fold) = ($1, $2, $3);
if ($status eq 'C') { # Common: one-to-one folding
- append(\@Fold, $code, $fold);
+ # No append() since several codes may fold into one.
+ push @Fold, [ $code, $code, $fold ];
} else { # F: full, or I: dotted uppercase I -> dotless lowercase I
$Fold{hex($code)} = $fold;
}
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
(UV)value);
if (FOLD) {
+ U8 tmpbuf [UTF8_MAXLEN+1];
+ U8 foldbuf[UTF8_MAXLEN_FOLD+1];
+ STRLEN foldlen;
+ UV f;
+
+ uvchr_to_utf8(tmpbuf, value);
+ f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
+
+ if (f != value)
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", f);
+
if (value == UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA) {
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n",
(UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA);
while (s <= e) {
if ( utf8_to_uvchr((U8*)s, &len) == c1
&& (ln == len ||
- ibcmp_utf8(s, do_utf8, strend - s,
- m, UTF, ln))
+ ibcmp_utf8(s, do_utf8, (I32)(strend - s),
+ m, UTF, (I32)ln))
&& (norun || regtry(prog, s)) )
goto got_it;
s += len;
}
else {
while (s <= e) {
+ U8 tmpbuf [UTF8_MAXLEN+1];
+ U8 foldbuf[UTF8_MAXLEN_FOLD+1];
+ STRLEN foldlen;
UV c = utf8_to_uvchr((U8*)s, &len);
+ UV f;
+
+ uvchr_to_utf8(tmpbuf, c);
+ f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);
+
if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
- if ( (c == c1 || c == c2)
- && (ln == len ||
- ibcmp_utf8(s, do_utf8, strend - s,
- m, UTF, ln))
+ if ( (c == c1 || c == c2 || f == c1 || f == c2)
+ && ibcmp_utf8(s, do_utf8, (I32)(strend - s),
+ m, UTF, (I32)ln)
&& (norun || regtry(prog, s)) )
goto got_it;
s += len;
}
/*
-=for apidoc A|I32|ibcmp_utf8|const char *s1|bool u1|const char *s2|bool u2|register I32 len
+=for apidoc A|I32|ibcmp_utf8|const char *s1|bool u1|register I32 len1|const char *s2|bool u2|register I32 len2
Return true if the strings s1 and s2 differ case-insensitively, false
if not (if they are equal case-insensitively). If u1 is true, the