if (c1 == c2)
while (s <= e) {
if ( utf8_to_uvchr((U8*)s, &len) == c1
- && (ln == 1 ||
+ && (ln == len ||
ibcmp_utf8(s, do_utf8, strend - s,
m, UTF, ln))
&& (norun || regtry(prog, s)) )
while (s <= e) {
UV c = utf8_to_uvchr((U8*)s, &len);
if ( (c == c1 || c == c2)
- && (ln == 1 ||
+ && (ln == len ||
ibcmp_utf8(s, do_utf8, strend - s,
m, UTF, ln))
&& (norun || regtry(prog, s)) )
s = STRING(scan);
ln = STR_LEN(scan);
if (do_utf8 != (UTF!=0)) {
- /* The target and the pattern have differing "utf8ness". */
+ /* The target and the pattern have differing utf8ness. */
char *l = locinput;
char *e = s + ln;
- STRLEN len;
+ STRLEN ulen;
if (do_utf8) {
/* The target is utf8, the pattern is not utf8. */
if (l >= PL_regeol)
sayNO;
if (NATIVE_TO_UNI(*(U8*)s) !=
- utf8_to_uvchr((U8*)l, &len))
+ utf8_to_uvchr((U8*)l, &ulen))
sayNO;
- l += len;
+ l += ulen;
s ++;
}
}
if (l >= PL_regeol)
sayNO;
if (NATIVE_TO_UNI(*((U8*)l)) !=
- utf8_to_uvchr((U8*)s, &len))
+ utf8_to_uvchr((U8*)s, &ulen))
sayNO;
- s += len;
+ s += ulen;
l ++;
}
}
nextchr = UCHARAT(locinput);
break;
}
- /* The target and the pattern have the same "utf8ness". */
+ /* The target and the pattern have the same utf8ness. */
/* Inline the first character, for speed. */
if (UCHARAT(s) != nextchr)
sayNO;
s = STRING(scan);
ln = STR_LEN(scan);
- if (do_utf8) {
+ {
char *l = locinput;
- char *e;
- STRLEN ulen;
- U8 tmpbuf[UTF8_MAXLEN_UCLC+1];
- e = s + ln;
- while (s < e) {
- if (l >= PL_regeol)
- sayNO;
- toLOWER_utf8((U8*)l, tmpbuf, &ulen);
- if (memNE(s, (char*)tmpbuf, ulen))
- sayNO;
- s += UTF8SKIP(s);
- l += ulen;
+ char *e = s + ln;
+ U8 tmpbuf[UTF8_MAXLEN_FOLD+1];
+
+ if (do_utf8 != (UTF!=0)) {
+ /* The target and the pattern have differing utf8ness. */
+ STRLEN ulen1, ulen2;
+ UV cs, cl;
+
+ if (do_utf8) {
+ /* The target is utf8, the pattern is not utf8. */
+ while (s < e) {
+ if (l >= PL_regeol)
+ sayNO;
+
+ cs = to_uni_fold(NATIVE_TO_UNI(*(U8*)s),
+ (U8*)s, &ulen1);
+ cl = utf8_to_uvchr((U8*)l, &ulen2);
+
+ if (cs != cl) {
+ cl = to_uni_fold(cl, (U8*)l, &ulen2);
+ if (ulen1 != ulen2 || cs != cl)
+ sayNO;
+ }
+ l += ulen1;
+ s ++;
+ }
+ }
+ else {
+ /* The target is not utf8, the pattern is utf8. */
+ while (s < e) {
+ if (l >= PL_regeol)
+ sayNO;
+
+ cs = utf8_to_uvchr((U8*)s, &ulen1);
+
+ cl = to_uni_fold(NATIVE_TO_UNI(*(U8*)l),
+ (U8*)l, &ulen2);
+
+ if (cs != cl) {
+ cs = to_uni_fold(cs, (U8*)s, &ulen1);
+ if (ulen1 != ulen2 || cs != cl)
+ sayNO;
+ }
+ l ++;
+ s += ulen1;
+ }
+ }
+ locinput = l;
+ nextchr = UCHARAT(locinput);
+ break;
+ }
+
+ if (do_utf8 && UTF) {
+ /* Both the target and the pattern are utf8. */
+ STRLEN ulen;
+
+ while (s < e) {
+ if (l >= PL_regeol)
+ sayNO;
+ if (UTF8SKIP(s) != UTF8SKIP(l) ||
+ memNE(s, (char*)l, UTF8SKIP(s))) {
+ to_utf8_fold((U8*)l, tmpbuf, &ulen);
+ if (UTF8SKIP(s) != ulen ||
+ memNE(s, (char*)tmpbuf, ulen))
+ sayNO;
+ }
+ l += UTF8SKIP(l);
+ s += UTF8SKIP(s);
+ }
+ locinput = l;
+ nextchr = UCHARAT(locinput);
+ break;
}
- locinput = l;
- nextchr = UCHARAT(locinput);
- break;
}
+ /* Neither the target and the pattern are utf8. */
+
/* Inline the first character, for speed. */
if (UCHARAT(s) != nextchr &&
UCHARAT(s) != ((OP(scan) == EXACTF)
$| = 1;
-print "1..770\n";
+print "1..786\n";
BEGIN {
chdir 't' if -d 't';
print "not " unless "A\x{100}" =~ /A/i;
print "ok 757\n";
+
+ print "not " unless "\x{101}a" =~ /\x{100}/i;
+ print "ok 758\n";
+
+ print "not " unless "\x{100}a" =~ /\x{100}/i;
+ print "ok 759\n";
+
+ print "not " unless "\x{101}a" =~ /\x{101}/i;
+ print "ok 760\n";
+
+ print "not " unless "\x{100}a" =~ /\x{101}/i;
+ print "ok 761\n";
+
+ print "not " unless "a\x{100}" =~ /A\x{100}/i;
+ print "ok 762\n";
+
+ print "not " unless "A\x{100}" =~ /A\x{100}/i;
+ print "ok 763\n";
+
+ print "not " unless "a\x{100}" =~ /a\x{100}/i;
+ print "ok 764\n";
+
+ print "not " unless "A\x{100}" =~ /A\x{100}/i;
+ print "ok 765\n";
+
+ print "not " unless "a\x{100}" =~ /[A]/i;
+ print "ok 766\n";
+
+ print "not " unless "A\x{100}" =~ /[A]/i;
+ print "ok 767\n";
+
+ print "not " unless "a\x{100}" =~ /[a]/i;
+ print "ok 768\n";
+
+ print "not " unless "A\x{100}" =~ /[A]/i;
+ print "ok 769\n";
+
+ print "not " unless "\x{101}a" =~ /[\x{100}]/i;
+ print "ok 770\n";
+
+ print "not " unless "\x{100}a" =~ /[\x{100}]/i;
+ print "ok 771\n";
+
+ print "not " unless "\x{101}a" =~ /[\x{101}]/i;
+ print "ok 772\n";
+
+ print "not " unless "\x{100}a" =~ /[\x{101}]/i;
+ print "ok 773\n";
+
}
{
my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}";
my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}";
- print $lower =~ m/$UPPER/i ? "ok 758\n" : "not ok 758\n";
- print $UPPER =~ m/$lower/i ? "ok 759\n" : "not ok 759\n";
- print $lower =~ m/[$UPPER]/i ? "ok 760\n" : "not ok 760\n";
- print $UPPER =~ m/[$lower]/i ? "ok 761\n" : "not ok 761\n";
+ print $lower =~ m/$UPPER/i ? "ok 774\n" : "not ok 774\n";
+ print $UPPER =~ m/$lower/i ? "ok 775\n" : "not ok 775\n";
+ print $lower =~ m/[$UPPER]/i ? "ok 776\n" : "not ok 776\n";
+ print $UPPER =~ m/[$lower]/i ? "ok 777\n" : "not ok 777\n";
print "# GREEK LETTER ALPHA WITH VRACHY\n";
$lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}";
$UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}";
- print $lower =~ m/$UPPER/i ? "ok 762\n" : "not ok 762\n";
- print $UPPER =~ m/$lower/i ? "ok 763\n" : "not ok 763\n";
- print $lower =~ m/[$UPPER]/i ? "ok 764\n" : "not ok 764\n";
- print $UPPER =~ m/[$lower]/i ? "ok 765\n" : "not ok 765\n";
+ print $lower =~ m/$UPPER/i ? "ok 778\n" : "not ok 778\n";
+ print $UPPER =~ m/$lower/i ? "ok 779\n" : "not ok 779\n";
+ print $lower =~ m/[$UPPER]/i ? "ok 780\n" : "not ok 780\n";
+ print $UPPER =~ m/[$lower]/i ? "ok 781\n" : "not ok 781\n";
print "# LATIN LETTER Y WITH DIAERESIS\n";
$lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}";
$UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}";
-
- print $lower =~ m/$UPPER/i ? "ok 766\n" : "not ok 766\n";
- print $UPPER =~ m/$lower/i ? "ok 767\n" : "not ok 767\n";
- print $lower =~ m/[$UPPER]/i ? "ok 768\n" : "not ok 768\n";
- print $UPPER =~ m/[$lower]/i ? "ok 769\n" : "not ok 769\n";
+ print $lower =~ m/$UPPER/i ? "ok 782\n" : "not ok 782\n";
+ print $UPPER =~ m/$lower/i ? "ok 783\n" : "not ok 783\n";
+ print $lower =~ m/[$UPPER]/i ? "ok 784\n" : "not ok 784\n";
+ print $UPPER =~ m/[$lower]/i ? "ok 785\n" : "not ok 785\n";
}
{
my $char = "\N{COMBINING GREEK PERISPOMENI}";
my $code = sprintf "%04x", ord($char);
- # Before #13843 this was failing.
- print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 770\n" : "ok 770\n";
+ # Before #13843 this was failing by matching falsely.
+ print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n";
}