regrepeat() did not work right for UTF-8(ed Latin-1)
in the EXACT case, which made the \x{a0}+ fail.
p4raw-id: //depot/perl@21158
case CANY:
scan = loceol;
break;
- case EXACT: /* length of string is 1 */
- c = (U8)*STRING(p);
- while (scan < loceol && UCHARAT(scan) == c)
- scan++;
+ case EXACT:
+ if (do_utf8) {
+ c = (U8)*STRING(p);
+ while (scan < loceol && utf8_to_uvuni((U8*)scan, 0) == c)
+ scan += UTF8SKIP(scan);
+ } else { /* length of string is 1 */
+ c = (U8)*STRING(p);
+ while (scan < loceol && UCHARAT(scan) == c)
+ scan++;
+ }
break;
case EXACTF: /* length of string is 1 */
c = (U8)*STRING(p);
$| = 1;
-print "1..1012\n";
+print "1..1015\n";
BEGIN {
chdir 't' if -d 't';
ok(" \x{1E01}x" =~ qr/\x{1E00}X/i,
"<20030808193656.5109.1@llama.ni-s.u-net.com>");
-# last test 1012
+{
+ # [perl #23769] Unicode regex broken on simple example
+ # regrepeat() didn't handle UTF-8 EXACT case right.
+
+ my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
+
+ ok($s =~ /\x{a0}/, "[perl #23769]");
+ ok($s =~ /\x{a0}+/, "[perl #23769]");
+ ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]");
+}
+
+# last test 1015