Re: unicode regex problem
Hugo van der Sanden [Sat, 13 Sep 2003 15:21:34 +0000 (16:21 +0100)]
Message-Id: <200309131421.h8DELYx25894@zen.crypt.org>

p4raw-id: //depot/perl@21210

regcomp.c
t/op/pat.t

index 07e68bd..c61e548 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3162,11 +3162,6 @@ tryagain:
                                ender = grok_hex(p + 1, &numlen, &flags, NULL);
                                if (ender > 0xff)
                                    RExC_utf8 = 1;
-                               /* numlen is generous */
-                               if (numlen + len >= 127) {
-                                   p--;
-                                   goto loopdone;
-                               }
                                p = e + 1;
                            }
                        }
index 771e3cd..7eeb79f 100755 (executable)
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..1033\n";
+print "1..1055\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3250,5 +3250,15 @@ ok("  \x{1E01}x" =~ qr/\x{1E00}X/i,
     ok("\xc4\xc4\xc4" !~ /(\x{100}+?)/, "[perl #23769] don't match first byte of utf8 representation");
 }
 
-# last test 1033
+for (120 .. 130) {
+    my $head = 'x' x $_;
+    for my $tail ('\x{0061}', '\x{1234}') {
+       ok(
+           eval qq{ "$head$tail" =~ /$head$tail/ },
+           '\x{...} misparsed in regexp near 127 char EXACT limit'
+       );
+    }
+}
+
+# last test 1055