Re: unicode regex problem

diff --git a/regcomp.c b/regcomp.c

index 07e68bd..c61e548 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3162,11 +3162,6 @@ tryagain:
                                ender = grok_hex(p + 1, &numlen, &flags, NULL);
                                if (ender > 0xff)
                                    RExC_utf8 = 1;
-                               /* numlen is generous */
-                               if (numlen + len >= 127) {
-                                   p--;
-                                   goto loopdone;
-                               }
                                p = e + 1;
                            }
                        }
diff --git a/t/op/pat.t b/t/op/pat.t

index 771e3cd..7eeb79f 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..1033\n";
+print "1..1055\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3250,5 +3250,15 @@ ok("  \x{1E01}x" =~ qr/\x{1E00}X/i,
     ok("\xc4\xc4\xc4" !~ /(\x{100}+?)/, "[perl #23769] don't match first byte of utf8 representation");
 }
 
-# last test 1033
+for (120 .. 130) {
+    my $head = 'x' x $_;
+    for my $tail ('\x{0061}', '\x{1234}') {
+       ok(
+           eval qq{ "$head$tail" =~ /$head$tail/ },
+           '\x{...} misparsed in regexp near 127 char EXACT limit'
+       );
+    }
+}
+
+# last test 1055
regcomp.c		patch \| blob \| blame \| history
t/op/pat.t		patch \| blob \| blame \| history