From: Hugo van der Sanden <hv@crypt.org>
Date: Sat, 13 Sep 2003 15:21:34 +0000 (+0100)
Subject: Re: unicode regex problem
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=4be7a33fa9707b547fe5b3343eb5cf92ac93f948;p=p5sagit%2Fp5-mst-13.2.git

Re: unicode regex problem
Message-Id: <200309131421.h8DELYx25894@zen.crypt.org>

p4raw-id: //depot/perl@21210
---

diff --git a/regcomp.c b/regcomp.c
index 07e68bd..c61e548 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3162,11 +3162,6 @@ tryagain:
 				ender = grok_hex(p + 1, &numlen, &flags, NULL);
 				if (ender > 0xff)
 				    RExC_utf8 = 1;
-				/* numlen is generous */
-				if (numlen + len >= 127) {
-				    p--;
-				    goto loopdone;
-				}
 				p = e + 1;
 			    }
 			}
diff --git a/t/op/pat.t b/t/op/pat.t
index 771e3cd..7eeb79f 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
 
 $| = 1;
 
-print "1..1033\n";
+print "1..1055\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3250,5 +3250,15 @@ ok("  \x{1E01}x" =~ qr/\x{1E00}X/i,
     ok("\xc4\xc4\xc4" !~ /(\x{100}+?)/, "[perl #23769] don't match first byte of utf8 representation");
 }
 
-# last test 1033
+for (120 .. 130) {
+    my $head = 'x' x $_;
+    for my $tail ('\x{0061}', '\x{1234}') {
+	ok(
+	    eval qq{ "$head$tail" =~ /$head$tail/ },
+	    '\x{...} misparsed in regexp near 127 char EXACT limit'
+	);
+    }
+}
+
+# last test 1055