From: Yves Orton <demerphq@gmail.com>
Date: Thu, 6 Nov 2008 18:48:28 +0000 (+0000)
Subject: Various changes to regex diagnostics and testing
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=24d786f4d2806834028ce32abc1769da2e945f9b;p=p5sagit%2Fp5-mst-13.2.git

Various changes to regex diagnostics and testing

* Make ANYOF output from regprop easier to read by adding ][ in between the unicode representation and the "ascii" one

* Make it possible to make tests in re_tests todo.

* add a todo test for a complementary character class match that should fail (perl #60156)

* Also add a comment explaining a previous commit (relating to perl #60344)

p4raw-id: //depot/perl@34755
---

diff --git a/regcomp.c b/regcomp.c
index b90a783..fba4260 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9099,6 +9099,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
     else if (k == ANYOF) {
 	int i, rangestart = -1;
 	const U8 flags = ANYOF_FLAGS(o);
+	int do_sep = 0;
 
 	/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
 	static const char * const anyofs[] = {
@@ -9114,8 +9115,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 	    "[:^alpha:]",
 	    "[:ascii:]",
 	    "[:^ascii:]",
-	    "[:ctrl:]",
-	    "[:^ctrl:]",
+	    "[:cntrl:]",
+	    "[:^cntrl:]",
 	    "[:graph:]",
 	    "[:^graph:]",
 	    "[:lower:]",
@@ -9154,14 +9155,26 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 		    sv_catpvs(sv, "-");
 		    put_byte(sv, i - 1);
 		}
+		do_sep = 1;
 		rangestart = -1;
 	    }
 	}
-
+        if (do_sep) {
+            sv_catpvs(sv,"][");
+            do_sep = 0;
+        }
+            
 	if (o->flags & ANYOF_CLASS)
 	    for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
-		if (ANYOF_CLASS_TEST(o,i))
+		if (ANYOF_CLASS_TEST(o,i)) {
 		    sv_catpv(sv, anyofs[i]);
+		    do_sep = 1;
+		}
+        
+        if (do_sep) {
+            sv_catpvs(sv,"][");
+            do_sep = 0;
+        }
 
 	if (flags & ANYOF_UNICODE)
 	    sv_catpvs(sv, "{unicode}");
@@ -9175,7 +9188,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
 	    if (lv) {
 		if (sw) {
 		    U8 s[UTF8_MAXBYTES_CASE+1];
-		
+
 		    for (i = 0; i <= 256; i++) { /* just the first 256 */
 			uvchr_to_utf8(s, i);
 			
diff --git a/regcomp.h b/regcomp.h
index dee7d78..1664871 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -317,9 +317,9 @@ struct regnode_charclass_class {	/* has [[:blah:]] classes */
 #define ANYOF_NALNUM	 1
 #define ANYOF_SPACE	 2	/* \s */
 #define ANYOF_NSPACE	 3
-#define ANYOF_DIGIT	 4
+#define ANYOF_DIGIT	 4	/* \d */
 #define ANYOF_NDIGIT	 5
-#define ANYOF_ALNUMC	 6	/* isalnum(3), utf8::IsAlnum, ALNUMC */
+#define ANYOF_ALNUMC	 6	/* [[:alnum:]] isalnum(3), utf8::IsAlnum, ALNUMC */
 #define ANYOF_NALNUMC	 7
 #define ANYOF_ALPHA	 8
 #define ANYOF_NALPHA	 9
diff --git a/regexec.c b/regexec.c
index 9bd5f0e..363e205 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4983,7 +4983,8 @@ NULL
 	  do_ifmatch:
 	    ST.me = scan;
 	    ST.logical = logical;
-	    logical = 0;
+	    logical = 0; /* XXX: reset state of logical once it has been saved into ST */
+	    
 	    /* execute body of (?...A) */
 	    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
 	    /* NOTREACHED */
diff --git a/t/op/re_tests b/t/op/re_tests
index 6d3ef4f..f515605 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -1360,4 +1360,5 @@ foo(\h)bar	foo\tbar	y	$1	\t
 /(.*?)a(?!(a+)b\2c)/	baaabaac	y	$&-$1	baa-ba
 # [perl #60344] Regex lookbehind failure after an (if)then|else in perl 5.10
 /\A(?(?=db2)db2|\D+)(?<!processed)\.csv\z/xms	sql_processed.csv	n	-	-
-/\N{U+0100}/	\x{100}	y	$&	\x{100}	# Bug #59328
\ No newline at end of file
+/\N{U+0100}/	\x{100}	y	$&	\x{100}	# Bug #59328
+[\s][\S]	\x{a0}\x{a0}	nT	-	-	# TODO Unicode complements should not match same character
\ No newline at end of file
diff --git a/t/op/regexp.t b/t/op/regexp.t
index 147e4cc..ba5da62 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -13,6 +13,7 @@
 # 	y	expect a match
 # 	n	expect no match
 # 	c	expect an error
+#	T	the test is a TODO (can be combined with y/n/c)
 #	B	test exposes a known bug in Perl, should be skipped
 #	b	test exposes a known bug in Perl, should be skipped if noamp
 #	t	test exposes a bug with threading, TODO if qr_embed_thr
@@ -102,16 +103,19 @@ foreach (@tests) {
     my ($pat, $subject, $result, $repl, $expect, $reason) = split(/\t/,$_,6);
     $reason = '' unless defined $reason;
     my $input = join(':',$pat,$subject,$result,$repl,$expect);
-    $pat = "'$pat'" unless $pat =~ /^[:'\/]/;
+    # the double '' below keeps simple syntax highlighters from going crazy
+    $pat = "'$pat'" unless $pat =~ /^[:''\/]/; 
     $pat =~ s/(\$\{\w+\})/$1/eeg;
     $pat =~ s/\\n/\n/g;
     $subject = eval qq("$subject"); die $@ if $@;
     $expect  = eval qq("$expect"); die $@ if $@;
     $expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/;
-    my $todo = $qr_embed_thr && ($result =~ s/t//);
+    my $todo_qr = $qr_embed_thr && ($result =~ s/t//);
     my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
     $reason = 'skipping $&' if $reason eq  '' && $skip_amp;
     $result =~ s/B//i unless $skip;
+    my $todo= $result =~ s/T// ? " # TODO" : "";
+    
 
     for my $study ('', 'study $subject', 'utf8::upgrade($subject)',
 		   'utf8::upgrade($subject); study $subject') {
@@ -165,39 +169,39 @@ EOFCODE
 	}
 	chomp( my $err = $@ );
 	if ($result eq 'c') {
-	    if ($err !~ m!^\Q$expect!) { print "not ok $test (compile) $input => `$err'\n"; next TEST }
+	    if ($err !~ m!^\Q$expect!) { print "not ok $test$todo (compile) $input => `$err'\n"; next TEST }
 	    last;  # no need to study a syntax error
 	}
 	elsif ( $skip ) {
 	    print "ok $test # skipped", length($reason) ? " $reason" : '', "\n";
 	    next TEST;
 	}
-	elsif ( $todo ) {
+	elsif ( $todo_qr ) {
 	    print "not ok $test # TODO", length($reason) ? " - $reason" : '', "\n";
 	    next TEST;
 	}
 	elsif ($@) {
-	    print "not ok $test $input => error `$err'\n$code\n$@\n"; next TEST;
+	    print "not ok $test$todo $input => error `$err'\n$code\n$@\n"; next TEST;
 	}
 	elsif ($result =~ /^n/) {
-	    if ($match) { print "not ok $test ($study) $input => false positive\n"; next TEST }
+	    if ($match) { print "not ok $test$todo ($study) $input => false positive\n"; next TEST }
 	}
 	else {
 	    if (!$match || $got ne $expect) {
 	        eval { require Data::Dumper };
 		if ($@) {
-		    print "not ok $test ($study) $input => `$got', match=$match\n$code\n";
+		    print "not ok $test$todo ($study) $input => `$got', match=$match\n$code\n";
 		}
 		else { # better diagnostics
 		    my $s = Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump;
 		    my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump;
-		    print "not ok $test ($study) $input => `$got', match=$match\n$s\n$g\n$code\n";
+		    print "not ok $test$todo ($study) $input => `$got', match=$match\n$s\n$g\n$code\n";
 		}
 		next TEST;
 	    }
 	}
     }
-    print "ok $test\n";
+    print "ok $test$todo\n";
 }
 
 1;