From: Nicholas Clark <nick@ccl4.org>
Date: Wed, 12 Dec 2001 20:07:04 +0000 (+0000)
Subject: use utf8; tests
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=435e7af634196f39217ad53bdbf04b5bbf203ce1;p=p5sagit%2Fp5-mst-13.2.git

use utf8; tests
Message-ID: <20011212200704.E21702@plum.flirble.org>

p4raw-id: //depot/perl@13662
---

diff --git a/lib/utf8.t b/lib/utf8.t
index 15af665..1cc0cf1 100644
--- a/lib/utf8.t
+++ b/lib/utf8.t
@@ -28,7 +28,7 @@ BEGIN {
 #
 #
 
-plan tests => 16;
+plan tests => 31;
 
 {
     # bug id 20001009.001
@@ -102,19 +102,66 @@ plan tests => 16;
 }
 
 {
+    use warnings;
     my $progfile = 'utf' . $$;
-    END {unlink $progfile}
-    open P, ">$progfile" or die "Can't open '$progfile': $!";
-    # Interpolation of hex characters needs to take place now, as we're
-    # testing feeding malformed utf8 into perl. Bug now fixed was an
-    # "out of memory" error. We really need the "" [rather than qq() or q()]
-    # to get the best explosion.
-    print P <<"BANG";
+    END {unlink_all $progfile}
+
+    # If I'm right 60 is '>' in ASCII, ' ' in EBCDIC
+    # 173 is not punctuation in either ASCII or EBCDIC
+    my (@char);
+    foreach (60, 173, 257, 65532) {
+      my $char = chr $_;
+      utf8::encode($char);
+      # I don't want to use map {ord} and I've no need to hardcode the UTF
+      # version
+      my $charsubst = $char;
+      $charsubst =~ s/(.)/ord ($1) . ','/ge;
+      chop $charsubst;
+      push @char, [$_, $char, $charsubst];
+    }
+    foreach (
+             ['check our detection program works',
+              '@a = ("'.chr(60).'\x2A", ""); display @a', qr/^>60,42<><$/],
+             ['check literal 8 bit input',
+              '$a = "' . chr (173) . '"; display $a', qr/^>173<$/],
+             ['check no utf8; makes no change',
+              'no utf8; $a = "' . chr (173) . '"; display $a', qr/^>173<$/],
+             # Now we do the real byte sequences that are valid UTF8
+             (map {
+               ["the utf8 sequence for chr $_->[0]",
+                qq(\$a = "$_->[1]"; display \$a), qr/^>$_->[2]<$/],
+               ["no utf8; for the utf8 sequence for chr $_->[0]",
+                qq(no utf8; \$a = "$_->[1]"; display \$a), qr/^>$_->[2]<$/],
+               ["use utf8; for the utf8 sequence for chr $_->[0]",
+                qq(use utf8; \$a = "$_->[1]"; display \$a), qr/^>$_->[0]<$/],
+              } @char),
+             # Interpolation of hex characters needs to take place now, as we're
+             # testing feeding malformed utf8 into perl. Bug now fixed was an
+             # "out of memory" error. We really need the "" [rather than qq()
+             # or q()] to get the best explosion.
+             ["!Feed malformed utf8 into perl.", <<"BANG",
     use utf8; %a = ("\xE1\xA0"=>"sterling");
-    print 'start'; printf '%x,', ord $_ foreach keys %a; print "end\n";
+    print 'start'; printf '%x,', ord \$_ foreach keys %a; print "end\n";
 BANG
-    print "# Possible delay...\n";
-    my $result = runperl ( verbose => 1, stderr => 1, progfile => $progfile );
-    like ($result,
-          qr/^Malformed UTF-8 character \(2 bytes, need 3\).*start\d+,end$/s);
+	      qr/^Malformed UTF-8 character \(2 bytes, need 3\).*start\d+,end$/s
+	     ],
+            ) {
+        my ($why, $prog, $expect) = @$_;
+        open P, ">$progfile" or die "Can't open '$progfile': $!";
+        print P q(
+                  sub display {
+                    print '>' . join (',', map {ord} split //, $_) . '<'
+                    foreach @_;
+                  }
+                 );
+	print P $prog;
+        close P or die "Can't close '$progfile': $!";
+        if ($why =~ s/^!//) {
+            print "# Possible delay...\n";
+        } else {
+            print "# $prog\n";
+        }
+        my $result = runperl ( stderr => 1, progfile => $progfile );
+        like ($result, $expect, $why);
+    }
 }
diff --git a/t/test.pl b/t/test.pl
index b6f4258..e0220f5 100644
--- a/t/test.pl
+++ b/t/test.pl
@@ -147,7 +147,8 @@ sub like {
     if (ref $expected eq 'Regexp') {
 	$pass = $got =~ $expected;
 	unless ($pass) {
-	    unshift(@mess, "#      got '$got'\n");
+	    unshift(@mess, "#      got '$got'\n",
+		           "# expected /$expected/\n");
 	}
     } else {
 	$pass = $got =~ /$expected/;
@@ -373,4 +374,10 @@ sub which_perl {
     return $Perl;
 }
 
+sub unlink_all {
+    foreach my $file (@_) {
+        1 while unlink $file;
+        print "# Couldn't unlink '$file': $!\n" if -f $file;
+    }
+}
 1;