From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Fri, 2 Nov 2001 15:19:35 +0000 (+0000)
Subject: More encoding testing.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=3de8ed06f96286478ecf8f3810596152fa21b27b;p=p5sagit%2Fp5-mst-13.2.git

More encoding testing.

p4raw-id: //depot/perl@12813
---

diff --git a/lib/encoding.pm b/lib/encoding.pm
index 2f4b059..6f5970f 100644
--- a/lib/encoding.pm
+++ b/lib/encoding.pm
@@ -43,7 +43,7 @@ encoding - pragma to control the conversion of legacy data into Unicode
 
     print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
 
-    # but pack/unpack C are not, in case you still
+    # but pack/unpack are not affected, in case you still
     # want back to your native encoding
 
     print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
@@ -58,13 +58,13 @@ The pragma is a per script, not a per block lexical.  Only the last
 C<use encoding> matters, and it affects B<the whole script>.
 
 If no encoding is specified, the environment variable L<PERL_ENCODING>
-is consulted.  If that fails, "latin1" (ISO 8859-1) is assumed.
-If no encoding can be found, C<Unknown encoding '...'> error will be thrown.
+is consulted.  If that fails, "latin1" (ISO 8859-1) is assumed.  If no
+encoding can be found, C<Unknown encoding '...'> error will be thrown.
 
 =head1 KNOWN PROBLEMS
 
-The C<\x..> and C<\0...> in regular expressions are not affected by
-this pragma.  They very probably should.
+Literals in regular expressions are not affected by this pragma.
+They very probably should.
 
 =head1 SEE ALSO
 
diff --git a/lib/encoding.t b/lib/encoding.t
index 923baa7..0363441 100644
--- a/lib/encoding.t
+++ b/lib/encoding.t
@@ -1,4 +1,4 @@
-print "1..10\n";
+print "1..15\n";
 
 use encoding "latin1"; # ignored (overwritten by the next line)
 use encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)
@@ -44,8 +44,27 @@ print "ok 8\n";
 print "not " unless unpack("C", chr(0xdf)) == 0xce;
 print "ok 9\n";
 
+print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
+print "ok 10\n";
+
+print "not " unless unpack("U", chr(0xdf)) == 0x3af;
+print "ok 11\n";
+
 # charnames must still work
 use charnames ':full';
 print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
-print "ok 10\n";
+print "ok 12\n";
+
+# combine
+
+$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
+
+print "not " unless ord($c) == 0x3af;
+print "ok 13\n";
+
+print "not " unless ord(substr($c, 1, 1)) == 0xdf;
+print "ok 14\n";
+
+print "not " unless ord(substr($c, 2, 1)) == 0x3af;
+print "ok 15\n";