Extend the effect of the encoding pragma to chr() and ord().

[p5sagit/p5-mst-13.2.git] / lib / encoding.pm
diff --git a/lib/encoding.pm b/lib/encoding.pm

index be0fd73..33c5113 100644 (file)
--- a/lib/encoding.pm
+++ b/lib/encoding.pm
@@ -5,6 +5,7 @@ use Encode;
 sub import {
     my ($class, $name) = @_;
     $name = $ENV{PERL_ENCODING} if @_ < 2;
+    $name = "latin1" unless defined $name;
     my $enc = find_encoding($name);
     unless (defined $enc) {
        require Carp;
@@ -23,7 +24,7 @@ encoding - pragma to control the conversion of legacy data into Unicode
 
     use encoding "iso 8859-7";
 
-    # The \xDF of ISO 8859-7 is \x{3af} in Unicode.
+    # The \xDF of ISO 8859-7 (Greek) is \x{3af} in Unicode.
 
     $a = "\xDF";
     $b = "\x{100}";
@@ -34,6 +35,19 @@ encoding - pragma to control the conversion of legacy data into Unicode
 
     # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
 
+    # chr() is affected, and ...
+
+    print "mega\n"  if ord(chr(0xdf)) == 0x3af;
+
+    # ... ord() is affected by the encoding pragma ...
+
+    print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
+
+    # but pack/unpack C are not, in case you still
+    # want back to your native encoding
+
+    print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
+
 =head1 DESCRIPTION
 
 Normally when legacy 8-bit data is converted to Unicode the data is
@@ -44,26 +58,26 @@ The pragma is a per script, not a per block lexical.  Only the last
 C<use encoding> matters, and it affects B<the whole script>.
 
 If no encoding is specified, the environment variable L<PERL_ENCODING>
-is consulted.  If no encoding can be found, C<Unknown encoding '...'>
-error will be thrown.
+is consulted.  If that fails, "latin1" (ISO 8859-1) is assumed.
+If no encoding can be found, C<Unknown encoding '...'> error will be thrown.
 
 =head1 FUTURE POSSIBILITIES
 
-The C<\x..> and C<\0...> in regular expressions are not
-affected by this pragma.  They probably should.
+The C<\x..> and C<\0...> in regular expressions are not affected by
+this pragma.  They probably should.
 
-Also chr(), ord(), and C<\N{...}> might become affected.
+The charnames "\N{...}" does not work with this pragma.
 
 =head1 KNOWN PROBLEMS
 
 Cannot be combined with C<use utf8>.  Note that this is a problem
 B<only> if you would like to have Unicode identifiers in your scripts.
 You should not need C<use utf8> for anything else these days
-(since Perl 5.8.0)
+(since Perl 5.8.0).
 
 =head1 SEE ALSO
 
-L<perlunicode>, L<encode>
+L<perlunicode>, L<Encode>
 
 =cut