lib/encoding.pm

   1 package encoding;
   2
   3 use Encode;
   4
   5 sub import {
   6     my ($class, $name) = @_;
   7     $name = $ENV{PERL_ENCODING} if @_ < 2;
   8     $name = "latin1" unless defined $name;
   9     my $enc = find_encoding($name);
  10     unless (defined $enc) {
  11         require Carp;
  12         Carp::croak "Unknown encoding '$name'";
  13     }
  14     ${^ENCODING} = $enc;
  15 }
  16
  17 =pod
  18
  19 =head1 NAME
  20
  21 encoding - pragma to control the conversion of legacy data into Unicode
  22
  23 =head1 SYNOPSIS
  24
  25     use encoding "iso 8859-7";
  26
  27     # The \xDF of ISO 8859-7 (Greek) is \x{3af} in Unicode.
  28
  29     $a = "\xDF";
  30     $b = "\x{100}";
  31
  32     printf "%#x\n", ord($a); # will print 0x3af, not 0xdf
  33
  34     $c = $a . $b;
  35
  36     # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
  37
  38     # chr() is affected, and ...
  39
  40     print "mega\n"  if ord(chr(0xdf)) == 0x3af;
  41
  42     # ... ord() is affected by the encoding pragma ...
  43
  44     print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
  45
  46     # but pack/unpack C are not, in case you still
  47     # want back to your native encoding
  48
  49     print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
  50
  51 =head1 DESCRIPTION
  52
  53 Normally when legacy 8-bit data is converted to Unicode the data is
  54 expected to be Latin-1 (or EBCDIC in EBCDIC platforms).  With the
  55 encoding pragma you can change this default.
  56
  57 The pragma is a per script, not a per block lexical.  Only the last
  58 C<use encoding> matters, and it affects B<the whole script>.
  59
  60 If no encoding is specified, the environment variable L<PERL_ENCODING>
  61 is consulted.  If that fails, "latin1" (ISO 8859-1) is assumed.
  62 If no encoding can be found, C<Unknown encoding '...'> error will be thrown.
  63
  64 =head1 FUTURE POSSIBILITIES
  65
  66 The C<\x..> and C<\0...> in regular expressions are not affected by
  67 this pragma.  They probably should.
  68
  69 The charnames "\N{...}" does not work with this pragma.
  70
  71 =head1 KNOWN PROBLEMS
  72
  73 Cannot be combined with C<use utf8>.  Note that this is a problem
  74 B<only> if you would like to have Unicode identifiers in your scripts.
  75 You should not need C<use utf8> for anything else these days
  76 (since Perl 5.8.0).
  77
  78 =head1 SEE ALSO
  79
  80 L<perlunicode>, L<Encode>
  81
  82 =cut
  83
  84 1;