lib/encoding.pm

   1 package encoding;
   2
   3 use Encode;
   4
   5 sub import {
   6     my ($class, $name) = @_;
   7     $name = $ENV{PERL_ENCODING} if @_ < 2;
   8     my $enc = find_encoding($name);
   9     unless (defined $enc) {
  10         require Carp;
  11         Carp::croak "Unknown encoding '$name'";
  12     }
  13     ${^ENCODING} = $enc;
  14 }
  15
  16 =pod
  17
  18 =head1 NAME
  19
  20 encoding - pragma to control the conversion of legacy data into Unicode
  21
  22 =head1 SYNOPSIS
  23
  24     use encoding "iso 8859-7";
  25
  26     # The \xDF of ISO 8859-7 is \x{3af} in Unicode.
  27
  28     $a = "\xDF";
  29     $b = "\x{100}";
  30
  31     printf "%#x\n", ord($a); # will print 0x3af, not 0xdf
  32
  33     $c = $a . $b;
  34
  35     # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
  36
  37 =head1 DESCRIPTION
  38
  39 Normally when legacy 8-bit data is converted to Unicode the data is
  40 expected to be Latin-1 (or EBCDIC in EBCDIC platforms).  With the
  41 encoding pragma you can change this default.
  42
  43 The pragma is a per script, not a per block lexical.  Only the last
  44 C<use encoding> matters, and it affects B<the whole script>.
  45
  46 If no encoding is specified, the environment variable L<PERL_ENCODING>
  47 is consulted.  If no encoding can be found, C<Unknown encoding '...'>
  48 error will be thrown.
  49
  50 =head1 FUTURE POSSIBILITIES
  51
  52 The C<\x..> and C<\0...> in regular expressions are not
  53 affected by this pragma.  They probably should.
  54
  55 Also chr(), ord(), and C<\N{...}> might become affected.
  56
  57 =head1 KNOWN PROBLEMS
  58
  59 Cannot be combined with C<use utf8>.  Note that this is a problem
  60 B<only> if you would like to have Unicode identifiers in your scripts.
  61 You should not need C<use utf8> for anything else these days
  62 (since Perl 5.8.0)
  63
  64 =head1 SEE ALSO
  65
  66 L<perlunicode>, L<encode>
  67
  68 =cut
  69
  70 1;