Commit | Line | Data |
0a378802 |
1 | package encoding; |
2 | |
3 | use Encode; |
4 | |
5 | sub import { |
6 | my ($class, $name) = @_; |
7 | $name = $ENV{PERL_ENCODING} if @_ < 2; |
121910a4 |
8 | $name = "latin1" unless defined $name; |
0a378802 |
9 | my $enc = find_encoding($name); |
10 | unless (defined $enc) { |
11 | require Carp; |
12 | Carp::croak "Unknown encoding '$name'"; |
13 | } |
14 | ${^ENCODING} = $enc; |
15 | } |
16 | |
17 | =pod |
18 | |
19 | =head1 NAME |
20 | |
21 | encoding - pragma to control the conversion of legacy data into Unicode |
22 | |
23 | =head1 SYNOPSIS |
24 | |
25 | use encoding "iso 8859-7"; |
26 | |
121910a4 |
27 | # The \xDF of ISO 8859-7 (Greek) is \x{3af} in Unicode. |
4bdee82d |
28 | |
0a378802 |
29 | $a = "\xDF"; |
30 | $b = "\x{100}"; |
31 | |
4bdee82d |
32 | printf "%#x\n", ord($a); # will print 0x3af, not 0xdf |
33 | |
0a378802 |
34 | $c = $a . $b; |
35 | |
36 | # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}". |
0a378802 |
37 | |
121910a4 |
38 | # chr() is affected, and ... |
39 | |
40 | print "mega\n" if ord(chr(0xdf)) == 0x3af; |
41 | |
42 | # ... ord() is affected by the encoding pragma ... |
43 | |
44 | print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; |
45 | |
46 | # but pack/unpack C are not, in case you still |
47 | # want back to your native encoding |
48 | |
49 | print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; |
50 | |
0a378802 |
51 | =head1 DESCRIPTION |
52 | |
53 | Normally when legacy 8-bit data is converted to Unicode the data is |
54 | expected to be Latin-1 (or EBCDIC in EBCDIC platforms). With the |
55 | encoding pragma you can change this default. |
56 | |
57 | The pragma is a per script, not a per block lexical. Only the last |
9f4817db |
58 | C<use encoding> matters, and it affects B<the whole script>. |
0a378802 |
59 | |
4bdee82d |
60 | If no encoding is specified, the environment variable L<PERL_ENCODING> |
121910a4 |
61 | is consulted. If that fails, "latin1" (ISO 8859-1) is assumed. |
62 | If no encoding can be found, C<Unknown encoding '...'> error will be thrown. |
4bdee82d |
63 | |
6ec9efec |
64 | =head1 KNOWN PROBLEMS |
0a378802 |
65 | |
121910a4 |
66 | The C<\x..> and C<\0...> in regular expressions are not affected by |
6ec9efec |
67 | this pragma. They very probably should. |
d521382b |
68 | |
6ec9efec |
69 | The charnames pragma ("\N{LATIN SMALL SHARP LETTER S}") does not work |
70 | with this pragma. |
d521382b |
71 | |
0a378802 |
72 | =head1 SEE ALSO |
73 | |
121910a4 |
74 | L<perlunicode>, L<Encode> |
0a378802 |
75 | |
76 | =cut |
77 | |
78 | 1; |