If an encoding is set by -M/-m, it must be in
[p5sagit/p5-mst-13.2.git] / lib / encoding.pm
CommitLineData
0a378802 1package encoding;
2
3use Encode;
4
5sub import {
6 my ($class, $name) = @_;
7 $name = $ENV{PERL_ENCODING} if @_ < 2;
121910a4 8 $name = "latin1" unless defined $name;
0a378802 9 my $enc = find_encoding($name);
10 unless (defined $enc) {
11 require Carp;
12 Carp::croak "Unknown encoding '$name'";
13 }
14 ${^ENCODING} = $enc;
15}
16
17=pod
18
19=head1 NAME
20
21encoding - pragma to control the conversion of legacy data into Unicode
22
23=head1 SYNOPSIS
24
25 use encoding "iso 8859-7";
26
121910a4 27 # The \xDF of ISO 8859-7 (Greek) is \x{3af} in Unicode.
4bdee82d 28
0a378802 29 $a = "\xDF";
30 $b = "\x{100}";
31
4bdee82d 32 printf "%#x\n", ord($a); # will print 0x3af, not 0xdf
33
0a378802 34 $c = $a . $b;
35
36 # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
0a378802 37
121910a4 38 # chr() is affected, and ...
39
40 print "mega\n" if ord(chr(0xdf)) == 0x3af;
41
42 # ... ord() is affected by the encoding pragma ...
43
44 print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
45
46 # but pack/unpack C are not, in case you still
47 # want back to your native encoding
48
49 print "peta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
50
0a378802 51=head1 DESCRIPTION
52
53Normally when legacy 8-bit data is converted to Unicode the data is
54expected to be Latin-1 (or EBCDIC in EBCDIC platforms). With the
55encoding pragma you can change this default.
56
57The pragma is a per script, not a per block lexical. Only the last
9f4817db 58C<use encoding> matters, and it affects B<the whole script>.
0a378802 59
4bdee82d 60If no encoding is specified, the environment variable L<PERL_ENCODING>
121910a4 61is consulted. If that fails, "latin1" (ISO 8859-1) is assumed.
62If no encoding can be found, C<Unknown encoding '...'> error will be thrown.
4bdee82d 63
6ec9efec 64=head1 KNOWN PROBLEMS
0a378802 65
121910a4 66The C<\x..> and C<\0...> in regular expressions are not affected by
6ec9efec 67this pragma. They very probably should.
d521382b 68
6ec9efec 69The charnames pragma ("\N{LATIN SMALL SHARP LETTER S}") does not work
70with this pragma.
d521382b 71
0a378802 72=head1 SEE ALSO
73
121910a4 74L<perlunicode>, L<Encode>
0a378802 75
76=cut
77
781;