--- /dev/null
+ package encoding;
+
+ use Encode;
+
+ sub import {
+ my ($class, $name) = @_;
+ $name = $ENV{PERL_ENCODING} if @_ < 2;
+ my $enc = find_encoding($name);
+ unless (defined $enc) {
+ require Carp;
+ Carp::croak "Unknown encoding '$name'";
+ }
+ ${^ENCODING} = $enc;
+ }
+
+ =pod
+
+ =head1 NAME
+
+ encoding - pragma to control the conversion of legacy data into Unicode
+
+ =head1 SYNOPSIS
+
+ use encoding "iso 8859-7";
+
+ $a = "\xDF";
+ $b = "\x{100}";
+
+ $c = $a . $b;
+
+ # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
+ # The \xDF of ISO 8859-7 is \x{3af} in Unicode.
+
+ =head1 DESCRIPTION
+
+ Normally when legacy 8-bit data is converted to Unicode the data is
+ expected to be Latin-1 (or EBCDIC in EBCDIC platforms). With the
+ encoding pragma you can change this default.
+
+ The pragma is a per script, not a per block lexical. Only the last
-C<use encoding> matters, and it affects B<the whole script>.
++'use encoding' seen matters.
+
+ =head1 FUTURE POSSIBILITIES
+
-The C<\x..> and C<\0...> in regular expressions are not
-affected by this pragma. They probably should.
-
-Also chr(), ord(), and C<\N{...}> might become affected.
-
-=head1 KNOWN PROBLEMS
-
-Cannot be combined with C<use utf8>. Note that this is a problem
-B<only> if you would like to have Unicode identifiers in your scripts.
-You should not need C<use utf8> for anything else these days
-(since Perl 5.8.0)
++The C<\x..> and C<\0...> in literals and regular expressions are not
++affected by this pragma. They probably should. Ditto C<\N{...}>.
+
+ =head1 SEE ALSO
+
+ L<perlunicode>
+
+ =cut
+
+ 1;
--- /dev/null
-print "1..5\n";
++print "1..3\n";
+
+ use encoding "latin1"; # ignored (overwritten by the next line)
+ use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
+
-# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
-# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
-# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
-
+ $a = "\xDF";
+ $b = "\x{100}";
+
-print "not " unless ord($a) == 0x3af;
-print "ok 1\n";
-
-print "not " unless ord($b) == 0x100;
-print "ok 2\n";
-
-my $c;
++my $c = $a . $b;
+
-$c = $a . $b;
++# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
++# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
++# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
+
+ print "not " unless ord($c) == 0x3af;
-print "ok 3\n";
++print "ok 1\n";
+
+ print "not " unless length($c) == 2;
-print "ok 4\n";
++print "ok 2\n";
+
+ print "not " unless ord(substr($c, 1, 1)) == 0x100;
-print "ok 5\n";
++print "ok 3\n";
++
+