Integrate mainline

diff --cc lib/encoding.pm

index 0000000,e758687..7dacd7a

mode 000000,100644..100644
--- /dev/null
--- 2/lib/encoding.pm
+++ b/lib/encoding.pm
@@@ -1,0 -1,63 +1,54 @@@
+ package encoding;
+ 
+ use Encode;
+ 
+ sub import {
+     my ($class, $name) = @_;
+     $name = $ENV{PERL_ENCODING} if @_ < 2;
+     my $enc = find_encoding($name);
+     unless (defined $enc) {
+       require Carp;
+       Carp::croak "Unknown encoding '$name'";
+     }
+     ${^ENCODING} = $enc;
+ }
+ 
+ =pod
+ 
+ =head1 NAME
+ 
+ encoding - pragma to control the conversion of legacy data into Unicode
+ 
+ =head1 SYNOPSIS
+ 
+     use encoding "iso 8859-7";
+ 
+     $a = "\xDF";
+     $b = "\x{100}";
+ 
+     $c = $a . $b;
+ 
+     # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
+     # The \xDF of ISO 8859-7 is \x{3af} in Unicode.
+ 
+ =head1 DESCRIPTION
+ 
+ Normally when legacy 8-bit data is converted to Unicode the data is
+ expected to be Latin-1 (or EBCDIC in EBCDIC platforms).  With the
+ encoding pragma you can change this default.
+ 
+ The pragma is a per script, not a per block lexical.  Only the last
- -C<use encoding> matters, and it affects B<the whole script>.
++'use encoding' seen matters.
+ 
+ =head1 FUTURE POSSIBILITIES
+ 
- -The C<\x..> and C<\0...> in regular expressions are not
- -affected by this pragma.  They probably should.
- -
- -Also chr(), ord(), and C<\N{...}> might become affected.
- -
- -=head1 KNOWN PROBLEMS
- -
- -Cannot be combined with C<use utf8>.  Note that this is a problem
- -B<only> if you would like to have Unicode identifiers in your scripts.
- -You should not need C<use utf8> for anything else these days
- -(since Perl 5.8.0)
++The C<\x..> and C<\0...> in literals and regular expressions are not
++affected by this pragma.  They probably should.  Ditto C<\N{...}>.
+ 
+ =head1 SEE ALSO
+ 
+ L<perlunicode>
+ 
+ =cut
+ 
+ 1;
diff --cc lib/encoding.t

index 0000000,2be0312..40d97a2

mode 000000,100644..100644
--- /dev/null
--- 2/lib/encoding.t
+++ b/lib/encoding.t
@@@ -1,0 -1,31 +1,24 @@@
- -print "1..5\n";
++print "1..3\n";
+ 
+ use encoding "latin1"; # ignored (overwritten by the next line)
+ use encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)
+ 
- -# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
- -# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
- -# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
- -
+ $a = "\xDF";
+ $b = "\x{100}";
+ 
- -print "not " unless ord($a) == 0x3af;
- -print "ok 1\n";
- -
- -print "not " unless ord($b) == 0x100;
- -print "ok 2\n";
- -
- -my $c;
++my $c = $a . $b;
+ 
- -$c = $a . $b;
++# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
++# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
++# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
+ 
+ print "not " unless ord($c) == 0x3af;
- -print "ok 3\n";
++print "ok 1\n";
+ 
+ print "not " unless length($c) == 2;
- -print "ok 4\n";
++print "ok 2\n";
+ 
+ print "not " unless ord(substr($c, 1, 1)) == 0x100;
- -print "ok 5\n";
++print "ok 3\n";
++
+ 
diff --cc perl.h
Simple merge
		1	2
lib/encoding.pm	patch \|	\|	diff2 \|	blob \| history
lib/encoding.t	patch \|	\|	diff2 \|	blob \| history
perl.h	patch \|	diff1 \|	diff2 \|	blob \| history