[p5sagit/p5-mst-13.2.git] / lib / encoding.pm

package encoding;

use Encode;

sub import {
    my ($class, $name) = @_;
    $name = $ENV{PERL_ENCODING} if @_ < 2;
    my $enc = find_encoding($name);
    unless (defined $enc) {
	require Carp;
	Carp::croak "Unknown encoding '$name'";
    }
    ${^ENCODING} = $enc;
}

=pod

=head1 NAME

encoding - pragma to control the conversion of legacy data into Unicode

=head1 SYNOPSIS

    use encoding "iso 8859-7";

    # The \xDF of ISO 8859-7 is \x{3af} in Unicode.

    $a = "\xDF";
    $b = "\x{100}";

    printf "%#x\n", ord($a); # will print 0x3af, not 0xdf

    $c = $a . $b;

    # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".

=head1 DESCRIPTION

Normally when legacy 8-bit data is converted to Unicode the data is
expected to be Latin-1 (or EBCDIC in EBCDIC platforms).  With the
encoding pragma you can change this default.

The pragma is a per script, not a per block lexical.  Only the last
C<use encoding> matters, and it affects B<the whole script>.

If no encoding is specified, the environment variable L<PERL_ENCODING>
is consulted.  If no encoding can be found, C<Unknown encoding '...'>
error will be thrown.

=head1 FUTURE POSSIBILITIES

The C<\x..> and C<\0...> in regular expressions are not
affected by this pragma.  They probably should.

Also chr(), ord(), and C<\N{...}> might become affected.

=head1 KNOWN PROBLEMS

Cannot be combined with C<use utf8>.  Note that this is a problem
B<only> if you would like to have Unicode identifiers in your scripts.
You should not need C<use utf8> for anything else these days
(since Perl 5.8.0)

=head1 SEE ALSO

L<perlunicode>, L<encode>

=cut

1;
Commit	Line	Data
0a378802	1	package encoding;
	2
	3	use Encode;
	4
	5	sub import {
	6	my ($class, $name) = @_;
	7	$name = $ENV{PERL_ENCODING} if @_ < 2;
	8	my $enc = find_encoding($name);
	9	unless (defined $enc) {
	10	require Carp;
	11	Carp::croak "Unknown encoding '$name'";
	12	}
	13	${^ENCODING} = $enc;
	14	}
	15
	16	=pod
	17
	18	=head1 NAME
	19
	20	encoding - pragma to control the conversion of legacy data into Unicode
	21
	22	=head1 SYNOPSIS
	23
	24	use encoding "iso 8859-7";
	25
4bdee82d	26	# The \xDF of ISO 8859-7 is \x{3af} in Unicode.
4bdee82d	27
0a378802	28	$a = "\xDF";
	29	$b = "\x{100}";
	30
4bdee82d	31	printf "%#x\n", ord($a); # will print 0x3af, not 0xdf
4bdee82d	32
0a378802	33	$c = $a . $b;
	34
	35	# $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
0a378802	36
	37	=head1 DESCRIPTION
	38
	39	Normally when legacy 8-bit data is converted to Unicode the data is
	40	expected to be Latin-1 (or EBCDIC in EBCDIC platforms). With the
	41	encoding pragma you can change this default.
	42
	43	The pragma is a per script, not a per block lexical. Only the last
9f4817db	44	C<use encoding> matters, and it affects B<the whole script>.
0a378802	45
4bdee82d	46	If no encoding is specified, the environment variable L<PERL_ENCODING>
	47	is consulted. If no encoding can be found, C<Unknown encoding '...'>
	48	error will be thrown.
	49
0a378802	50	=head1 FUTURE POSSIBILITIES
0a378802	51
9f4817db	52	The C<\x..> and C<\0...> in regular expressions are not
	53	affected by this pragma. They probably should.
	54
1768d7eb	55	Also chr(), ord(), and C<\N{...}> might become affected.
0a378802	56
d521382b	57	=head1 KNOWN PROBLEMS
	58
	59	Cannot be combined with C<use utf8>. Note that this is a problem
	60	B<only> if you would like to have Unicode identifiers in your scripts.
	61	You should not need C<use utf8> for anything else these days
	62	(since Perl 5.8.0)
	63
0a378802	64	=head1 SEE ALSO
0a378802	65
4bdee82d	66	L<perlunicode>, L<encode>
0a378802	67
	68	=cut
	69
	70	1;