Upgrade to Encode-2.17
[p5sagit/p5-mst-13.2.git] / ext / Encode / encoding.pm
CommitLineData
d1256cb1 1# $Id: encoding.pm,v 2.3 2006/05/03 18:24:10 dankogai Exp $
3ef515df 2package encoding;
d1256cb1 3our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
3ef515df 4
5use Encode;
046f36bf 6use strict;
b1aeb384 7
8f139f4c 8sub DEBUG () { 0 }
3ef515df 9
10BEGIN {
d1256cb1 11 if ( ord("A") == 193 ) {
12 require Carp;
13 Carp::croak("encoding: pragma does not support EBCDIC platforms");
3ef515df 14 }
15}
16
0ab8f81e 17our $HAS_PERLIO = 0;
18eval { require PerlIO::encoding };
d1256cb1 19unless ($@) {
20 $HAS_PERLIO = ( PerlIO::encoding->VERSION >= 0.02 );
0ab8f81e 21}
b2704119 22
d1256cb1 23sub _exception {
151b5d36 24 my $name = shift;
d1256cb1 25 $] > 5.008 and return 0; # 5.8.1 or higher then no
26 my %utfs = map { $_ => 1 }
27 qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
28 UTF-32 UTF-32BE UTF-32LE);
29 $utfs{$name} or return 0; # UTFs or no
30 require Config;
31 Config->import();
32 our %Config;
33 return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no
151b5d36 34}
fa6f41cf 35
d1256cb1 36sub in_locale { $^H & ( $locale::hint_bits || 0 ) }
b1aeb384 37
38sub _get_locale_encoding {
39 my $locale_encoding;
40
41 # I18N::Langinfo isn't available everywhere
42 eval {
d1256cb1 43 require I18N::Langinfo;
44 I18N::Langinfo->import(qw(langinfo CODESET));
45 $locale_encoding = langinfo( CODESET() );
b1aeb384 46 };
d1256cb1 47
b1aeb384 48 my $country_language;
49
50 no warnings 'uninitialized';
51
d1256cb1 52 if ( not $locale_encoding && in_locale() ) {
53 if ( $ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/ ) {
54 ( $country_language, $locale_encoding ) = ( $1, $2 );
55 }
56 elsif ( $ENV{LANG} =~ /^([^.]+)\.([^.]+)$/ ) {
57 ( $country_language, $locale_encoding ) = ( $1, $2 );
58 }
59
60 # LANGUAGE affects only LC_MESSAGES only on glibc
61 }
62 elsif ( not $locale_encoding ) {
63 if ( $ENV{LC_ALL} =~ /\butf-?8\b/i
64 || $ENV{LANG} =~ /\butf-?8\b/i )
65 {
66 $locale_encoding = 'utf8';
67 }
68
69 # Could do more heuristics based on the country and language
70 # parts of LC_ALL and LANG (the parts before the dot (if any)),
71 # since we have Locale::Country and Locale::Language available.
72 # TODO: get a database of Language -> Encoding mappings
73 # (the Estonian database at http://www.eki.ee/letter/
74 # would be excellent!) --jhi
b1aeb384 75 }
d1256cb1 76 if ( defined $locale_encoding
77 && lc($locale_encoding) eq 'euc'
78 && defined $country_language )
79 {
80 if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) {
81 $locale_encoding = 'euc-jp';
82 }
83 elsif ( $country_language =~ /^ko_KR|korean?$/i ) {
84 $locale_encoding = 'euc-kr';
85 }
5a1dbf39 86 elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) {
d1256cb1 87 $locale_encoding = 'euc-cn';
88 }
89 elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) {
90 $locale_encoding = 'euc-tw';
91 }
92 else {
93 require Carp;
94 Carp::croak(
95 "encoding: Locale encoding '$locale_encoding' too ambiguous"
96 );
97 }
b1aeb384 98 }
99
100 return $locale_encoding;
101}
102
3ef515df 103sub import {
104 my $class = shift;
105 my $name = shift;
d1256cb1 106 if ( $name eq ':_get_locale_encoding' ) { # used by lib/open.pm
107 my $caller = caller();
b1aeb384 108 {
d1256cb1 109 no strict 'refs';
110 *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding;
111 }
112 return;
b1aeb384 113 }
114 $name = _get_locale_encoding() if $name eq ':locale';
3ef515df 115 my %arg = @_;
b1aeb384 116 $name = $ENV{PERL_ENCODING} unless defined $name;
3ef515df 117 my $enc = find_encoding($name);
d1256cb1 118 unless ( defined $enc ) {
119 require Carp;
120 Carp::croak("encoding: Unknown encoding '$name'");
121 }
122 $name = $enc->name; # canonize
123 unless ( $arg{Filter} ) {
124 DEBUG and warn "_exception($name) = ", _exception($name);
125 _exception($name) or ${^ENCODING} = $enc;
126 $HAS_PERLIO or return 1;
3ef515df 127 }
d1256cb1 128 else {
129 defined( ${^ENCODING} ) and undef ${^ENCODING};
130
131 # implicitly 'use utf8'
132 require utf8; # to fetch $utf8::hint_bits;
133 $^H |= $utf8::hint_bits;
134 eval {
135 require Filter::Util::Call;
136 Filter::Util::Call->import;
137 filter_add(
138 sub {
139 my $status = filter_read();
140 if ( $status > 0 ) {
141 $_ = $enc->decode( $_, 1 );
142 DEBUG and warn $_;
143 }
144 $status;
145 }
146 );
147 };
d7fe8a7a 148 $@ eq '' and DEBUG and warn "Filter installed";
b1aeb384 149 }
05ef2f67 150 defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
d1256cb1 151 for my $h (qw(STDIN STDOUT)) {
152 if ( $arg{$h} ) {
153 unless ( defined find_encoding( $arg{$h} ) ) {
154 require Carp;
155 Carp::croak(
156 "encoding: Unknown encoding for $h, '$arg{$h}'");
157 }
158 eval { binmode( $h, ":raw :encoding($arg{$h})" ) };
159 }
160 else {
161 unless ( exists $arg{$h} ) {
162 eval {
163 no warnings 'uninitialized';
164 binmode( $h, ":raw :encoding($name)" );
165 };
166 }
167 }
168 if ($@) {
169 require Carp;
170 Carp::croak($@);
171 }
3ef515df 172 }
d1256cb1 173 return 1; # I doubt if we need it, though
3ef515df 174}
175
d1256cb1 176sub unimport {
3ef515df 177 no warnings;
178 undef ${^ENCODING};
d1256cb1 179 if ($HAS_PERLIO) {
180 binmode( STDIN, ":raw" );
181 binmode( STDOUT, ":raw" );
182 }
183 else {
184 binmode(STDIN);
185 binmode(STDOUT);
621b0f8d 186 }
d1256cb1 187 if ( $INC{"Filter/Util/Call.pm"} ) {
188 eval { filter_del() };
aae85ceb 189 }
3ef515df 190}
191
1921;
193__END__
85982a32 194
3ef515df 195=pod
196
197=head1 NAME
198
0ab8f81e 199encoding - allows you to write your script in non-ascii or non-utf8
3ef515df 200
201=head1 SYNOPSIS
202
962111ca 203 use encoding "greek"; # Perl like Greek to you?
3ef515df 204 use encoding "euc-jp"; # Jperl!
205
962111ca 206 # or you can even do this if your shell supports your native encoding
3ef515df 207
962111ca 208 perl -Mencoding=latin2 -e '...' # Feeling centrally European?
0ab8f81e 209 perl -Mencoding=euc-kr -e '...' # Or Korean?
3ef515df 210
3ef515df 211 # more control
212
962111ca 213 # A simple euc-cn => utf-8 converter
6d1c0808 214 use encoding "euc-cn", STDOUT => "utf8"; while(<>){print};
3ef515df 215
216 # "no encoding;" supported (but not scoped!)
217 no encoding;
218
aae85ceb 219 # an alternate way, Filter
220 use encoding "euc-jp", Filter=>1;
aae85ceb 221 # now you can use kanji identifiers -- in euc-jp!
222
b1aeb384 223 # switch on locale -
224 # note that this probably means that unless you have a complete control
225 # over the environments the application is ever going to be run, you should
226 # NOT use the feature of encoding pragma allowing you to write your script
227 # in any recognized encoding because changing locale settings will wreck
228 # the script; you can of course still use the other features of the pragma.
229 use encoding ':locale';
230
3ef515df 231=head1 ABSTRACT
232
962111ca 233Let's start with a bit of history: Perl 5.6.0 introduced Unicode
234support. You could apply C<substr()> and regexes even to complex CJK
235characters -- so long as the script was written in UTF-8. But back
0ab8f81e 236then, text editors that supported UTF-8 were still rare and many users
237instead chose to write scripts in legacy encodings, giving up a whole
238new feature of Perl 5.6.
3ef515df 239
0ab8f81e 240Rewind to the future: starting from perl 5.8.0 with the B<encoding>
962111ca 241pragma, you can write your script in any encoding you like (so long
242as the C<Encode> module supports it) and still enjoy Unicode support.
0f29a567 243This pragma achieves that by doing the following:
05ef2f67 244
245=over
246
247=item *
248
249Internally converts all literals (C<q//,qq//,qr//,qw///, qx//>) from
250the encoding specified to utf8. In Perl 5.8.1 and later, literals in
251C<tr///> and C<DATA> pseudo-filehandle are also converted.
252
253=item *
254
255Changing PerlIO layers of C<STDIN> and C<STDOUT> to the encoding
256 specified.
257
258=back
259
260=head2 Literal Conversions
261
0ab8f81e 262You can write code in EUC-JP as follows:
3ef515df 263
264 my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji
265 #<-char-><-char-> # 4 octets
266 s/\bCamel\b/$Rakuda/;
267
268And with C<use encoding "euc-jp"> in effect, it is the same thing as
962111ca 269the code in UTF-8:
3ef515df 270
32b9ed1f 271 my $Rakuda = "\x{99F1}\x{99DD}"; # two Unicode Characters
3ef515df 272 s/\bCamel\b/$Rakuda/;
273
05ef2f67 274=head2 PerlIO layers for C<STD(IN|OUT)>
275
276The B<encoding> pragma also modifies the filehandle layers of
4b291ae6 277STDIN and STDOUT to the specified encoding. Therefore,
3ef515df 278
279 use encoding "euc-jp";
280 my $message = "Camel is the symbol of perl.\n";
281 my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji
282 $message =~ s/\bCamel\b/$Rakuda/;
283 print $message;
284
962111ca 285Will print "\xF1\xD1\xF1\xCC is the symbol of perl.\n",
286not "\x{99F1}\x{99DD} is the symbol of perl.\n".
3ef515df 287
0ab8f81e 288You can override this by giving extra arguments; see below.
3ef515df 289
990e18f7 290=head2 Implicit upgrading for byte strings
291
292By default, if strings operating under byte semantics and strings
293with Unicode character data are concatenated, the new string will
294be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>.
295
296The B<encoding> pragma changes this to use the specified encoding
297instead. For example:
298
299 use encoding 'utf8';
300 my $string = chr(20000); # a Unicode string
301 utf8::encode($string); # now it's a UTF-8 encoded byte string
302 # concatenate with another Unicode string
303 print length($string . chr(20000));
304
305Will print C<2>, because C<$string> is upgraded as UTF-8. Without
306C<use encoding 'utf8';>, it will print C<4> instead, since C<$string>
307is three octets when interpreted as Latin-1.
308
05ef2f67 309=head1 FEATURES THAT REQUIRE 5.8.1
310
311Some of the features offered by this pragma requires perl 5.8.1. Most
0f29a567 312of these are done by Inaba Hiroto. Any other features and changes
05ef2f67 313are good for 5.8.0.
314
315=over
316
317=item "NON-EUC" doublebyte encodings
318
0f29a567 319Because perl needs to parse script before applying this pragma, such
05ef2f67 320encodings as Shift_JIS and Big-5 that may contain '\' (BACKSLASH;
321\x5c) in the second byte fails because the second byte may
0f29a567 322accidentally escape the quoting character that follows. Perl 5.8.1
05ef2f67 323or later fixes this problem.
324
325=item tr//
326
327C<tr//> was overlooked by Perl 5 porters when they released perl 5.8.0
328See the section below for details.
329
330=item DATA pseudo-filehandle
331
332Another feature that was overlooked was C<DATA>.
333
334=back
335
3ef515df 336=head1 USAGE
337
338=over 4
339
340=item use encoding [I<ENCNAME>] ;
341
05ef2f67 342Sets the script encoding to I<ENCNAME>. And unless ${^UNICODE}
343exists and non-zero, PerlIO layers of STDIN and STDOUT are set to
344":encoding(I<ENCNAME>)".
345
346Note that STDERR WILL NOT be changed.
347
348Also note that non-STD file handles remain unaffected. Use C<use
349open> or C<binmode> to change layers of those.
3ef515df 350
351If no encoding is specified, the environment variable L<PERL_ENCODING>
962111ca 352is consulted. If no encoding can be found, the error C<Unknown encoding
353'I<ENCNAME>'> will be thrown.
3ef515df 354
aae85ceb 355=item use encoding I<ENCNAME> [ STDIN =E<gt> I<ENCNAME_IN> ...] ;
3ef515df 356
0ab8f81e 357You can also individually set encodings of STDIN and STDOUT via the
32b9ed1f 358C<< STDIN => I<ENCNAME> >> form. In this case, you cannot omit the
359first I<ENCNAME>. C<< STDIN => undef >> turns the IO transcoding
aae85ceb 360completely off.
3ef515df 361
05ef2f67 362When ${^UNICODE} exists and non-zero, these options will completely
363ignored. ${^UNICODE} is a variable introduced in perl 5.8.1. See
364L<perlrun> see L<perlvar/"${^UNICODE}"> and L<perlrun/"-C"> for
365details (perl 5.8.1 and later).
366
151b5d36 367=item use encoding I<ENCNAME> Filter=E<gt>1;
368
369This turns the encoding pragma into a source filter. While the
370default approach just decodes interpolated literals (in qq() and
371qr()), this will apply a source filter to the entire source code. See
05ef2f67 372L</"The Filter Option"> below for details.
151b5d36 373
3ef515df 374=item no encoding;
375
05ef2f67 376Unsets the script encoding. The layers of STDIN, STDOUT are
962111ca 377reset to ":raw" (the default unprocessed raw stream of bytes).
3ef515df 378
379=back
380
151b5d36 381=head1 The Filter Option
382
383The magic of C<use encoding> is not applied to the names of
384identifiers. In order to make C<${"\x{4eba}"}++> ($human++, where human
385is a single Han ideograph) work, you still need to write your script
386in UTF-8 -- or use a source filter. That's what 'Filter=>1' does.
387
151b5d36 388What does this mean? Your source code behaves as if it is written in
389UTF-8 with 'use utf8' in effect. So even if your editor only supports
390Shift_JIS, for example, you can still try examples in Chapter 15 of
391C<Programming Perl, 3rd Ed.>. For instance, you can use UTF-8
392identifiers.
393
394This option is significantly slower and (as of this writing) non-ASCII
395identifiers are not very stable WITHOUT this option and with the
396source code written in UTF-8.
397
398=head2 Filter-related changes at Encode version 1.87
399
400=over
401
402=item *
403
404The Filter option now sets STDIN and STDOUT like non-filter options.
405And C<< STDIN=>I<ENCODING> >> and C<< STDOUT=>I<ENCODING> >> work like
406non-filter version.
407
408=item *
409
410C<use utf8> is implicitly declared so you no longer have to C<use
411utf8> to C<${"\x{4eba}"}++>.
412
413=back
414
3ef515df 415=head1 CAVEATS
416
417=head2 NOT SCOPED
418
419The pragma is a per script, not a per block lexical. Only the last
621b0f8d 420C<use encoding> or C<no encoding> matters, and it affects
421B<the whole script>. However, the <no encoding> pragma is supported and
422B<use encoding> can appear as many times as you want in a given script.
423The multiple use of this pragma is discouraged.
424
0f29a567 425By the same reason, the use this pragma inside modules is also
3c4b39be 426discouraged (though not as strongly discouraged as the case above.
0f29a567 427See below).
05ef2f67 428
429If you still have to write a module with this pragma, be very careful
430of the load order. See the codes below;
431
432 # called module
433 package Module_IN_BAR;
434 use encoding "bar";
435 # stuff in "bar" encoding here
436 1;
437
438 # caller script
439 use encoding "foo"
440 use Module_IN_BAR;
441 # surprise! use encoding "bar" is in effect.
442
443The best way to avoid this oddity is to use this pragma RIGHT AFTER
444other modules are loaded. i.e.
445
446 use Module_IN_BAR;
447 use encoding "foo";
3ef515df 448
449=head2 DO NOT MIX MULTIPLE ENCODINGS
450
451Notice that only literals (string or regular expression) having only
452legacy code points are affected: if you mix data like this
453
d1256cb1 454 \xDF\x{100}
3ef515df 455
456the data is assumed to be in (Latin 1 and) Unicode, not in your native
457encoding. In other words, this will match in "greek":
458
d1256cb1 459 "\xDF" =~ /\x{3af}/
3ef515df 460
461but this will not
462
d1256cb1 463 "\xDF\x{100}" =~ /\x{3af}\x{100}/
3ef515df 464
962111ca 465since the C<\xDF> (ISO 8859-7 GREEK SMALL LETTER IOTA WITH TONOS) on
466the left will B<not> be upgraded to C<\x{3af}> (Unicode GREEK SMALL
467LETTER IOTA WITH TONOS) because of the C<\x{100}> on the left. You
468should not be mixing your legacy data and Unicode in the same string.
3ef515df 469
470This pragma also affects encoding of the 0x80..0xFF code point range:
471normally characters in that range are left as eight-bit bytes (unless
472they are combined with characters with code points 0x100 or larger,
473in which case all characters need to become UTF-8 encoded), but if
474the C<encoding> pragma is present, even the 0x80..0xFF range always
475gets UTF-8 encoded.
476
477After all, the best thing about this pragma is that you don't have to
0ab8f81e 478resort to \x{....} just to spell your name in a native encoding.
479So feel free to put your strings in your encoding in quotes and
480regexes.
3ef515df 481
151b5d36 482=head2 tr/// with ranges
4b291ae6 483
484The B<encoding> pragma works by decoding string literals in
151b5d36 485C<q//,qq//,qr//,qw///, qx//> and so forth. In perl 5.8.0, this
4b291ae6 486does not apply to C<tr///>. Therefore,
487
488 use encoding 'euc-jp';
489 #....
490 $kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/;
491 # -------- -------- -------- --------
492
493Does not work as
494
495 $kana =~ tr/\x{3041}-\x{3093}/\x{30a1}-\x{30f3}/;
496
497=over
498
499=item Legend of characters above
500
501 utf8 euc-jp charnames::viacode()
502 -----------------------------------------
503 \x{3041} \xA4\xA1 HIRAGANA LETTER SMALL A
504 \x{3093} \xA4\xF3 HIRAGANA LETTER N
505 \x{30a1} \xA5\xA1 KATAKANA LETTER SMALL A
506 \x{30f3} \xA5\xF3 KATAKANA LETTER N
507
508=back
509
05ef2f67 510This counterintuitive behavior has been fixed in perl 5.8.1.
151b5d36 511
4b291ae6 512=head3 workaround to tr///;
513
ce16148b 514In perl 5.8.0, you can work around as follows;
4b291ae6 515
516 use encoding 'euc-jp';
151b5d36 517 # ....
4b291ae6 518 eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ };
519
ce16148b 520Note the C<tr//> expression is surrounded by C<qq{}>. The idea behind
4b291ae6 521is the same as classic idiom that makes C<tr///> 'interpolate'.
522
523 tr/$from/$to/; # wrong!
524 eval qq{ tr/$from/$to/ }; # workaround.
525
526Nevertheless, in case of B<encoding> pragma even C<q//> is affected so
527C<tr///> not being decoded was obviously against the will of Perl5
05ef2f67 528Porters so it has been fixed in Perl 5.8.1 or later.
aae85ceb 529
3ef515df 530=head1 EXAMPLE - Greekperl
531
532 use encoding "iso 8859-7";
533
0ab8f81e 534 # \xDF in ISO 8859-7 (Greek) is \x{3af} in Unicode.
3ef515df 535
536 $a = "\xDF";
537 $b = "\x{100}";
538
539 printf "%#x\n", ord($a); # will print 0x3af, not 0xdf
540
541 $c = $a . $b;
542
543 # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
544
545 # chr() is affected, and ...
546
547 print "mega\n" if ord(chr(0xdf)) == 0x3af;
548
549 # ... ord() is affected by the encoding pragma ...
550
551 print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
552
553 # ... as are eq and cmp ...
554
555 print "peta\n" if "\x{3af}" eq pack("C", 0xdf);
556 print "exa\n" if "\x{3af}" cmp pack("C", 0xdf) == 0;
557
558 # ... but pack/unpack C are not affected, in case you still
0ab8f81e 559 # want to go back to your native encoding
3ef515df 560
561 print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
562
563=head1 KNOWN PROBLEMS
564
151b5d36 565=over
566
0f29a567 567=item literals in regex that are longer than 127 bytes
151b5d36 568
0ab8f81e 569For native multibyte encodings (either fixed or variable length),
3ef515df 570the current implementation of the regular expressions may introduce
0ab8f81e 571recoding errors for regular expression literals longer than 127 bytes.
3ef515df 572
05ef2f67 573=item EBCDIC
151b5d36 574
3ef515df 575The encoding pragma is not supported on EBCDIC platforms.
0ab8f81e 576(Porters who are willing and able to remove this limitation are
577welcome.)
3ef515df 578
05ef2f67 579=item format
580
581This pragma doesn't work well with format because PerlIO does not
582get along very well with it. When format contains non-ascii
583characters it prints funny or gets "wide character warnings".
584To understand it, try the code below.
585
586 # Save this one in utf8
587 # replace *non-ascii* with a non-ascii string
588 my $camel;
589 format STDOUT =
590 *non-ascii*@>>>>>>>
591 $camel
592 .
593 $camel = "*non-ascii*";
594 binmode(STDOUT=>':encoding(utf8)'); # bang!
595 write; # funny
596 print $camel, "\n"; # fine
597
598Without binmode this happens to work but without binmode, print()
599fails instead of write().
600
601At any rate, the very use of format is questionable when it comes to
602unicode characters since you have to consider such things as character
603width (i.e. double-width for ideographs) and directions (i.e. BIDI for
604Arabic and Hebrew).
605
151b5d36 606=back
607
b1aeb384 608=head2 The Logic of :locale
609
610The logic of C<:locale> is as follows:
611
612=over 4
613
614=item 1.
615
616If the platform supports the langinfo(CODESET) interface, the codeset
617returned is used as the default encoding for the open pragma.
618
619=item 2.
620
621If 1. didn't work but we are under the locale pragma, the environment
622variables LC_ALL and LANG (in that order) are matched for encodings
623(the part after C<.>, if any), and if any found, that is used
624as the default encoding for the open pragma.
625
626=item 3.
627
628If 1. and 2. didn't work, the environment variables LC_ALL and LANG
629(in that order) are matched for anything looking like UTF-8, and if
630any found, C<:utf8> is used as the default encoding for the open
631pragma.
632
633=back
634
635If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
636contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
637the default encoding of your STDIN, STDOUT, and STDERR, and of
638B<any subsequent file open>, is UTF-8.
639
05ef2f67 640=head1 HISTORY
641
642This pragma first appeared in Perl 5.8.0. For features that require
6435.8.1 and better, see above.
644
b1aeb384 645The C<:locale> subpragma was implemented in 2.01, or Perl 5.8.6.
646
3ef515df 647=head1 SEE ALSO
648
aae85ceb 649L<perlunicode>, L<Encode>, L<open>, L<Filter::Util::Call>,
650
651Ch. 15 of C<Programming Perl (3rd Edition)>
652by Larry Wall, Tom Christiansen, Jon Orwant;
653O'Reilly & Associates; ISBN 0-596-00027-8
3ef515df 654
655=cut