3 $open::hint_bits = 0x20000;
9 sub in_locale { $^H & $locale::hint_bits }
11 sub _get_locale_encoding {
12 unless (defined $locale_encoding) {
13 # I18N::Langinfo isn't available everywhere
15 require I18N::Langinfo;
16 I18N::Langinfo->import(qw(langinfo CODESET));
17 $locale_encoding = langinfo(CODESET());
21 no warnings 'uninitialized';
23 if (not $locale_encoding && in_locale()) {
24 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
25 ($country_language, $locale_encoding) = ($1, $2);
26 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
27 ($country_language, $locale_encoding) = ($1, $2);
29 } elsif (not $locale_encoding) {
30 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
31 $ENV{LANG} =~ /\butf-?8\b/i) {
32 $locale_encoding = 'utf8';
34 # Could do more heuristics based on the country and language
35 # parts of LC_ALL and LANG (the parts before the dot (if any)),
36 # since we have Locale::Country and Locale::Language available.
37 # TODO: get a database of Language -> Encoding mappings
38 # (the Estonian database at http://www.eki.ee/letter/
39 # would be excellent!) --jhi
41 if (defined $locale_encoding &&
42 $locale_encoding eq 'euc' &&
43 defined $country_language) {
44 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
45 $locale_encoding = 'euc-jp';
46 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
47 $locale_encoding = 'euc-kr';
48 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
49 $locale_encoding = 'euc-cn';
50 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
51 $locale_encoding = 'big5';
53 croak "Locale encoding 'euc' too ambiguous"
54 if $locale_encoding eq 'euc';
60 my ($class,@args) = @_;
61 croak("`use open' needs explicit list of disciplines") unless @args;
62 $^H |= $open::hint_bits;
63 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
65 my $type = shift(@args);
67 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
71 $dscp = shift(@args) || '';
74 foreach my $layer (split(/\s+/,$dscp)) {
76 if ($layer eq 'locale') {
78 _get_locale_encoding()
79 unless defined $locale_encoding;
80 (carp("Cannot figure out an encoding to use"), last)
81 unless defined $locale_encoding;
82 if ($locale_encoding =~ /^utf-?8$/i) {
85 $layer = "encoding($locale_encoding)";
88 unless(PerlIO::Layer::->find($layer)) {
89 carp("Unknown discipline layer '$layer'");
93 if ($layer =~ /^(crlf|raw)$/) {
94 $^H{"open_$type"} = $layer;
97 # print "# type = $type, val = @val\n";
101 elsif ($type eq 'OUT') {
102 $out = join(' ',@val);
104 elsif ($type eq 'IO') {
105 $in = $out = join(' ',@val);
108 croak "Unknown discipline class '$type'";
111 ${^OPEN} = join("\0",$in,$out) if $in or $out;
119 open - perl pragma to set default disciplines for input and output
123 use open IN => ":crlf", OUT => ":raw";
124 use open OUT => ':utf8';
125 use open IO => ":encoding(iso-8859-7)";
127 use open IO => ':locale';
131 use open ':encoding(iso-8859-7)';
135 Full-fledged support for I/O disciplines is now implemented provided
136 Perl is configured to use PerlIO as its IO system (which is now the
139 The C<open> pragma serves as one of the interfaces to declare default
140 "layers" (aka disciplines) for all I/O.
142 The C<open> pragma is used to declare one or more default layers for
143 I/O operations. Any open(), readpipe() (aka qx//) and similar
144 operators found within the lexical scope of this pragma will use the
147 With the C<IN> subpragma you can declare the default layers
148 of input streams, and with the C<OUT> subpragma you can declare
149 the default layers of output streams. With the C<IO> subpragma
150 you can control both input and output streams simultaneously.
152 If you have a legacy encoding, you can use the C<:encoding(...)> tag.
154 if you want to set your encoding disciplines based on your
155 locale environment variables, you can use the C<:locale> tag.
158 $ENV{LANG} = 'ru_RU.KOI8-R';
159 # the :locale will probe the locale environment variables like LANG
160 use open OUT => ':locale';
162 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
165 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
171 use open IO => ':utf8';
176 use open IO => ':locale';
180 use open ':encoding(iso-8859-7)';
181 use open IO => ':encoding(iso-8859-7)';
183 When open() is given an explicit list of layers they are appended to
184 the list declared using this pragma.
186 The logic of C<:locale> is as follows:
192 If the platform supports the langinfo(CODESET) interface, the codeset
193 returned is used as the default encoding for the open pragma.
197 If 1. didn't work but we are under the locale pragma, the environment
198 variables LC_ALL and LANG (in that order) are matched for encodings
199 (the part after C<.>, if any), and if any found, that is used
200 as the default encoding for the open pragma.
204 If 1. and 2. didn't work, the environment variables LC_ALL and LANG
205 (in that order) are matched for anything looking like UTF-8, and if
206 any found, C<:utf8> is used as the default encoding for the open
211 Directory handles may also support disciplines in future.
213 =head1 NONPERLIO FUNCTIONALITY
215 If Perl is not built to use PerlIO as its IO system then only the two
216 pseudo-disciplines ":raw" and ":crlf" are available.
218 The ":raw" discipline corresponds to "binary mode" and the ":crlf"
219 discipline corresponds to "text mode" on platforms that distinguish
220 between the two modes when opening files (which is many DOS-like
221 platforms, including Windows). These two disciplines are no-ops on
222 platforms where binmode() is a no-op, but perform their functions
223 everywhere if PerlIO is enabled.
225 =head1 IMPLEMENTATION DETAILS
227 There is a class method in C<PerlIO::Layer> C<find> which is
228 implemented as XS code. It is called by C<import> to validate the
231 PerlIO::Layer::->find("perlio")
233 The return value (if defined) is a Perl object, of class
234 C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
235 yet there is nothing useful you can do with the object at the perl
240 L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,