3 $open::hint_bits = 0x20000;
9 sub in_locale { $^H & ($locale::hint_bits || 0)}
11 sub _get_locale_encoding {
12 unless (defined $locale_encoding) {
13 # I18N::Langinfo isn't available everywhere
15 require I18N::Langinfo;
16 I18N::Langinfo->import(qw(langinfo CODESET));
17 $locale_encoding = langinfo(CODESET());
21 no warnings 'uninitialized';
23 if (not $locale_encoding && in_locale()) {
24 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
25 ($country_language, $locale_encoding) = ($1, $2);
26 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
27 ($country_language, $locale_encoding) = ($1, $2);
29 } elsif (not $locale_encoding) {
30 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
31 $ENV{LANG} =~ /\butf-?8\b/i) {
32 $locale_encoding = 'utf8';
34 # Could do more heuristics based on the country and language
35 # parts of LC_ALL and LANG (the parts before the dot (if any)),
36 # since we have Locale::Country and Locale::Language available.
37 # TODO: get a database of Language -> Encoding mappings
38 # (the Estonian database at http://www.eki.ee/letter/
39 # would be excellent!) --jhi
41 if (defined $locale_encoding &&
42 $locale_encoding eq 'euc' &&
43 defined $country_language) {
44 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
45 $locale_encoding = 'euc-jp';
46 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
47 $locale_encoding = 'euc-kr';
48 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
49 $locale_encoding = 'euc-cn';
50 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
51 $locale_encoding = 'big5';
53 croak "Locale encoding 'euc' too ambiguous"
54 if $locale_encoding eq 'euc';
60 my ($class,@args) = @_;
61 croak("`use open' needs explicit list of disciplines") unless @args;
63 $^H |= $open::hint_bits;
64 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
66 my $type = shift(@args);
68 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
71 } elsif ($type eq ':std') {
75 $dscp = shift(@args) || '';
78 foreach my $layer (split(/\s+/,$dscp)) {
80 if ($layer eq 'locale') {
82 _get_locale_encoding()
83 unless defined $locale_encoding;
84 (carp("Cannot figure out an encoding to use"), last)
85 unless defined $locale_encoding;
86 if ($locale_encoding =~ /^utf-?8$/i) {
89 $layer = "encoding($locale_encoding)";
93 unless(PerlIO::Layer::->find($layer)) {
94 carp("Unknown discipline layer '$layer'");
98 if ($layer =~ /^(crlf|raw)$/) {
99 $^H{"open_$type"} = $layer;
103 $in = join(' ',@val);
105 elsif ($type eq 'OUT') {
106 $out = join(' ',@val);
108 elsif ($type eq 'IO') {
109 $in = $out = join(' ',@val);
112 croak "Unknown discipline class '$type'";
115 ${^OPEN} = join("\0",$in,$out) if $in or $out;
118 if ($in =~ /:utf8\b/) {
119 binmode(STDIN, ":utf8");
120 } elsif ($in =~ /(\w+\(.+\))/) {
121 binmode(STDIN, ":$1");
125 if ($out =~ /:utf8\b/) {
126 binmode(STDOUT, ":utf8");
127 binmode(STDERR, ":utf8");
128 } elsif ($out =~ /(\w+\(.+\))/) {
129 binmode(STDOUT, ":$1");
130 binmode(STDERR, ":$1");
141 open - perl pragma to set default disciplines for input and output
145 use open IN => ":crlf", OUT => ":raw";
146 use open OUT => ':utf8';
147 use open IO => ":encoding(iso-8859-7)";
149 use open IO => ':locale';
153 use open ':encoding(iso-8859-7)';
159 Full-fledged support for I/O disciplines is now implemented provided
160 Perl is configured to use PerlIO as its IO system (which is now the
163 The C<open> pragma serves as one of the interfaces to declare default
164 "layers" (aka disciplines) for all I/O.
166 The C<open> pragma is used to declare one or more default layers for
167 I/O operations. Any open(), readpipe() (aka qx//) and similar
168 operators found within the lexical scope of this pragma will use the
171 With the C<IN> subpragma you can declare the default layers
172 of input streams, and with the C<OUT> subpragma you can declare
173 the default layers of output streams. With the C<IO> subpragma
174 you can control both input and output streams simultaneously.
176 If you have a legacy encoding, you can use the C<:encoding(...)> tag.
178 if you want to set your encoding disciplines based on your
179 locale environment variables, you can use the C<:locale> tag.
182 $ENV{LANG} = 'ru_RU.KOI8-R';
183 # the :locale will probe the locale environment variables like LANG
184 use open OUT => ':locale';
186 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
189 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
195 use open IO => ':utf8';
200 use open IO => ':locale';
204 use open ':encoding(iso-8859-7)';
205 use open IO => ':encoding(iso-8859-7)';
207 When open() is given an explicit list of layers they are appended to
208 the list declared using this pragma.
210 The C<:std> subpragma on its own has no effect, but if combined with
211 the C<:utf8> or C<:encoding> subpragmas, it converts the standard
212 filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
213 for input/output handles. For example, if both input and out are
214 chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
215 STDERR are also in C<:utf8>. On the other hand, if only output is
216 chosen to be in C<:encoding(koi8r)', a C<:std> will cause only the
217 STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
218 implicitly turns on C<:std>.
220 The logic of C<:locale> is as follows:
226 If the platform supports the langinfo(CODESET) interface, the codeset
227 returned is used as the default encoding for the open pragma.
231 If 1. didn't work but we are under the locale pragma, the environment
232 variables LC_ALL and LANG (in that order) are matched for encodings
233 (the part after C<.>, if any), and if any found, that is used
234 as the default encoding for the open pragma.
238 If 1. and 2. didn't work, the environment variables LC_ALL and LANG
239 (in that order) are matched for anything looking like UTF-8, and if
240 any found, C<:utf8> is used as the default encoding for the open
245 Directory handles may also support disciplines in future.
247 =head1 NONPERLIO FUNCTIONALITY
249 If Perl is not built to use PerlIO as its IO system then only the two
250 pseudo-disciplines ":raw" and ":crlf" are available.
252 The ":raw" discipline corresponds to "binary mode" and the ":crlf"
253 discipline corresponds to "text mode" on platforms that distinguish
254 between the two modes when opening files (which is many DOS-like
255 platforms, including Windows). These two disciplines are no-ops on
256 platforms where binmode() is a no-op, but perform their functions
257 everywhere if PerlIO is enabled.
259 =head1 IMPLEMENTATION DETAILS
261 There is a class method in C<PerlIO::Layer> C<find> which is
262 implemented as XS code. It is called by C<import> to validate the
265 PerlIO::Layer::->find("perlio")
267 The return value (if defined) is a Perl object, of class
268 C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
269 yet there is nothing useful you can do with the object at the perl
274 L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,