3 $open::hint_bits = 0x20000;
9 sub in_locale { $^H & ($locale::hint_bits || 0)}
11 sub _get_locale_encoding {
12 unless (defined $locale_encoding) {
13 # I18N::Langinfo isn't available everywhere
15 require I18N::Langinfo;
16 I18N::Langinfo->import(qw(langinfo CODESET));
17 $locale_encoding = langinfo(CODESET());
21 no warnings 'uninitialized';
23 if (not $locale_encoding && in_locale()) {
24 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
25 ($country_language, $locale_encoding) = ($1, $2);
26 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
27 ($country_language, $locale_encoding) = ($1, $2);
29 } elsif (not $locale_encoding) {
30 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
31 $ENV{LANG} =~ /\butf-?8\b/i) {
32 $locale_encoding = 'utf8';
34 # Could do more heuristics based on the country and language
35 # parts of LC_ALL and LANG (the parts before the dot (if any)),
36 # since we have Locale::Country and Locale::Language available.
37 # TODO: get a database of Language -> Encoding mappings
38 # (the Estonian database at http://www.eki.ee/letter/
39 # would be excellent!) --jhi
41 if (defined $locale_encoding &&
42 $locale_encoding eq 'euc' &&
43 defined $country_language) {
44 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
45 $locale_encoding = 'euc-jp';
46 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
47 $locale_encoding = 'euc-kr';
48 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
49 $locale_encoding = 'euc-cn';
50 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
51 $locale_encoding = 'euc-tw';
53 croak "Locale encoding 'euc' too ambiguous"
54 if $locale_encoding eq 'euc';
60 my ($class,@args) = @_;
61 croak("`use open' needs explicit list of disciplines") unless @args;
63 $^H |= $open::hint_bits;
64 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
66 my $type = shift(@args);
68 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
71 } elsif ($type eq ':std') {
75 $dscp = shift(@args) || '';
78 foreach my $layer (split(/\s+/,$dscp)) {
80 if ($layer eq 'locale') {
82 _get_locale_encoding()
83 unless defined $locale_encoding;
84 (carp("Cannot figure out an encoding to use"), last)
85 unless defined $locale_encoding;
86 if ($locale_encoding =~ /^utf-?8$/i) {
89 $layer = "encoding($locale_encoding)";
93 my $target = $layer; # the layer name itself
94 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters
96 unless(PerlIO::Layer::->find($target)) {
97 carp("Unknown discipline layer '$layer'");
100 push(@val,":$layer");
101 if ($layer =~ /^(crlf|raw)$/) {
102 $^H{"open_$type"} = $layer;
106 $in = join(' ',@val);
108 elsif ($type eq 'OUT') {
109 $out = join(' ',@val);
111 elsif ($type eq 'IO') {
112 $in = $out = join(' ',@val);
115 croak "Unknown discipline class '$type'";
118 ${^OPEN} = join("\0",$in,$out) if $in or $out;
121 if ($in =~ /:utf8\b/) {
122 binmode(STDIN, ":utf8");
123 } elsif ($in =~ /(\w+\(.+\))/) {
124 binmode(STDIN, ":$1");
128 if ($out =~ /:utf8\b/) {
129 binmode(STDOUT, ":utf8");
130 binmode(STDERR, ":utf8");
131 } elsif ($out =~ /(\w+\(.+\))/) {
132 binmode(STDOUT, ":$1");
133 binmode(STDERR, ":$1");
144 open - perl pragma to set default disciplines for input and output
148 use open IN => ":crlf", OUT => ":raw";
149 use open OUT => ':utf8';
150 use open IO => ":encoding(iso-8859-7)";
152 use open IO => ':locale';
156 use open ':encoding(iso-8859-7)';
162 Full-fledged support for I/O disciplines is now implemented provided
163 Perl is configured to use PerlIO as its IO system (which is now the
166 The C<open> pragma serves as one of the interfaces to declare default
167 "layers" (aka disciplines) for all I/O.
169 The C<open> pragma is used to declare one or more default layers for
170 I/O operations. Any open(), readpipe() (aka qx//) and similar
171 operators found within the lexical scope of this pragma will use the
174 With the C<IN> subpragma you can declare the default layers
175 of input streams, and with the C<OUT> subpragma you can declare
176 the default layers of output streams. With the C<IO> subpragma
177 you can control both input and output streams simultaneously.
179 If you have a legacy encoding, you can use the C<:encoding(...)> tag.
181 if you want to set your encoding disciplines based on your
182 locale environment variables, you can use the C<:locale> tag.
185 $ENV{LANG} = 'ru_RU.KOI8-R';
186 # the :locale will probe the locale environment variables like LANG
187 use open OUT => ':locale';
189 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
192 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
198 use open IO => ':utf8';
203 use open IO => ':locale';
207 use open ':encoding(iso-8859-7)';
208 use open IO => ':encoding(iso-8859-7)';
210 When open() is given an explicit list of layers they are appended to
211 the list declared using this pragma.
213 The C<:std> subpragma on its own has no effect, but if combined with
214 the C<:utf8> or C<:encoding> subpragmas, it converts the standard
215 filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
216 for input/output handles. For example, if both input and out are
217 chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
218 STDERR are also in C<:utf8>. On the other hand, if only output is
219 chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
220 STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
221 implicitly turns on C<:std>.
223 The logic of C<:locale> is as follows:
229 If the platform supports the langinfo(CODESET) interface, the codeset
230 returned is used as the default encoding for the open pragma.
234 If 1. didn't work but we are under the locale pragma, the environment
235 variables LC_ALL and LANG (in that order) are matched for encodings
236 (the part after C<.>, if any), and if any found, that is used
237 as the default encoding for the open pragma.
241 If 1. and 2. didn't work, the environment variables LC_ALL and LANG
242 (in that order) are matched for anything looking like UTF-8, and if
243 any found, C<:utf8> is used as the default encoding for the open
248 If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
249 contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
250 the default encoding of your STDIN, STDOUT, and STDERR, and of
251 B<any subsequent file open>, is UTF-8.
253 Directory handles may also support disciplines in future.
255 =head1 NONPERLIO FUNCTIONALITY
257 If Perl is not built to use PerlIO as its IO system then only the two
258 pseudo-disciplines ":raw" and ":crlf" are available.
260 The ":raw" discipline corresponds to "binary mode" and the ":crlf"
261 discipline corresponds to "text mode" on platforms that distinguish
262 between the two modes when opening files (which is many DOS-like
263 platforms, including Windows). These two disciplines are no-ops on
264 platforms where binmode() is a no-op, but perform their functions
265 everywhere if PerlIO is enabled.
267 =head1 IMPLEMENTATION DETAILS
269 There is a class method in C<PerlIO::Layer> C<find> which is
270 implemented as XS code. It is called by C<import> to validate the
273 PerlIO::Layer::->find("perlio")
275 The return value (if defined) is a Perl object, of class
276 C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
277 yet there is nothing useful you can do with the object at the perl
282 L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,