Integrate mainline.
[p5sagit/p5-mst-13.2.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
ac27b0f5 2use Carp;
16fe6d59 3$open::hint_bits = 0x20000;
4
0c4f7ff0 5our $VERSION = '1.01';
b75c8c73 6
58d53262 7my $locale_encoding;
8
b178108d 9sub in_locale { $^H & ($locale::hint_bits || 0)}
58d53262 10
11sub _get_locale_encoding {
12 unless (defined $locale_encoding) {
276c9210 13 # I18N::Langinfo isn't available everywhere
9615f2ee 14 eval {
15 require I18N::Langinfo;
16 I18N::Langinfo->import(qw(langinfo CODESET));
ba6ce41c 17 $locale_encoding = langinfo(CODESET());
9615f2ee 18 };
11fc5dc3 19 my $country_language;
a4157ebb 20
21 no warnings 'uninitialized';
22
58d53262 23 if (not $locale_encoding && in_locale()) {
11fc5dc3 24 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
25 ($country_language, $locale_encoding) = ($1, $2);
26 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
27 ($country_language, $locale_encoding) = ($1, $2);
58d53262 28 }
1e616cf5 29 } elsif (not $locale_encoding) {
30 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
31 $ENV{LANG} =~ /\butf-?8\b/i) {
32 $locale_encoding = 'utf8';
33 }
34 # Could do more heuristics based on the country and language
58d53262 35 # parts of LC_ALL and LANG (the parts before the dot (if any)),
36 # since we have Locale::Country and Locale::Language available.
37 # TODO: get a database of Language -> Encoding mappings
421e5dc3 38 # (the Estonian database at http://www.eki.ee/letter/
39 # would be excellent!) --jhi
58d53262 40 }
11fc5dc3 41 if (defined $locale_encoding &&
42 $locale_encoding eq 'euc' &&
43 defined $country_language) {
56fb2e42 44 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
1e616cf5 45 $locale_encoding = 'euc-jp';
5a192dee 46 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
1e616cf5 47 $locale_encoding = 'euc-kr';
a4157ebb 48 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
49 $locale_encoding = 'euc-cn';
56fb2e42 50 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
a4157ebb 51 $locale_encoding = 'big5';
11fc5dc3 52 }
53 croak "Locale encoding 'euc' too ambiguous"
54 if $locale_encoding eq 'euc';
55 }
58d53262 56 }
57}
58
16fe6d59 59sub import {
dfebf958 60 my ($class,@args) = @_;
61 croak("`use open' needs explicit list of disciplines") unless @args;
b178108d 62 my $std;
16fe6d59 63 $^H |= $open::hint_bits;
ba6ce41c 64 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958 65 while (@args) {
66 my $type = shift(@args);
1e616cf5 67 my $dscp;
68 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
69 $type = 'IO';
70 $dscp = ":$1";
b178108d 71 } elsif ($type eq ':std') {
72 $std = 1;
73 next;
1e616cf5 74 } else {
725d232a 75 $dscp = shift(@args) || '';
1e616cf5 76 }
ac27b0f5 77 my @val;
1e616cf5 78 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 79 $layer =~ s/^://;
58d53262 80 if ($layer eq 'locale') {
81 use Encode;
82 _get_locale_encoding()
83 unless defined $locale_encoding;
a4157ebb 84 (carp("Cannot figure out an encoding to use"), last)
58d53262 85 unless defined $locale_encoding;
11fc5dc3 86 if ($locale_encoding =~ /^utf-?8$/i) {
87 $layer = "utf8";
88 } else {
738b23dc 89 $layer = "encoding($locale_encoding)";
11fc5dc3 90 }
b178108d 91 $std = 1;
97ed432b 92 } else {
93 unless(PerlIO::Layer::->find($layer)) {
94 carp("Unknown discipline layer '$layer'");
95 }
ac27b0f5 96 }
97 push(@val,":$layer");
98 if ($layer =~ /^(crlf|raw)$/) {
99 $^H{"open_$type"} = $layer;
16fe6d59 100 }
ac27b0f5 101 }
102 if ($type eq 'IN') {
103 $in = join(' ',@val);
104 }
105 elsif ($type eq 'OUT') {
106 $out = join(' ',@val);
16fe6d59 107 }
1e616cf5 108 elsif ($type eq 'IO') {
f3b00462 109 $in = $out = join(' ',@val);
110 }
16fe6d59 111 else {
ac27b0f5 112 croak "Unknown discipline class '$type'";
16fe6d59 113 }
114 }
a4157ebb 115 ${^OPEN} = join("\0",$in,$out) if $in or $out;
b178108d 116 if ($std) {
117 if ($in) {
118 if ($in =~ /:utf8\b/) {
119 binmode(STDIN, ":utf8");
120 } elsif ($in =~ /(\w+\(.+\))/) {
121 binmode(STDIN, ":$1");
122 }
123 }
124 if ($out) {
125 if ($out =~ /:utf8\b/) {
126 binmode(STDOUT, ":utf8");
127 binmode(STDERR, ":utf8");
128 } elsif ($out =~ /(\w+\(.+\))/) {
129 binmode(STDOUT, ":$1");
130 binmode(STDERR, ":$1");
131 }
132 }
133 }
16fe6d59 134}
135
1361;
137__END__
d1edabcf 138
139=head1 NAME
140
141open - perl pragma to set default disciplines for input and output
142
143=head1 SYNOPSIS
144
1e616cf5 145 use open IN => ":crlf", OUT => ":raw";
146 use open OUT => ':utf8';
147 use open IO => ":encoding(iso-8859-7)";
148
149 use open IO => ':locale';
725d232a 150
1e616cf5 151 use open ':utf8';
152 use open ':locale';
153 use open ':encoding(iso-8859-7)';
d1edabcf 154
b178108d 155 use open ':std';
156
d1edabcf 157=head1 DESCRIPTION
158
d151aa0e 159Full-fledged support for I/O disciplines is now implemented provided
160Perl is configured to use PerlIO as its IO system (which is now the
161default).
16fe6d59 162
7d3b96bb 163The C<open> pragma serves as one of the interfaces to declare default
164"layers" (aka disciplines) for all I/O.
165
166The C<open> pragma is used to declare one or more default layers for
d151aa0e 167I/O operations. Any open(), readpipe() (aka qx//) and similar
168operators found within the lexical scope of this pragma will use the
169declared defaults.
7d3b96bb 170
1e616cf5 171With the C<IN> subpragma you can declare the default layers
d8d29d4f 172of input streams, and with the C<OUT> subpragma you can declare
1e616cf5 173the default layers of output streams. With the C<IO> subpragma
174you can control both input and output streams simultaneously.
175
176If you have a legacy encoding, you can use the C<:encoding(...)> tag.
177
178if you want to set your encoding disciplines based on your
179locale environment variables, you can use the C<:locale> tag.
180For example:
181
182 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41 183 # the :locale will probe the locale environment variables like LANG
184 use open OUT => ':locale';
1e616cf5 185 open(O, ">koi8");
23bcb45a 186 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5 187 close O;
188 open(I, "<koi8");
23bcb45a 189 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5 190 close I;
191
192These are equivalent
193
194 use open ':utf8';
195 use open IO => ':utf8';
196
197as are these
198
199 use open ':locale';
200 use open IO => ':locale';
201
202and these
203
204 use open ':encoding(iso-8859-7)';
205 use open IO => ':encoding(iso-8859-7)';
206
d151aa0e 207When open() is given an explicit list of layers they are appended to
208the list declared using this pragma.
7d3b96bb 209
b178108d 210The C<:std> subpragma on its own has no effect, but if combined with
211the C<:utf8> or C<:encoding> subpragmas, it converts the standard
212filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
213for input/output handles. For example, if both input and out are
214chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
215STDERR are also in C<:utf8>. On the other hand, if only output is
216chosen to be in C<:encoding(koi8r)', a C<:std> will cause only the
217STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
218implicitly turns on C<:std>.
219
ba9a69eb 220The logic of C<:locale> is as follows:
221
222=over 4
223
224=item 1.
225
226If the platform supports the langinfo(CODESET) interface, the codeset
227returned is used as the default encoding for the open pragma.
228
229=item 2.
230
231If 1. didn't work but we are under the locale pragma, the environment
232variables LC_ALL and LANG (in that order) are matched for encodings
233(the part after C<.>, if any), and if any found, that is used
234as the default encoding for the open pragma.
235
236=item 3.
237
238If 1. and 2. didn't work, the environment variables LC_ALL and LANG
239(in that order) are matched for anything looking like UTF-8, and if
240any found, C<:utf8> is used as the default encoding for the open
241pragma.
242
243=back
244
7d3b96bb 245Directory handles may also support disciplines in future.
246
247=head1 NONPERLIO FUNCTIONALITY
248
d151aa0e 249If Perl is not built to use PerlIO as its IO system then only the two
250pseudo-disciplines ":raw" and ":crlf" are available.
16fe6d59 251
252The ":raw" discipline corresponds to "binary mode" and the ":crlf"
253discipline corresponds to "text mode" on platforms that distinguish
254between the two modes when opening files (which is many DOS-like
d151aa0e 255platforms, including Windows). These two disciplines are no-ops on
256platforms where binmode() is a no-op, but perform their functions
257everywhere if PerlIO is enabled.
7d3b96bb 258
259=head1 IMPLEMENTATION DETAILS
d1edabcf 260
f3b00462 261There is a class method in C<PerlIO::Layer> C<find> which is
262implemented as XS code. It is called by C<import> to validate the
263layers:
0c4f7ff0 264
265 PerlIO::Layer::->find("perlio")
266
f3b00462 267The return value (if defined) is a Perl object, of class
268C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
269yet there is nothing useful you can do with the object at the perl
270level.
16fe6d59 271
d1edabcf 272=head1 SEE ALSO
273
1768d7eb 274L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
275L<encoding>
d1edabcf 276
277=cut