Document the logic of :locale better.
[p5sagit/p5-mst-13.2.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
ac27b0f5 2use Carp;
16fe6d59 3$open::hint_bits = 0x20000;
4
0c4f7ff0 5our $VERSION = '1.01';
b75c8c73 6
58d53262 7my $locale_encoding;
8
9sub in_locale { $^H & $locale::hint_bits }
10
11sub _get_locale_encoding {
12 unless (defined $locale_encoding) {
276c9210 13 # I18N::Langinfo isn't available everywhere
9615f2ee 14 eval {
15 require I18N::Langinfo;
16 I18N::Langinfo->import(qw(langinfo CODESET));
ba6ce41c 17 $locale_encoding = langinfo(CODESET());
9615f2ee 18 };
11fc5dc3 19 my $country_language;
a4157ebb 20
21 no warnings 'uninitialized';
22
58d53262 23 if (not $locale_encoding && in_locale()) {
11fc5dc3 24 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
25 ($country_language, $locale_encoding) = ($1, $2);
26 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
27 ($country_language, $locale_encoding) = ($1, $2);
58d53262 28 }
1e616cf5 29 } elsif (not $locale_encoding) {
30 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
31 $ENV{LANG} =~ /\butf-?8\b/i) {
32 $locale_encoding = 'utf8';
33 }
34 # Could do more heuristics based on the country and language
58d53262 35 # parts of LC_ALL and LANG (the parts before the dot (if any)),
36 # since we have Locale::Country and Locale::Language available.
37 # TODO: get a database of Language -> Encoding mappings
421e5dc3 38 # (the Estonian database at http://www.eki.ee/letter/
39 # would be excellent!) --jhi
58d53262 40 }
11fc5dc3 41 if (defined $locale_encoding &&
42 $locale_encoding eq 'euc' &&
43 defined $country_language) {
56fb2e42 44 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
1e616cf5 45 $locale_encoding = 'euc-jp';
5a192dee 46 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
1e616cf5 47 $locale_encoding = 'euc-kr';
a4157ebb 48 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
49 $locale_encoding = 'euc-cn';
56fb2e42 50 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
a4157ebb 51 $locale_encoding = 'big5';
11fc5dc3 52 }
53 croak "Locale encoding 'euc' too ambiguous"
54 if $locale_encoding eq 'euc';
55 }
58d53262 56 }
57}
58
16fe6d59 59sub import {
dfebf958 60 my ($class,@args) = @_;
61 croak("`use open' needs explicit list of disciplines") unless @args;
16fe6d59 62 $^H |= $open::hint_bits;
ba6ce41c 63 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958 64 while (@args) {
65 my $type = shift(@args);
1e616cf5 66 my $dscp;
67 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
68 $type = 'IO';
69 $dscp = ":$1";
70 } else {
725d232a 71 $dscp = shift(@args) || '';
1e616cf5 72 }
ac27b0f5 73 my @val;
1e616cf5 74 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 75 $layer =~ s/^://;
58d53262 76 if ($layer eq 'locale') {
77 use Encode;
78 _get_locale_encoding()
79 unless defined $locale_encoding;
a4157ebb 80 (carp("Cannot figure out an encoding to use"), last)
58d53262 81 unless defined $locale_encoding;
11fc5dc3 82 if ($locale_encoding =~ /^utf-?8$/i) {
83 $layer = "utf8";
84 } else {
738b23dc 85 $layer = "encoding($locale_encoding)";
11fc5dc3 86 }
97ed432b 87 } else {
88 unless(PerlIO::Layer::->find($layer)) {
89 carp("Unknown discipline layer '$layer'");
90 }
ac27b0f5 91 }
92 push(@val,":$layer");
93 if ($layer =~ /^(crlf|raw)$/) {
94 $^H{"open_$type"} = $layer;
16fe6d59 95 }
ac27b0f5 96 }
738b23dc 97 # print "# type = $type, val = @val\n";
ac27b0f5 98 if ($type eq 'IN') {
99 $in = join(' ',@val);
100 }
101 elsif ($type eq 'OUT') {
102 $out = join(' ',@val);
16fe6d59 103 }
1e616cf5 104 elsif ($type eq 'IO') {
f3b00462 105 $in = $out = join(' ',@val);
106 }
16fe6d59 107 else {
ac27b0f5 108 croak "Unknown discipline class '$type'";
16fe6d59 109 }
110 }
a4157ebb 111 ${^OPEN} = join("\0",$in,$out) if $in or $out;
16fe6d59 112}
113
1141;
115__END__
d1edabcf 116
117=head1 NAME
118
119open - perl pragma to set default disciplines for input and output
120
121=head1 SYNOPSIS
122
1e616cf5 123 use open IN => ":crlf", OUT => ":raw";
124 use open OUT => ':utf8';
125 use open IO => ":encoding(iso-8859-7)";
126
127 use open IO => ':locale';
725d232a 128
1e616cf5 129 use open ':utf8';
130 use open ':locale';
131 use open ':encoding(iso-8859-7)';
d1edabcf 132
133=head1 DESCRIPTION
134
d151aa0e 135Full-fledged support for I/O disciplines is now implemented provided
136Perl is configured to use PerlIO as its IO system (which is now the
137default).
16fe6d59 138
7d3b96bb 139The C<open> pragma serves as one of the interfaces to declare default
140"layers" (aka disciplines) for all I/O.
141
142The C<open> pragma is used to declare one or more default layers for
d151aa0e 143I/O operations. Any open(), readpipe() (aka qx//) and similar
144operators found within the lexical scope of this pragma will use the
145declared defaults.
7d3b96bb 146
1e616cf5 147With the C<IN> subpragma you can declare the default layers
d8d29d4f 148of input streams, and with the C<OUT> subpragma you can declare
1e616cf5 149the default layers of output streams. With the C<IO> subpragma
150you can control both input and output streams simultaneously.
151
152If you have a legacy encoding, you can use the C<:encoding(...)> tag.
153
154if you want to set your encoding disciplines based on your
155locale environment variables, you can use the C<:locale> tag.
156For example:
157
158 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41 159 # the :locale will probe the locale environment variables like LANG
160 use open OUT => ':locale';
1e616cf5 161 open(O, ">koi8");
23bcb45a 162 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5 163 close O;
164 open(I, "<koi8");
23bcb45a 165 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5 166 close I;
167
168These are equivalent
169
170 use open ':utf8';
171 use open IO => ':utf8';
172
173as are these
174
175 use open ':locale';
176 use open IO => ':locale';
177
178and these
179
180 use open ':encoding(iso-8859-7)';
181 use open IO => ':encoding(iso-8859-7)';
182
d151aa0e 183When open() is given an explicit list of layers they are appended to
184the list declared using this pragma.
7d3b96bb 185
ba9a69eb 186The logic of C<:locale> is as follows:
187
188=over 4
189
190=item 1.
191
192If the platform supports the langinfo(CODESET) interface, the codeset
193returned is used as the default encoding for the open pragma.
194
195=item 2.
196
197If 1. didn't work but we are under the locale pragma, the environment
198variables LC_ALL and LANG (in that order) are matched for encodings
199(the part after C<.>, if any), and if any found, that is used
200as the default encoding for the open pragma.
201
202=item 3.
203
204If 1. and 2. didn't work, the environment variables LC_ALL and LANG
205(in that order) are matched for anything looking like UTF-8, and if
206any found, C<:utf8> is used as the default encoding for the open
207pragma.
208
209=back
210
7d3b96bb 211Directory handles may also support disciplines in future.
212
213=head1 NONPERLIO FUNCTIONALITY
214
d151aa0e 215If Perl is not built to use PerlIO as its IO system then only the two
216pseudo-disciplines ":raw" and ":crlf" are available.
16fe6d59 217
218The ":raw" discipline corresponds to "binary mode" and the ":crlf"
219discipline corresponds to "text mode" on platforms that distinguish
220between the two modes when opening files (which is many DOS-like
d151aa0e 221platforms, including Windows). These two disciplines are no-ops on
222platforms where binmode() is a no-op, but perform their functions
223everywhere if PerlIO is enabled.
7d3b96bb 224
225=head1 IMPLEMENTATION DETAILS
d1edabcf 226
f3b00462 227There is a class method in C<PerlIO::Layer> C<find> which is
228implemented as XS code. It is called by C<import> to validate the
229layers:
0c4f7ff0 230
231 PerlIO::Layer::->find("perlio")
232
f3b00462 233The return value (if defined) is a Perl object, of class
234C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
235yet there is nothing useful you can do with the object at the perl
236level.
16fe6d59 237
d1edabcf 238=head1 SEE ALSO
239
1768d7eb 240L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
241L<encoding>
d1edabcf 242
243=cut