[ PATCH ] mymalloc on HP-UX
[p5sagit/p5-mst-13.2.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
99ef548b 2use warnings;
ac27b0f5 3use Carp;
16fe6d59 4$open::hint_bits = 0x20000;
5
0c4f7ff0 6our $VERSION = '1.01';
b75c8c73 7
58d53262 8my $locale_encoding;
9
b178108d 10sub in_locale { $^H & ($locale::hint_bits || 0)}
58d53262 11
12sub _get_locale_encoding {
13 unless (defined $locale_encoding) {
276c9210 14 # I18N::Langinfo isn't available everywhere
9615f2ee 15 eval {
16 require I18N::Langinfo;
17 I18N::Langinfo->import(qw(langinfo CODESET));
ba6ce41c 18 $locale_encoding = langinfo(CODESET());
9615f2ee 19 };
11fc5dc3 20 my $country_language;
a4157ebb 21
22 no warnings 'uninitialized';
23
58d53262 24 if (not $locale_encoding && in_locale()) {
11fc5dc3 25 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
26 ($country_language, $locale_encoding) = ($1, $2);
27 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
28 ($country_language, $locale_encoding) = ($1, $2);
58d53262 29 }
1e616cf5 30 } elsif (not $locale_encoding) {
31 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
32 $ENV{LANG} =~ /\butf-?8\b/i) {
33 $locale_encoding = 'utf8';
34 }
35 # Could do more heuristics based on the country and language
58d53262 36 # parts of LC_ALL and LANG (the parts before the dot (if any)),
37 # since we have Locale::Country and Locale::Language available.
38 # TODO: get a database of Language -> Encoding mappings
421e5dc3 39 # (the Estonian database at http://www.eki.ee/letter/
40 # would be excellent!) --jhi
58d53262 41 }
11fc5dc3 42 if (defined $locale_encoding &&
43 $locale_encoding eq 'euc' &&
44 defined $country_language) {
56fb2e42 45 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
1e616cf5 46 $locale_encoding = 'euc-jp';
5a192dee 47 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
1e616cf5 48 $locale_encoding = 'euc-kr';
a4157ebb 49 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
50 $locale_encoding = 'euc-cn';
56fb2e42 51 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
011f8d22 52 $locale_encoding = 'euc-tw';
11fc5dc3 53 }
54 croak "Locale encoding 'euc' too ambiguous"
55 if $locale_encoding eq 'euc';
56 }
58d53262 57 }
58}
59
16fe6d59 60sub import {
dfebf958 61 my ($class,@args) = @_;
62 croak("`use open' needs explicit list of disciplines") unless @args;
b178108d 63 my $std;
16fe6d59 64 $^H |= $open::hint_bits;
ba6ce41c 65 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958 66 while (@args) {
67 my $type = shift(@args);
1e616cf5 68 my $dscp;
69 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
70 $type = 'IO';
71 $dscp = ":$1";
b178108d 72 } elsif ($type eq ':std') {
73 $std = 1;
74 next;
1e616cf5 75 } else {
725d232a 76 $dscp = shift(@args) || '';
1e616cf5 77 }
ac27b0f5 78 my @val;
1e616cf5 79 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 80 $layer =~ s/^://;
58d53262 81 if ($layer eq 'locale') {
82 use Encode;
83 _get_locale_encoding()
84 unless defined $locale_encoding;
99ef548b 85 (warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
58d53262 86 unless defined $locale_encoding;
11fc5dc3 87 if ($locale_encoding =~ /^utf-?8$/i) {
88 $layer = "utf8";
89 } else {
738b23dc 90 $layer = "encoding($locale_encoding)";
11fc5dc3 91 }
b178108d 92 $std = 1;
97ed432b 93 } else {
011f8d22 94 my $target = $layer; # the layer name itself
95 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters
96
97 unless(PerlIO::Layer::->find($target)) {
99ef548b 98 warnings::warnif("layer", "Unknown discipline layer '$layer'");
97ed432b 99 }
ac27b0f5 100 }
101 push(@val,":$layer");
102 if ($layer =~ /^(crlf|raw)$/) {
103 $^H{"open_$type"} = $layer;
16fe6d59 104 }
ac27b0f5 105 }
106 if ($type eq 'IN') {
107 $in = join(' ',@val);
108 }
109 elsif ($type eq 'OUT') {
110 $out = join(' ',@val);
16fe6d59 111 }
1e616cf5 112 elsif ($type eq 'IO') {
f3b00462 113 $in = $out = join(' ',@val);
114 }
16fe6d59 115 else {
ac27b0f5 116 croak "Unknown discipline class '$type'";
16fe6d59 117 }
118 }
a4157ebb 119 ${^OPEN} = join("\0",$in,$out) if $in or $out;
b178108d 120 if ($std) {
121 if ($in) {
122 if ($in =~ /:utf8\b/) {
123 binmode(STDIN, ":utf8");
124 } elsif ($in =~ /(\w+\(.+\))/) {
125 binmode(STDIN, ":$1");
126 }
127 }
128 if ($out) {
129 if ($out =~ /:utf8\b/) {
130 binmode(STDOUT, ":utf8");
131 binmode(STDERR, ":utf8");
132 } elsif ($out =~ /(\w+\(.+\))/) {
133 binmode(STDOUT, ":$1");
134 binmode(STDERR, ":$1");
135 }
136 }
137 }
16fe6d59 138}
139
1401;
141__END__
d1edabcf 142
143=head1 NAME
144
145open - perl pragma to set default disciplines for input and output
146
147=head1 SYNOPSIS
148
1e616cf5 149 use open IN => ":crlf", OUT => ":raw";
150 use open OUT => ':utf8';
151 use open IO => ":encoding(iso-8859-7)";
152
153 use open IO => ':locale';
725d232a 154
1e616cf5 155 use open ':utf8';
156 use open ':locale';
157 use open ':encoding(iso-8859-7)';
d1edabcf 158
b178108d 159 use open ':std';
160
d1edabcf 161=head1 DESCRIPTION
162
d151aa0e 163Full-fledged support for I/O disciplines is now implemented provided
164Perl is configured to use PerlIO as its IO system (which is now the
165default).
16fe6d59 166
7d3b96bb 167The C<open> pragma serves as one of the interfaces to declare default
168"layers" (aka disciplines) for all I/O.
169
170The C<open> pragma is used to declare one or more default layers for
d151aa0e 171I/O operations. Any open(), readpipe() (aka qx//) and similar
172operators found within the lexical scope of this pragma will use the
173declared defaults.
7d3b96bb 174
1e616cf5 175With the C<IN> subpragma you can declare the default layers
d8d29d4f 176of input streams, and with the C<OUT> subpragma you can declare
1e616cf5 177the default layers of output streams. With the C<IO> subpragma
178you can control both input and output streams simultaneously.
179
180If you have a legacy encoding, you can use the C<:encoding(...)> tag.
181
182if you want to set your encoding disciplines based on your
183locale environment variables, you can use the C<:locale> tag.
184For example:
185
186 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41 187 # the :locale will probe the locale environment variables like LANG
188 use open OUT => ':locale';
1e616cf5 189 open(O, ">koi8");
23bcb45a 190 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5 191 close O;
192 open(I, "<koi8");
23bcb45a 193 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5 194 close I;
195
196These are equivalent
197
198 use open ':utf8';
199 use open IO => ':utf8';
200
201as are these
202
203 use open ':locale';
204 use open IO => ':locale';
205
206and these
207
208 use open ':encoding(iso-8859-7)';
209 use open IO => ':encoding(iso-8859-7)';
210
b5d8778e 211The matching of encoding names is loose: case does not matter, and
212many encodings have several aliases. See L<Encode::Supported> for
213details and the list of supported locales.
214
215Note that C<:utf8> discipline must always be specified exactly like
216that, it is not subject to the loose matching of encoding names.
217
d151aa0e 218When open() is given an explicit list of layers they are appended to
219the list declared using this pragma.
7d3b96bb 220
b178108d 221The C<:std> subpragma on its own has no effect, but if combined with
222the C<:utf8> or C<:encoding> subpragmas, it converts the standard
223filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
224for input/output handles. For example, if both input and out are
225chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
226STDERR are also in C<:utf8>. On the other hand, if only output is
fb80c70c 227chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
b178108d 228STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
229implicitly turns on C<:std>.
230
ba9a69eb 231The logic of C<:locale> is as follows:
232
233=over 4
234
235=item 1.
236
237If the platform supports the langinfo(CODESET) interface, the codeset
238returned is used as the default encoding for the open pragma.
239
240=item 2.
241
242If 1. didn't work but we are under the locale pragma, the environment
243variables LC_ALL and LANG (in that order) are matched for encodings
244(the part after C<.>, if any), and if any found, that is used
245as the default encoding for the open pragma.
246
247=item 3.
248
249If 1. and 2. didn't work, the environment variables LC_ALL and LANG
250(in that order) are matched for anything looking like UTF-8, and if
251any found, C<:utf8> is used as the default encoding for the open
252pragma.
253
254=back
255
b310b053 256If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
257contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
258the default encoding of your STDIN, STDOUT, and STDERR, and of
259B<any subsequent file open>, is UTF-8.
260
7d3b96bb 261Directory handles may also support disciplines in future.
262
263=head1 NONPERLIO FUNCTIONALITY
264
d151aa0e 265If Perl is not built to use PerlIO as its IO system then only the two
266pseudo-disciplines ":raw" and ":crlf" are available.
16fe6d59 267
268The ":raw" discipline corresponds to "binary mode" and the ":crlf"
269discipline corresponds to "text mode" on platforms that distinguish
270between the two modes when opening files (which is many DOS-like
d151aa0e 271platforms, including Windows). These two disciplines are no-ops on
272platforms where binmode() is a no-op, but perform their functions
273everywhere if PerlIO is enabled.
7d3b96bb 274
275=head1 IMPLEMENTATION DETAILS
d1edabcf 276
f3b00462 277There is a class method in C<PerlIO::Layer> C<find> which is
278implemented as XS code. It is called by C<import> to validate the
279layers:
0c4f7ff0 280
281 PerlIO::Layer::->find("perlio")
282
f3b00462 283The return value (if defined) is a Perl object, of class
284C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
285yet there is nothing useful you can do with the object at the perl
286level.
16fe6d59 287
d1edabcf 288=head1 SEE ALSO
289
1768d7eb 290L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
291L<encoding>
d1edabcf 292
293=cut