[tests for Pod::Html]
[p5sagit/p5-mst-13.2.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
ac27b0f5 2use Carp;
16fe6d59 3$open::hint_bits = 0x20000;
4
0c4f7ff0 5our $VERSION = '1.01';
b75c8c73 6
58d53262 7my $locale_encoding;
8
b178108d 9sub in_locale { $^H & ($locale::hint_bits || 0)}
58d53262 10
11sub _get_locale_encoding {
12 unless (defined $locale_encoding) {
276c9210 13 # I18N::Langinfo isn't available everywhere
9615f2ee 14 eval {
15 require I18N::Langinfo;
16 I18N::Langinfo->import(qw(langinfo CODESET));
ba6ce41c 17 $locale_encoding = langinfo(CODESET());
9615f2ee 18 };
11fc5dc3 19 my $country_language;
a4157ebb 20
21 no warnings 'uninitialized';
22
58d53262 23 if (not $locale_encoding && in_locale()) {
11fc5dc3 24 if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) {
25 ($country_language, $locale_encoding) = ($1, $2);
26 } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) {
27 ($country_language, $locale_encoding) = ($1, $2);
58d53262 28 }
1e616cf5 29 } elsif (not $locale_encoding) {
30 if ($ENV{LC_ALL} =~ /\butf-?8\b/i ||
31 $ENV{LANG} =~ /\butf-?8\b/i) {
32 $locale_encoding = 'utf8';
33 }
34 # Could do more heuristics based on the country and language
58d53262 35 # parts of LC_ALL and LANG (the parts before the dot (if any)),
36 # since we have Locale::Country and Locale::Language available.
37 # TODO: get a database of Language -> Encoding mappings
421e5dc3 38 # (the Estonian database at http://www.eki.ee/letter/
39 # would be excellent!) --jhi
58d53262 40 }
11fc5dc3 41 if (defined $locale_encoding &&
42 $locale_encoding eq 'euc' &&
43 defined $country_language) {
56fb2e42 44 if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) {
1e616cf5 45 $locale_encoding = 'euc-jp';
5a192dee 46 } elsif ($country_language =~ /^ko_KR|korean?$/i) {
1e616cf5 47 $locale_encoding = 'euc-kr';
a4157ebb 48 } elsif ($country_language =~ /^zh_CN|chin(?:a|ese)?$/i) {
49 $locale_encoding = 'euc-cn';
56fb2e42 50 } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) {
011f8d22 51 $locale_encoding = 'euc-tw';
11fc5dc3 52 }
53 croak "Locale encoding 'euc' too ambiguous"
54 if $locale_encoding eq 'euc';
55 }
58d53262 56 }
57}
58
16fe6d59 59sub import {
dfebf958 60 my ($class,@args) = @_;
61 croak("`use open' needs explicit list of disciplines") unless @args;
b178108d 62 my $std;
16fe6d59 63 $^H |= $open::hint_bits;
ba6ce41c 64 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958 65 while (@args) {
66 my $type = shift(@args);
1e616cf5 67 my $dscp;
68 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
69 $type = 'IO';
70 $dscp = ":$1";
b178108d 71 } elsif ($type eq ':std') {
72 $std = 1;
73 next;
1e616cf5 74 } else {
725d232a 75 $dscp = shift(@args) || '';
1e616cf5 76 }
ac27b0f5 77 my @val;
1e616cf5 78 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 79 $layer =~ s/^://;
58d53262 80 if ($layer eq 'locale') {
81 use Encode;
82 _get_locale_encoding()
83 unless defined $locale_encoding;
a4157ebb 84 (carp("Cannot figure out an encoding to use"), last)
58d53262 85 unless defined $locale_encoding;
11fc5dc3 86 if ($locale_encoding =~ /^utf-?8$/i) {
87 $layer = "utf8";
88 } else {
738b23dc 89 $layer = "encoding($locale_encoding)";
11fc5dc3 90 }
b178108d 91 $std = 1;
97ed432b 92 } else {
011f8d22 93 my $target = $layer; # the layer name itself
94 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters
95
96 unless(PerlIO::Layer::->find($target)) {
97ed432b 97 carp("Unknown discipline layer '$layer'");
98 }
ac27b0f5 99 }
100 push(@val,":$layer");
101 if ($layer =~ /^(crlf|raw)$/) {
102 $^H{"open_$type"} = $layer;
16fe6d59 103 }
ac27b0f5 104 }
105 if ($type eq 'IN') {
106 $in = join(' ',@val);
107 }
108 elsif ($type eq 'OUT') {
109 $out = join(' ',@val);
16fe6d59 110 }
1e616cf5 111 elsif ($type eq 'IO') {
f3b00462 112 $in = $out = join(' ',@val);
113 }
16fe6d59 114 else {
ac27b0f5 115 croak "Unknown discipline class '$type'";
16fe6d59 116 }
117 }
a4157ebb 118 ${^OPEN} = join("\0",$in,$out) if $in or $out;
b178108d 119 if ($std) {
120 if ($in) {
121 if ($in =~ /:utf8\b/) {
122 binmode(STDIN, ":utf8");
123 } elsif ($in =~ /(\w+\(.+\))/) {
124 binmode(STDIN, ":$1");
125 }
126 }
127 if ($out) {
128 if ($out =~ /:utf8\b/) {
129 binmode(STDOUT, ":utf8");
130 binmode(STDERR, ":utf8");
131 } elsif ($out =~ /(\w+\(.+\))/) {
132 binmode(STDOUT, ":$1");
133 binmode(STDERR, ":$1");
134 }
135 }
136 }
16fe6d59 137}
138
1391;
140__END__
d1edabcf 141
142=head1 NAME
143
144open - perl pragma to set default disciplines for input and output
145
146=head1 SYNOPSIS
147
1e616cf5 148 use open IN => ":crlf", OUT => ":raw";
149 use open OUT => ':utf8';
150 use open IO => ":encoding(iso-8859-7)";
151
152 use open IO => ':locale';
725d232a 153
1e616cf5 154 use open ':utf8';
155 use open ':locale';
156 use open ':encoding(iso-8859-7)';
d1edabcf 157
b178108d 158 use open ':std';
159
d1edabcf 160=head1 DESCRIPTION
161
d151aa0e 162Full-fledged support for I/O disciplines is now implemented provided
163Perl is configured to use PerlIO as its IO system (which is now the
164default).
16fe6d59 165
7d3b96bb 166The C<open> pragma serves as one of the interfaces to declare default
167"layers" (aka disciplines) for all I/O.
168
169The C<open> pragma is used to declare one or more default layers for
d151aa0e 170I/O operations. Any open(), readpipe() (aka qx//) and similar
171operators found within the lexical scope of this pragma will use the
172declared defaults.
7d3b96bb 173
1e616cf5 174With the C<IN> subpragma you can declare the default layers
d8d29d4f 175of input streams, and with the C<OUT> subpragma you can declare
1e616cf5 176the default layers of output streams. With the C<IO> subpragma
177you can control both input and output streams simultaneously.
178
179If you have a legacy encoding, you can use the C<:encoding(...)> tag.
180
181if you want to set your encoding disciplines based on your
182locale environment variables, you can use the C<:locale> tag.
183For example:
184
185 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41 186 # the :locale will probe the locale environment variables like LANG
187 use open OUT => ':locale';
1e616cf5 188 open(O, ">koi8");
23bcb45a 189 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5 190 close O;
191 open(I, "<koi8");
23bcb45a 192 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5 193 close I;
194
195These are equivalent
196
197 use open ':utf8';
198 use open IO => ':utf8';
199
200as are these
201
202 use open ':locale';
203 use open IO => ':locale';
204
205and these
206
207 use open ':encoding(iso-8859-7)';
208 use open IO => ':encoding(iso-8859-7)';
209
d151aa0e 210When open() is given an explicit list of layers they are appended to
211the list declared using this pragma.
7d3b96bb 212
b178108d 213The C<:std> subpragma on its own has no effect, but if combined with
214the C<:utf8> or C<:encoding> subpragmas, it converts the standard
215filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
216for input/output handles. For example, if both input and out are
217chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
218STDERR are also in C<:utf8>. On the other hand, if only output is
fb80c70c 219chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
b178108d 220STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
221implicitly turns on C<:std>.
222
ba9a69eb 223The logic of C<:locale> is as follows:
224
225=over 4
226
227=item 1.
228
229If the platform supports the langinfo(CODESET) interface, the codeset
230returned is used as the default encoding for the open pragma.
231
232=item 2.
233
234If 1. didn't work but we are under the locale pragma, the environment
235variables LC_ALL and LANG (in that order) are matched for encodings
236(the part after C<.>, if any), and if any found, that is used
237as the default encoding for the open pragma.
238
239=item 3.
240
241If 1. and 2. didn't work, the environment variables LC_ALL and LANG
242(in that order) are matched for anything looking like UTF-8, and if
243any found, C<:utf8> is used as the default encoding for the open
244pragma.
245
246=back
247
b310b053 248If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
249contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
250the default encoding of your STDIN, STDOUT, and STDERR, and of
251B<any subsequent file open>, is UTF-8.
252
7d3b96bb 253Directory handles may also support disciplines in future.
254
255=head1 NONPERLIO FUNCTIONALITY
256
d151aa0e 257If Perl is not built to use PerlIO as its IO system then only the two
258pseudo-disciplines ":raw" and ":crlf" are available.
16fe6d59 259
260The ":raw" discipline corresponds to "binary mode" and the ":crlf"
261discipline corresponds to "text mode" on platforms that distinguish
262between the two modes when opening files (which is many DOS-like
d151aa0e 263platforms, including Windows). These two disciplines are no-ops on
264platforms where binmode() is a no-op, but perform their functions
265everywhere if PerlIO is enabled.
7d3b96bb 266
267=head1 IMPLEMENTATION DETAILS
d1edabcf 268
f3b00462 269There is a class method in C<PerlIO::Layer> C<find> which is
270implemented as XS code. It is called by C<import> to validate the
271layers:
0c4f7ff0 272
273 PerlIO::Layer::->find("perlio")
274
f3b00462 275The return value (if defined) is a Perl object, of class
276C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
277yet there is nothing useful you can do with the object at the perl
278level.
16fe6d59 279
d1edabcf 280=head1 SEE ALSO
281
1768d7eb 282L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
283L<encoding>
d1edabcf 284
285=cut