Commit | Line | Data |
d1edabcf |
1 | package open; |
99ef548b |
2 | use warnings; |
ac27b0f5 |
3 | use Carp; |
9cfe5470 |
4 | $open::hint_bits = 0x20000; # HINT_LOCALIZE_HH |
16fe6d59 |
5 | |
7c0e976d |
6 | our $VERSION = '1.04'; |
b75c8c73 |
7 | |
7c0e976d |
8 | require 5.008001; # for PerlIO::get_layers() |
58d53262 |
9 | |
b4ebbc94 |
10 | my $locale_encoding; |
a4157ebb |
11 | |
7c0e976d |
12 | sub _get_encname { |
b4ebbc94 |
13 | return ($1, Encode::resolve_alias($1)) if $_[0] =~ /^:?encoding\((.+)\)$/; |
7c0e976d |
14 | return; |
15 | } |
a4157ebb |
16 | |
7c0e976d |
17 | sub _drop_oldenc { |
18 | # If by the time we arrive here there already is at the top of the |
19 | # perlio layer stack an encoding identical to what we would like |
20 | # to push via this open pragma, we will pop away the old encoding |
21 | # (+utf8) so that we can push ourselves in place (this is easier |
22 | # than ignoring pushing ourselves because of the way how ${^OPEN} |
23 | # works). So we are looking for something like |
24 | # |
25 | # stdio encoding(xxx) utf8 |
26 | # |
27 | # in the existing layer stack, and in the new stack chunk for |
28 | # |
29 | # :encoding(xxx) |
30 | # |
31 | # If we find a match, we pop the old stack (once, since |
32 | # the utf8 is just a flag on the encoding layer) |
33 | my ($h, @new) = @_; |
34 | return unless @new >= 1 && $new[-1] =~ /^:encoding\(.+\)$/; |
35 | my @old = PerlIO::get_layers($h); |
36 | return unless @old >= 3 && |
00243fce |
37 | $old[-1] eq 'utf8' && |
7c0e976d |
38 | $old[-2] =~ /^encoding\(.+\)$/; |
b4ebbc94 |
39 | require Encode; |
7c0e976d |
40 | my ($loname, $lcname) = _get_encname($old[-2]); |
41 | unless (defined $lcname) { # Should we trust get_layers()? |
42 | require Carp; |
43 | Carp::croak("open: Unknown encoding '$loname'"); |
44 | } |
45 | my ($voname, $vcname) = _get_encname($new[-1]); |
46 | unless (defined $vcname) { |
47 | require Carp; |
48 | Carp::croak("open: Unknown encoding '$voname'"); |
49 | } |
50 | if ($lcname eq $vcname) { |
51 | binmode($h, ":pop"); # utf8 is part of the encoding layer |
58d53262 |
52 | } |
53 | } |
54 | |
16fe6d59 |
55 | sub import { |
dfebf958 |
56 | my ($class,@args) = @_; |
7c0e976d |
57 | croak("open: needs explicit list of PerlIO layers") unless @args; |
b178108d |
58 | my $std; |
16fe6d59 |
59 | $^H |= $open::hint_bits; |
ba6ce41c |
60 | my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); |
dfebf958 |
61 | while (@args) { |
62 | my $type = shift(@args); |
1e616cf5 |
63 | my $dscp; |
64 | if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { |
65 | $type = 'IO'; |
66 | $dscp = ":$1"; |
b178108d |
67 | } elsif ($type eq ':std') { |
68 | $std = 1; |
69 | next; |
1e616cf5 |
70 | } else { |
725d232a |
71 | $dscp = shift(@args) || ''; |
1e616cf5 |
72 | } |
ac27b0f5 |
73 | my @val; |
1e616cf5 |
74 | foreach my $layer (split(/\s+/,$dscp)) { |
dfebf958 |
75 | $layer =~ s/^://; |
58d53262 |
76 | if ($layer eq 'locale') { |
54cfe943 |
77 | require Encode; |
b4ebbc94 |
78 | require encoding; |
79 | $locale_encoding = encoding::_get_locale_encoding() |
58d53262 |
80 | unless defined $locale_encoding; |
99ef548b |
81 | (warnings::warnif("layer", "Cannot figure out an encoding to use"), last) |
58d53262 |
82 | unless defined $locale_encoding; |
11fc5dc3 |
83 | if ($locale_encoding =~ /^utf-?8$/i) { |
84 | $layer = "utf8"; |
85 | } else { |
738b23dc |
86 | $layer = "encoding($locale_encoding)"; |
11fc5dc3 |
87 | } |
b178108d |
88 | $std = 1; |
97ed432b |
89 | } else { |
011f8d22 |
90 | my $target = $layer; # the layer name itself |
91 | $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters |
92 | |
c7732655 |
93 | unless(PerlIO::Layer::->find($target,1)) { |
94 | warnings::warnif("layer", "Unknown PerlIO layer '$target'"); |
97ed432b |
95 | } |
ac27b0f5 |
96 | } |
97 | push(@val,":$layer"); |
98 | if ($layer =~ /^(crlf|raw)$/) { |
99 | $^H{"open_$type"} = $layer; |
16fe6d59 |
100 | } |
ac27b0f5 |
101 | } |
102 | if ($type eq 'IN') { |
7c0e976d |
103 | _drop_oldenc(*STDIN, @val); |
104 | $in = join(' ', @val); |
ac27b0f5 |
105 | } |
106 | elsif ($type eq 'OUT') { |
7c0e976d |
107 | _drop_oldenc(*STDOUT, @val); |
108 | $out = join(' ', @val); |
16fe6d59 |
109 | } |
1e616cf5 |
110 | elsif ($type eq 'IO') { |
7c0e976d |
111 | _drop_oldenc(*STDIN, @val); |
112 | _drop_oldenc(*STDOUT, @val); |
113 | $in = $out = join(' ', @val); |
f3b00462 |
114 | } |
16fe6d59 |
115 | else { |
e2d9456f |
116 | croak "Unknown PerlIO layer class '$type'"; |
16fe6d59 |
117 | } |
118 | } |
7c0e976d |
119 | ${^OPEN} = join("\0", $in, $out); |
b178108d |
120 | if ($std) { |
121 | if ($in) { |
122 | if ($in =~ /:utf8\b/) { |
123 | binmode(STDIN, ":utf8"); |
124 | } elsif ($in =~ /(\w+\(.+\))/) { |
125 | binmode(STDIN, ":$1"); |
126 | } |
127 | } |
128 | if ($out) { |
129 | if ($out =~ /:utf8\b/) { |
130 | binmode(STDOUT, ":utf8"); |
131 | binmode(STDERR, ":utf8"); |
132 | } elsif ($out =~ /(\w+\(.+\))/) { |
133 | binmode(STDOUT, ":$1"); |
134 | binmode(STDERR, ":$1"); |
135 | } |
136 | } |
137 | } |
16fe6d59 |
138 | } |
139 | |
140 | 1; |
141 | __END__ |
d1edabcf |
142 | |
143 | =head1 NAME |
144 | |
e2d9456f |
145 | open - perl pragma to set default PerlIO layers for input and output |
d1edabcf |
146 | |
147 | =head1 SYNOPSIS |
148 | |
d5563ed7 |
149 | use open IN => ":crlf", OUT => ":bytes"; |
1e616cf5 |
150 | use open OUT => ':utf8'; |
151 | use open IO => ":encoding(iso-8859-7)"; |
152 | |
153 | use open IO => ':locale'; |
725d232a |
154 | |
1e616cf5 |
155 | use open ':utf8'; |
156 | use open ':locale'; |
157 | use open ':encoding(iso-8859-7)'; |
d1edabcf |
158 | |
b178108d |
159 | use open ':std'; |
160 | |
d1edabcf |
161 | =head1 DESCRIPTION |
162 | |
e2d9456f |
163 | Full-fledged support for I/O layers is now implemented provided |
d151aa0e |
164 | Perl is configured to use PerlIO as its IO system (which is now the |
165 | default). |
16fe6d59 |
166 | |
7d3b96bb |
167 | The C<open> pragma serves as one of the interfaces to declare default |
16479489 |
168 | "layers" (also known as "disciplines") for all I/O. Any two-argument |
169 | open(), readpipe() (aka qx//) and similar operators found within the |
170 | lexical scope of this pragma will use the declared defaults. |
171 | Three-argument opens are not affected by this pragma since there you |
172 | (can) explicitly specify the layers and are supposed to know what you |
173 | are doing. |
7d3b96bb |
174 | |
1e616cf5 |
175 | With the C<IN> subpragma you can declare the default layers |
d8d29d4f |
176 | of input streams, and with the C<OUT> subpragma you can declare |
1e616cf5 |
177 | the default layers of output streams. With the C<IO> subpragma |
178 | you can control both input and output streams simultaneously. |
179 | |
180 | If you have a legacy encoding, you can use the C<:encoding(...)> tag. |
181 | |
e2d9456f |
182 | if you want to set your encoding layers based on your |
1e616cf5 |
183 | locale environment variables, you can use the C<:locale> tag. |
184 | For example: |
185 | |
186 | $ENV{LANG} = 'ru_RU.KOI8-R'; |
dbd62f41 |
187 | # the :locale will probe the locale environment variables like LANG |
188 | use open OUT => ':locale'; |
1e616cf5 |
189 | open(O, ">koi8"); |
23bcb45a |
190 | print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 |
1e616cf5 |
191 | close O; |
192 | open(I, "<koi8"); |
23bcb45a |
193 | printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 |
1e616cf5 |
194 | close I; |
195 | |
196 | These are equivalent |
197 | |
198 | use open ':utf8'; |
199 | use open IO => ':utf8'; |
200 | |
201 | as are these |
202 | |
203 | use open ':locale'; |
204 | use open IO => ':locale'; |
205 | |
206 | and these |
207 | |
208 | use open ':encoding(iso-8859-7)'; |
209 | use open IO => ':encoding(iso-8859-7)'; |
210 | |
b5d8778e |
211 | The matching of encoding names is loose: case does not matter, and |
212 | many encodings have several aliases. See L<Encode::Supported> for |
213 | details and the list of supported locales. |
214 | |
e2d9456f |
215 | Note that C<:utf8> PerlIO layer must always be specified exactly like |
b5d8778e |
216 | that, it is not subject to the loose matching of encoding names. |
217 | |
d151aa0e |
218 | When open() is given an explicit list of layers they are appended to |
219 | the list declared using this pragma. |
7d3b96bb |
220 | |
b178108d |
221 | The C<:std> subpragma on its own has no effect, but if combined with |
222 | the C<:utf8> or C<:encoding> subpragmas, it converts the standard |
223 | filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected |
224 | for input/output handles. For example, if both input and out are |
225 | chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and |
226 | STDERR are also in C<:utf8>. On the other hand, if only output is |
fb80c70c |
227 | chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the |
b178108d |
228 | STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma |
229 | implicitly turns on C<:std>. |
230 | |
7c0e976d |
231 | The logic of C<:locale> is described in full in L</encoding>, |
232 | but in short it is first trying nl_langinfo(CODESET) and then |
233 | guessing from the LC_ALL and LANG locale environment variables. |
b310b053 |
234 | |
e2d9456f |
235 | Directory handles may also support PerlIO layers in the future. |
7d3b96bb |
236 | |
237 | =head1 NONPERLIO FUNCTIONALITY |
238 | |
d151aa0e |
239 | If Perl is not built to use PerlIO as its IO system then only the two |
e2d9456f |
240 | pseudo-layers C<:bytes> and C<:crlf> are available. |
16fe6d59 |
241 | |
e2d9456f |
242 | The C<:bytes> layer corresponds to "binary mode" and the C<:crlf> |
243 | layer corresponds to "text mode" on platforms that distinguish |
16fe6d59 |
244 | between the two modes when opening files (which is many DOS-like |
e2d9456f |
245 | platforms, including Windows). These two layers are no-ops on |
d151aa0e |
246 | platforms where binmode() is a no-op, but perform their functions |
247 | everywhere if PerlIO is enabled. |
7d3b96bb |
248 | |
249 | =head1 IMPLEMENTATION DETAILS |
d1edabcf |
250 | |
f3b00462 |
251 | There is a class method in C<PerlIO::Layer> C<find> which is |
252 | implemented as XS code. It is called by C<import> to validate the |
253 | layers: |
0c4f7ff0 |
254 | |
255 | PerlIO::Layer::->find("perlio") |
256 | |
f3b00462 |
257 | The return value (if defined) is a Perl object, of class |
258 | C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As |
259 | yet there is nothing useful you can do with the object at the perl |
260 | level. |
16fe6d59 |
261 | |
d1edabcf |
262 | =head1 SEE ALSO |
263 | |
1768d7eb |
264 | L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, |
265 | L<encoding> |
d1edabcf |
266 | |
267 | =cut |