extra code in pp_concat, Take 2
[p5sagit/p5-mst-13.2.git] / lib / open.pm
CommitLineData
d1edabcf 1package open;
99ef548b 2use warnings;
ac27b0f5 3use Carp;
9cfe5470 4$open::hint_bits = 0x20000; # HINT_LOCALIZE_HH
16fe6d59 5
7c0e976d 6our $VERSION = '1.04';
b75c8c73 7
7c0e976d 8require 5.008001; # for PerlIO::get_layers()
58d53262 9
b4ebbc94 10my $locale_encoding;
a4157ebb 11
7c0e976d 12sub _get_encname {
b4ebbc94 13 return ($1, Encode::resolve_alias($1)) if $_[0] =~ /^:?encoding\((.+)\)$/;
7c0e976d 14 return;
15}
a4157ebb 16
7c0e976d 17sub _drop_oldenc {
18 # If by the time we arrive here there already is at the top of the
19 # perlio layer stack an encoding identical to what we would like
20 # to push via this open pragma, we will pop away the old encoding
21 # (+utf8) so that we can push ourselves in place (this is easier
22 # than ignoring pushing ourselves because of the way how ${^OPEN}
23 # works). So we are looking for something like
24 #
25 # stdio encoding(xxx) utf8
26 #
27 # in the existing layer stack, and in the new stack chunk for
28 #
29 # :encoding(xxx)
30 #
31 # If we find a match, we pop the old stack (once, since
32 # the utf8 is just a flag on the encoding layer)
33 my ($h, @new) = @_;
34 return unless @new >= 1 && $new[-1] =~ /^:encoding\(.+\)$/;
35 my @old = PerlIO::get_layers($h);
36 return unless @old >= 3 &&
00243fce 37 $old[-1] eq 'utf8' &&
7c0e976d 38 $old[-2] =~ /^encoding\(.+\)$/;
b4ebbc94 39 require Encode;
7c0e976d 40 my ($loname, $lcname) = _get_encname($old[-2]);
41 unless (defined $lcname) { # Should we trust get_layers()?
42 require Carp;
43 Carp::croak("open: Unknown encoding '$loname'");
44 }
45 my ($voname, $vcname) = _get_encname($new[-1]);
46 unless (defined $vcname) {
47 require Carp;
48 Carp::croak("open: Unknown encoding '$voname'");
49 }
50 if ($lcname eq $vcname) {
51 binmode($h, ":pop"); # utf8 is part of the encoding layer
58d53262 52 }
53}
54
16fe6d59 55sub import {
dfebf958 56 my ($class,@args) = @_;
7c0e976d 57 croak("open: needs explicit list of PerlIO layers") unless @args;
b178108d 58 my $std;
16fe6d59 59 $^H |= $open::hint_bits;
ba6ce41c 60 my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
dfebf958 61 while (@args) {
62 my $type = shift(@args);
1e616cf5 63 my $dscp;
64 if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
65 $type = 'IO';
66 $dscp = ":$1";
b178108d 67 } elsif ($type eq ':std') {
68 $std = 1;
69 next;
1e616cf5 70 } else {
725d232a 71 $dscp = shift(@args) || '';
1e616cf5 72 }
ac27b0f5 73 my @val;
1e616cf5 74 foreach my $layer (split(/\s+/,$dscp)) {
dfebf958 75 $layer =~ s/^://;
58d53262 76 if ($layer eq 'locale') {
54cfe943 77 require Encode;
b4ebbc94 78 require encoding;
79 $locale_encoding = encoding::_get_locale_encoding()
58d53262 80 unless defined $locale_encoding;
99ef548b 81 (warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
58d53262 82 unless defined $locale_encoding;
11fc5dc3 83 if ($locale_encoding =~ /^utf-?8$/i) {
84 $layer = "utf8";
85 } else {
738b23dc 86 $layer = "encoding($locale_encoding)";
11fc5dc3 87 }
b178108d 88 $std = 1;
97ed432b 89 } else {
011f8d22 90 my $target = $layer; # the layer name itself
91 $target =~ s/^(\w+)\(.+\)$/$1/; # strip parameters
92
c7732655 93 unless(PerlIO::Layer::->find($target,1)) {
94 warnings::warnif("layer", "Unknown PerlIO layer '$target'");
97ed432b 95 }
ac27b0f5 96 }
97 push(@val,":$layer");
98 if ($layer =~ /^(crlf|raw)$/) {
99 $^H{"open_$type"} = $layer;
16fe6d59 100 }
ac27b0f5 101 }
102 if ($type eq 'IN') {
7c0e976d 103 _drop_oldenc(*STDIN, @val);
104 $in = join(' ', @val);
ac27b0f5 105 }
106 elsif ($type eq 'OUT') {
7c0e976d 107 _drop_oldenc(*STDOUT, @val);
108 $out = join(' ', @val);
16fe6d59 109 }
1e616cf5 110 elsif ($type eq 'IO') {
7c0e976d 111 _drop_oldenc(*STDIN, @val);
112 _drop_oldenc(*STDOUT, @val);
113 $in = $out = join(' ', @val);
f3b00462 114 }
16fe6d59 115 else {
e2d9456f 116 croak "Unknown PerlIO layer class '$type'";
16fe6d59 117 }
118 }
7c0e976d 119 ${^OPEN} = join("\0", $in, $out);
b178108d 120 if ($std) {
121 if ($in) {
122 if ($in =~ /:utf8\b/) {
123 binmode(STDIN, ":utf8");
124 } elsif ($in =~ /(\w+\(.+\))/) {
125 binmode(STDIN, ":$1");
126 }
127 }
128 if ($out) {
129 if ($out =~ /:utf8\b/) {
130 binmode(STDOUT, ":utf8");
131 binmode(STDERR, ":utf8");
132 } elsif ($out =~ /(\w+\(.+\))/) {
133 binmode(STDOUT, ":$1");
134 binmode(STDERR, ":$1");
135 }
136 }
137 }
16fe6d59 138}
139
1401;
141__END__
d1edabcf 142
143=head1 NAME
144
e2d9456f 145open - perl pragma to set default PerlIO layers for input and output
d1edabcf 146
147=head1 SYNOPSIS
148
d5563ed7 149 use open IN => ":crlf", OUT => ":bytes";
1e616cf5 150 use open OUT => ':utf8';
151 use open IO => ":encoding(iso-8859-7)";
152
153 use open IO => ':locale';
725d232a 154
1e616cf5 155 use open ':utf8';
156 use open ':locale';
157 use open ':encoding(iso-8859-7)';
d1edabcf 158
b178108d 159 use open ':std';
160
d1edabcf 161=head1 DESCRIPTION
162
e2d9456f 163Full-fledged support for I/O layers is now implemented provided
d151aa0e 164Perl is configured to use PerlIO as its IO system (which is now the
165default).
16fe6d59 166
7d3b96bb 167The C<open> pragma serves as one of the interfaces to declare default
16479489 168"layers" (also known as "disciplines") for all I/O. Any two-argument
169open(), readpipe() (aka qx//) and similar operators found within the
170lexical scope of this pragma will use the declared defaults.
171Three-argument opens are not affected by this pragma since there you
172(can) explicitly specify the layers and are supposed to know what you
173are doing.
7d3b96bb 174
1e616cf5 175With the C<IN> subpragma you can declare the default layers
d8d29d4f 176of input streams, and with the C<OUT> subpragma you can declare
1e616cf5 177the default layers of output streams. With the C<IO> subpragma
178you can control both input and output streams simultaneously.
179
180If you have a legacy encoding, you can use the C<:encoding(...)> tag.
181
e2d9456f 182if you want to set your encoding layers based on your
1e616cf5 183locale environment variables, you can use the C<:locale> tag.
184For example:
185
186 $ENV{LANG} = 'ru_RU.KOI8-R';
dbd62f41 187 # the :locale will probe the locale environment variables like LANG
188 use open OUT => ':locale';
1e616cf5 189 open(O, ">koi8");
23bcb45a 190 print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1
1e616cf5 191 close O;
192 open(I, "<koi8");
23bcb45a 193 printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1
1e616cf5 194 close I;
195
196These are equivalent
197
198 use open ':utf8';
199 use open IO => ':utf8';
200
201as are these
202
203 use open ':locale';
204 use open IO => ':locale';
205
206and these
207
208 use open ':encoding(iso-8859-7)';
209 use open IO => ':encoding(iso-8859-7)';
210
b5d8778e 211The matching of encoding names is loose: case does not matter, and
212many encodings have several aliases. See L<Encode::Supported> for
213details and the list of supported locales.
214
e2d9456f 215Note that C<:utf8> PerlIO layer must always be specified exactly like
b5d8778e 216that, it is not subject to the loose matching of encoding names.
217
d151aa0e 218When open() is given an explicit list of layers they are appended to
219the list declared using this pragma.
7d3b96bb 220
b178108d 221The C<:std> subpragma on its own has no effect, but if combined with
222the C<:utf8> or C<:encoding> subpragmas, it converts the standard
223filehandles (STDIN, STDOUT, STDERR) to comply with encoding selected
224for input/output handles. For example, if both input and out are
225chosen to be C<:utf8>, a C<:std> will mean that STDIN, STDOUT, and
226STDERR are also in C<:utf8>. On the other hand, if only output is
fb80c70c 227chosen to be in C<< :encoding(koi8r) >>, a C<:std> will cause only the
b178108d 228STDOUT and STDERR to be in C<koi8r>. The C<:locale> subpragma
229implicitly turns on C<:std>.
230
7c0e976d 231The logic of C<:locale> is described in full in L</encoding>,
232but in short it is first trying nl_langinfo(CODESET) and then
233guessing from the LC_ALL and LANG locale environment variables.
b310b053 234
e2d9456f 235Directory handles may also support PerlIO layers in the future.
7d3b96bb 236
237=head1 NONPERLIO FUNCTIONALITY
238
d151aa0e 239If Perl is not built to use PerlIO as its IO system then only the two
e2d9456f 240pseudo-layers C<:bytes> and C<:crlf> are available.
16fe6d59 241
e2d9456f 242The C<:bytes> layer corresponds to "binary mode" and the C<:crlf>
243layer corresponds to "text mode" on platforms that distinguish
16fe6d59 244between the two modes when opening files (which is many DOS-like
e2d9456f 245platforms, including Windows). These two layers are no-ops on
d151aa0e 246platforms where binmode() is a no-op, but perform their functions
247everywhere if PerlIO is enabled.
7d3b96bb 248
249=head1 IMPLEMENTATION DETAILS
d1edabcf 250
f3b00462 251There is a class method in C<PerlIO::Layer> C<find> which is
252implemented as XS code. It is called by C<import> to validate the
253layers:
0c4f7ff0 254
255 PerlIO::Layer::->find("perlio")
256
f3b00462 257The return value (if defined) is a Perl object, of class
258C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As
259yet there is nothing useful you can do with the object at the perl
260level.
16fe6d59 261
d1edabcf 262=head1 SEE ALSO
263
1768d7eb 264L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
265L<encoding>
d1edabcf 266
267=cut