Commit | Line | Data |
d1edabcf |
1 | package open; |
ac27b0f5 |
2 | use Carp; |
16fe6d59 |
3 | $open::hint_bits = 0x20000; |
4 | |
0c4f7ff0 |
5 | our $VERSION = '1.01'; |
b75c8c73 |
6 | |
58d53262 |
7 | my $locale_encoding; |
8 | |
9 | sub in_locale { $^H & $locale::hint_bits } |
10 | |
11 | sub _get_locale_encoding { |
12 | unless (defined $locale_encoding) { |
ba6ce41c |
13 | eval { |
14 | # I18N::Langinfo isn't available everywhere |
15 | require I18N::Langinfo; |
16 | I18N::Langinfo->import('langinfo', 'CODESET'); |
17 | }; |
58d53262 |
18 | unless ($@) { |
ba6ce41c |
19 | $locale_encoding = langinfo(CODESET()); |
58d53262 |
20 | } |
11fc5dc3 |
21 | my $country_language; |
58d53262 |
22 | if (not $locale_encoding && in_locale()) { |
11fc5dc3 |
23 | if ($ENV{LC_ALL} =~ /^([^.]+)\.([^.]+)$/) { |
24 | ($country_language, $locale_encoding) = ($1, $2); |
25 | } elsif ($ENV{LANG} =~ /^([^.]+)\.([^.]+)$/) { |
26 | ($country_language, $locale_encoding) = ($1, $2); |
58d53262 |
27 | } |
1e616cf5 |
28 | } elsif (not $locale_encoding) { |
29 | if ($ENV{LC_ALL} =~ /\butf-?8\b/i || |
30 | $ENV{LANG} =~ /\butf-?8\b/i) { |
31 | $locale_encoding = 'utf8'; |
32 | } |
33 | # Could do more heuristics based on the country and language |
58d53262 |
34 | # parts of LC_ALL and LANG (the parts before the dot (if any)), |
35 | # since we have Locale::Country and Locale::Language available. |
36 | # TODO: get a database of Language -> Encoding mappings |
421e5dc3 |
37 | # (the Estonian database at http://www.eki.ee/letter/ |
38 | # would be excellent!) --jhi |
58d53262 |
39 | } |
11fc5dc3 |
40 | if (defined $locale_encoding && |
41 | $locale_encoding eq 'euc' && |
42 | defined $country_language) { |
56fb2e42 |
43 | if ($country_language =~ /^ja_JP|japan(?:ese)?$/i) { |
1e616cf5 |
44 | $locale_encoding = 'euc-jp'; |
5a192dee |
45 | } elsif ($country_language =~ /^ko_KR|korean?$/i) { |
1e616cf5 |
46 | $locale_encoding = 'euc-kr'; |
56fb2e42 |
47 | } elsif ($country_language =~ /^zh_TW|taiwan(?:ese)?$/i) { |
1e616cf5 |
48 | $locale_encoding = 'euc-tw'; |
11fc5dc3 |
49 | } |
50 | croak "Locale encoding 'euc' too ambiguous" |
51 | if $locale_encoding eq 'euc'; |
52 | } |
58d53262 |
53 | } |
54 | } |
55 | |
16fe6d59 |
56 | sub import { |
dfebf958 |
57 | my ($class,@args) = @_; |
58 | croak("`use open' needs explicit list of disciplines") unless @args; |
16fe6d59 |
59 | $^H |= $open::hint_bits; |
ba6ce41c |
60 | my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1); |
dfebf958 |
61 | while (@args) { |
62 | my $type = shift(@args); |
1e616cf5 |
63 | my $dscp; |
64 | if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) { |
65 | $type = 'IO'; |
66 | $dscp = ":$1"; |
67 | } else { |
68 | $dscp = shift(@args); |
69 | } |
ac27b0f5 |
70 | my @val; |
1e616cf5 |
71 | foreach my $layer (split(/\s+/,$dscp)) { |
dfebf958 |
72 | $layer =~ s/^://; |
58d53262 |
73 | if ($layer eq 'locale') { |
74 | use Encode; |
75 | _get_locale_encoding() |
76 | unless defined $locale_encoding; |
77 | croak "Cannot figure out an encoding to use" |
78 | unless defined $locale_encoding; |
11fc5dc3 |
79 | if ($locale_encoding =~ /^utf-?8$/i) { |
80 | $layer = "utf8"; |
81 | } else { |
738b23dc |
82 | $layer = "encoding($locale_encoding)"; |
11fc5dc3 |
83 | } |
97ed432b |
84 | } else { |
85 | unless(PerlIO::Layer::->find($layer)) { |
86 | carp("Unknown discipline layer '$layer'"); |
87 | } |
ac27b0f5 |
88 | } |
89 | push(@val,":$layer"); |
90 | if ($layer =~ /^(crlf|raw)$/) { |
91 | $^H{"open_$type"} = $layer; |
16fe6d59 |
92 | } |
ac27b0f5 |
93 | } |
738b23dc |
94 | # print "# type = $type, val = @val\n"; |
ac27b0f5 |
95 | if ($type eq 'IN') { |
96 | $in = join(' ',@val); |
97 | } |
98 | elsif ($type eq 'OUT') { |
99 | $out = join(' ',@val); |
16fe6d59 |
100 | } |
1e616cf5 |
101 | elsif ($type eq 'IO') { |
f3b00462 |
102 | $in = $out = join(' ',@val); |
103 | } |
16fe6d59 |
104 | else { |
ac27b0f5 |
105 | croak "Unknown discipline class '$type'"; |
16fe6d59 |
106 | } |
107 | } |
1e616cf5 |
108 | ${^OPEN} = join("\0",$in,$out); |
16fe6d59 |
109 | } |
110 | |
111 | 1; |
112 | __END__ |
d1edabcf |
113 | |
114 | =head1 NAME |
115 | |
116 | open - perl pragma to set default disciplines for input and output |
117 | |
118 | =head1 SYNOPSIS |
119 | |
1e616cf5 |
120 | use open IN => ":crlf", OUT => ":raw"; |
121 | use open OUT => ':utf8'; |
122 | use open IO => ":encoding(iso-8859-7)"; |
123 | |
124 | use open IO => ':locale'; |
125 | |
126 | use open ':utf8'; |
127 | use open ':locale'; |
128 | use open ':encoding(iso-8859-7)'; |
d1edabcf |
129 | |
130 | =head1 DESCRIPTION |
131 | |
d151aa0e |
132 | Full-fledged support for I/O disciplines is now implemented provided |
133 | Perl is configured to use PerlIO as its IO system (which is now the |
134 | default). |
16fe6d59 |
135 | |
7d3b96bb |
136 | The C<open> pragma serves as one of the interfaces to declare default |
137 | "layers" (aka disciplines) for all I/O. |
138 | |
139 | The C<open> pragma is used to declare one or more default layers for |
d151aa0e |
140 | I/O operations. Any open(), readpipe() (aka qx//) and similar |
141 | operators found within the lexical scope of this pragma will use the |
142 | declared defaults. |
7d3b96bb |
143 | |
1e616cf5 |
144 | With the C<IN> subpragma you can declare the default layers |
145 | of input sterams, and with the C<OUT> subpragma you can declare |
146 | the default layers of output streams. With the C<IO> subpragma |
147 | you can control both input and output streams simultaneously. |
148 | |
149 | If you have a legacy encoding, you can use the C<:encoding(...)> tag. |
150 | |
151 | if you want to set your encoding disciplines based on your |
152 | locale environment variables, you can use the C<:locale> tag. |
153 | For example: |
154 | |
155 | $ENV{LANG} = 'ru_RU.KOI8-R'; |
dbd62f41 |
156 | # the :locale will probe the locale environment variables like LANG |
157 | use open OUT => ':locale'; |
1e616cf5 |
158 | open(O, ">koi8"); |
23bcb45a |
159 | print O chr(0x430); # Unicode CYRILLIC SMALL LETTER A = KOI8-R 0xc1 |
1e616cf5 |
160 | close O; |
161 | open(I, "<koi8"); |
23bcb45a |
162 | printf "%#x\n", ord(<I>), "\n"; # this should print 0xc1 |
1e616cf5 |
163 | close I; |
164 | |
165 | These are equivalent |
166 | |
167 | use open ':utf8'; |
168 | use open IO => ':utf8'; |
169 | |
170 | as are these |
171 | |
172 | use open ':locale'; |
173 | use open IO => ':locale'; |
174 | |
175 | and these |
176 | |
177 | use open ':encoding(iso-8859-7)'; |
178 | use open IO => ':encoding(iso-8859-7)'; |
179 | |
d151aa0e |
180 | When open() is given an explicit list of layers they are appended to |
181 | the list declared using this pragma. |
7d3b96bb |
182 | |
183 | Directory handles may also support disciplines in future. |
184 | |
185 | =head1 NONPERLIO FUNCTIONALITY |
186 | |
d151aa0e |
187 | If Perl is not built to use PerlIO as its IO system then only the two |
188 | pseudo-disciplines ":raw" and ":crlf" are available. |
16fe6d59 |
189 | |
190 | The ":raw" discipline corresponds to "binary mode" and the ":crlf" |
191 | discipline corresponds to "text mode" on platforms that distinguish |
192 | between the two modes when opening files (which is many DOS-like |
d151aa0e |
193 | platforms, including Windows). These two disciplines are no-ops on |
194 | platforms where binmode() is a no-op, but perform their functions |
195 | everywhere if PerlIO is enabled. |
7d3b96bb |
196 | |
197 | =head1 IMPLEMENTATION DETAILS |
d1edabcf |
198 | |
f3b00462 |
199 | There is a class method in C<PerlIO::Layer> C<find> which is |
200 | implemented as XS code. It is called by C<import> to validate the |
201 | layers: |
0c4f7ff0 |
202 | |
203 | PerlIO::Layer::->find("perlio") |
204 | |
f3b00462 |
205 | The return value (if defined) is a Perl object, of class |
206 | C<PerlIO::Layer> which is created by the C code in F<perlio.c>. As |
207 | yet there is nothing useful you can do with the object at the perl |
208 | level. |
16fe6d59 |
209 | |
d1edabcf |
210 | =head1 SEE ALSO |
211 | |
1768d7eb |
212 | L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>, |
213 | L<encoding> |
d1edabcf |
214 | |
215 | =cut |