NEXT LINE --> NEL
[p5sagit/p5-mst-13.2.git] / ext / Encode / lib / Encode / Alias.pm
CommitLineData
5d030b67 1package Encode::Alias;
2use strict;
5129552c 3use Encode;
a63c962f 4our $VERSION = do { my @r = (q$Revision: 0.98 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5d030b67 5our $DEBUG = 0;
6require Exporter;
7
8our @ISA = qw(Exporter);
9
10# Public, encouraged API is exported by default
5129552c 11
12our @EXPORT =
13 qw (
14 define_alias
15 find_alias
16 );
5d030b67 17
18our @Alias; # ordered matching list
19our %Alias; # cached known aliases
20
5129552c 21sub find_alias
5d030b67 22{
23 my $class = shift;
24 local $_ = shift;
25 unless (exists $Alias{$_})
26 {
27 for (my $i=0; $i < @Alias; $i += 2)
28 {
29 my $alias = $Alias[$i];
30 my $val = $Alias[$i+1];
31 my $new;
32 if (ref($alias) eq 'Regexp' && $_ =~ $alias)
33 {
a63c962f 34 $DEBUG and warn "eval $val";
5d030b67 35 $new = eval $val;
36 # $@ and warn "$val, $@";
37 }
38 elsif (ref($alias) eq 'CODE')
39 {
a63c962f 40 $DEBUG and warn "$alias", "->", "($val)";
5d030b67 41 $new = $alias->($val);
42 }
43 elsif (lc($_) eq lc($alias))
44 {
45 $new = $val;
46 }
47 if (defined($new))
48 {
49 next if $new eq $_; # avoid (direct) recursion on bugs
a63c962f 50 $DEBUG and warn "$alias, $new";
5d030b67 51 my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
52 if ($enc)
53 {
54 $Alias{$_} = $enc;
55 last;
56 }
57 }
58 }
59 }
a63c962f 60 if ($DEBUG){
61 my $name;
62 if (my $e = $Alias{$_}){
63 $name = $e->name;
64 }else{
65 $name = "";
66 }
67 warn "find_alias($class, $_)->name = $name";
68 }
5d030b67 69 return $Alias{$_};
70}
71
72sub define_alias
73{
74 while (@_)
75 {
76 my ($alias,$name) = splice(@_,0,2);
77 unshift(@Alias, $alias => $name); # newer one has precedence
78 # clear %Alias cache to allow overrides
79 if (ref($alias)){
5129552c 80 my @a = keys %Alias;
81 for my $k (@a){
5d030b67 82 if (ref($alias) eq 'Regexp' && $k =~ $alias)
83 {
a63c962f 84 $DEBUG and warn "delete \$Alias\{$k\}";
5d030b67 85 delete $Alias{$k};
86 }
87 elsif (ref($alias) eq 'CODE')
88 {
a63c962f 89 $DEBUG and warn "delete \$Alias\{$k\}";
5d030b67 90 delete $Alias{$alias->($name)};
91 }
92 }
93 }else{
a63c962f 94 $DEBUG and warn "delete \$Alias\{$alias\}";
5d030b67 95 delete $Alias{$alias};
96 }
97 }
98}
99
5d030b67 100# Allow latin-1 style names as well
101 # 0 1 2 3 4 5 6 7 8 9 10
102our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
5d030b67 103# Allow winlatin1 style names as well
104our %Winlatin2cp = (
105 'latin1' => 1252,
106 'latin2' => 1250,
107 'cyrillic' => 1251,
108 'greek' => 1253,
109 'turkish' => 1254,
110 'hebrew' => 1255,
111 'arabic' => 1256,
112 'baltic' => 1257,
113 'vietnamese' => 1258,
114 );
115
5129552c 116init_aliases();
117
118sub undef_aliases{
119 @Alias = ();
120 %Alias = ();
121}
122
123sub init_aliases
124{
125 undef_aliases();
126# Allow variants of iso-8859-1 etc.
127 define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
128
129# At least HP-UX has these.
130 define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' );
131
132# More HP stuff.
133 define_alias( qr/^(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' );
134
135# The Official name of ASCII.
136 define_alias( qr/^ANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
137
138# This is a font issue, not an encoding issue.
139# (The currency symbol of the Latin 1 upper half
140# has been redefined as the euro symbol.)
141 define_alias( qr/^(.+)\@euro$/i => '"$1"' );
142
143 define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i
144 => '"iso-8859-$Encode::Alias::Latin2iso[$1]"' );
145
146 define_alias( qr/win(latin[12]|cyrillic|baltic|greek|turkish|
147 hebrew|arabic|baltic|vietnamese)$/ix =>
148 '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
5d030b67 149
150# Common names for non-latin prefered MIME names
5129552c 151 define_alias( 'ascii' => 'US-ascii',
152 'cyrillic' => 'iso-8859-5',
153 'arabic' => 'iso-8859-6',
154 'greek' => 'iso-8859-7',
155 'hebrew' => 'iso-8859-8',
156 'thai' => 'iso-8859-11',
157 'tis620' => 'iso-8859-11',
158 );
5d030b67 159
160# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
161# And Microsoft has their own naming (again, surprisingly).
5129552c 162 define_alias( qr/^(?:ibm|ms)[-_]?(\d\d\d\d?)$/i => '"cp$1"');
5d030b67 163
164# Sometimes seen with a leading zero.
5129552c 165 define_alias( qr/^cp037$/i => '"cp37"');
5d030b67 166
167# Ououououou.
5129552c 168 define_alias( qr/^macRomanian$/i => '"macRumanian"');
5d030b67 169
170# Standardize on the dashed versions.
a63c962f 171 # define_alias( qr/^utf8$/i => 'utf-8' );
5129552c 172 define_alias( qr/^koi8r$/i => 'koi8-r' );
173 define_alias( qr/^koi8u$/i => 'koi8-u' );
174
a63c962f 175 unless ($Encode::ON_EBCDIC){
176 # for Encode::CN
177 define_alias( qr/euc.*cn$/i => '"euc-cn"' );
178 define_alias( qr/cn.*euc/i => '"euc-cn"' );
179 define_alias( qr/^GB[- ]?(\d+)$/i => '"gb$1"' );
180 # for Encode::JP
181 define_alias( qr/euc.*jp$/i => '"euc-jp"' );
182 define_alias( qr/jp.*euc/i => '"euc-jp"' );
183 define_alias( qr/ujis$/i => '"euc-jp"' );
184 define_alias( qr/shift.*jis$/i => '"shiftjis"' );
185 define_alias( qr/sjis$/i => '"shiftjis"' );
186 define_alias( qr/^jis$/i => '"7bit-jis"' );
187 # for Encode::KR
188 define_alias( qr/euc.*kr$/i => '"euc-kr"' );
189 define_alias( qr/kr.*euc/i => '"euc-kr"' );
190 # for Encode::TW
191 define_alias( qr/big-?5$/i => '"big5"' );
192 define_alias( qr/big5-hk(?:scs)?/i => '"big5-hkscs"' );
193 }
5129552c 194
195# At last, Map white space and _ to '-'
196 define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
197}
198
1991;
200__END__
5d030b67 201
202# TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
203# TODO: HP-UX '15' encodings japanese15 korean15 roi15
204# TODO: Cyrillic encoding ISO-IR-111 (useful?)
205# TODO: Armenian encoding ARMSCII-8
206# TODO: Hebrew encoding ISO-8859-8-1
207# TODO: Thai encoding TCVN
208# TODO: Korean encoding Johab
209# TODO: Vietnamese encodings VPS
210# TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
211# ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
212# Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
213# Kannada Khmer Korean Laotian Malayalam Mongolian
214# Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
215
5d030b67 216=head1 NAME
217
218Encode::Alias - alias defintions to encodings
219
220=head1 SYNOPSIS
221
5129552c 222 use Encode;
223 use Encode::Alias;
5d030b67 224 define_alias( newName => ENCODING);
225
226=head1 DESCRIPTION
227
228Allows newName to be used as am alias for ENCODING. ENCODING may be
229either the name of an encoding or and encoding object (as described in L<Encode>).
230
231Currently I<newName> can be specified in the following ways:
232
233=over 4
234
235=item As a simple string.
236
237=item As a qr// compiled regular expression, e.g.:
238
239 define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
240
241In this case if I<ENCODING> is not a reference it is C<eval>-ed to
242allow C<$1> etc. to be subsituted. The example is one way to names as
243used in X11 font names to alias the MIME names for the iso-8859-*
244family. Note the double quote inside the single quote.
245
246If you are using regex here, you have to do so or it won't work in
247this case. Also not regex is tricky even for the experienced. Use it
248with caution.
249
250=item As a code reference, e.g.:
251
252 define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
253
254In this case C<$_> will be set to the name that is being looked up and
255I<ENCODING> is passed to the sub as its first argument. The example
256is another way to names as used in X11 font names to alias the MIME
257names for the iso-8859-* family.
258
5129552c 259=back
260
261=head2 Alias overloading
5d030b67 262
263You can override predefined aliases by simply applying define_alias().
264New alias is always evaluated first and when neccessary define_alias()
265flushes internal cache to make new definition available.
266
267 # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
268 # superset of SHIFT_JIS
269
5129552c 270 define_alias( qr/shift.*jis$/i => '"cp932"' );
271 define_alias( qr/sjis$/i => '"cp932"' );
272
273If you want to zap all predefined aliases, you can
274
275 Encode::Alias->undef_aliases;
276
277to do so. And
278
279 Encode::Alias->init_aliases;
280
281gets factory setting back.
282
5d030b67 283
284=head1 SEE ALSO
285
286L<Encode>, L<Encode::Supported>
287
5129552c 288=cut
5d030b67 289