4 our $VERSION = do { my @r = (q$Revision: 0.96 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
8 our @ISA = qw(Exporter);
10 # Public, encouraged API is exported by default
18 our @Alias; # ordered matching list
19 our %Alias; # cached known aliases
25 unless (exists $Alias{$_})
27 for (my $i=0; $i < @Alias; $i += 2)
29 my $alias = $Alias[$i];
30 my $val = $Alias[$i+1];
32 if (ref($alias) eq 'Regexp' && $_ =~ $alias)
35 # $@ and warn "$val, $@";
37 elsif (ref($alias) eq 'CODE')
39 $new = $alias->($val);
41 elsif (lc($_) eq lc($alias))
47 next if $new eq $_; # avoid (direct) recursion on bugs
48 my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
64 my ($alias,$name) = splice(@_,0,2);
65 unshift(@Alias, $alias => $name); # newer one has precedence
66 # clear %Alias cache to allow overrides
70 if (ref($alias) eq 'Regexp' && $k =~ $alias)
75 elsif (ref($alias) eq 'CODE')
77 delete $Alias{$alias->($name)};
81 delete $Alias{$alias};
86 # Allow latin-1 style names as well
87 # 0 1 2 3 4 5 6 7 8 9 10
88 our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
89 # Allow winlatin1 style names as well
112 # Allow variants of iso-8859-1 etc.
113 define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
115 # At least HP-UX has these.
116 define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' );
119 define_alias( qr/^(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' );
121 # The Official name of ASCII.
122 define_alias( qr/^ANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
124 # This is a font issue, not an encoding issue.
125 # (The currency symbol of the Latin 1 upper half
126 # has been redefined as the euro symbol.)
127 define_alias( qr/^(.+)\@euro$/i => '"$1"' );
129 define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i
130 => '"iso-8859-$Encode::Alias::Latin2iso[$1]"' );
132 define_alias( qr/win(latin[12]|cyrillic|baltic|greek|turkish|
133 hebrew|arabic|baltic|vietnamese)$/ix =>
134 '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
136 # Common names for non-latin prefered MIME names
137 define_alias( 'ascii' => 'US-ascii',
138 'cyrillic' => 'iso-8859-5',
139 'arabic' => 'iso-8859-6',
140 'greek' => 'iso-8859-7',
141 'hebrew' => 'iso-8859-8',
142 'thai' => 'iso-8859-11',
143 'tis620' => 'iso-8859-11',
146 # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
147 # And Microsoft has their own naming (again, surprisingly).
148 define_alias( qr/^(?:ibm|ms)[-_]?(\d\d\d\d?)$/i => '"cp$1"');
150 # Sometimes seen with a leading zero.
151 define_alias( qr/^cp037$/i => '"cp37"');
154 define_alias( qr/^macRomanian$/i => '"macRumanian"');
156 # Standardize on the dashed versions.
157 define_alias( qr/^utf8$/i => 'utf-8' );
158 define_alias( qr/^koi8r$/i => 'koi8-r' );
159 define_alias( qr/^koi8u$/i => 'koi8-u' );
162 define_alias( qr/euc.*cn$/i => '"euc-cn"' );
163 define_alias( qr/cn.*euc/i => '"euc-cn"' );
166 define_alias( qr/euc.*jp$/i => '"euc-jp"' );
167 define_alias( qr/jp.*euc/i => '"euc-jp"' );
168 define_alias( qr/ujis$/i => '"euc-jp"' );
169 define_alias( qr/shift.*jis$/i => '"shiftjis"' );
170 define_alias( qr/sjis$/i => '"shiftjis"' );
171 define_alias( qr/^jis$/i => '"7bit-jis"' );
174 define_alias( qr/euc.*kr$/i => '"euc-kr"' );
175 define_alias( qr/kr.*euc/i => '"euc-kr"' );
178 define_alias( qr/big-?5$/i => '"big5"' );
179 define_alias( qr/big5-hk(?:scs)?/i => '"big5-hkscs"' );
181 # At last, Map white space and _ to '-'
182 define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
188 # TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
189 # TODO: HP-UX '15' encodings japanese15 korean15 roi15
190 # TODO: Cyrillic encoding ISO-IR-111 (useful?)
191 # TODO: Armenian encoding ARMSCII-8
192 # TODO: Hebrew encoding ISO-8859-8-1
193 # TODO: Thai encoding TCVN
194 # TODO: Korean encoding Johab
195 # TODO: Vietnamese encodings VPS
196 # TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
197 # ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
198 # Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
199 # Kannada Khmer Korean Laotian Malayalam Mongolian
200 # Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
204 Encode::Alias - alias defintions to encodings
210 define_alias( newName => ENCODING);
214 Allows newName to be used as am alias for ENCODING. ENCODING may be
215 either the name of an encoding or and encoding object (as described in L<Encode>).
217 Currently I<newName> can be specified in the following ways:
221 =item As a simple string.
223 =item As a qr// compiled regular expression, e.g.:
225 define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
227 In this case if I<ENCODING> is not a reference it is C<eval>-ed to
228 allow C<$1> etc. to be subsituted. The example is one way to names as
229 used in X11 font names to alias the MIME names for the iso-8859-*
230 family. Note the double quote inside the single quote.
232 If you are using regex here, you have to do so or it won't work in
233 this case. Also not regex is tricky even for the experienced. Use it
236 =item As a code reference, e.g.:
238 define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
240 In this case C<$_> will be set to the name that is being looked up and
241 I<ENCODING> is passed to the sub as its first argument. The example
242 is another way to names as used in X11 font names to alias the MIME
243 names for the iso-8859-* family.
247 =head2 Alias overloading
249 You can override predefined aliases by simply applying define_alias().
250 New alias is always evaluated first and when neccessary define_alias()
251 flushes internal cache to make new definition available.
253 # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
254 # superset of SHIFT_JIS
256 define_alias( qr/shift.*jis$/i => '"cp932"' );
257 define_alias( qr/sjis$/i => '"cp932"' );
259 If you want to zap all predefined aliases, you can
261 Encode::Alias->undef_aliases;
265 Encode::Alias->init_aliases;
267 gets factory setting back.
272 L<Encode>, L<Encode::Supported>