4 our $VERSION = do { my @r = (q$Revision: 0.98 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
8 our @ISA = qw(Exporter);
10 # Public, encouraged API is exported by default
18 our @Alias; # ordered matching list
19 our %Alias; # cached known aliases
25 unless (exists $Alias{$_})
27 for (my $i=0; $i < @Alias; $i += 2)
29 my $alias = $Alias[$i];
30 my $val = $Alias[$i+1];
32 if (ref($alias) eq 'Regexp' && $_ =~ $alias)
34 $DEBUG and warn "eval $val";
36 # $@ and warn "$val, $@";
38 elsif (ref($alias) eq 'CODE')
40 $DEBUG and warn "$alias", "->", "($val)";
41 $new = $alias->($val);
43 elsif (lc($_) eq lc($alias))
49 next if $new eq $_; # avoid (direct) recursion on bugs
50 $DEBUG and warn "$alias, $new";
51 my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
62 if (my $e = $Alias{$_}){
67 warn "find_alias($class, $_)->name = $name";
76 my ($alias,$name) = splice(@_,0,2);
77 unshift(@Alias, $alias => $name); # newer one has precedence
78 # clear %Alias cache to allow overrides
82 if (ref($alias) eq 'Regexp' && $k =~ $alias)
84 $DEBUG and warn "delete \$Alias\{$k\}";
87 elsif (ref($alias) eq 'CODE')
89 $DEBUG and warn "delete \$Alias\{$k\}";
90 delete $Alias{$alias->($name)};
94 $DEBUG and warn "delete \$Alias\{$alias\}";
95 delete $Alias{$alias};
100 # Allow latin-1 style names as well
101 # 0 1 2 3 4 5 6 7 8 9 10
102 our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
103 # Allow winlatin1 style names as well
113 'vietnamese' => 1258,
126 # Allow variants of iso-8859-1 etc.
127 define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
129 # At least HP-UX has these.
130 define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' );
133 define_alias( qr/^(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"${1}8"' );
135 # The Official name of ASCII.
136 define_alias( qr/^ANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' );
138 # This is a font issue, not an encoding issue.
139 # (The currency symbol of the Latin 1 upper half
140 # has been redefined as the euro symbol.)
141 define_alias( qr/^(.+)\@euro$/i => '"$1"' );
143 define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i
144 => '"iso-8859-$Encode::Alias::Latin2iso[$1]"' );
146 define_alias( qr/win(latin[12]|cyrillic|baltic|greek|turkish|
147 hebrew|arabic|baltic|vietnamese)$/ix =>
148 '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
150 # Common names for non-latin prefered MIME names
151 define_alias( 'ascii' => 'US-ascii',
152 'cyrillic' => 'iso-8859-5',
153 'arabic' => 'iso-8859-6',
154 'greek' => 'iso-8859-7',
155 'hebrew' => 'iso-8859-8',
156 'thai' => 'iso-8859-11',
157 'tis620' => 'iso-8859-11',
160 # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
161 # And Microsoft has their own naming (again, surprisingly).
162 define_alias( qr/^(?:ibm|ms)[-_]?(\d\d\d\d?)$/i => '"cp$1"');
164 # Sometimes seen with a leading zero.
165 define_alias( qr/^cp037$/i => '"cp37"');
168 define_alias( qr/^macRomanian$/i => '"macRumanian"');
170 # Standardize on the dashed versions.
171 # define_alias( qr/^utf8$/i => 'utf-8' );
172 define_alias( qr/^koi8r$/i => 'koi8-r' );
173 define_alias( qr/^koi8u$/i => 'koi8-u' );
175 unless ($Encode::ON_EBCDIC){
177 define_alias( qr/euc.*cn$/i => '"euc-cn"' );
178 define_alias( qr/cn.*euc/i => '"euc-cn"' );
179 define_alias( qr/^GB[- ]?(\d+)$/i => '"gb$1"' );
181 define_alias( qr/euc.*jp$/i => '"euc-jp"' );
182 define_alias( qr/jp.*euc/i => '"euc-jp"' );
183 define_alias( qr/ujis$/i => '"euc-jp"' );
184 define_alias( qr/shift.*jis$/i => '"shiftjis"' );
185 define_alias( qr/sjis$/i => '"shiftjis"' );
186 define_alias( qr/^jis$/i => '"7bit-jis"' );
188 define_alias( qr/euc.*kr$/i => '"euc-kr"' );
189 define_alias( qr/kr.*euc/i => '"euc-kr"' );
191 define_alias( qr/big-?5$/i => '"big5"' );
192 define_alias( qr/big5-hk(?:scs)?/i => '"big5-hkscs"' );
195 # At last, Map white space and _ to '-'
196 define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
202 # TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8
203 # TODO: HP-UX '15' encodings japanese15 korean15 roi15
204 # TODO: Cyrillic encoding ISO-IR-111 (useful?)
205 # TODO: Armenian encoding ARMSCII-8
206 # TODO: Hebrew encoding ISO-8859-8-1
207 # TODO: Thai encoding TCVN
208 # TODO: Korean encoding Johab
209 # TODO: Vietnamese encodings VPS
210 # TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese
211 # ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic
212 # Farsi Georgian Gujarati Gurmukhi Hebrew Japanese
213 # Kannada Khmer Korean Laotian Malayalam Mongolian
214 # Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese
218 Encode::Alias - alias defintions to encodings
224 define_alias( newName => ENCODING);
228 Allows newName to be used as am alias for ENCODING. ENCODING may be
229 either the name of an encoding or and encoding object (as described in L<Encode>).
231 Currently I<newName> can be specified in the following ways:
235 =item As a simple string.
237 =item As a qr// compiled regular expression, e.g.:
239 define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
241 In this case if I<ENCODING> is not a reference it is C<eval>-ed to
242 allow C<$1> etc. to be subsituted. The example is one way to names as
243 used in X11 font names to alias the MIME names for the iso-8859-*
244 family. Note the double quote inside the single quote.
246 If you are using regex here, you have to do so or it won't work in
247 this case. Also not regex is tricky even for the experienced. Use it
250 =item As a code reference, e.g.:
252 define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
254 In this case C<$_> will be set to the name that is being looked up and
255 I<ENCODING> is passed to the sub as its first argument. The example
256 is another way to names as used in X11 font names to alias the MIME
257 names for the iso-8859-* family.
261 =head2 Alias overloading
263 You can override predefined aliases by simply applying define_alias().
264 New alias is always evaluated first and when neccessary define_alias()
265 flushes internal cache to make new definition available.
267 # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a
268 # superset of SHIFT_JIS
270 define_alias( qr/shift.*jis$/i => '"cp932"' );
271 define_alias( qr/sjis$/i => '"cp932"' );
273 If you want to zap all predefined aliases, you can
275 Encode::Alias->undef_aliases;
279 Encode::Alias->init_aliases;
281 gets factory setting back.
286 L<Encode>, L<Encode::Supported>