package Encode::Alias;
use strict;
+no warnings 'redefine';
use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.32 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-our $DEBUG = 0;
+our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+sub DEBUG () { 0 }
use base qw(Exporter);
sub find_alias
{
my $class = shift;
- local $_ = shift;
- unless (exists $Alias{$_})
+ my $find = shift;
+ unless (exists $Alias{$find})
{
- $Alias{$_} = undef; # Recursion guard
+ $Alias{$find} = undef; # Recursion guard
for (my $i=0; $i < @Alias; $i += 2)
{
my $alias = $Alias[$i];
my $val = $Alias[$i+1];
my $new;
- if (ref($alias) eq 'Regexp' && $_ =~ $alias)
+ if (ref($alias) eq 'Regexp' && $find =~ $alias)
{
- $DEBUG and warn "eval $val";
+ DEBUG and warn "eval $val";
$new = eval $val;
- # $@ and warn "$val, $@";
+ DEBUG and $@ and warn "$val, $@";
}
elsif (ref($alias) eq 'CODE')
{
- $DEBUG and warn "$alias", "->", "($val)";
- $new = $alias->($val);
+ DEBUG and warn "$alias", "->", "($find)";
+ $new = $alias->($find);
}
- elsif (lc($_) eq lc($alias))
+ elsif (lc($find) eq lc($alias))
{
$new = $val;
}
if (defined($new))
{
- next if $new eq $_; # avoid (direct) recursion on bugs
- $DEBUG and warn "$alias, $new";
+ next if $new eq $find; # avoid (direct) recursion on bugs
+ DEBUG and warn "$alias, $new";
my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
if ($enc)
{
- $Alias{$_} = $enc;
+ $Alias{$find} = $enc;
last;
}
}
}
}
- if ($DEBUG){
+ if (DEBUG){
my $name;
- if (my $e = $Alias{$_}){
+ if (my $e = $Alias{$find}){
$name = $e->name;
}else{
$name = "";
}
- warn "find_alias($class, $_)->name = $name";
+ warn "find_alias($class, $find)->name = $name";
}
- return $Alias{$_};
+ return $Alias{$find};
}
sub define_alias
for my $k (@a){
if (ref($alias) eq 'Regexp' && $k =~ $alias)
{
- $DEBUG and warn "delete \$Alias\{$k\}";
+ DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$k};
}
elsif (ref($alias) eq 'CODE')
{
- $DEBUG and warn "delete \$Alias\{$k\}";
+ DEBUG and warn "delete \$Alias\{$k\}";
delete $Alias{$alias->($name)};
}
}
}else{
- $DEBUG and warn "delete \$Alias\{$alias\}";
+ DEBUG and warn "delete \$Alias\{$alias\}";
delete $Alias{$alias};
}
}
define_alias( qr/^(.*)$/ => '"\L$1"' );
# UTF/UCS stuff
+ define_alias( qr/^UTF-?7$/i => '"UTF-7"');
define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' );
define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"',
qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")',
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
# Standardize on the dashed versions.
- # define_alias( qr/\butf8$/i => 'utf-8' );
- define_alias( qr/\bkoi8r$/i => 'koi8-r' );
- define_alias( qr/\bkoi8u$/i => 'koi8-u' );
+ # define_alias( qr/\butf8$/i => '"utf-8"' );
+ define_alias( qr/\bkoi8[\s-_]*([ru])$/i => '"koi8-$1"' );
unless ($Encode::ON_EBCDIC){
# for Encode::CN
# CP936 doesn't have vendor-addon for GBK, so they're identical.
define_alias( qr/^gbk$/i => '"cp936"');
# This fixes gb2312 vs. euc-cn confusion, practically
- define_alias( qr/\bGB[-_ ]?2312(?:\D.*$|$)/i => '"euc-cn"' );
+ define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' );
# for Encode::JP
define_alias( qr/\bjis$/i => '"7bit-jis"' );
define_alias( qr/\beuc.*jp$/i => '"euc-jp"' );
define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' );
# for Encode::TW
define_alias( qr/\bbig-?5$/i => '"big5-eten"' );
- define_alias( qr/\bbig5-?et(?:en)$/i => '"big5-eten"' );
+ define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' );
define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' );
define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' );
define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' );
way to alias names as used in X11 fonts to the MIME names for the
iso-8859-* family. Note the double quotes inside the single quotes.
+(or, you don't have to do this yourself because this example is predefined)
+
If you are using a regex here, you have to use the quotes as shown or
it won't work. Also note that regex handling is tricky even for the
-experienced. Use it with caution.
+experienced. Use this feature with caution.
=item As a code reference, e.g.:
- define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
+ define_alias( sub {shift =~ /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
-In this case, C<$_> will be set to the name that is being looked up and
-I<ENCODING> is passed to the sub as its first argument. The example
-is another way to alias names as used in X11 fonts to the MIME names
-for the iso-8859-* family.
+The same effect as the example above in a different way. The coderef
+takes the alias name as an argument and returns a canonical name on
+success or undef if not. Note the second argument is not required.
+Use this with even more caution than the regex version.
=back
+=head3 Changes in code reference aliasing
+
+As of Encode 1.87, the older form
+
+ define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } );
+
+no longer works.
+
+Encode up to 1.86 internally used "local $_" to implement ths older
+form. But consider the code below;
+
+ use Encode;
+ $_ = "eeeee" ;
+ while (/(e)/g) {
+ my $utf = decode('aliased-encoding-name', $1);
+ print "position:",pos,"\n";
+ }
+
+Prior to Encode 1.86 this fails because of "local $_".
+
=head2 Alias overloading
You can override predefined aliases by simply applying define_alias().