# Public, encouraged API is exported by default
-our @EXPORT =
+our @EXPORT =
qw (
define_alias
find_alias
local $_ = shift;
unless (exists $Alias{$_})
{
+ $Alias{$_} = undef; # Recursion guard
for (my $i=0; $i < @Alias; $i += 2)
{
my $alias = $Alias[$i];
# has been redefined as the euro symbol.)
define_alias( qr/^(.+)\@euro$/i => '"$1"' );
- define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
+ define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
=> '"iso-8859-$Encode::Alias::Latin2iso[$1]"' );
define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
- hebrew|arabic|baltic|vietnamese)$/ix =>
+ hebrew|arabic|baltic|vietnamese)$/ix =>
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
# Common names for non-latin prefered MIME names
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
# And Microsoft has their own naming (again, surprisingly).
- # And windows-* is registered in IANA!
+ # And windows-* is registered in IANA!
define_alias( qr/\b(?:ibm|ms|windows)[-_]?(\d\d\d\d?)$/i => '"cp$1"');
# Sometimes seen with a leading zero.
define_alias( qr/^mac_(.*)$/i => '"mac$1"');
# Ououououou. gone. They are differente!
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
-
+
# Standardize on the dashed versions.
# define_alias( qr/\butf8$/i => 'utf-8' );
define_alias( qr/\bkoi8r$/i => 'koi8-r' );
=head1 DESCRIPTION
Allows newName to be used as an alias for ENCODING. ENCODING may be
-either the name of an encoding or an encoding object (as described
+either the name of an encoding or an encoding object (as described
in L<Encode>).
Currently I<newName> can be specified in the following ways:
if ($_[0] < 0xD800){
return $_[0] > 0;
}else{
- return ($_[0] > 0xDFFFF && $_[0] <= 0xFFFF);
+ return ($_[0] > 0xDFFF && $_[0] <= 0xFFFF);
}
}
*decode = \&decode_classic;
*encode = \&encode_classic;
}else{
- require Carp;
+ require Carp;
Carp::croak __PACKAGE__, "::set_transcoder(modern|classic)";
}
}
my $ord = shift @ord;
unless ($size == 4 or valid_ucs2($ord &= $mask)){
if ($ucs2){
- $chk and
+ $chk and
poisoned2death($obj, "no surrogates allowed", $ord);
shift @ord; # skip the next one as well
$ord = FBCHAR;
unless ($size == 4 or valid_ucs2($ord)) {
unless(issurrogate($ord)){
if ($ucs2){
- $chk and
+ $chk and
poisoned2death($obj, "code point too high", $ord);
push @str, FBCHAR;
}else{
-
+
push @str, ensurrogate($ord);
}
}else{ # not supposed to happen
my $ord = unpack($endian, substr($str, 0, $size, ''));
unless ($size == 4 or valid_ucs2($ord &= $mask)){
if ($ucs2){
- $chk and
+ $chk and
poisoned2death($obj, "no surrogates allowed", $ord);
substr($str,0,$size,''); # skip the next one as well
$ord = FBCHAR;
unless ($size == 4 or valid_ucs2($ord)) {
unless(issurrogate($ord)){
if ($ucs2){
- $chk and
+ $chk and
poisoned2death($obj, "code point too high", $ord);
$str .= pack($endian, FBCHAR);
}else{
my ($size, $bom) = @_;
my $N = $size == 2 ? 'n' : 'N';
my $ord = unpack($N, $bom);
- return ($ord eq BOM_BE) ? $N :
+ return ($ord eq BOM_BE) ? $N :
($ord eq BOM16LE) ? 'v' : ($ord eq BOM32LE) ? 'V' : undef;
}
=head1 SYNOPSIS
- use Encode qw/encode decode/;
+ use Encode qw/encode decode/;
$ucs2 = encode("UCS-2BE", $utf8);
$utf8 = decode("UCS-2BE", $ucs2);
-------------------------
=back
-
+
This modules handles BOM as follows.
=over 4
When BE or LE is omitted during decode(), it checks if BOM is in the
beginning of the string and if found endianness is set to what BOM
-says. if not found, dies.
+says. if not found, dies.
=item *
'UCS2' => 'UCS-2BE',
'iso-10646-1' => 'UCS-2BE',
'ucs2-le' => 'UCS-2LE',
+ 'ucs2-be' => 'UCS-2BE',
'utf16' => 'UTF-16',
'utf32' => 'UTF-32',
'utf16-be' => 'UTF-16BE',
print "# alias test; \$ON_EBCDIC == $ON_EBCDIC\n";
-foreach my $a (keys %a2c){
+foreach my $a (keys %a2c){
my $e = Encode::find_encoding($a);
- is((defined($e) and $e->name), $a2c{$a})
+ is((defined($e) and $e->name), $a2c{$a},$a)
or warn "alias was $a";;
}
print "# alias test with alias overrides\n";
-foreach my $a (keys %a2c){
+foreach my $a (keys %a2c){
my $e = Encode::find_encoding($a);
- is((defined($e) and $e->name), $a2c{$a})
+ is((defined($e) and $e->name), $a2c{$a}, "Override $a")
or warn "alias was $a";
}
print "# alias undef test\n";
Encode::Alias->undef_aliases;
-foreach my $a (keys %a2c){
+foreach my $a (keys %a2c){
my $e = Encode::find_encoding($a);
- ok(!defined($e) || $e->name =~ /-raw$/o)
+ ok(!defined($e) || $e->name =~ /-raw$/o,"Undef $a")
or warn "alias was $a";
}
Encode::Alias->init_aliases;
init_a2c();
-foreach my $a (keys %a2c){
+foreach my $a (keys %a2c){
my $e = Encode::find_encoding($a);
- is((defined($e) and $e->name), $a2c{$a})
+ is((defined($e) and $e->name), $a2c{$a}, "Reinit $a")
or warn "alias was $a";
}
__END__