From: Nick Ing-Simmons Date: Tue, 9 Apr 2002 12:38:25 +0000 (+0000) Subject: Patch up Encode for Tk X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=eaac0a1587c48d54e8c05b8c33d2a713ed231f06;p=p5sagit%2Fp5-mst-13.2.git Patch up Encode for Tk p4raw-id: //depot/perlio@15826 --- diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm index 0c4b884..56e75ea 100644 --- a/ext/Encode/lib/Encode/Alias.pm +++ b/ext/Encode/lib/Encode/Alias.pm @@ -9,7 +9,7 @@ our @ISA = qw(Exporter); # Public, encouraged API is exported by default -our @EXPORT = +our @EXPORT = qw ( define_alias find_alias @@ -24,6 +24,7 @@ sub find_alias local $_ = shift; unless (exists $Alias{$_}) { + $Alias{$_} = undef; # Recursion guard for (my $i=0; $i < @Alias; $i += 2) { my $alias = $Alias[$i]; @@ -155,11 +156,11 @@ sub init_aliases # has been redefined as the euro symbol.) define_alias( qr/^(.+)\@euro$/i => '"$1"' ); - define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i + define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i => '"iso-8859-$Encode::Alias::Latin2iso[$1]"' ); define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish| - hebrew|arabic|baltic|vietnamese)$/ix => + hebrew|arabic|baltic|vietnamese)$/ix => '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' ); # Common names for non-latin prefered MIME names @@ -174,7 +175,7 @@ sub init_aliases # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. # And Microsoft has their own naming (again, surprisingly). - # And windows-* is registered in IANA! + # And windows-* is registered in IANA! define_alias( qr/\b(?:ibm|ms|windows)[-_]?(\d\d\d\d?)$/i => '"cp$1"'); # Sometimes seen with a leading zero. @@ -186,7 +187,7 @@ sub init_aliases define_alias( qr/^mac_(.*)$/i => '"mac$1"'); # Ououououou. gone. They are differente! # define_alias( qr/\bmacRomanian$/i => '"macRumanian"'); - + # Standardize on the dashed versions. # define_alias( qr/\butf8$/i => 'utf-8' ); define_alias( qr/\bkoi8r$/i => 'koi8-r' ); @@ -254,7 +255,7 @@ Encode::Alias - alias definitions to encodings =head1 DESCRIPTION Allows newName to be used as an alias for ENCODING. ENCODING may be -either the name of an encoding or an encoding object (as described +either the name of an encoding or an encoding object (as described in L). Currently I can be specified in the following ways: diff --git a/ext/Encode/lib/Encode/Unicode.pm b/ext/Encode/lib/Encode/Unicode.pm index 03fab5c..7f1ad53 100644 --- a/ext/Encode/lib/Encode/Unicode.pm +++ b/ext/Encode/lib/Encode/Unicode.pm @@ -18,7 +18,7 @@ sub valid_ucs2($){ if ($_[0] < 0xD800){ return $_[0] > 0; }else{ - return ($_[0] > 0xDFFFF && $_[0] <= 0xFFFF); + return ($_[0] > 0xDFFF && $_[0] <= 0xFFFF); } } @@ -88,7 +88,7 @@ sub set_transcoder{ *decode = \&decode_classic; *encode = \&encode_classic; }else{ - require Carp; + require Carp; Carp::croak __PACKAGE__, "::set_transcoder(modern|classic)"; } } @@ -115,7 +115,7 @@ sub decode_modern my $ord = shift @ord; unless ($size == 4 or valid_ucs2($ord &= $mask)){ if ($ucs2){ - $chk and + $chk and poisoned2death($obj, "no surrogates allowed", $ord); shift @ord; # skip the next one as well $ord = FBCHAR; @@ -151,12 +151,12 @@ sub encode_modern unless ($size == 4 or valid_ucs2($ord)) { unless(issurrogate($ord)){ if ($ucs2){ - $chk and + $chk and poisoned2death($obj, "code point too high", $ord); push @str, FBCHAR; }else{ - + push @str, ensurrogate($ord); } }else{ # not supposed to happen @@ -188,7 +188,7 @@ sub decode_classic my $ord = unpack($endian, substr($str, 0, $size, '')); unless ($size == 4 or valid_ucs2($ord &= $mask)){ if ($ucs2){ - $chk and + $chk and poisoned2death($obj, "no surrogates allowed", $ord); substr($str,0,$size,''); # skip the next one as well $ord = FBCHAR; @@ -224,7 +224,7 @@ sub encode_classic unless ($size == 4 or valid_ucs2($ord)) { unless(issurrogate($ord)){ if ($ucs2){ - $chk and + $chk and poisoned2death($obj, "code point too high", $ord); $str .= pack($endian, FBCHAR); }else{ @@ -244,7 +244,7 @@ sub BOMB { my ($size, $bom) = @_; my $N = $size == 2 ? 'n' : 'N'; my $ord = unpack($N, $bom); - return ($ord eq BOM_BE) ? $N : + return ($ord eq BOM_BE) ? $N : ($ord eq BOM16LE) ? 'v' : ($ord eq BOM32LE) ? 'V' : undef; } @@ -267,7 +267,7 @@ Encode::Unicode -- Various Unicode Transform Format =head1 SYNOPSIS - use Encode qw/encode decode/; + use Encode qw/encode decode/; $ucs2 = encode("UCS-2BE", $utf8); $utf8 = decode("UCS-2BE", $ucs2); @@ -349,7 +349,7 @@ LE 0xFFeF 0xFFFe0000 ------------------------- =back - + This modules handles BOM as follows. =over 4 @@ -363,7 +363,7 @@ simply treated as one of characters (ZERO WIDTH NO-BREAK SPACE). When BE or LE is omitted during decode(), it checks if BOM is in the beginning of the string and if found endianness is set to what BOM -says. if not found, dies. +says. if not found, dies. =item * diff --git a/ext/Encode/t/Aliases.t b/ext/Encode/t/Aliases.t index 4c6570f..8f9d3e3 100644 --- a/ext/Encode/t/Aliases.t +++ b/ext/Encode/t/Aliases.t @@ -26,6 +26,7 @@ sub init_a2c{ 'UCS2' => 'UCS-2BE', 'iso-10646-1' => 'UCS-2BE', 'ucs2-le' => 'UCS-2LE', + 'ucs2-be' => 'UCS-2BE', 'utf16' => 'UTF-16', 'utf32' => 'UTF-32', 'utf16-be' => 'UTF-16BE', @@ -112,9 +113,9 @@ use Test::More tests => (scalar keys %a2c) * 4; print "# alias test; \$ON_EBCDIC == $ON_EBCDIC\n"; -foreach my $a (keys %a2c){ +foreach my $a (keys %a2c){ my $e = Encode::find_encoding($a); - is((defined($e) and $e->name), $a2c{$a}) + is((defined($e) and $e->name), $a2c{$a},$a) or warn "alias was $a";; } @@ -130,18 +131,18 @@ define_alias( print "# alias test with alias overrides\n"; -foreach my $a (keys %a2c){ +foreach my $a (keys %a2c){ my $e = Encode::find_encoding($a); - is((defined($e) and $e->name), $a2c{$a}) + is((defined($e) and $e->name), $a2c{$a}, "Override $a") or warn "alias was $a"; } print "# alias undef test\n"; Encode::Alias->undef_aliases; -foreach my $a (keys %a2c){ +foreach my $a (keys %a2c){ my $e = Encode::find_encoding($a); - ok(!defined($e) || $e->name =~ /-raw$/o) + ok(!defined($e) || $e->name =~ /-raw$/o,"Undef $a") or warn "alias was $a"; } @@ -149,9 +150,9 @@ print "# alias reinit test\n"; Encode::Alias->init_aliases; init_a2c(); -foreach my $a (keys %a2c){ +foreach my $a (keys %a2c){ my $e = Encode::find_encoding($a); - is((defined($e) and $e->name), $a2c{$a}) + is((defined($e) and $e->name), $a2c{$a}, "Reinit $a") or warn "alias was $a"; } __END__