ext/Encode/Changes Change Log
ext/Encode/EBCDIC/EBCDIC.pm Encode extension
ext/Encode/EBCDIC/Makefile.PL Encode extension
-ext/Encode/Encode.pm Encode extension
+ext/Encode/Encode.pm Mother of all Encode extensions
ext/Encode/Encode.xs Encode extension
ext/Encode/Encode/Changes.e2x Skeleton file for enc2xs
ext/Encode/Encode/Makefile_PL.e2x Skeleton file for enc2xs
ext/Encode/lib/Encode/Alias.pm Encode extension
ext/Encode/lib/Encode/CJKConstants.pm Encode extension
ext/Encode/lib/Encode/CN/HZ.pm Encode extension
+ext/Encode/lib/Encode/Encoder.pm OO Encoder
ext/Encode/lib/Encode/Encoding.pm Encode extension
ext/Encode/lib/Encode/JP/2022_JP.pm Encode extension
ext/Encode/lib/Encode/JP/2022_JP1.pm Encode extension
ext/Encode/t/JP.t Encode extension test
ext/Encode/t/KR.t Encode extension test
ext/Encode/t/TW.t Encode extension test
-ext/Encode/t/Unicode.t Encode extension test
+ext/Encode/t/Unicode.t Encode extension test
ext/Encode/t/encoding.t encoding extension test
ext/Encode/t/gb2312.euc test data
ext/Encode/t/gb2312.ref test data
-ext/Encode/t/grow.t Encode extension test
+ext/Encode/t/grow.t Encode extension test
ext/Encode/t/jisx0208.euc test data
ext/Encode/t/jisx0208.ref test data
ext/Encode/t/jisx0212.euc test data
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.26 2002/04/07 15:22:04 dankogai Exp $
+# $Id: Changes,v 1.30 2002/04/08 02:34:51 dankogai Exp $
#
-1.26 $Date: 2002/04/07 15:22:04 $
+1.30 $Date: 2002/04/08 02:34:51 $
++ lib/Encode/Encoder.pm
+ Object Oriented Encoder. I reckon something like this is in need.
+! Encode.pm
+! t/Unicode.pm
+! lib/Encode/Supported.pod
+ * autoloading but that prevented upper-case canonicals such as UTF-16
+ is fixed. Now even UTF/UCS are autoloaded!
+ * encodings() is now more intuitive.
+ * t/Unicode.t fixed to explicitly use Unicode.pm -- BOM values are
+ stored therein.
+ * Obligatory fixes to the POD.
+! lib/Encode/Supported.pod
+ Patch from Anton applied.
+ Message-Id: <66641479.20020408033300@motor.ru>
+! Encode.pm
+! lib/Encode/Unicode.pm
+ Cosmetic changes: "bless $obj, $class" => "bless $obj => class"
+
+1.28 2002/04/07 18:58:42
+! MANIFEST
++ t/Unicode.t
++ t/grow.t
+ Just a MANIFEST for those missing files.
+
+1.26 Date: 2002/04/07 15:22:04
! JP/Makefile.PL
! t/Aliases.PL
Schwarn's patches against Makefile.PL has zapped jis*.ucm. Restored.
Typo fixes and improvements by jhi
Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
-1.11 $Date: 2002/04/07 15:22:04 $
+1.11 $Date: 2002/04/08 02:34:51 $
+ t/encoding.t
+ t/jperl.t
! MANIFEST
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
require DynaLoader;
);
for my $k (2..11,13..16){
- $ExtModule{"iso-8859-$k"} = 'Encode/Byte.pm';
+ $ExtModule{"iso-8859-$k"} = 'Encode::Byte';
}
for my $k (@codepages){
- $ExtModule{"cp$k"} = 'Encode/Byte.pm';
+ $ExtModule{"cp$k"} = 'Encode::Byte';
}
for my $k (@macintosh)
{
- $ExtModule{"mac$k"} = 'Encode/Byte.pm';
+ $ExtModule{"mac$k"} = 'Encode::Byte';
+}
+
+for my $k (qw(UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
+ UTF-32 UTF-32BE UTF-32LE)){
+ $ExtModule{$k} = 'Encode::Unicode';
}
%ExtModule =
(%ExtModule,
- 'koi8-r' => 'Encode/Byte.pm',
- 'posix-bc' => 'Encode/EBCDIC.pm',
- cp037 => 'Encode/EBCDIC.pm',
- cp1026 => 'Encode/EBCDIC.pm',
- cp1047 => 'Encode/EBCDIC.pm',
- cp500 => 'Encode/EBCDIC.pm',
- cp875 => 'Encode/EBCDIC.pm',
- dingbats => 'Encode/Symbol.pm',
- macDingbats => 'Encode/Symbol.pm',
- macSymbol => 'Encode/Symbol.pm',
- symbol => 'Encode/Symbol.pm',
- viscii => 'Encode/Byte.pm',
+ 'koi8-r' => 'Encode::Byte',
+ 'posix-bc' => 'Encode::EBCDIC',
+ cp37 => 'Encode::EBCDIC',
+ cp1026 => 'Encode::EBCDIC',
+ cp1047 => 'Encode::EBCDIC',
+ cp500 => 'Encode::EBCDIC',
+ cp875 => 'Encode::EBCDIC',
+ dingbats => 'Encode::Symbol',
+ macDingbats => 'Encode::Symbol',
+ macSymbol => 'Encode::Symbol',
+ symbol => 'Encode::Symbol',
+ viscii => 'Encode::Byte',
);
unless ($ON_EBCDIC) { # CJK added to autoload unless EBCDIC env
%ExtModule =
(%ExtModule,
- 'cp936' => 'Encode/CN.pm',
- 'euc-cn' => 'Encode/CN.pm',
- 'gb12345-raw' => 'Encode/CN.pm',
- 'gb2312-raw' => 'Encode/CN.pm',
- 'gbk' => 'Encode/CN.pm',
- 'iso-ir-165' => 'Encode/CN.pm',
-
- '7bit-jis' => 'Encode/JP.pm',
- 'cp932' => 'Encode/JP.pm',
- 'euc-jp' => 'Encode/JP.pm',
- 'iso-2022-jp' => 'Encode/JP.pm',
- 'iso-2022-jp-1' => 'Encode/JP.pm',
- 'jis0201-raw' => 'Encode/JP.pm',
- 'jis0208-raw' => 'Encode/JP.pm',
- 'jis0212-raw' => 'Encode/JP.pm',
- 'macJapanese' => 'Encode/JP.pm',
- 'shiftjis' => 'Encode/JP.pm',
-
- 'cp949' => 'Encode/KR.pm',
- 'euc-kr' => 'Encode/KR.pm',
- 'ksc5601' => 'Encode/KR.pm',
- 'macKorean' => 'Encode/KR.pm',
-
- 'big5' => 'Encode/TW.pm',
- 'big5-hkscs' => 'Encode/TW.pm',
- 'cp950' => 'Encode/TW.pm',
-
- 'big5plus' => 'Encode/HanExtra.pm',
- 'euc-tw' => 'Encode/HanExtra.pm',
- 'gb18030' => 'Encode/HanExtra.pm',
+ 'cp936' => 'Encode::CN',
+ 'euc-cn' => 'Encode::CN',
+ 'gb12345-raw' => 'Encode::CN',
+ 'gb2312-raw' => 'Encode::CN',
+ 'gbk' => 'Encode::CN',
+ 'iso-ir-165' => 'Encode::CN',
+
+ '7bit-jis' => 'Encode::JP',
+ 'cp932' => 'Encode::JP',
+ 'euc-jp' => 'Encode::JP',
+ 'iso-2022-jp' => 'Encode::JP',
+ 'iso-2022-jp-1' => 'Encode::JP',
+ 'jis0201-raw' => 'Encode::JP',
+ 'jis0208-raw' => 'Encode::JP',
+ 'jis0212-raw' => 'Encode::JP',
+ 'macJapanese' => 'Encode::JP',
+ 'shiftjis' => 'Encode::JP',
+
+ 'cp949' => 'Encode::KR',
+ 'euc-kr' => 'Encode::KR',
+ 'ksc5601' => 'Encode::KR',
+ 'macKorean' => 'Encode::KR',
+
+ 'big5' => 'Encode::TW',
+ 'big5-hkscs' => 'Encode::TW',
+ 'cp950' => 'Encode::TW',
+
+ 'big5plus' => 'Encode::HanExtra',
+ 'euc-tw' => 'Encode::HanExtra',
+ 'gb18030' => 'Encode::HanExtra',
);
}
{
my $class = shift;
my @modules = (@_ and $_[0] eq ":all") ? values %ExtModule : @_;
- for my $m (@modules)
- {
- $DEBUG and warn "about to require $m;";
- eval { require $m; };
+ for my $mod (@modules){
+ $mod =~ s,::,/,g or $mod = "Encode/$mod";
+ $mod .= '.pm';
+ $DEBUG and warn "about to require $mod;";
+ eval { require $mod; };
}
+ my %modules = map {$_ => 1} @modules;
return
- map({$_->[0]}
- sort({$a->[1] cmp $b->[1]}
- map({[$_, lc $_]}
- grep({ $_ ne 'Internal' } keys %Encoding))));
+ sort grep {!/^(?:Internal|Unicode)$/o} keys %Encoding;
}
sub define_encoding
$oc = $class->find_alias($lc) if $lc ne $name;
return $oc if defined $oc;
- if (!$skip_external and exists $ExtModule{$lc})
+ unless ($skip_external)
{
- eval{ require $ExtModule{$lc}; };
- return $Encoding{$name} if exists $Encoding{$name};
+ if (my $mod = $ExtModule{$name} || $ExtModule{$lc}){
+ $mod =~ s,::,/,g ; $mod .= '.pm';
+ eval{ require $mod; };
+ return $Encoding{$name} if exists $Encoding{$name};
+ }
}
-
return;
}
sub encode_utf8
{
my ($str) = @_;
- utf8::encode($str);
+ utf8::encode($str);
return $str;
}
$_[1] = '' if $chk;
return $res;
};
- $Encode::Encoding{Unicode} =
- bless {Name => "UTF_EBCDIC"}, "Encode::UTF_EBCDIC";
+ $Encode::Encoding{Internal} =
+ bless {Name => "UTF_EBCDIC"} => "Encode::UTF_EBCDIC";
} else {
# was in Encode::UTF_EBCDIC
package Encode::Internal;
};
*encode = \&decode;
$Encode::Encoding{Unicode} =
- bless {Name => "Internal"}, "Encode::Internal";
+ bless {Name => "Internal"} => "Encode::Internal";
}
{
return $octets;
};
$Encode::Encoding{utf8} =
- bless {Name => "utf8"}, "Encode::utf8";
+ bless {Name => "utf8"} => "Encode::utf8";
}
# do externals if necessary
require File::Basename;
require File::Spec;
- for my $ext (qw(Unicode)){
+ for my $ext (qw()){
my $pm =
File::Spec->catfile(File::Basename::dirname($INC{'Encode.pm'}),
"Encode", "$ext.pm");
Or you can give the name of specific module.
- @with_jp = Encode->encodings("Encode/JP.pm");
+ @with_jp = Encode->encodings("Encode::JP");
+
+When "::" is not in the name, "Encode::" is assumed.
-Note in this case you have to say C<"Encode/JP.pm"> instead of
-C<"Encode::JP">.
+ @ebcdic = Encode->encodings("EBCDIC");
To find which encodings are supported by this package in details,
see L<Encode::Supported>.
Changes Change Log
EBCDIC/EBCDIC.pm Encode extension
EBCDIC/Makefile.PL Encode extension
-Encode.pm Encode extension
+Encode.pm Mother of all Encode extensions
Encode.xs Encode extension
Encode/Changes.e2x Skeleton file for enc2xs
Encode/Makefile_PL.e2x Skeleton file for enc2xs
lib/Encode/Alias.pm Encode extension
lib/Encode/CJKConstants.pm Encode extension
lib/Encode/CN/HZ.pm Encode extension
+lib/Encode/Encoder.pm OO Encoder
lib/Encode/Encoding.pm Encode extension
lib/Encode/JP/2022_JP.pm Encode extension
lib/Encode/JP/2022_JP1.pm Encode extension
package Encode::Alias;
use strict;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.25 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
require Exporter;
define_alias( qr/\b(?:ibm|ms|windows)[-_]?(\d\d\d\d?)$/i => '"cp$1"');
# Sometimes seen with a leading zero.
- define_alias( qr/\bcp037\b/i => '"cp37"');
+ # define_alias( qr/\bcp037\b/i => '"cp37"');
# Mac Mappings
# predefined in *.ucm; unneeded
--- /dev/null
+#
+# $Id: Encoder.pm,v 0.1 2002/04/08 02:35:10 dankogai Exp $
+#
+package Encoder;
+use strict;
+our $VERSION = do { my @r = (q$Revision: 0.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+
+require Exporter;
+our @ISA = qw(Exporter);
+# Public, encouraged API is exported by default
+our @EXPORT = qw (
+ encoder
+);
+
+our $AUTOLOAD;
+our $DEBUG = 0;
+use Encode qw(encode decode find_encoding from_to);
+use Carp;
+
+sub new{
+ my ($class, $data, $encname) = @_;
+ $encname ||= 'utf8';
+ my $obj = find_encoding($encname)
+ or croak __PACKAGE__, ": unknown encoding: $encname";
+ my $self = {
+ data => $data,
+ encoding => $obj->name,
+ };
+ bless $self => $class;
+}
+
+sub encoder{ shift->new(@_) }
+
+sub data{
+ my ($self, $data) = shift;
+ defined $data and $self->{data} = $data;
+ return $self;
+}
+
+sub encoding{
+ my ($self, $encname) = @_;
+ if ($encname){
+ my $obj = find_encoding($encname)
+ or confess __PACKAGE__, ": unknown encoding: $encname";
+ $self->{encoding} = $obj->name;
+ }
+ $self;
+}
+
+sub AUTOLOAD {
+ my $self = shift;
+ my $type = ref($self)
+ or confess "$self is not an object";
+ my $myname = $AUTOLOAD;
+ $myname =~ s/.*://; # strip fully-qualified portion
+ my $obj = find_encoding($myname)
+ or confess __PACKAGE__, ": unknown encoding: $myname";
+ $DEBUG and warn $self->{encoding}, " => ", $obj->name;
+ from_to($self->{data}, $self->{encoding}, $obj->name, 1);
+ $self->{encoding} = $obj->name;
+ return $self;
+}
+
+use overload
+ q("") => sub { $_[0]->{data} },
+ q(0+) => sub { use bytes (); bytes::length($_[0]->{data}) },
+ fallback => 1,
+ ;
+
+1;
+__END__
+
+=head1 NAME
+
+Encode::Encoder -- Object Oriented Encoder
+
+=head1 SYNOPSIS
+
+ use Encode::Encoder;
+ # Encode::encode("ISO-8859-1", $data);
+ Encoder->new($data)->iso_8859_1; # OOP way
+ # shortcut
+ encoder($data)->iso_8859_1;
+ # you can stack them!
+ encoder($data)->iso_8859_1->base64; # provided base64() is defined
+ # stringified
+ print encoder($utf8)->latin1 # prints the string in latin1
+ # numified
+ encoder("\x{abcd}\x{ef}g") == 6; # true. bytes::length($data)
+
+=head1 ABSTRACT
+
+B<Encode::Encoder> allows you to use Encode via OOP style. This is
+not only more intuitive than functional approach, but also handier
+when you want to stack encodings. Suppose you want your UTF-8 string
+converted to Latin1 then Base64, you can simply say
+
+ my $base64 = encoder($utf8)->latin1->base64;
+
+instead of
+
+ my $latin1 = encode("latin1", $utf8);
+
+or lazier and convolted
+
+ my $base64 = encode_base64(encode("latin1", $utf8));
+
+=head1 Description
+
+Here is how to use this module.
+
+=over 4
+
+=item *
+
+There are at least two instance variable stored in hash reference,
+{data} and {encoding}.
+
+=item *
+
+When there is no method, it takes the method name as the name of
+encoding and encode instance I<data> with I<encoding>. If successful,
+instance I<encoding> is set accordingly.
+
+=item *
+
+This module is desined to work with L<Encode::Encoding>.
+To make the Base64 transcorder example above really work, you should
+write a module like this.
+
+ package Encode::Base64;
+ use base 'Encode::Encoding';
+ __PACKAGE->Define('base64');
+ use MIME::Base64;
+ sub encode{
+ my ($obj, $data) = @_;
+ return encode_base64($data);
+ }
+ sub decode{
+ my ($obj, $data) = @_;
+ return decode_base64($data);
+ }
+ 1;
+ __END__
+
+And your caller module should be like this;
+
+ use Encode::Encoder;
+ use Encode::Base64;
+ # and be creative.
+
+=head2 operator overloading
+
+This module overloads two operators, stringify ("") and numify (0+).
+
+Stringify dumps the data therein.
+
+Numify returns the number of bytes therein.
+
+They come in handy when you want to print or find the size of data.
+
+=back
+
+=head2 Predefined Methods
+
+This module predefines the methods below;
+
+=over 4
+
+=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
+
+returns the encoder object. Its data is initialized with $data if
+there, and its encoding is set to $encoding if there.
+
+=item encoder()
+
+is an alias of Encode::Encoder-E<gt>new(). This one is exported for
+convenience.
+
+=item $e-E<gt>data($data)
+
+sets instance data to $data.
+
+=item $e-E<gt>encoding($encoding)
+
+sets instance encoding to $encoding
+
+=back
+
+=head1 SEE ALSO
+
+L<Encode>
+L<Encode::Encoding>
+
+=cut
ascii US-ascii [ECMA]
iso-8859-1 latin1 [ISO]
utf8 UTF-8 [RFC2279]
+ ----------------------------------------------------------------
+
+
+=head2 Encode::Unicode -- other Unicode encodings
+
+Unicode coding schemes other than native utf8 are supported by
+Encode::Unicode which will be autoloaded on demand.
+
+ ----------------------------------------------------------------
UCS-2BE UCS-2, iso-10646-1 [IANA, UC]
UCS-2LE [UC]
UTF-16 [UC]
are registered to IANA as preferred MIME names and may probably
be used over the Internet.
-C<Shift_JIS> has been officialized by JIS X 0208-1997.
+C<Shift_JIS> has been officialized by JIS X 0208:1997.
L<Microsoft-related naming mess> gives details.
C<GB2312> is the IANA name for C<EUC-CN>.
=item *
-data coded with C<UTF-8> seamlessly passes traditional
-command piping (C<cat>, C<more>, etc.) while UTF-16 coded
+C<UTF-8> coded data seamlessly passes traditional
+command piping (C<cat>, C<more>, etc.) while C<UTF-16> coded
data is likely to cause confusion (with it's zero bytes,
for example)
=item *
it is beyond the power of words to describe the way HTML browsers
-encode non-C<ASCII> form data. To get a general impression refer to
+encode non-C<ASCII> form data. To get a general impression visit
L<http://ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html>.
-While encoding of form data has stabilzed for C<UTF-8> coded pages
+While encoding of form data has stabilized for C<UTF-8> coded pages
(at least IE 5/6, NS 6, Opera 6 behave consitently), be sure to
expect fun (and cross-browser discrepancies) with C<UTF-16> coded
pages!
=back
The rule of thumb is to use C<UTF-8> unless you know what
-you're doing and unless you really need from using C<UTF-16>.
+you're doing and unless you really benefit from using C<UTF-16>.
ISO-IR-165 [RFC1345]
Microsoft extension to C<EUC-KR>.
-Proper name: C<CP949>.
+Proper names: C<CP949>, C<UHC>, C<x-windows-949> (as used by Mozilla).
See L<http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0033.html>
for details.
JIS has not endorsed the full Microsoft standard however.
The official C<Shift_JIS> includes only JIS X 0201 and JIS X 0208
subsets, while Microsoft has always been meaning C<Shift_JIS> to
-encode a wider character repertoire.
+encode a wider character repertoire, see C<IANA> registration for
+C<Windows-31J>.
As a historical predecessor Microsoft's variant
probably has more rights for the name, albeit it may be objected
that Microsoft shouldn't have used JIS as part of the name
in the first place.
-Unabiguous name: C<CP932>.
+Unabiguous name: C<CP932>. C<IANA> name (not used?): C<Windows-31J>.
Encode separately supports C<Shift_JIS> and C<cp932>.
use strict;
use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.25 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
#
# Aux. subs & constants
size => $size,
endian => $endian,
ucs2 => $ucs2,
- }, __PACKAGE__;
+ } => __PACKAGE__;
}
move>. Their mistake was not this magnitude so let's forgive them.
(I don't dare make any comparison with Unicode Consortium and the
-Vogols here :)
+Vogons here ;) Or, comparing Encode to Babel Fish is completely
+appropriate -- if you can only stick this into your ear :)
A surrogate pair was born when Unicode Consortium had finally
admitted that 16 bit was not big enough to hold all the world's
#
-# $Id: Unicode.t,v 1.2 2002/04/07 17:22:31 dankogai Exp dankogai $
+# $Id: Unicode.t,v 1.4 2002/04/08 02:35:48 dankogai Exp dankogai $
#
# This script is written entirely in ASCII, even though quoted literals
# do include non-BMP unicode characters -- Are you happy, jhi?
#
BEGIN {
- require Config; import Config;
- if ($Config{'extensions'} !~ /\bEncode\b/) {
- print "1..0 # Skip: Encode was not built\n";
- exit 0;
- }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
# should work without perlio
# unless (find PerlIO::Layer 'perlio') {
-# print "1..0 # Skip: PerlIO was not built\n";
-# exit 0;
+# print "1..0 # Skip: PerlIO was not built\n";
+# exit 0;
# }
# should work on EBCDIC
# if (ord("A") == 193) {
-# print "1..0 # Skip: EBCDIC\n";
-# exit 0;
+# print "1..0 # Skip: EBCDIC\n";
+# exit 0;
# }
- $| = 1;
+ $| = 1;
}
use strict;
#use Test::More 'no_plan';
use Test::More tests => 22;
use Encode qw(encode decode);
+use Encode::Unicode; # to load BOM defs
#
# see
#hi: (0x1abcd - 0x10000) / 0x400 + 0xD800 = 0xd82a
#lo: (0x1abcd - 0x10000) % 0x400 + 0xDC00 = 0xdfcd
-my $n_16be =
- pack("C*", map {hex($_)} qw<00 4D 00 61 d8 2a df cd>);
+my $n_16be =
+ pack("C*", map {hex($_)} qw<00 4D 00 61 d8 2a df cd>);
my $n_16le =
- pack("C*", map {hex($_)} qw<4D 00 61 00 2a d8 cd df>);
-my $f_16be =
- pack("C*", map {hex($_)} qw<00 4D 00 61 ff fd>);
+ pack("C*", map {hex($_)} qw<4D 00 61 00 2a d8 cd df>);
+my $f_16be =
+ pack("C*", map {hex($_)} qw<00 4D 00 61 ff fd>);
my $f_16le =
- pack("C*", map {hex($_)} qw<4D 00 61 00 fd ff>);
-my $n_32be =
- pack("C*", map {hex($_)} qw<00 00 00 4D 00 00 00 61 00 01 ab cd>);
-my $n_32le =
- pack("C*", map {hex($_)} qw<4D 00 00 00 61 00 00 00 cd ab 01 00>);
+ pack("C*", map {hex($_)} qw<4D 00 61 00 fd ff>);
+my $n_32be =
+ pack("C*", map {hex($_)} qw<00 00 00 4D 00 00 00 61 00 01 ab cd>);
+my $n_32le =
+ pack("C*", map {hex($_)} qw<4D 00 00 00 61 00 00 00 cd ab 01 00>);
my $n_16bb = pack('n', Encode::Unicode::BOM_BE) . $n_16be;
my $n_16lb = pack('n', Encode::Unicode::BOM16LE) . $n_16le;
is(decode('UCS-2BE', $n_16be), $fallback, "decode UCS-2BE: fallback");
is(decode('UCS-2LE', $n_16le), $fallback, "decode UCS-2LE: fallback");
-eval { decode('UCS-2BE', $n_16be, 1) };
+eval { decode('UCS-2BE', $n_16be, 1) };
ok($@=~/^UCS-2BE:/, "decode UCS-2BE: exception");
-eval { decode('UCS-2LE', $n_16le, 1) };
+eval { decode('UCS-2LE', $n_16le, 1) };
ok($@=~/^UCS-2LE:/, "decode UCS-2LE: exception");
is(encode('UCS-2BE', $nasty), $f_16be, "encode UCS-2BE: fallback");
is(encode('UCS-2LE', $nasty), $f_16le, "encode UCS-2LE: fallback");
-eval { encode('UCS-2BE', $nasty, 1) };
+eval { encode('UCS-2BE', $nasty, 1) };
ok($@=~/^UCS-2BE:/, "encode UCS-2BE: exception");
-eval { encode('UCS-2LE', $nasty, 1) };
+eval { encode('UCS-2LE', $nasty, 1) };
ok($@=~/^UCS-2LE:/, "encode UCS-2LE: exception");
1;
__END__
+
+use Devel::Peek;
+my $foo = decode('UTF-16BE', $n_16be);
+Dump $n_16be; Dump $foo;
__END__
-#!../perl
-our $POWER;
-BEGIN {
- if ($ENV{'PERL_CORE'}){
- chdir 't';
- unshift @INC, '../lib';
- }
- require Config; import Config;
- if ($Config{'extensions'} !~ /\bEncode\b/) {
- print "1..0 # Skip: Encode was not built\n";
- exit 0;
- }
- $POWER = 12; # up to 1 MB. You may adjust the figure here
-}
-
-use strict;
-use Encode;
-
-my $seed = "";
-for my $i (0x00..0xff){
- my $c = chr($i);
- $seed .= ($c =~ /^\p{IsPrint}/o) ? $c : " ";
-}
-
-use Test::More tests => $POWER*2;
-my $octs = $seed;
-use bytes ();
-for my $i (1..$POWER){
- $octs .= $octs;
- my $len = bytes::length($octs);
- my $utf8 = Encode::decode('latin1', $octs);
- ok(1, "decode $len bytes");
- is($octs,
- Encode::encode('latin1', $utf8),
- "encode $len bytes");
-}
-__END__
-
-