ext/Encode/lib/Encode/Alias.pm Encode extension
ext/Encode/lib/Encode/CJKConstants.pm Encode extension
ext/Encode/lib/Encode/CN/HZ.pm Encode extension
+ext/Encode/lib/Encode/Config.pm Encode configuration module
ext/Encode/lib/Encode/Encoder.pm OO Encoder
ext/Encode/lib/Encode/Encoding.pm Encode extension
ext/Encode/lib/Encode/JP/2022_JP.pm Encode extension
ext/Encode/t/Aliases.t Encode extension test
ext/Encode/t/CN.t Encode extension test
ext/Encode/t/Encode.t Encode extension test
+ext/Encode/t/Encoder.t Encode::Encoder test
ext/Encode/t/JP.t Encode extension test
ext/Encode/t/KR.t Encode extension test
ext/Encode/t/TW.t Encode extension test
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.30 2002/04/08 02:34:51 dankogai Exp $
+# $Id: Changes,v 1.31 2002/04/08 18:08:07 dankogai Exp dankogai $
#
-1.30 $Date: 2002/04/08 02:34:51 $
+1.31 $Date: 2002/04/08 18:08:07 $
+! lib/Encode/Encoder.pm
++ t/Encoder.t
+ Encode::Encoder, once just a placeholder of an idea, is now much more
+ practical. See t/Encode.t to find how practical it can be.
++ lib/Encode/Config.pm
+! Encode.pm
+ my false laziness at Encode.pm is fixed. Now %ExtModules are set
+ in Encode::Config and they are all literally, not programatically
+ set. My false laziness was resulting many encodings missing from
+ %ExtModules.
+! lib/Encode/Unicode.pm
+! t/Unicode.t
+ BOM for 32LE was bogus as noted by Anton. t/Unicode.t is fixed
+ so that it does not rely Encode::Unicode for BOM values
+ Message-Id: <FFEC33E9-4AFB-11D6-B415-00039301D480@dan.co.jp>
+
+1.30 2002/04/08 02:34:51
+ lib/Encode/Encoder.pm
Object Oriented Encoder. I reckon something like this is in need.
! Encode.pm
! t/Unicode.pm
! lib/Encode/Supported.pod
- * autoloading but that prevented upper-case canonicals such as UTF-16
+ * autoloading bug that prevented upper-case canonicals such as UTF-16
is fixed. Now even UTF/UCS are autoloaded!
* encodings() is now more intuitive.
* t/Unicode.t fixed to explicitly use Unicode.pm -- BOM values are
Typo fixes and improvements by jhi
Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
-1.11 $Date: 2002/04/08 02:34:51 $
+1.11 $Date: 2002/04/08 18:08:07 $
+ t/encoding.t
+ t/jperl.t
! MANIFEST
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.31 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
require DynaLoader;
# Make a %Encoding package variable to allow a certain amount of cheating
our %Encoding;
-our %ExtModule;
-
-my @codepages = qw(
- 37 424 437 500 737 775 850 852 855
- 856 857 860 861 862 863 864 865 866
- 869 874 875 932 936 949 950 1006 1026
- 1047 1250 1251 1252 1253 1254 1255 1256 1257
- 1258
- );
-
-my @macintosh = qw(
- CentralEurRoman Croatian Cyrillic Greek
- Iceland Roman Rumanian Sami
- Thai Turkish Ukrainian
- );
-
-for my $k (2..11,13..16){
- $ExtModule{"iso-8859-$k"} = 'Encode::Byte';
-}
-
-for my $k (@codepages){
- $ExtModule{"cp$k"} = 'Encode::Byte';
-}
-
-for my $k (@macintosh)
-{
- $ExtModule{"mac$k"} = 'Encode::Byte';
-}
-
-for my $k (qw(UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE
- UTF-32 UTF-32BE UTF-32LE)){
- $ExtModule{$k} = 'Encode::Unicode';
-}
-
-%ExtModule =
- (%ExtModule,
- 'koi8-r' => 'Encode::Byte',
- 'posix-bc' => 'Encode::EBCDIC',
- cp37 => 'Encode::EBCDIC',
- cp1026 => 'Encode::EBCDIC',
- cp1047 => 'Encode::EBCDIC',
- cp500 => 'Encode::EBCDIC',
- cp875 => 'Encode::EBCDIC',
- dingbats => 'Encode::Symbol',
- macDingbats => 'Encode::Symbol',
- macSymbol => 'Encode::Symbol',
- symbol => 'Encode::Symbol',
- viscii => 'Encode::Byte',
-);
-
-unless ($ON_EBCDIC) { # CJK added to autoload unless EBCDIC env
-%ExtModule =
- (%ExtModule,
-
- 'cp936' => 'Encode::CN',
- 'euc-cn' => 'Encode::CN',
- 'gb12345-raw' => 'Encode::CN',
- 'gb2312-raw' => 'Encode::CN',
- 'gbk' => 'Encode::CN',
- 'iso-ir-165' => 'Encode::CN',
-
- '7bit-jis' => 'Encode::JP',
- 'cp932' => 'Encode::JP',
- 'euc-jp' => 'Encode::JP',
- 'iso-2022-jp' => 'Encode::JP',
- 'iso-2022-jp-1' => 'Encode::JP',
- 'jis0201-raw' => 'Encode::JP',
- 'jis0208-raw' => 'Encode::JP',
- 'jis0212-raw' => 'Encode::JP',
- 'macJapanese' => 'Encode::JP',
- 'shiftjis' => 'Encode::JP',
-
- 'cp949' => 'Encode::KR',
- 'euc-kr' => 'Encode::KR',
- 'ksc5601' => 'Encode::KR',
- 'macKorean' => 'Encode::KR',
-
- 'big5' => 'Encode::TW',
- 'big5-hkscs' => 'Encode::TW',
- 'cp950' => 'Encode::TW',
-
- 'big5plus' => 'Encode::HanExtra',
- 'euc-tw' => 'Encode::HanExtra',
- 'gb18030' => 'Encode::HanExtra',
- );
-}
+use Encode::Config;
sub encodings
{
lib/Encode/Alias.pm Encode extension
lib/Encode/CJKConstants.pm Encode extension
lib/Encode/CN/HZ.pm Encode extension
+lib/Encode/Config.pm Encode configuration module
lib/Encode/Encoder.pm OO Encoder
lib/Encode/Encoding.pm Encode extension
lib/Encode/JP/2022_JP.pm Encode extension
t/Aliases.t Encode extension test
t/CN.t Encode extension test
t/Encode.t Encode extension test
+t/Encoder.t Encode::Encoder test
t/JP.t Encode extension test
t/KR.t Encode extension test
t/TW.t Encode extension test
--- /dev/null
+#
+# Demand-load module list
+#
+package Encode::Config;
+our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+
+use strict;
+require Exporter;
+
+our %ExtModule =
+ (
+ # Encode::Byte
+ #iso-8859-1 is on Encode.pm itself
+ 'iso-8859-2' => 'Encode::Byte',
+ 'iso-8859-3' => 'Encode::Byte',
+ 'iso-8859-4' => 'Encode::Byte',
+ 'iso-8859-5' => 'Encode::Byte',
+ 'iso-8859-6' => 'Encode::Byte',
+ 'iso-8859-7' => 'Encode::Byte',
+ 'iso-8859-8' => 'Encode::Byte',
+ 'iso-8859-9' => 'Encode::Byte',
+ 'iso-8859-10' => 'Encode::Byte',
+ 'iso-8859-11' => 'Encode::Byte',
+ 'iso-8859-13' => 'Encode::Byte',
+ 'iso-8859-14' => 'Encode::Byte',
+ 'iso-8859-15' => 'Encode::Byte',
+ 'iso-8859-16' => 'Encode::Byte',
+ 'koi8-f' => 'Encode::Byte',
+ 'koi8-r' => 'Encode::Byte',
+ 'koi8-u' => 'Encode::Byte',
+ 'viscii' => 'Encode::Byte',
+ 'cp424' => 'Encode::Byte',
+ 'cp437' => 'Encode::Byte',
+ 'cp737' => 'Encode::Byte',
+ 'cp775' => 'Encode::Byte',
+ 'cp850' => 'Encode::Byte',
+ 'cp852' => 'Encode::Byte',
+ 'cp855' => 'Encode::Byte',
+ 'cp856' => 'Encode::Byte',
+ 'cp857' => 'Encode::Byte',
+ 'cp860' => 'Encode::Byte',
+ 'cp861' => 'Encode::Byte',
+ 'cp862' => 'Encode::Byte',
+ 'cp863' => 'Encode::Byte',
+ 'cp864' => 'Encode::Byte',
+ 'cp865' => 'Encode::Byte',
+ 'cp866' => 'Encode::Byte',
+ 'cp869' => 'Encode::Byte',
+ 'cp874' => 'Encode::Byte',
+ 'cp1006' => 'Encode::Byte',
+ 'cp1250' => 'Encode::Byte',
+ 'cp1251' => 'Encode::Byte',
+ 'cp1252' => 'Encode::Byte',
+ 'cp1253' => 'Encode::Byte',
+ 'cp1254' => 'Encode::Byte',
+ 'cp1255' => 'Encode::Byte',
+ 'cp1256' => 'Encode::Byte',
+ 'cp1257' => 'Encode::Byte',
+ 'cp1258' => 'Encode::Byte',
+ 'AdobeStandardEncoding' => 'Encode::Byte',
+ 'MacArabic' => 'Encode::Byte',
+ 'MacCentralEurRoman' => 'Encode::Byte',
+ 'MacCroatian' => 'Encode::Byte',
+ 'MacCyrillic' => 'Encode::Byte',
+ 'MacFarsi' => 'Encode::Byte',
+ 'MacGreek' => 'Encode::Byte',
+ 'MacHebrew' => 'Encode::Byte',
+ 'MacIcelandic' => 'Encode::Byte',
+ 'MacRoman' => 'Encode::Byte',
+ 'MacRomanian' => 'Encode::Byte',
+ 'MacRumanian' => 'Encode::Byte',
+ 'MacSami' => 'Encode::Byte',
+ 'MacThai' => 'Encode::Byte',
+ 'MacTurkish' => 'Encode::Byte',
+ 'MacUkrainian' => 'Encode::Byte',
+ 'nextstep' => 'Encode::Byte',
+ 'hp-roman8' => 'Encode::Byte',
+ 'gsm0338' => 'Encode::Byte',
+ # Encode::EBCDIC
+ 'cp37' => 'Encode::EBCDIC',
+ 'cp500' => 'Encode::EBCDIC',
+ 'cp875' => 'Encode::EBCDIC',
+ 'cp1026' => 'Encode::EBCDIC',
+ 'cp1047' => 'Encode::EBCDIC',
+ 'posix-bc' => 'Encode::EBCDIC',
+ # Encode::Symbol
+ 'dingbats' => 'Encode::Symbol',
+ 'symbol' => 'Encode::Symbol',
+ 'AdobeSymbol' => 'Encode::Symbol',
+ 'AdobeZdingbat' => 'Encode::Symbol',
+ 'MacDingbats' => 'Encode::Symbol',
+ 'MacSymbol' => 'Encode::Symbol',
+ # Encode::Unicode
+ 'UCS-2BE' => 'Encode::Unicode',
+ 'UCS-2LE' => 'Encode::Unicode',
+ 'UTF-16' => 'Encode::Unicode',
+ 'UTF-16BE' => 'Encode::Unicode',
+ 'UTF-16LE' => 'Encode::Unicode',
+ 'UTF-32' => 'Encode::Unicode',
+ 'UTF-32BE' => 'Encode::Unicode',
+ 'UTF-32LE' => 'Encode::Unicode',
+ );
+
+unless (ord("A") == 193){
+ %ExtModule =
+ (
+ %ExtModule,
+ 'euc-cn' => 'Encode::CN',
+ 'gb12345-raw' => 'Encode::CN',
+ 'gb2312-raw' => 'Encode::CN',
+ 'iso-ir-165' => 'Encode::CN',
+ 'cp936' => 'Encode::CN',
+ 'MacChineseSimp' => 'Encode::CN',
+
+ '7bit-jis' => 'Encode::JP',
+ 'euc-jp' => 'Encode::JP',
+ 'iso-2022-jp' => 'Encode::JP',
+ 'iso-2022-jp-1' => 'Encode::JP',
+ 'jis0201-raw' => 'Encode::JP',
+ 'jis0208-raw' => 'Encode::JP',
+ 'jis0212-raw' => 'Encode::JP',
+ 'cp932' => 'Encode::JP',
+ 'MacJapanese' => 'Encode::JP',
+ 'shiftjis' => 'Encode::JP',
+
+
+ 'euc-kr' => 'Encode::KR',
+ 'iso-2022-kr' => 'Encode::KR',
+ 'johab' => 'Encode::KR',
+ 'ksc5601-raw' => 'Encode::KR',
+ 'cp949' => 'Encode::KR',
+ 'MacKorean' => 'Encode::KR',
+
+ 'big5' => 'Encode::TW',
+ 'big5-hkscs' => 'Encode::TW',
+ 'cp950' => 'Encode::TW',
+ 'MacChineseTrad' => 'Encode::TW',
+
+ 'big5plus' => 'Encode::HanExtra',
+ 'euc-tw' => 'Encode::HanExtra',
+ 'gb18030' => 'Encode::HanExtra',
+ );
+}
+
+*Encode::ExtModule = \%ExtModule;
+
+1;
#
-# $Id: Encoder.pm,v 0.1 2002/04/08 02:35:10 dankogai Exp $
+# $Id: Encoder.pm,v 0.2 2002/04/08 18:08:07 dankogai Exp dankogai $
#
-package Encoder;
+package Encode::Encoder;
use strict;
-our $VERSION = do { my @r = (q$Revision: 0.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+use warnings;
+our $VERSION = do { my @r = (q$Revision: 0.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
require Exporter;
our @ISA = qw(Exporter);
-# Public, encouraged API is exported by default
-our @EXPORT = qw (
- encoder
-);
+our @EXPORT = qw ( encoder );
our $AUTOLOAD;
our $DEBUG = 0;
sub new{
my ($class, $data, $encname) = @_;
- $encname ||= 'utf8';
- my $obj = find_encoding($encname)
- or croak __PACKAGE__, ": unknown encoding: $encname";
+ unless($encname){
+ $encname = Encode::is_utf8($data) ? 'utf8' : '';
+ }else{
+ my $obj = find_encoding($encname)
+ or croak __PACKAGE__, ": unknown encoding: $encname";
+ $encname = $obj->name;
+ }
my $self = {
data => $data,
- encoding => $obj->name,
+ encoding => $encname,
};
bless $self => $class;
}
-sub encoder{ shift->new(@_) }
+sub encoder{ __PACKAGE__->new(@_) }
sub data{
my ($self, $data) = shift;
- defined $data and $self->{data} = $data;
- return $self;
+ if (defined $data){
+ $self->{data} = $data;
+ return $data;
+ }else{
+ return $self->{data};
+ }
}
sub encoding{
my $obj = find_encoding($encname)
or confess __PACKAGE__, ": unknown encoding: $encname";
$self->{encoding} = $obj->name;
+ return $self;
+ }else{
+ return $self->{encoding}
}
- $self;
+}
+
+sub bytes {
+ my ($self, $encname) = @_;
+ $encname ||= $self->{encoding};
+ my $obj = find_encoding($encname)
+ or confess __PACKAGE__, ": unknown encoding: $encname";
+ $self->{data} = $obj->decode($self->{data}, 1);
+ $self->{encoding} = '' ;
+ return $self;
+}
+
+sub DESTROY{ # defined so it won't autoload.
+ $DEBUG and warn shift;
}
sub AUTOLOAD {
my $obj = find_encoding($myname)
or confess __PACKAGE__, ": unknown encoding: $myname";
$DEBUG and warn $self->{encoding}, " => ", $obj->name;
- from_to($self->{data}, $self->{encoding}, $obj->name, 1);
+ if ($self->{encoding}){
+ from_to($self->{data}, $self->{encoding}, $obj->name, 1);
+ }else{
+ $self->{data} = $obj->encode($self->{data}, 1);
+ }
$self->{encoding} = $obj->name;
return $self;
}
# shortcut
encoder($data)->iso_8859_1;
# you can stack them!
- encoder($data)->iso_8859_1->base64; # provided base64() is defined
+ encoder($data)->iso_8859_1->base64; # provided base64() is defined
+ # you can use it as a decoder as well
+ encoder($base64)->bytes('base64')->latin1;
# stringified
- print encoder($utf8)->latin1 # prints the string in latin1
+ print encoder($data)->utf8->latin1; # prints the string in latin1
# numified
- encoder("\x{abcd}\x{ef}g") == 6; # true. bytes::length($data)
+ encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
=head1 ABSTRACT
instead of
my $latin1 = encode("latin1", $utf8);
+ my $base64 = encode_base64($utf8);
or lazier and convolted
encoding and encode instance I<data> with I<encoding>. If successful,
instance I<encoding> is set accordingly.
-=item *
+=item *
+
+You can retrieve the result via -E<gt>data but usually you don't have to
+because the stringify operator ("") is overridden to do exactly that.
+
+=back
+
+=head2 Predefined Methods
+
+This module predefines the methods below;
+
+=over 4
+
+=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
+
+returns the encoder object. Its data is initialized with $data if
+there, and its encoding is set to $encoding if there.
+
+When $encoding is omitted, it defaults to utf8 if $data is already in
+utf8 or "" (empty string) otherwise.
+
+=item encoder()
+
+is an alias of Encode::Encoder-E<gt>new(). This one is exported for
+convenience.
+
+=item $e-E<gt>data([$data])
+
+when $data is present, sets instance data to $data and returns the
+object itself. otherwise the current instance data is returned.
+
+=item $e-E<gt>encoding([$encoding])
+
+when $encoding is present, sets instance encoding to $encoding and
+returns the object itself. otherwise the current instance encoding is
+returned.
+
+=item $e-E<gt>bytes([$encoding])
+
+decodes instance data from $encoding, or instance encoding if omitted.
+when the conversion is successful, the enstance encoding will be set
+to "" .
+
+The name I<bytes> was deliberately picked to avoid namespace tainting
+-- this module may be used as a base class so method names that appear
+in Encode::Encoding are avoided.
+
+=back
+
+=head2 Example: base64 transcoder
This module is desined to work with L<Encode::Encoding>.
To make the Base64 transcorder example above really work, you should
package Encode::Base64;
use base 'Encode::Encoding';
- __PACKAGE->Define('base64');
+ __PACKAGE__->Define('base64');
use MIME::Base64;
sub encode{
- my ($obj, $data) = @_;
- return encode_base64($data);
+ my ($obj, $data) = @_;
+ return encode_base64($data);
}
sub decode{
- my ($obj, $data) = @_;
- return decode_base64($data);
+ my ($obj, $data) = @_;
+ return decode_base64($data);
}
1;
__END__
use Encode::Encoder;
use Encode::Base64;
- # and be creative.
+
+ # now you can really do the following
+
+ encoder($data)->iso_8859_1->base64;
+ encoder($base64)->bytes('base64')->latin1;
=head2 operator overloading
They come in handy when you want to print or find the size of data.
-=back
-
-=head2 Predefined Methods
-
-This module predefines the methods below;
-
-=over 4
-
-=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
-
-returns the encoder object. Its data is initialized with $data if
-there, and its encoding is set to $encoding if there.
-
-=item encoder()
-
-is an alias of Encode::Encoder-E<gt>new(). This one is exported for
-convenience.
-
-=item $e-E<gt>data($data)
-
-sets instance data to $data.
-
-=item $e-E<gt>encoding($encoding)
-
-sets instance encoding to $encoding
-
-=back
-
=head1 SEE ALSO
L<Encode>
use strict;
use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.28 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
#
# Aux. subs & constants
sub FBCHAR(){ 0xFFFd }
sub BOM_BE(){ 0xFeFF }
sub BOM16LE(){ 0xFFFe }
-sub BOM32LE(){ 0xFeFF0000 }
+sub BOM32LE(){ 0xFFFe0000 }
sub valid_ucs2($){
if ($_[0] < 0xD800){
16 32 bits/char
-------------------------
-BE 0xFeFF 0x0000FeFF
-LE 0xFFeF 0xFeFF0000
+BE 0xFeFF 0x0000FeFF
+LE 0xFFeF 0xFFFe0000
-------------------------
=back
C<UCS-2> is an exception. Unlike others this is an alias of UCS-2BE.
UCS-2 is already registered by IANA and others that way.
+=back
=head1 The Surrogate Pair
=head1 SEE ALSO
-L<Encode>, L<http://www.unicode.org/glossary/>
+L<Encode>, L<http://www.unicode.org/glossary/>,
-=back
+RFC 2781 L<http://rfc.net/rfc2781.html>,
+
+L<http://www.unicode.org/unicode/faq/utf_bom.html>
+
+=cut
--- /dev/null
+#
+# $Id: Encoder.t,v 1.1 2002/04/08 18:07:31 dankogai Exp $
+#
+
+BEGIN {
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+# should work without perlio
+# unless (find PerlIO::Layer 'perlio') {
+# print "1..0 # Skip: PerlIO was not built\n";
+# exit 0;
+# }
+# should work on EBCDIC
+# if (ord("A") == 193) {
+# print "1..0 # Skip: EBCDIC\n";
+# exit 0;
+# }
+ $| = 1;
+}
+
+use strict;
+#use Test::More 'no_plan';
+use Test::More tests => 512;
+use Encode::Encoder;
+use MIME::Base64;
+package Encode::Base64;
+use base 'Encode::Encoding';
+__PACKAGE__->Define('base64');
+use MIME::Base64;
+sub encode{
+ my ($obj, $data) = @_;
+ return encode_base64($data);
+}
+sub decode{
+ my ($obj, $data) = @_;
+ return decode_base64($data);
+}
+
+package main;
+
+my $data = '';
+for my $i (0..255){
+ no warnings;
+ $data .= chr($i);
+ my $base64 = encode_base64($data);
+ is(encoder($data)->base64, $base64, "encode");
+ is(encoder($base64)->bytes('base64'), $data, "decode");
+}
+
+1;
+__END__
#
-# $Id: Unicode.t,v 1.4 2002/04/08 02:35:48 dankogai Exp dankogai $
+# $Id: Unicode.t,v 1.5 2002/04/08 14:17:19 dankogai Exp $
#
# This script is written entirely in ASCII, even though quoted literals
# do include non-BMP unicode characters -- Are you happy, jhi?
#use Test::More 'no_plan';
use Test::More tests => 22;
use Encode qw(encode decode);
-use Encode::Unicode; # to load BOM defs
#
# see
my $n_32le =
pack("C*", map {hex($_)} qw<4D 00 00 00 61 00 00 00 cd ab 01 00>);
-my $n_16bb = pack('n', Encode::Unicode::BOM_BE) . $n_16be;
-my $n_16lb = pack('n', Encode::Unicode::BOM16LE) . $n_16le;
-my $n_32bb = pack('N', Encode::Unicode::BOM_BE ) . $n_32be;
-my $n_32lb = pack('N', Encode::Unicode::BOM32LE) . $n_32le;
+my $n_16bb = pack('n', 0xFeFF) . $n_16be;
+my $n_16lb = pack('v', 0xFeFF) . $n_16le;
+my $n_32bb = pack('N', 0xFeFF) . $n_32be;
+my $n_32lb = pack('V', 0xFeFF) . $n_32le;
is($n_16be, encode('UTF-16BE', $nasty), qq{encode UTF-16BE});
is($n_16le, encode('UTF-16LE', $nasty), qq{encode UTF-16LE});
1;
__END__
-
-use Devel::Peek;
-my $foo = decode('UTF-16BE', $n_16be);
-Dump $n_16be; Dump $foo;