From: Jarkko Hietaniemi Date: Tue, 29 Jan 2002 14:09:21 +0000 (+0000) Subject: Encode cleanup from Dan Kogai; reworked even further. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=df1df145ec46da3489b48fd58c3178073e7e90db;p=p5sagit%2Fp5-mst-13.2.git Encode cleanup from Dan Kogai; reworked even further. p4raw-id: //depot/perl@14486 --- diff --git a/MANIFEST b/MANIFEST index 6f49999..a2f435a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -194,146 +194,158 @@ ext/DynaLoader/hints/openbsd.pl Hint for DynaLoader for named architecture ext/DynaLoader/Makefile.PL Dynamic Loader makefile writer ext/DynaLoader/README Dynamic Loader notes and intro ext/DynaLoader/XSLoader_pm.PL Simple XS Loader perl module -ext/Encode.t See if Encode works -ext/Encode/compile Encode extension -ext/Encode/encengine.c Encode extension -ext/Encode/encode.h Encode extension -ext/Encode/Encode.pm Encode extension +ext/Encode/compile Encode extension +ext/Encode/encengine.c Encode extension +ext/Encode/encode.h Encode extension ext/Encode/Encode.xs Encode extension -ext/Encode/Encode/11643-1.enc Encoding tables -ext/Encode/Encode/11643-2.enc Encoding tables -ext/Encode/Encode/2022-cn.enc Encoding tables -ext/Encode/Encode/2022-jp.enc Encoding tables -ext/Encode/Encode/2022-jp1.enc Encoding tables -ext/Encode/Encode/2022-jp2.enc Encoding tables -ext/Encode/Encode/2022-kr.enc Encoding tables -ext/Encode/Encode/2022.enc Encoding tables -ext/Encode/Encode/7bit-greek.enc Encoding tables -ext/Encode/Encode/7bit-jis.enc Encoding tables -ext/Encode/Encode/7bit-kana.enc Encoding tables -ext/Encode/Encode/7bit-kr.enc Encoding tables -ext/Encode/Encode/7bit-latin1.enc Encoding tables -ext/Encode/Encode/8859-1.enc Encoding tables -ext/Encode/Encode/8859-1.ucm Encoding tables -ext/Encode/Encode/8859-10.enc Encoding tables -ext/Encode/Encode/8859-10.ucm Encoding tables -ext/Encode/Encode/8859-11.enc Encoding tables -ext/Encode/Encode/8859-11.ucm Encoding tables -ext/Encode/Encode/8859-13.enc Encoding tables -ext/Encode/Encode/8859-13.ucm Encoding tables -ext/Encode/Encode/8859-14.enc Encoding tables -ext/Encode/Encode/8859-14.ucm Encoding tables -ext/Encode/Encode/8859-15.enc Encoding tables -ext/Encode/Encode/8859-15.ucm Encoding tables -ext/Encode/Encode/8859-16.enc Encoding tables -ext/Encode/Encode/8859-16.ucm Encoding tables -ext/Encode/Encode/8859-2.enc Encoding tables -ext/Encode/Encode/8859-2.ucm Encoding tables -ext/Encode/Encode/8859-3.enc Encoding tables -ext/Encode/Encode/8859-3.ucm Encoding tables -ext/Encode/Encode/8859-4.enc Encoding tables -ext/Encode/Encode/8859-4.ucm Encoding tables -ext/Encode/Encode/8859-5.enc Encoding tables -ext/Encode/Encode/8859-5.ucm Encoding tables -ext/Encode/Encode/8859-6.enc Encoding tables -ext/Encode/Encode/8859-6.ucm Encoding tables -ext/Encode/Encode/8859-7.enc Encoding tables -ext/Encode/Encode/8859-7.ucm Encoding tables -ext/Encode/Encode/8859-8.enc Encoding tables -ext/Encode/Encode/8859-8.ucm Encoding tables -ext/Encode/Encode/8859-9.enc Encoding tables -ext/Encode/Encode/8859-9.ucm Encoding tables -ext/Encode/Encode/ascii.enc Encoding tables -ext/Encode/Encode/ascii.ucm Encoding tables -ext/Encode/Encode/big5.enc Encoding tables -ext/Encode/Encode/cp1006.enc Encoding tables -ext/Encode/Encode/cp1047.enc Encoding tables -ext/Encode/Encode/cp1047.ucm Encoding tables -ext/Encode/Encode/cp1250.enc Encoding tables -ext/Encode/Encode/cp1250.ucm Encoding tables -ext/Encode/Encode/cp1251.enc Encoding tables -ext/Encode/Encode/cp1252.enc Encoding tables -ext/Encode/Encode/cp1253.enc Encoding tables -ext/Encode/Encode/cp1254.enc Encoding tables -ext/Encode/Encode/cp1255.enc Encoding tables -ext/Encode/Encode/cp1256.enc Encoding tables -ext/Encode/Encode/cp1257.enc Encoding tables -ext/Encode/Encode/cp1258.enc Encoding tables -ext/Encode/Encode/cp37.enc Encoding tables -ext/Encode/Encode/cp37.ucm Encoding tables -ext/Encode/Encode/cp424.enc Encoding tables -ext/Encode/Encode/cp437.enc Encoding tables -ext/Encode/Encode/cp737.enc Encoding tables -ext/Encode/Encode/cp775.enc Encoding tables -ext/Encode/Encode/cp850.enc Encoding tables -ext/Encode/Encode/cp852.enc Encoding tables -ext/Encode/Encode/cp855.enc Encoding tables -ext/Encode/Encode/cp856.enc Encoding tables -ext/Encode/Encode/cp857.enc Encoding tables -ext/Encode/Encode/cp860.enc Encoding tables -ext/Encode/Encode/cp861.enc Encoding tables -ext/Encode/Encode/cp862.enc Encoding tables -ext/Encode/Encode/cp863.enc Encoding tables -ext/Encode/Encode/cp864.enc Encoding tables -ext/Encode/Encode/cp865.enc Encoding tables -ext/Encode/Encode/cp866.enc Encoding tables -ext/Encode/Encode/cp869.enc Encoding tables -ext/Encode/Encode/cp874.enc Encoding tables -ext/Encode/Encode/cp932.enc Encoding tables -ext/Encode/Encode/cp936.enc Encoding tables -ext/Encode/Encode/cp949.enc Encoding tables -ext/Encode/Encode/cp950.enc Encoding tables -ext/Encode/Encode/dingbats.enc Encoding tables -ext/Encode/Encode/dingbats.ucm Encoding tables -ext/Encode/Encode/EncodeFormat.pod Encoding table format -ext/Encode/Encode/euc-cn.enc Encoding tables -ext/Encode/Encode/euc-jp-0212.enc Encoding tables -ext/Encode/Encode/euc-jp.enc Encoding tables -ext/Encode/Encode/euc-kr.enc Encoding tables -ext/Encode/Encode/gb12345.enc Encoding tables -ext/Encode/Encode/gb1988.enc Encoding tables -ext/Encode/Encode/gb2312.enc Encoding tables -ext/Encode/Encode/gsm0338.enc Encoding tables -ext/Encode/Encode/HZ.enc Encoding tables -ext/Encode/Encode/ir-197.enc Encoding tables -ext/Encode/Encode/jis0201.enc Encoding tables -ext/Encode/Encode/jis0208.enc Encoding tables -ext/Encode/Encode/jis0212.enc Encoding tables -ext/Encode/Encode/koi8-f.enc Encoding tables -ext/Encode/Encode/koi8-f.ucm Encoding tables -ext/Encode/Encode/koi8-r.enc Encoding tables -ext/Encode/Encode/koi8-r.ucm Encoding tables -ext/Encode/Encode/koi8-u.enc Encoding tables -ext/Encode/Encode/koi8-u.ucm Encoding tables -ext/Encode/Encode/ksc5601.enc Encoding tables -ext/Encode/Encode/macCentEuro.enc Encoding tables -ext/Encode/Encode/macCroatian.enc Encoding tables -ext/Encode/Encode/macCyrillic.enc Encoding tables -ext/Encode/Encode/macDingbats.enc Encoding tables -ext/Encode/Encode/macGreek.enc Encoding tables -ext/Encode/Encode/macIceland.enc Encoding tables -ext/Encode/Encode/macJapan.enc Encoding tables -ext/Encode/Encode/macRoman.enc Encoding tables -ext/Encode/Encode/macRumanian.enc Encoding tables -ext/Encode/Encode/macSami.enc Encoding tables -ext/Encode/Encode/macThai.enc Encoding tables -ext/Encode/Encode/macTurkish.enc Encoding tables -ext/Encode/Encode/macUkraine.enc Encoding tables -ext/Encode/Encode/nextstep.enc Encoding tables -ext/Encode/Encode/nextstep.ucm Encoding tables -ext/Encode/Encode/posix-bc.enc Encoding tables -ext/Encode/Encode/posix-bc.ucm Encoding tables -ext/Encode/Encode/roman8.enc Encoding tables -ext/Encode/Encode/roman8.ucm Encoding tables -ext/Encode/Encode/shiftjis.enc Encoding tables -ext/Encode/Encode/symbol.enc Encoding tables -ext/Encode/Encode/symbol.ucm Encoding tables -ext/Encode/Encode/Tcl.pm Handler for .enc encodings -ext/Encode/Encode/Tcl.t See if Encode::Tcl works -ext/Encode/Encode/viscii.enc Encoding tables -ext/Encode/Encode/viscii.ucm Encoding tables -ext/Encode/Makefile.PL Encode extension -ext/Encode/Todo Encode extension +ext/Encode/Encode/11643-1.enc Encode table +ext/Encode/Encode/11643-2.enc Encode table +ext/Encode/Encode/2022-cn.enc Encode table +ext/Encode/Encode/2022-jp.enc Encode table +ext/Encode/Encode/2022-jp1.enc Encode table +ext/Encode/Encode/2022-jp2.enc Encode table +ext/Encode/Encode/2022-kr.enc Encode table +ext/Encode/Encode/2022.enc Encode table +ext/Encode/Encode/7bit-greek.enc Encode table +ext/Encode/Encode/7bit-jis.enc Encode table +ext/Encode/Encode/7bit-kana.enc Encode table +ext/Encode/Encode/7bit-kr.enc Encode table +ext/Encode/Encode/7bit-latin1.enc Encode table +ext/Encode/Encode/8859-1.enc Encode table +ext/Encode/Encode/8859-1.ucm Encode table +ext/Encode/Encode/8859-10.enc Encode table +ext/Encode/Encode/8859-10.ucm Encode table +ext/Encode/Encode/8859-11.enc Encode table +ext/Encode/Encode/8859-11.ucm Encode table +ext/Encode/Encode/8859-13.enc Encode table +ext/Encode/Encode/8859-13.ucm Encode table +ext/Encode/Encode/8859-14.enc Encode table +ext/Encode/Encode/8859-14.ucm Encode table +ext/Encode/Encode/8859-15.enc Encode table +ext/Encode/Encode/8859-15.ucm Encode table +ext/Encode/Encode/8859-16.enc Encode table +ext/Encode/Encode/8859-16.ucm Encode table +ext/Encode/Encode/8859-2.enc Encode table +ext/Encode/Encode/8859-2.ucm Encode table +ext/Encode/Encode/8859-3.enc Encode table +ext/Encode/Encode/8859-3.ucm Encode table +ext/Encode/Encode/8859-4.enc Encode table +ext/Encode/Encode/8859-4.ucm Encode table +ext/Encode/Encode/8859-5.enc Encode table +ext/Encode/Encode/8859-5.ucm Encode table +ext/Encode/Encode/8859-6.enc Encode table +ext/Encode/Encode/8859-6.ucm Encode table +ext/Encode/Encode/8859-7.enc Encode table +ext/Encode/Encode/8859-7.ucm Encode table +ext/Encode/Encode/8859-8.enc Encode table +ext/Encode/Encode/8859-8.ucm Encode table +ext/Encode/Encode/8859-9.enc Encode table +ext/Encode/Encode/8859-9.ucm Encode table +ext/Encode/Encode/ascii.enc Encode table +ext/Encode/Encode/ascii.ucm Encode table +ext/Encode/Encode/big5.enc Encode table +ext/Encode/Encode/cp1006.enc Encode table +ext/Encode/Encode/cp1047.enc Encode table +ext/Encode/Encode/cp1047.ucm Encode table +ext/Encode/Encode/cp1250.enc Encode table +ext/Encode/Encode/cp1250.ucm Encode table +ext/Encode/Encode/cp1251.enc Encode table +ext/Encode/Encode/cp1252.enc Encode table +ext/Encode/Encode/cp1253.enc Encode table +ext/Encode/Encode/cp1254.enc Encode table +ext/Encode/Encode/cp1255.enc Encode table +ext/Encode/Encode/cp1256.enc Encode table +ext/Encode/Encode/cp1257.enc Encode table +ext/Encode/Encode/cp1258.enc Encode table +ext/Encode/Encode/cp37.enc Encode table +ext/Encode/Encode/cp37.ucm Encode table +ext/Encode/Encode/cp424.enc Encode table +ext/Encode/Encode/cp437.enc Encode table +ext/Encode/Encode/cp737.enc Encode table +ext/Encode/Encode/cp775.enc Encode table +ext/Encode/Encode/cp850.enc Encode table +ext/Encode/Encode/cp852.enc Encode table +ext/Encode/Encode/cp855.enc Encode table +ext/Encode/Encode/cp856.enc Encode table +ext/Encode/Encode/cp857.enc Encode table +ext/Encode/Encode/cp860.enc Encode table +ext/Encode/Encode/cp861.enc Encode table +ext/Encode/Encode/cp862.enc Encode table +ext/Encode/Encode/cp863.enc Encode table +ext/Encode/Encode/cp864.enc Encode table +ext/Encode/Encode/cp865.enc Encode table +ext/Encode/Encode/cp866.enc Encode table +ext/Encode/Encode/cp869.enc Encode table +ext/Encode/Encode/cp874.enc Encode table +ext/Encode/Encode/cp932.enc Encode table +ext/Encode/Encode/cp936.enc Encode table +ext/Encode/Encode/cp949.enc Encode table +ext/Encode/Encode/cp950.enc Encode table +ext/Encode/Encode/dingbats.enc Encode table +ext/Encode/Encode/dingbats.ucm Encode table +ext/Encode/Encode/euc-cn.enc Encode table +ext/Encode/Encode/euc-jp-0212.enc Encode table +ext/Encode/Encode/euc-jp.enc Encode table +ext/Encode/Encode/euc-kr.enc Encode table +ext/Encode/Encode/gb12345.enc Encode table +ext/Encode/Encode/gb1988.enc Encode table +ext/Encode/Encode/gb2312.enc Encode table +ext/Encode/Encode/gsm0338.enc Encode table +ext/Encode/Encode/HZ.enc Encode table +ext/Encode/Encode/ir-197.enc Encode table +ext/Encode/Encode/jis0201.enc Encode table +ext/Encode/Encode/jis0208.enc Encode table +ext/Encode/Encode/jis0212.enc Encode table +ext/Encode/Encode/koi8-f.enc Encode table +ext/Encode/Encode/koi8-f.ucm Encode table +ext/Encode/Encode/koi8-r.enc Encode table +ext/Encode/Encode/koi8-r.ucm Encode table +ext/Encode/Encode/koi8-u.enc Encode table +ext/Encode/Encode/koi8-u.ucm Encode table +ext/Encode/Encode/ksc5601.enc Encode table +ext/Encode/Encode/macCentEuro.enc Encode table +ext/Encode/Encode/macCroatian.enc Encode table +ext/Encode/Encode/macCyrillic.enc Encode table +ext/Encode/Encode/macDingbats.enc Encode table +ext/Encode/Encode/macGreek.enc Encode table +ext/Encode/Encode/macIceland.enc Encode table +ext/Encode/Encode/macJapan.enc Encode table +ext/Encode/Encode/macRoman.enc Encode table +ext/Encode/Encode/macRumanian.enc Encode table +ext/Encode/Encode/macSami.enc Encode table +ext/Encode/Encode/macThai.enc Encode table +ext/Encode/Encode/macTurkish.enc Encode table +ext/Encode/Encode/macUkraine.enc Encode table +ext/Encode/Encode/nextstep.enc Encode table +ext/Encode/Encode/nextstep.ucm Encode table +ext/Encode/Encode/posix-bc.enc Encode table +ext/Encode/Encode/posix-bc.ucm Encode table +ext/Encode/Encode/roman8.enc Encode table +ext/Encode/Encode/roman8.ucm Encode table +ext/Encode/Encode/shiftjis.enc Encode table +ext/Encode/Encode/symbol.enc Encode table +ext/Encode/Encode/symbol.ucm Encode table +ext/Encode/Encode/viscii.enc Encode table +ext/Encode/Encode/viscii.ucm Encode table +ext/Encode/lib/Encode.pm Encode extension +ext/Encode/lib/Encode/Encoding.pm Encode extension +ext/Encode/lib/Encode/Internal.pm Encode extension +ext/Encode/lib/Encode/iso10646_1.pm Encode extension +ext/Encode/lib/Encode/Tcl.pm Encode extension +ext/Encode/lib/Encode/Tcl/Escape.pm Encode extension +ext/Encode/lib/Encode/Tcl/Extended.pm Encode extension +ext/Encode/lib/Encode/Tcl/HanZi.pm Encode extension +ext/Encode/lib/Encode/Tcl/Table.pm Encode extension +ext/Encode/lib/Encode/ucs2_le.pm Encode extension +ext/Encode/lib/Encode/Unicode.pm Encode extension +ext/Encode/lib/Encode/utf8.pm Encode extension +ext/Encode/lib/Encode/XS.pm Encode extension +ext/Encode/lib/EncodeFormat.pod Encode extension +ext/Encode/Makefile.PL Encode extension makefile writer +ext/Encode/MANIFEST Encode extension +ext/Encode/README Encode extension +ext/Encode/t/Encode.t Encode extension test +ext/Encode/t/Tcl.t Encode extension test ext/Errno/ChangeLog Errno perl module change log ext/Errno/Errno.t See if Errno works ext/Errno/Errno_pm.PL Errno perl module create script diff --git a/ext/Encode/Encode/8859-1.ucm b/ext/Encode/Encode/8859-1.ucm index 6f139fb..f24d9d8 100644 --- a/ext/Encode/Encode/8859-1.ucm +++ b/ext/Encode/Encode/8859-1.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-1 -o Encode/iso8859-1.ucm Encode/iso8859-1.enc "iso-8859-1" 1 diff --git a/ext/Encode/Encode/8859-10.ucm b/ext/Encode/Encode/8859-10.ucm index 2bcc2b0..278112f 100644 --- a/ext/Encode/Encode/8859-10.ucm +++ b/ext/Encode/Encode/8859-10.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-10 -o Encode/iso8859-10.ucm Encode/iso8859-10.enc "iso-8859-10" 1 diff --git a/ext/Encode/Encode/8859-13.ucm b/ext/Encode/Encode/8859-13.ucm index ff3e75c..1f5f284 100644 --- a/ext/Encode/Encode/8859-13.ucm +++ b/ext/Encode/Encode/8859-13.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-13 -o Encode/iso8859-13.ucm Encode/iso8859-13.enc "iso-8859-13" 1 diff --git a/ext/Encode/Encode/8859-14.ucm b/ext/Encode/Encode/8859-14.ucm index 76a2bba..eb3f2f4 100644 --- a/ext/Encode/Encode/8859-14.ucm +++ b/ext/Encode/Encode/8859-14.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-14 -o Encode/iso8859-14.ucm Encode/iso8859-14.enc "iso-8859-14" 1 diff --git a/ext/Encode/Encode/8859-15.ucm b/ext/Encode/Encode/8859-15.ucm index 40538ac..abf4d18 100644 --- a/ext/Encode/Encode/8859-15.ucm +++ b/ext/Encode/Encode/8859-15.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-15 -o Encode/iso8859-15.ucm Encode/iso8859-15.enc "iso-8859-15" 1 diff --git a/ext/Encode/Encode/8859-16.ucm b/ext/Encode/Encode/8859-16.ucm index 2ff7cb8..667672e 100644 --- a/ext/Encode/Encode/8859-16.ucm +++ b/ext/Encode/Encode/8859-16.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-16 -o Encode/iso8859-16.ucm Encode/iso8859-16.enc "iso-8859-16" 1 diff --git a/ext/Encode/Encode/8859-2.ucm b/ext/Encode/Encode/8859-2.ucm index b55c8dc..90d9f8a 100644 --- a/ext/Encode/Encode/8859-2.ucm +++ b/ext/Encode/Encode/8859-2.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-2 -o Encode/iso8859-2.ucm Encode/iso8859-2.enc "iso-8859-2" 1 diff --git a/ext/Encode/Encode/8859-3.ucm b/ext/Encode/Encode/8859-3.ucm index ec68ed1..0abc868 100644 --- a/ext/Encode/Encode/8859-3.ucm +++ b/ext/Encode/Encode/8859-3.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-3 -o Encode/iso8859-3.ucm Encode/iso8859-3.enc "iso-8859-3" 1 diff --git a/ext/Encode/Encode/8859-4.ucm b/ext/Encode/Encode/8859-4.ucm index 3d43082..34952db 100644 --- a/ext/Encode/Encode/8859-4.ucm +++ b/ext/Encode/Encode/8859-4.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-4 -o Encode/iso8859-4.ucm Encode/iso8859-4.enc "iso-8859-4" 1 diff --git a/ext/Encode/Encode/8859-5.ucm b/ext/Encode/Encode/8859-5.ucm index 86235a8..dfa61fc 100644 --- a/ext/Encode/Encode/8859-5.ucm +++ b/ext/Encode/Encode/8859-5.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-5 -o Encode/iso8859-5.ucm Encode/iso8859-5.enc "iso-8859-5" 1 diff --git a/ext/Encode/Encode/8859-6.ucm b/ext/Encode/Encode/8859-6.ucm index fbeb228..f5b20fc 100644 --- a/ext/Encode/Encode/8859-6.ucm +++ b/ext/Encode/Encode/8859-6.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-6 -o Encode/iso8859-6.ucm Encode/iso8859-6.enc "iso-8859-6" 1 diff --git a/ext/Encode/Encode/8859-7.ucm b/ext/Encode/Encode/8859-7.ucm index ba405db..cbb716c 100644 --- a/ext/Encode/Encode/8859-7.ucm +++ b/ext/Encode/Encode/8859-7.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-7 -o Encode/iso8859-7.ucm Encode/iso8859-7.enc "iso-8859-7" 1 diff --git a/ext/Encode/Encode/8859-8.ucm b/ext/Encode/Encode/8859-8.ucm index 574abfd..6625144 100644 --- a/ext/Encode/Encode/8859-8.ucm +++ b/ext/Encode/Encode/8859-8.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-8 -o Encode/iso8859-8.ucm Encode/iso8859-8.enc "iso-8859-8" 1 diff --git a/ext/Encode/Encode/8859-9.ucm b/ext/Encode/Encode/8859-9.ucm index 24d7d4b..2451dc6 100644 --- a/ext/Encode/Encode/8859-9.ucm +++ b/ext/Encode/Encode/8859-9.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n iso-8859-9 -o Encode/iso8859-9.ucm Encode/iso8859-9.enc "iso-8859-9" 1 diff --git a/ext/Encode/Encode/Tcl.pm b/ext/Encode/Encode/Tcl.pm deleted file mode 100644 index 5d3ad1b..0000000 --- a/ext/Encode/Encode/Tcl.pm +++ /dev/null @@ -1,768 +0,0 @@ -package Encode::Tcl; - -our $VERSION = '1.00'; - -use strict; -use Encode qw(find_encoding); -use base 'Encode::Encoding'; -use Carp; - -=head1 NAME - -Encode::Tcl - Tcl encodings - -=cut - -sub INC_search -{ - foreach my $dir (@INC) - { - if (opendir(my $dh,"$dir/Encode")) - { - while (defined(my $name = readdir($dh))) - { - if ($name =~ /^(.*)\.enc$/) - { - my $canon = $1; - my $obj = find_encoding($canon); - if (!defined($obj)) - { - my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__; - $obj->Define( $canon ); - # warn "$canon => $obj\n"; - } - } - } - closedir($dh); - } - } -} - -sub import -{ - INC_search(); -} - -sub no_map_in_encode ($$) - # codepoint, enc-name; -{ - carp sprintf "\"\\N{U+%x}\" does not map to %s", @_; -# /* FIXME: Skip over the character, copy in replacement and continue -# * but that is messy so for now just fail. -# */ - return; -} - -sub no_map_in_decode ($$) - # enc-name, string beginning the malform char; -{ -# /* UTF-8 is supposed to be "Universal" so should not happen */ - croak sprintf "%s '%s' does not map to UTF-8", @_; -} - -sub encode -{ - my $obj = shift; - my $new = $obj->loadEncoding; - return undef unless (defined $new); - return $new->encode(@_); -} - -sub new_sequence -{ - my $obj = shift; - my $new = $obj->loadEncoding; - return undef unless (defined $new); - return $new->new_sequence(@_); -} - -sub decode -{ - my $obj = shift; - my $new = $obj->loadEncoding; - return undef unless (defined $new); - return $new->decode(@_); -} - -sub loadEncoding -{ - my $obj = shift; - my $file = $obj->{'File'}; - my $name = $obj->name; - if (open(my $fh,$file)) - { - my $type; - while (1) - { - my $line = <$fh>; - $type = substr($line,0,1); - last unless $type eq '#'; - } - my $subclass = - ($type eq 'X') ? 'Extended' : - ($type eq 'H') ? 'HanZi' : - ($type eq 'E') ? 'Escape' : 'Table'; - my $class = ref($obj) . '::' . $subclass; - # carp "Loading $file"; - bless $obj,$class; - return $obj if $obj->read($fh,$obj->name,$type); - } - else - { - croak("Cannot open $file for ".$obj->name); - } - $obj->Undefine($name); - return undef; -} - -sub INC_find -{ - my ($class,$name) = @_; - my $enc; - foreach my $dir (@INC) - { - last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc")); - } - return $enc; -} - -package Encode::Tcl::Table; -use base 'Encode::Encoding'; - -use Carp; -#use Data::Dumper; - -sub read -{ - my ($obj,$fh,$name,$type) = @_; - my($rep, @leading); - my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); - my @touni; - my %fmuni; - my $count = 0; - $def = hex($def); - while ($pages--) - { - my $line = <$fh>; - chomp($line); - my $page = hex($line); - my @page; - $leading[$page] = 1 if $page; - my $ch = $page * 256; - for (my $i = 0; $i < 16; $i++) - { - my $line = <$fh>; - for (my $j = 0; $j < 16; $j++) - { - my $val = hex(substr($line,0,4,'')); - if ($val || !$ch) - { - my $uch = pack('U', $val); # chr($val); - push(@page,$uch); - $fmuni{$uch} = $ch; - $count++; - } - else - { - push(@page,undef); - } - $ch++; - } - } - $touni[$page] = \@page; - } - $rep = $type ne 'M' - ? $obj->can("rep_$type") - : sub - { - ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'; - }; - $obj->{'Rep'} = $rep; - $obj->{'ToUni'} = \@touni; - $obj->{'FmUni'} = \%fmuni; - $obj->{'Def'} = $def; - $obj->{'Num'} = $count; - return $obj; -} - -sub rep_S { 'C' } - -sub rep_D { 'n' } - -#sub rep_M { ($_[0] > 255) ? 'n' : 'C' } - -sub representation -{ - my ($obj,$ch) = @_; - $ch = 0 unless @_ > 1; - $obj->{'Rep'}->($ch); -} - -sub decode -{ - my($obj,$str,$chk) = @_; - my $name = $obj->{'Name'}; - my $rep = $obj->{'Rep'}; - my $touni = $obj->{'ToUni'}; - my $uni; - while (length($str)) - { - my $cc = substr($str,0,1,''); - my $ch = ord($cc); - my $x; - if (&$rep($ch) eq 'C') - { - $x = $touni->[0][$ch]; - } - else - { - if(! length $str) - { - $str = pack('C',$ch); # split leading byte - last; - } - my $c2 = substr($str,0,1,''); - $cc .= $c2; - $x = $touni->[$ch][ord($c2)]; - } - unless (defined $x) - { - Encode::Tcl::no_map_in_decode($name, $cc.$str); - } - $uni .= $x; - } - $_[1] = $str if $chk; - return $uni; -} - - -sub encode -{ - my ($obj,$uni,$chk) = @_; - my $fmuni = $obj->{'FmUni'}; - my $def = $obj->{'Def'}; - my $name = $obj->{'Name'}; - my $rep = $obj->{'Rep'}; - my $str; - while (length($uni)) - { - my $ch = substr($uni,0,1,''); - my $x = $fmuni->{$ch}; - unless(defined $x) - { - unless($chk) - { - Encode::Tcl::no_map_in_encode(ord($ch), $name) - } - return undef; - } - $str .= pack(&$rep($x),$x); - } - $_[1] = $uni if $chk; - return $str; -} - -package Encode::Tcl::Escape; -use base 'Encode::Encoding'; - -use Carp; - -use constant SI => "\cO"; -use constant SO => "\cN"; -use constant SS2 => "\eN"; -use constant SS3 => "\eO"; - -sub read -{ - my ($obj,$fh,$name) = @_; - my(%tbl, @seq, $enc, @esc, %grp); - while (<$fh>) - { - next unless /^(\S+)\s+(.*)$/; - my ($key,$val) = ($1,$2); - $val =~ s/^\{(.*?)\}/$1/g; - $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; - - if($enc = Encode->getEncoding($key)) - { - $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; - push @seq, $val; - $grp{$val} = - $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO" - $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN" - $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN" - $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO" - 0; # G0 - } - else - { - $obj->{$key} = $val; - } - if($val =~ /^\e(.*)/) - { - push(@esc, quotemeta $1); - } - } - $obj->{'Grp'} = \%grp; # graphic chars - $obj->{'Seq'} = \@seq; # escape sequences - $obj->{'Tbl'} = \%tbl; # encoding tables - $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC - return $obj; -} - -sub decode -{ - my ($obj,$str,$chk) = @_; - my $name = $obj->{'Name'}; - my $tbl = $obj->{'Tbl'}; - my $seq = $obj->{'Seq'}; - my $grp = $obj->{'Grp'}; - my $esc = $obj->{'Esc'}; - my $ini = $obj->{'init'}; - my $fin = $obj->{'final'}; - my $std = $seq->[0]; - my $cur = $std; - my @sta = ($std, undef, undef, undef); # G0 .. G3 state - my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1); - my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3); - my $uni; - while (length($str)) - { - my $cc = substr($str,0,1,''); - if($cc eq "\e") - { - if($str =~ s/^($esc)//) - { - my $e = "\e$1"; - $sta[ $grp->{$e} ] = $e if $tbl->{$e}; - } - # appearance of "\eN\eO" or "\eO\eN" isn't supposed. - # but in that case, the former will be ignored. - elsif($str =~ s/^N//) - { - $ss = 2; - } - elsif($str =~ s/^O//) - { - $ss = 3; - } - else - { - # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped. - $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//; - if($chk && ! length $str) - { - $str = "\e$1"; # split sequence - last; - } - croak "unknown escape sequence: ESC $1"; - } - next; - } - if($cc eq SO) - { - $s = 1; next; - } - if($cc eq SI) - { - $s = 0; next; - } - - $cur = $ss ? $sta[$ss] : $sta[$s]; - - if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') - { - $uni .= $tbl->{$cur}->decode($cc); - $ss = 0; - next; - } - my $ch = ord($cc); - my $rep = $tbl->{$cur}->{'Rep'}; - my $touni = $tbl->{$cur}->{'ToUni'}; - my $x; - if (&$rep($ch) eq 'C') - { - $x = $touni->[0][$ch]; - } - else - { - if(! length $str) - { - $str = $cc; # split leading byte - last; - } - my $c2 = substr($str,0,1,''); - $cc .= $c2; - $x = $touni->[$ch][ord($c2)]; - } - unless (defined $x) - { - Encode::Tcl::no_map_in_decode($name, $cc.$str); - } - $uni .= $x; - $ss = 0; - } - if($chk) - { - my $back = join('', grep defined($_) && $_ ne $std, @sta); - $back .= SO if $s; - $back .= $ss == 2 ? SS2 : SS3 if $ss; - $_[1] = $back.$str; - } - return $uni; -} - -sub encode -{ - my ($obj,$uni,$chk) = @_; - my $name = $obj->{'Name'}; - my $tbl = $obj->{'Tbl'}; - my $seq = $obj->{'Seq'}; - my $grp = $obj->{'Grp'}; - my $ini = $obj->{'init'}; - my $fin = $obj->{'final'}; - my $std = $seq->[0]; - my $str = $ini; - my @sta = ($std,undef,undef,undef); # G0 .. G3 state - my $cur = $std; - my $pG = 0; # previous G: 0 or 1. - my $cG = 0; # current G: 0,1,2,3. - - if($ini && defined $grp->{$ini}) - { - $sta[ $grp->{$ini} ] = $ini; - } - - while (length($uni)) - { - my $ch = substr($uni,0,1,''); - my $x; - foreach my $e_seq (@$seq) - { - $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' - ? $tbl->{$e_seq}->{FmUni}->{$ch} - : $tbl->{$e_seq}->encode($ch,1); - $cur = $e_seq, last if defined $x; - } - unless (defined $x) - { - unless($chk) - { - Encode::Tcl::no_map_in_encode(ord($ch), $name) - } - return undef; - } - if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') - { - my $def = $tbl->{$cur}->{'Def'}; - my $rep = $tbl->{$cur}->{'Rep'}; - $x = pack(&$rep($x),$x); - } - $cG = $grp->{$cur}; - $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG]; - - $str .= $cG == 0 && $pG == 1 ? SI : - $cG == 1 && $pG == 0 ? SO : - $cG == 2 ? SS2 : - $cG == 3 ? SS3 : ""; - $str .= $x; - $pG = $cG if $cG < 2; - } - $str .= SI if $pG == 1; # back to G0 - $str .= $std unless $std eq $sta[0]; # GO to ASCII - $str .= $fin; # necessary? - $_[1] = $uni if $chk; - return $str; -} - - -package Encode::Tcl::Extended; -use base 'Encode::Encoding'; - -use Carp; - -sub read -{ - my ($obj,$fh,$name) = @_; - my(%tbl, $enc, %ssc, @key); - while (<$fh>) - { - next unless /^(\S+)\s+(.*)$/; - my ($key,$val) = ($1,$2); - $val =~ s/\{(.*?)\}/$1/; - $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; - - if($enc = Encode->getEncoding($key)) - { - push @key, $val; - $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; - $ssc{$val} = substr($val,1) if $val =~ /^>/; - } - else - { - $obj->{$key} = $val; - } - } - $obj->{'SSC'} = \%ssc; # single shift char - $obj->{'Tbl'} = \%tbl; # encoding tables - $obj->{'Key'} = \@key; # keys of table hash - return $obj; -} - -sub decode -{ - my ($obj,$str,$chk) = @_; - my $name = $obj->{'Name'}; - my $tbl = $obj->{'Tbl'}; - my $ssc = $obj->{'SSC'}; - my $cur = ''; # current state - my $uni; - while (length($str)) - { - my $cc = substr($str,0,1,''); - my $ch = ord($cc); - if(!$cur && $ch > 0x7F) - { - $cur = '>'; - $cur .= $cc, next if $ssc->{$cur.$cc}; - } - $ch ^= 0x80 if $cur; - - if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') - { - $uni .= $tbl->{$cur}->decode($cc); - $cur = ''; - next; - } - my $rep = $tbl->{$cur}->{'Rep'}; - my $touni = $tbl->{$cur}->{'ToUni'}; - my $x; - if (&$rep($ch) eq 'C') - { - $x = $touni->[0][$ch]; - } - else - { - if(! length $str) - { - $str = $cc; # split leading byte - last; - } - my $c2 = substr($str,0,1,''); - $cc .= $c2; - $x = $touni->[$ch][0x80 ^ ord($c2)]; - } - unless (defined $x) - { - Encode::Tcl::no_map_in_decode($name, $cc.$str); - } - $uni .= $x; - $cur = ''; - } - if($chk) - { - $cur =~ s/>//; - $_[1] = $cur ne '' ? $cur.$str : $str; - } - return $uni; -} - -sub encode -{ - my ($obj,$uni,$chk) = @_; - my $name = $obj->{'Name'}; - my $tbl = $obj->{'Tbl'}; - my $ssc = $obj->{'SSC'}; - my $key = $obj->{'Key'}; - my $str; - my $cur; - - while (length($uni)) - { - my $ch = substr($uni,0,1,''); - my $x; - foreach my $k (@$key) - { - $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table' - ? $k =~ /^>/ - ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1) - : $tbl->{$k}->encode($ch,1) - : $tbl->{$k}->{FmUni}->{$ch}; - $cur = $k, last if defined $x; - } - unless (defined $x) - { - unless($chk) - { - Encode::Tcl::no_map_in_encode(ord($ch), $name) - } - return undef; - } - if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') - { - my $def = $tbl->{$cur}->{'Def'}; - my $rep = $tbl->{$cur}->{'Rep'}; - my $r = &$rep($x); - $x = pack($r, - $cur =~ /^>/ - ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x - : $x); - } - $str .= $ssc->{$cur} if defined $ssc->{$cur}; - $str .= $x; - } - $_[1] = $uni if $chk; - return $str; -} - -package Encode::Tcl::HanZi; -use base 'Encode::Encoding'; - -use Carp; - -sub read -{ - my ($obj,$fh,$name) = @_; - my(%tbl, @seq, $enc); - while (<$fh>) - { - next unless /^(\S+)\s+(.*)$/; - my ($key,$val) = ($1,$2); - $val =~ s/^\{(.*?)\}/$1/g; - $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; - if($enc = Encode->getEncoding($key)) - { - $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; - push @seq, $val; - } - else - { - $obj->{$key} = $val; - } - } - $obj->{'Seq'} = \@seq; # escape sequences - $obj->{'Tbl'} = \%tbl; # encoding tables - return $obj; -} - -sub decode -{ - my ($obj,$str,$chk) = @_; - my $name = $obj->{'Name'}; - my $tbl = $obj->{'Tbl'}; - my $seq = $obj->{'Seq'}; - my $std = $seq->[0]; - my $cur = $std; - my $uni; - while (length($str)){ - my $cc = substr($str,0,1,''); - if($cc eq "~") - { - if($str =~ s/^\cJ//) - { - next; - } - elsif($str =~ s/^\~//) - { - 1; # no-op - } - elsif($str =~ s/^([{}])//) - { - $cur = "~$1"; - next; - } - elsif(! length $str) - { - $str = '~'; - last; - } - else - { - $str =~ s/^([^~])//; - croak "unknown HanZi escape sequence: ~$1"; - next; - } - } - if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') - { - $uni .= $tbl->{$cur}->decode($cc); - next; - } - my $ch = ord($cc); - my $rep = $tbl->{$cur}->{'Rep'}; - my $touni = $tbl->{$cur}->{'ToUni'}; - my $x; - if (&$rep($ch) eq 'C') - { - $x = $touni->[0][$ch]; - } - else - { - if(! length $str) - { - $str = $cc; # split leading byte - last; - } - my $c2 = substr($str,0,1,''); - $cc .= $c2; - $x = $touni->[$ch][ord($c2)]; - } - unless (defined $x) - { - Encode::Tcl::no_map_in_decode($name, $cc.$str); - } - $uni .= $x; - } - if($chk) - { - $_[1] = $cur eq $std ? $str : $cur.$str; - } - return $uni; -} - -sub encode -{ - my ($obj,$uni,$chk) = @_; - my $name = $obj->{'Name'}; - my $tbl = $obj->{'Tbl'}; - my $seq = $obj->{'Seq'}; - my $std = $seq->[0]; - my $str; - my $pre = $std; - my $cur = $pre; - - while (length($uni)) - { - my $ch = substr($uni,0,1,''); - my $x; - foreach my $e_seq (@$seq) - { - $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' - ? $tbl->{$e_seq}->{FmUni}->{$ch} - : $tbl->{$e_seq}->encode($ch,1); - $cur = $e_seq and last if defined $x; - } - unless (defined $x) - { - unless($chk) - { - Encode::Tcl::no_map_in_encode(ord($ch), $name) - } - return undef; - } - if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') - { - my $def = $tbl->{$cur}->{'Def'}; - my $rep = $tbl->{$cur}->{'Rep'}; - $x = pack(&$rep($x),$x); - } - $str .= $cur eq $pre ? $x : ($pre = $cur).$x; - $str .= '~' if $x eq '~'; # to '~~' - } - $str .= $std unless $cur eq $std; - $_[1] = $uni if $chk; - return $str; -} - -1; -__END__ diff --git a/ext/Encode/Encode/ascii.ucm b/ext/Encode/Encode/ascii.ucm index 344423e..2281db0 100644 --- a/ext/Encode/Encode/ascii.ucm +++ b/ext/Encode/Encode/ascii.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n US-ascii -o Encode/ascii.ucm Encode/ascii.enc "US-ascii" "ascii" diff --git a/ext/Encode/Encode/cp1047.ucm b/ext/Encode/Encode/cp1047.ucm index aefc6e1..85c29d0 100644 --- a/ext/Encode/Encode/cp1047.ucm +++ b/ext/Encode/Encode/cp1047.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ # compile -n cp1047 -o Encode/cp1047.ucm Encode/cp1047.enc "cp1047" 1 diff --git a/ext/Encode/Encode/cp1250.ucm b/ext/Encode/Encode/cp1250.ucm index bc3cedc..904f6e5 100644 --- a/ext/Encode/Encode/cp1250.ucm +++ b/ext/Encode/Encode/cp1250.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#15 $ # ./compile -n cp1250 -o Encode/cp1250.ucm Encode/cp1250.enc "cp1250" 1 diff --git a/ext/Encode/Encode/cp37.ucm b/ext/Encode/Encode/cp37.ucm index 2c7698f..c8e9ed6 100644 --- a/ext/Encode/Encode/cp37.ucm +++ b/ext/Encode/Encode/cp37.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ # compile -n cp37 -o Encode/cp37.ucm Encode/cp37.enc "cp37" 1 diff --git a/ext/Encode/Encode/dingbats.ucm b/ext/Encode/Encode/dingbats.ucm index 8908e79..2dfd143 100644 --- a/ext/Encode/Encode/dingbats.ucm +++ b/ext/Encode/Encode/dingbats.ucm @@ -1,5 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ -# compile -n dingbats -o Encode/dingbats.ucm Encode/dingbats.enc "dingbats" 1 1 diff --git a/ext/Encode/Encode/koi8-r.ucm b/ext/Encode/Encode/koi8-r.ucm index 376ce5f..edff83d 100644 --- a/ext/Encode/Encode/koi8-r.ucm +++ b/ext/Encode/Encode/koi8-r.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#16 $ # ./compile -n koi8-r -o Encode/koi8-r.ucm Encode/koi8-r.enc "koi8-r" 1 diff --git a/ext/Encode/Encode/posix-bc.ucm b/ext/Encode/Encode/posix-bc.ucm index 7e9e82f..df7d702 100644 --- a/ext/Encode/Encode/posix-bc.ucm +++ b/ext/Encode/Encode/posix-bc.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ # compile -n posix-bc -o Encode/posix-bc.ucm Encode/posix-bc.enc "posix-bc" 1 diff --git a/ext/Encode/Encode/symbol.ucm b/ext/Encode/Encode/symbol.ucm index 25641ff..358b6be 100644 --- a/ext/Encode/Encode/symbol.ucm +++ b/ext/Encode/Encode/symbol.ucm @@ -1,4 +1,3 @@ -# Written $Id: //depot/perlio/ext/Encode/compile#14 $ # compile -n symbol -o Encode/symbol.ucm Encode/symbol.enc "symbol" 1 diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST new file mode 100644 index 0000000..d3edd0e --- /dev/null +++ b/ext/Encode/MANIFEST @@ -0,0 +1,151 @@ +Encode.xs +Encode/11643-1.enc +Encode/11643-2.enc +Encode/2022-cn.enc +Encode/2022-jp.enc +Encode/2022-jp1.enc +Encode/2022-jp2.enc +Encode/2022-kr.enc +Encode/2022.enc +Encode/7bit-greek.enc +Encode/7bit-jis.enc +Encode/7bit-kana.enc +Encode/7bit-kr.enc +Encode/7bit-latin1.enc +Encode/8859-1.enc +Encode/8859-1.ucm +Encode/8859-10.enc +Encode/8859-10.ucm +Encode/8859-11.enc +Encode/8859-11.ucm +Encode/8859-13.enc +Encode/8859-13.ucm +Encode/8859-14.enc +Encode/8859-14.ucm +Encode/8859-15.enc +Encode/8859-15.ucm +Encode/8859-16.enc +Encode/8859-16.ucm +Encode/8859-2.enc +Encode/8859-2.ucm +Encode/8859-3.enc +Encode/8859-3.ucm +Encode/8859-4.enc +Encode/8859-4.ucm +Encode/8859-5.enc +Encode/8859-5.ucm +Encode/8859-6.enc +Encode/8859-6.ucm +Encode/8859-7.enc +Encode/8859-7.ucm +Encode/8859-8.enc +Encode/8859-8.ucm +Encode/8859-9.enc +Encode/8859-9.ucm +Encode/HZ.enc +Encode/ascii.enc +Encode/ascii.ucm +Encode/big5.enc +Encode/cp1006.enc +Encode/cp1047.enc +Encode/cp1047.ucm +Encode/cp1250.enc +Encode/cp1250.ucm +Encode/cp1251.enc +Encode/cp1252.enc +Encode/cp1253.enc +Encode/cp1254.enc +Encode/cp1255.enc +Encode/cp1256.enc +Encode/cp1257.enc +Encode/cp1258.enc +Encode/cp37.enc +Encode/cp37.ucm +Encode/cp424.enc +Encode/cp437.enc +Encode/cp737.enc +Encode/cp775.enc +Encode/cp850.enc +Encode/cp852.enc +Encode/cp855.enc +Encode/cp856.enc +Encode/cp857.enc +Encode/cp860.enc +Encode/cp861.enc +Encode/cp862.enc +Encode/cp863.enc +Encode/cp864.enc +Encode/cp865.enc +Encode/cp866.enc +Encode/cp869.enc +Encode/cp874.enc +Encode/cp932.enc +Encode/cp936.enc +Encode/cp949.enc +Encode/cp950.enc +Encode/dingbats.enc +Encode/dingbats.ucm +Encode/euc-cn.enc +Encode/euc-jp-0212.enc +Encode/euc-jp.enc +Encode/euc-kr.enc +Encode/gb12345.enc +Encode/gb1988.enc +Encode/gb2312.enc +Encode/gsm0338.enc +Encode/ir-197.enc +Encode/jis0201.enc +Encode/jis0208.enc +Encode/jis0212.enc +Encode/koi8-f.enc +Encode/koi8-f.ucm +Encode/koi8-r.enc +Encode/koi8-r.ucm +Encode/koi8-u.enc +Encode/koi8-u.ucm +Encode/ksc5601.enc +Encode/macCentEuro.enc +Encode/macCroatian.enc +Encode/macCyrillic.enc +Encode/macDingbats.enc +Encode/macGreek.enc +Encode/macIceland.enc +Encode/macJapan.enc +Encode/macRoman.enc +Encode/macRumanian.enc +Encode/macSami.enc +Encode/macThai.enc +Encode/macTurkish.enc +Encode/macUkraine.enc +Encode/nextstep.enc +Encode/nextstep.ucm +Encode/posix-bc.enc +Encode/posix-bc.ucm +Encode/roman8.enc +Encode/roman8.ucm +Encode/shiftjis.enc +Encode/symbol.enc +Encode/symbol.ucm +Encode/viscii.enc +Encode/viscii.ucm +MANIFEST +Makefile.PL +README +compile +encengine.c +encode.h +lib/Encode.pm +lib/Encode/Encoding.pm +lib/Encode/Internal.pm +lib/Encode/Tcl.pm +lib/Encode/Tcl/Escape.pm +lib/Encode/Tcl/Extended.pm +lib/Encode/Tcl/HanZi.pm +lib/Encode/Tcl/Table.pm +lib/Encode/Unicode.pm +lib/Encode/XS.pm +lib/Encode/iso10646_1.pm +lib/Encode/ucs2_le.pm +lib/Encode/utf8.pm +lib/EncodeFormat.pod +t/Tcl.t diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index bf6baee..3a26ad8 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -1,104 +1,106 @@ +use 5.7.2; +use strict; use ExtUtils::MakeMaker; my %tables = (8859 => ['ascii.ucm', 'cp1250.ucm', 'koi8-r.ucm' ], EBCDIC => ['cp1047.ucm','cp37.ucm','posix-bc.ucm'], Symbols => ['symbol.ucm','dingbats.ucm'], - ); + ); opendir(ENC,'Encode'); while (defined(my $file = readdir(ENC))) - { - if ($file =~ /8859.*\.ucm/) - { - push(@{$tables{8859}},$file); - } - } +{ + if ($file =~ /8859.*\.ucm/) + { + push(@{$tables{8859}},$file); + } +} closedir(ENC); WriteMakefile( - NAME => "Encode", - VERSION_FROM => 'Encode.pm', - OBJECT => '$(O_FILES)', - 'dist' => { - COMPRESS => 'gzip -9f', - SUFFIX => 'gz', - DIST_DEFAULT => 'all tardist', - }, - MAN3PODS => {}, -); + NAME => "Encode", + VERSION_FROM => 'lib/Encode.pm', + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + ); package MY; sub post_initialize { - my ($self) = @_; - my %o; - # Find existing O_FILES - foreach my $f (@{$self->{'O_FILES'}}) - { - $o{$f} = 1; - } - my $x = $self->{'OBJ_EXT'}; - # Add the table O_FILES - foreach my $e (keys %tables) - { - $o{$e.$x} = 1; - } - # Trick case-blind filesystems. - delete $o{'encode'.$x}; - $o{'Encode'.$x} = 1; - # Reset the variable - $self->{'O_FILES'} = [sort keys %o]; - my @files; - foreach my $table (keys %tables) - { - foreach my $ext (qw($(OBJ_EXT) .c .h .def .fnm)) + my ($self) = @_; + my %o; + # Find existing O_FILES + foreach my $f (@{$self->{'O_FILES'}}) + { + $o{$f} = 1; + } + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) { - push (@files,$table.$ext); + $o{$e.$x} = 1; } - } - $self->{'clean'}{'FILES'} .= join(' ',@files); - return ''; + # Trick case-blind filesystems. + delete $o{'encode'.$x}; + $o{'Encode'.$x} = 1; + # Reset the variable + $self->{'O_FILES'} = [sort keys %o]; + my @files; + foreach my $table (keys %tables) + { + foreach my $ext (qw($(OBJ_EXT) .c .h .def .fnm)) + { + push (@files,$table.$ext); + } +} +$self->{'clean'}{'FILES'} .= join(' ',@files); +return ''; } sub postamble { - my $self = shift; - my $dir = $self->catdir($self->curdir,'Encode'); - my $str = "# Encode\$(OBJ_EXT) depends on .h and .def files not .c files - but all written by compile\n"; - $str .= 'Encode$(OBJ_EXT) :'; - foreach my $table (keys %tables) - { - $str .= " $table.c"; - } - $str .= "\n\n"; - foreach my $table (keys %tables) - { - my $numlines = 1; - my $lengthsofar = length($str); - my $continuator = ''; - $str .= "$table.c : compile Makefile.PL"; - foreach my $file (@{$tables{$table}}) + my $self = shift; + my $dir = $self->catdir($self->curdir,'Encode'); + my $str = "# Encode\$(OBJ_EXT) depends on .h and .def files not .c files - but all written by compile\n"; + $str .= 'Encode$(OBJ_EXT) :'; + foreach my $table (keys %tables) { - $str .= $continuator.' '.$self->catfile($dir,$file); - if ( length($str)-$lengthsofar > 128*$numlines ) - { - $continuator .= " \\\n\t"; - $numlines++; - } else { - $continuator = ''; - } + $str .= " $table.c"; } - $str .= "\n\t\$(PERL) compile -o \$\@ -f $table.fnm\n\n"; - open (FILELIST, ">$table.fnm") - || die "Could not open $table.fnm: $!"; - foreach my $file (@{$tables{$table}}) + $str .= "\n\n"; + foreach my $table (keys %tables) { - print FILELIST $self->catfile($dir,$file) . "\n"; + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : compile Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + $str .= "\n\t\$(PERL) compile -o \$\@ -f $table.fnm\n\n"; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); } - close(FILELIST); - } - return $str; + return $str; } diff --git a/ext/Encode/README b/ext/Encode/README new file mode 100644 index 0000000..6e8384f --- /dev/null +++ b/ext/Encode/README @@ -0,0 +1,26 @@ +NAME + Encode - character encodings + +SYNOPSIS + use Encode; + +DESCRIPTION + The "Encode" module provides the interfaces between Perl's + strings and the rest of the system. Perl strings are + sequences of characters. + + See "perldoc Encode" for the rest of the story + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires perl5.7.2 or later. + diff --git a/ext/Encode/Todo b/ext/Encode/Todo deleted file mode 100644 index c59622b..0000000 --- a/ext/Encode/Todo +++ /dev/null @@ -1,49 +0,0 @@ -Use Markus Kuhn's UTF-8 Decode Stress Tester at - - http://www.cl.cam.ac.uk/~mgk25/ucs/examples/ - -Markus: -> -> What exactly is malformed UTF-8 data here? -> -> Obviously at least everything listed in section R.7 of ISO 10646-1/Amd.2. -> -> Does it also cover overlong UTF-8 sequences, i.e. any string -> containing any of the five bit sequences -> -> 1100000x, -> 11100000 100xxxxx, -> 11110000 1000xxxx, -> 11111000 10000xxx, -> 11111100 100000xx -> -> Does it also cover UTF-8 encoded code positions U+D800 to U+DFFF (UTF-16 -> surrogates) as well as U+FFFE (anti-BOM) and U+FFFF, all of which must -> not occur in proper UTF-8 and UTF-32 data according to the standard -> (see note 3 in section R.4 of UCS)? -> -> It might be useful, if the spec were clearer here. -> -> References: -> -> - ISO/IEC 10646-1:1993(E), Amd. 2, -> http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html -> -> - http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 -> - -Markus: -> -> It is commonly considered to be good practice to reject at least -> overlong UTF-8 sequences, otherwise one permits multiple encodings for -> characters, which makes pattern matching far more difficult in -> applications where strings are processed in both coded and decoded form. -> It has been argued that this could easily lead to security -> vulnerabilities. See -> -> http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 -> http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt (section 4) -> ftp://sunsite.doc.ic.ac.uk/packages/rfc/rfc2279.txt (section 6) -> -> for a brief discussion. -> diff --git a/ext/Encode/compile b/ext/Encode/compile index ad64b6f..8f1899c 100755 --- a/ext/Encode/compile +++ b/ext/Encode/compile @@ -1,12 +1,11 @@ #!../../perl -w BEGIN { - @INC = '../../lib'; + unshift @INC, '../../lib'; $ENV{PATH} .= ';../..' if $^O eq 'MSWin32'; } use strict; use Getopt::Std; my @orig_ARGV = @ARGV; -my $perforce = '$Id$'; sub encode_U { @@ -74,7 +73,6 @@ if ($cname =~ /\.(c|xs)$/) !!!!!!! DO NOT EDIT THIS FILE !!!!!!! This file was autogenerated by: $^X $0 $cname @orig_ARGV - (Repository $perforce) */ END } @@ -592,7 +590,7 @@ sub output_ucm_page sub output_ucm { my ($fh,$name,$h,$rep,$min_el,$max_el) = @_; - print $fh "# Written $perforce\n# $0 @orig_ARGV\n" unless $opt{'q'}; + print $fh "# $0 @orig_ARGV\n" unless $opt{'q'}; print $fh " \"$name\"\n"; char_names(); if (defined $min_el) diff --git a/ext/Encode/Encode.pm b/ext/Encode/lib/Encode.pm similarity index 82% rename from ext/Encode/Encode.pm rename to ext/Encode/lib/Encode.pm index cd94038..7af36ad 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/lib/Encode.pm @@ -1,7 +1,6 @@ package Encode; use strict; - -our $VERSION = '0.02'; +our $VERSION = do {my @r=(q$Revision: 0.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; require DynaLoader; require Exporter; @@ -71,50 +70,50 @@ sub encodings sub findAlias { - my $class = shift; - local $_ = shift; - # print "# findAlias $_\n"; - unless (exists $alias{$_}) - { - for (my $i=0; $i < @alias; $i += 2) + my $class = shift; + local $_ = shift; + # print "# findAlias $_\n"; + unless (exists $alias{$_}) { - my $alias = $alias[$i]; - my $val = $alias[$i+1]; - my $new; - if (ref($alias) eq 'Regexp' && $_ =~ $alias) - { - $new = eval $val; - } - elsif (ref($alias) eq 'CODE') - { - $new = &{$alias}($val) - } - elsif (lc($_) eq lc($alias)) - { - $new = $val; - } - if (defined($new)) - { - next if $new eq $_; # avoid (direct) recursion on bugs - my $enc = (ref($new)) ? $new : find_encoding($new); - if ($enc) - { - $alias{$_} = $enc; - last; - } - } + for (my $i=0; $i < @alias; $i += 2) + { + my $alias = $alias[$i]; + my $val = $alias[$i+1]; + my $new; + if (ref($alias) eq 'Regexp' && $_ =~ $alias) + { + $new = eval $val; + } + elsif (ref($alias) eq 'CODE') + { + $new = &{$alias}($val) + } + elsif (lc($_) eq lc($alias)) + { + $new = $val; + } + if (defined($new)) + { + next if $new eq $_; # avoid (direct) recursion on bugs + my $enc = (ref($new)) ? $new : find_encoding($new); + if ($enc) + { + $alias{$_} = $enc; + last; + } + } + } } - } - return $alias{$_}; + return $alias{$_}; } sub define_alias { - while (@_) - { - my ($alias,$name) = splice(@_,0,2); - push(@alias, $alias => $name); - } + while (@_) + { + my ($alias,$name) = splice(@_,0,2); + push(@alias, $alias => $name); + } } # Allow variants of iso-8859-1 etc. @@ -174,285 +173,112 @@ define_alias( qr/^koi8u$/i => 'koi8-u' ); # Kannada Khmer Korean Laotian Malayalam Mongolian # Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese # TODO: what is the Japanese 'UJIS' encoding seen in some Linuxes? - +# Answer: euc-jp # Map white space and _ to '-' + define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); sub define_encoding { - my $obj = shift; - my $name = shift; - $encoding{$name} = $obj; - my $lc = lc($name); - define_alias($lc => $obj) unless $lc eq $name; - while (@_) - { - my $alias = shift; - define_alias($alias,$obj); - } - return $obj; + my $obj = shift; + my $name = shift; + $encoding{$name} = $obj; + my $lc = lc($name); + define_alias($lc => $obj) unless $lc eq $name; + while (@_) + { + my $alias = shift; + define_alias($alias,$obj); + } + return $obj; } sub getEncoding { - my ($class,$name) = @_; - my $enc; - if (ref($name) && $name->can('new_sequence')) - { - return $name; - } - my $lc = lc $name; - if (exists $encoding{$name}) - { - return $encoding{$name}; - } - if (exists $encoding{$lc}) - { - return $encoding{$lc}; - } - - my $oc = $class->findAlias($name); - return $oc if defined $oc; - return $class->findAlias($lc) if $lc ne $name; - - return; + my ($class,$name) = @_; + my $enc; + if (ref($name) && $name->can('new_sequence')) + { + return $name; + } + my $lc = lc $name; + if (exists $encoding{$name}) + { + return $encoding{$name}; + } + if (exists $encoding{$lc}) + { + return $encoding{$lc}; + } + + my $oc = $class->findAlias($name); + return $oc if defined $oc; + return $class->findAlias($lc) if $lc ne $name; + + return; } sub find_encoding { - my ($name) = @_; - return __PACKAGE__->getEncoding($name); + my ($name) = @_; + return __PACKAGE__->getEncoding($name); } sub encode { - my ($name,$string,$check) = @_; - my $enc = find_encoding($name); - croak("Unknown encoding '$name'") unless defined $enc; - my $octets = $enc->encode($string,$check); - return undef if ($check && length($string)); - return $octets; + my ($name,$string,$check) = @_; + my $enc = find_encoding($name); + croak("Unknown encoding '$name'") unless defined $enc; + my $octets = $enc->encode($string,$check); + return undef if ($check && length($string)); + return $octets; } sub decode { - my ($name,$octets,$check) = @_; - my $enc = find_encoding($name); - croak("Unknown encoding '$name'") unless defined $enc; - my $string = $enc->decode($octets,$check); - $_[1] = $octets if $check; - return $string; + my ($name,$octets,$check) = @_; + my $enc = find_encoding($name); + croak("Unknown encoding '$name'") unless defined $enc; + my $string = $enc->decode($octets,$check); + $_[1] = $octets if $check; + return $string; } sub from_to { - my ($string,$from,$to,$check) = @_; - my $f = find_encoding($from); - croak("Unknown encoding '$from'") unless defined $f; - my $t = find_encoding($to); - croak("Unknown encoding '$to'") unless defined $t; - my $uni = $f->decode($string,$check); - return undef if ($check && length($string)); - $string = $t->encode($uni,$check); - return undef if ($check && length($uni)); - return length($_[0] = $string); + my ($string,$from,$to,$check) = @_; + my $f = find_encoding($from); + croak("Unknown encoding '$from'") unless defined $f; + my $t = find_encoding($to); + croak("Unknown encoding '$to'") unless defined $t; + my $uni = $f->decode($string,$check); + return undef if ($check && length($string)); + $string = $t->encode($uni,$check); + return undef if ($check && length($uni)); + return length($_[0] = $string); } sub encode_utf8 { - my ($str) = @_; - utf8::encode($str); - return $str; + my ($str) = @_; + utf8::encode($str); + return $str; } sub decode_utf8 { - my ($str) = @_; - return undef unless utf8::decode($str); - return $str; -} - -package Encode::Encoding; -# Base class for classes which implement encodings - -sub Define -{ - my $obj = shift; - my $canonical = shift; - $obj = bless { Name => $canonical },$obj unless ref $obj; - # warn "$canonical => $obj\n"; - Encode::define_encoding($obj, $canonical, @_); -} - -sub name { shift->{'Name'} } - -# Temporary legacy methods -sub toUnicode { shift->decode(@_) } -sub fromUnicode { shift->encode(@_) } - -sub new_sequence { return $_[0] } - -package Encode::XS; -use base 'Encode::Encoding'; - -package Encode::Internal; -use base 'Encode::Encoding'; - -# Dummy package that provides the encode interface but leaves data -# as UTF-X encoded. It is here so that from_to() works. - -__PACKAGE__->Define('Internal'); - -Encode::define_alias( 'Unicode' => 'Internal' ) if ord('A') == 65; - -sub decode -{ - my ($obj,$str,$chk) = @_; - utf8::upgrade($str); - $_[1] = '' if $chk; - return $str; -} - -*encode = \&decode; - -package Encoding::Unicode; -use base 'Encode::Encoding'; - -__PACKAGE__->Define('Unicode') unless ord('A') == 65; - -sub decode -{ - my ($obj,$str,$chk) = @_; - my $res = ''; - for (my $i = 0; $i < length($str); $i++) - { - $res .= chr(utf8::unicode_to_native(ord(substr($str,$i,1)))); - } - $_[1] = '' if $chk; - return $res; -} - -sub encode -{ - my ($obj,$str,$chk) = @_; - my $res = ''; - for (my $i = 0; $i < length($str); $i++) - { - $res .= chr(utf8::native_to_unicode(ord(substr($str,$i,1)))); - } - $_[1] = '' if $chk; - return $res; -} - - -package Encode::utf8; -use base 'Encode::Encoding'; -# package to allow long-hand -# $octets = encode( utf8 => $string ); -# - -__PACKAGE__->Define(qw(UTF-8 utf8)); - -sub decode -{ - my ($obj,$octets,$chk) = @_; - my $str = Encode::decode_utf8($octets); - if (defined $str) - { - $_[1] = '' if $chk; - return $str; - } - return undef; -} - -sub encode -{ - my ($obj,$string,$chk) = @_; - my $octets = Encode::encode_utf8($string); - $_[1] = '' if $chk; - return $octets; -} - -package Encode::iso10646_1; -use base 'Encode::Encoding'; -# Encoding is 16-bit network order Unicode (no surogates) -# Used for X font encodings - -__PACKAGE__->Define(qw(UCS-2 iso-10646-1)); - -sub decode -{ - my ($obj,$str,$chk) = @_; - my $uni = ''; - while (length($str)) - { - my $code = unpack('n',substr($str,0,2,'')) & 0xffff; - $uni .= chr($code); - } - $_[1] = $str if $chk; - utf8::upgrade($uni); - return $uni; + my ($str) = @_; + return undef unless utf8::decode($str); + return $str; } -sub encode -{ - my ($obj,$uni,$chk) = @_; - my $str = ''; - while (length($uni)) - { - my $ch = substr($uni,0,1,''); - my $x = ord($ch); - unless ($x < 32768) - { - last if ($chk); - $x = 0; - } - $str .= pack('n',$x); - } - $_[1] = $uni if $chk; - return $str; -} - -package Encode::ucs_2le; -use base 'Encode::Encoding'; - -__PACKAGE__->Define(qw(UCS-2le UCS-2LE ucs-2le)); - -sub decode -{ - my ($obj,$str,$chk) = @_; - my $uni = ''; - while (length($str)) - { - my $code = unpack('v',substr($str,0,2,'')) & 0xffff; - $uni .= chr($code); - } - $_[1] = $str if $chk; - utf8::upgrade($uni); - return $uni; -} - -sub encode -{ - my ($obj,$uni,$chk) = @_; - my $str = ''; - while (length($uni)) - { - my $ch = substr($uni,0,1,''); - my $x = ord($ch); - unless ($x < 32768) - { - last if ($chk); - $x = 0; - } - $str .= pack('v',$x); - } - $_[1] = $uni if $chk; - return $str; -} - -# switch back to Encode package in case we ever add AutoLoader -package Encode; +require Encode::Encoding; +require Encode::XS; +require Encode::Internal; +require Encode::Unicode; +require Encode::utf8; +require Encode::iso10646_1; +require Encode::ucs2_le; 1; diff --git a/ext/Encode/lib/Encode/Encoding.pm b/ext/Encode/lib/Encode/Encoding.pm new file mode 100644 index 0000000..11bc01d --- /dev/null +++ b/ext/Encode/lib/Encode/Encoding.pm @@ -0,0 +1,25 @@ +package Encode::Encoding; +# Base class for classes which implement encodings +use strict; +our $VERSION = + do {my @r=(q$Revision: 0.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; + +sub Define +{ + my $obj = shift; + my $canonical = shift; + $obj = bless { Name => $canonical },$obj unless ref $obj; + # warn "$canonical => $obj\n"; + Encode::define_encoding($obj, $canonical, @_); +} + +sub name { shift->{'Name'} } + +# Temporary legacy methods +sub toUnicode { shift->decode(@_) } +sub fromUnicode { shift->encode(@_) } + +sub new_sequence { return $_[0] } + +1; +__END__ diff --git a/ext/Encode/lib/Encode/Internal.pm b/ext/Encode/lib/Encode/Internal.pm new file mode 100644 index 0000000..a807bce --- /dev/null +++ b/ext/Encode/lib/Encode/Internal.pm @@ -0,0 +1,23 @@ +package Encode::Internal; +use strict; +our $VERSION = do {my @r=(q$Revision: 0.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; + +# Dummy package that provides the encode interface but leaves data +# as UTF-X encoded. It is here so that from_to() works. + +__PACKAGE__->Define('Internal'); + +Encode::define_alias( 'Unicode' => 'Internal' ) if ord('A') == 65; + +sub decode +{ + my ($obj,$str,$chk) = @_; + utf8::upgrade($str); + $_[1] = '' if $chk; + return $str; +} + +*encode = \&decode; +1; +__END__ diff --git a/ext/Encode/lib/Encode/Tcl.pm b/ext/Encode/lib/Encode/Tcl.pm new file mode 100644 index 0000000..8a2efb9 --- /dev/null +++ b/ext/Encode/lib/Encode/Tcl.pm @@ -0,0 +1,133 @@ +package Encode::Tcl; +use strict; +our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use Encode qw(find_encoding); +use base 'Encode::Encoding'; +use Carp; + +=head1 NAME + +Encode::Tcl - Tcl encodings + +=cut + + sub INC_search +{ + foreach my $dir (@INC) + { + if (opendir(my $dh,"$dir/Encode")) + { + while (defined(my $name = readdir($dh))) + { + if ($name =~ /^(.*)\.enc$/) + { + my $canon = $1; + my $obj = find_encoding($canon); + if (!defined($obj)) + { + my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__; + $obj->Define( $canon ); + # warn "$canon => $obj\n"; + } + } + } + closedir($dh); + } + } +} + +sub import +{ + INC_search(); +} + +sub no_map_in_encode ($$) + # codepoint, enc-name; +{ + carp sprintf "\"\\N{U+%x}\" does not map to %s", @_; +# /* FIXME: Skip over the character, copy in replacement and continue +# * but that is messy so for now just fail. +# */ + return; +} + +sub no_map_in_decode ($$) + # enc-name, string beginning the malform char; +{ +# /* UTF-8 is supposed to be "Universal" so should not happen */ + croak sprintf "%s '%s' does not map to UTF-8", @_; +} + +sub encode +{ + my $obj = shift; + my $new = $obj->loadEncoding; + return undef unless (defined $new); + return $new->encode(@_); +} + +sub new_sequence +{ + my $obj = shift; + my $new = $obj->loadEncoding; + return undef unless (defined $new); + return $new->new_sequence(@_); +} + +sub decode +{ + my $obj = shift; + my $new = $obj->loadEncoding; + return undef unless (defined $new); + return $new->decode(@_); +} + +sub loadEncoding +{ + my $obj = shift; + my $file = $obj->{'File'}; + my $name = $obj->name; + if (open(my $fh,$file)) + { + my $type; + while (1) + { + my $line = <$fh>; + $type = substr($line,0,1); + last unless $type eq '#'; + } + my $subclass = + ($type eq 'X') ? 'Extended' : + ($type eq 'H') ? 'HanZi' : + ($type eq 'E') ? 'Escape' : 'Table'; + my $class = ref($obj) . '::' . $subclass; + # carp "Loading $file"; + bless $obj,$class; + return $obj if $obj->read($fh,$obj->name,$type); + } + else + { + croak("Cannot open $file for ".$obj->name); + } + $obj->Undefine($name); + return undef; +} + +sub INC_find +{ + my ($class,$name) = @_; + my $enc; + foreach my $dir (@INC) + { + last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc")); + } + return $enc; +} + +require Encode::Tcl::Table; +require Encode::Tcl::Escape; +require Encode::Tcl::Extended; +require Encode::Tcl::HanZi; + +1; +__END__ diff --git a/ext/Encode/lib/Encode/Tcl/Escape.pm b/ext/Encode/lib/Encode/Tcl/Escape.pm new file mode 100644 index 0000000..572e2bf --- /dev/null +++ b/ext/Encode/lib/Encode/Tcl/Escape.pm @@ -0,0 +1,217 @@ +package Encode::Tcl::Escape; +use strict; +our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; + +use Carp; + +use constant SI => "\cO"; +use constant SO => "\cN"; +use constant SS2 => "\eN"; +use constant SS3 => "\eO"; + +sub read +{ + my ($obj,$fh,$name) = @_; + my(%tbl, @seq, $enc, @esc, %grp); + while (<$fh>) + { + next unless /^(\S+)\s+(.*)$/; + my ($key,$val) = ($1,$2); + $val =~ s/^\{(.*?)\}/$1/g; + $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; + + if($enc = Encode->getEncoding($key)) + { + $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; + push @seq, $val; + $grp{$val} = + $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO" + $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN" + $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN" + $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO" + 0; # G0 + } + else + { + $obj->{$key} = $val; + } + if($val =~ /^\e(.*)/) + { + push(@esc, quotemeta $1); + } + } + $obj->{'Grp'} = \%grp; # graphic chars + $obj->{'Seq'} = \@seq; # escape sequences + $obj->{'Tbl'} = \%tbl; # encoding tables + $obj->{'Esc'} = join('|', @esc); # regex of sequences following ESC + return $obj; +} + +sub decode +{ + my ($obj,$str,$chk) = @_; + my $name = $obj->{'Name'}; + my $tbl = $obj->{'Tbl'}; + my $seq = $obj->{'Seq'}; + my $grp = $obj->{'Grp'}; + my $esc = $obj->{'Esc'}; + my $ini = $obj->{'init'}; + my $fin = $obj->{'final'}; + my $std = $seq->[0]; + my $cur = $std; + my @sta = ($std, undef, undef, undef); # G0 .. G3 state + my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1); + my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3); + my $uni; + while (length($str)) + { + my $cc = substr($str,0,1,''); + if($cc eq "\e") + { + if($str =~ s/^($esc)//) + { + my $e = "\e$1"; + $sta[ $grp->{$e} ] = $e if $tbl->{$e}; + } + # appearance of "\eN\eO" or "\eO\eN" isn't supposed. + # but in that case, the former will be ignored. + elsif($str =~ s/^N//) + { + $ss = 2; + } + elsif($str =~ s/^O//) + { + $ss = 3; + } + else + { + # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped. + $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//; + if($chk && ! length $str) + { + $str = "\e$1"; # split sequence + last; + } + croak "unknown escape sequence: ESC $1"; + } + next; + } + if($cc eq SO) + { + $s = 1; next; + } + if($cc eq SI) + { + $s = 0; next; + } + + $cur = $ss ? $sta[$ss] : $sta[$s]; + + if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') + { + $uni .= $tbl->{$cur}->decode($cc); + $ss = 0; + next; + } + my $ch = ord($cc); + my $rep = $tbl->{$cur}->{'Rep'}; + my $touni = $tbl->{$cur}->{'ToUni'}; + my $x; + if (&$rep($ch) eq 'C') + { + $x = $touni->[0][$ch]; + } + else + { + if(! length $str) + { + $str = $cc; # split leading byte + last; + } + my $c2 = substr($str,0,1,''); + $cc .= $c2; + $x = $touni->[$ch][ord($c2)]; + } + unless (defined $x) + { + Encode::Tcl::no_map_in_decode($name, $cc.$str); + } + $uni .= $x; + $ss = 0; + } + if($chk) + { + my $back = join('', grep defined($_) && $_ ne $std, @sta); + $back .= SO if $s; + $back .= $ss == 2 ? SS2 : SS3 if $ss; + $_[1] = $back.$str; + } + return $uni; +} + +sub encode +{ + my ($obj,$uni,$chk) = @_; + my $name = $obj->{'Name'}; + my $tbl = $obj->{'Tbl'}; + my $seq = $obj->{'Seq'}; + my $grp = $obj->{'Grp'}; + my $ini = $obj->{'init'}; + my $fin = $obj->{'final'}; + my $std = $seq->[0]; + my $str = $ini; + my @sta = ($std,undef,undef,undef); # G0 .. G3 state + my $cur = $std; + my $pG = 0; # previous G: 0 or 1. + my $cG = 0; # current G: 0,1,2,3. + + if($ini && defined $grp->{$ini}) + { + $sta[ $grp->{$ini} ] = $ini; + } + + while (length($uni)) + { + my $ch = substr($uni,0,1,''); + my $x; + foreach my $e_seq (@$seq) + { + $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' + ? $tbl->{$e_seq}->{FmUni}->{$ch} + : $tbl->{$e_seq}->encode($ch,1); + $cur = $e_seq, last if defined $x; + } + unless (defined $x) + { + unless($chk) + { + Encode::Tcl::no_map_in_encode(ord($ch), $name) + } + return undef; + } + if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') + { + my $def = $tbl->{$cur}->{'Def'}; + my $rep = $tbl->{$cur}->{'Rep'}; + $x = pack(&$rep($x),$x); + } + $cG = $grp->{$cur}; + $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG]; + + $str .= $cG == 0 && $pG == 1 ? SI : + $cG == 1 && $pG == 0 ? SO : + $cG == 2 ? SS2 : + $cG == 3 ? SS3 : ""; + $str .= $x; + $pG = $cG if $cG < 2; + } + $str .= SI if $pG == 1; # back to G0 + $str .= $std unless $std eq $sta[0]; # GO to ASCII + $str .= $fin; # necessary? + $_[1] = $uni if $chk; + return $str; +} + +1; +__END__ diff --git a/ext/Encode/lib/Encode/Tcl/Extended.pm b/ext/Encode/lib/Encode/Tcl/Extended.pm new file mode 100644 index 0000000..4b471d8 --- /dev/null +++ b/ext/Encode/lib/Encode/Tcl/Extended.pm @@ -0,0 +1,142 @@ +package Encode::Tcl::Extended; +use strict; +our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; + +use Carp; + +sub read +{ + my ($obj,$fh,$name) = @_; + my(%tbl, $enc, %ssc, @key); + while (<$fh>) + { + next unless /^(\S+)\s+(.*)$/; + my ($key,$val) = ($1,$2); + $val =~ s/\{(.*?)\}/$1/; + $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; + + if($enc = Encode->getEncoding($key)) + { + push @key, $val; + $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; + $ssc{$val} = substr($val,1) if $val =~ /^>/; + } + else + { + $obj->{$key} = $val; + } + } + $obj->{'SSC'} = \%ssc; # single shift char + $obj->{'Tbl'} = \%tbl; # encoding tables + $obj->{'Key'} = \@key; # keys of table hash + return $obj; +} + +sub decode +{ + my ($obj,$str,$chk) = @_; + my $name = $obj->{'Name'}; + my $tbl = $obj->{'Tbl'}; + my $ssc = $obj->{'SSC'}; + my $cur = ''; # current state + my $uni; + while (length($str)) + { + my $cc = substr($str,0,1,''); + my $ch = ord($cc); + if(!$cur && $ch > 0x7F) + { + $cur = '>'; + $cur .= $cc, next if $ssc->{$cur.$cc}; + } + $ch ^= 0x80 if $cur; + + if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') + { + $uni .= $tbl->{$cur}->decode($cc); + $cur = ''; + next; + } + my $rep = $tbl->{$cur}->{'Rep'}; + my $touni = $tbl->{$cur}->{'ToUni'}; + my $x; + if (&$rep($ch) eq 'C') + { + $x = $touni->[0][$ch]; + } + else + { + if(! length $str) + { + $str = $cc; # split leading byte + last; + } + my $c2 = substr($str,0,1,''); + $cc .= $c2; + $x = $touni->[$ch][0x80 ^ ord($c2)]; + } + unless (defined $x) + { + Encode::Tcl::no_map_in_decode($name, $cc.$str); + } + $uni .= $x; + $cur = ''; + } + if($chk) + { + $cur =~ s/>//; + $_[1] = $cur ne '' ? $cur.$str : $str; + } + return $uni; +} + +sub encode +{ + my ($obj,$uni,$chk) = @_; + my $name = $obj->{'Name'}; + my $tbl = $obj->{'Tbl'}; + my $ssc = $obj->{'SSC'}; + my $key = $obj->{'Key'}; + my $str; + my $cur; + + while (length($uni)) + { + my $ch = substr($uni,0,1,''); + my $x; + foreach my $k (@$key) + { + $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table' + ? $k =~ /^>/ + ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1) + : $tbl->{$k}->encode($ch,1) + : $tbl->{$k}->{FmUni}->{$ch}; + $cur = $k, last if defined $x; + } + unless (defined $x) + { + unless($chk) + { + Encode::Tcl::no_map_in_encode(ord($ch), $name) + } + return undef; + } + if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') + { + my $def = $tbl->{$cur}->{'Def'}; + my $rep = $tbl->{$cur}->{'Rep'}; + my $r = &$rep($x); + $x = pack($r, + $cur =~ /^>/ + ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x + : $x); + } + $str .= $ssc->{$cur} if defined $ssc->{$cur}; + $str .= $x; + } + $_[1] = $uni if $chk; + return $str; +} +1; +__END__ diff --git a/ext/Encode/lib/Encode/Tcl/HanZi.pm b/ext/Encode/lib/Encode/Tcl/HanZi.pm new file mode 100644 index 0000000..3a6a5c0 --- /dev/null +++ b/ext/Encode/lib/Encode/Tcl/HanZi.pm @@ -0,0 +1,151 @@ +package Encode::Tcl::HanZi; +our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; + +use Carp; + +sub read +{ + my ($obj,$fh,$name) = @_; + my(%tbl, @seq, $enc); + while (<$fh>) + { + next unless /^(\S+)\s+(.*)$/; + my ($key,$val) = ($1,$2); + $val =~ s/^\{(.*?)\}/$1/g; + $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; + if($enc = Encode->getEncoding($key)) + { + $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; + push @seq, $val; + } + else + { + $obj->{$key} = $val; + } + } + $obj->{'Seq'} = \@seq; # escape sequences + $obj->{'Tbl'} = \%tbl; # encoding tables + return $obj; +} + +sub decode +{ + my ($obj,$str,$chk) = @_; + my $name = $obj->{'Name'}; + my $tbl = $obj->{'Tbl'}; + my $seq = $obj->{'Seq'}; + my $std = $seq->[0]; + my $cur = $std; + my $uni; + while (length($str)){ + my $cc = substr($str,0,1,''); + if($cc eq "~") + { + if($str =~ s/^\cJ//) + { + next; + } + elsif($str =~ s/^\~//) + { + 1; # no-op + } + elsif($str =~ s/^([{}])//) + { + $cur = "~$1"; + next; + } + elsif(! length $str) + { + $str = '~'; + last; + } + else + { + $str =~ s/^([^~])//; + croak "unknown HanZi escape sequence: ~$1"; + next; + } + } + if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') + { + $uni .= $tbl->{$cur}->decode($cc); + next; + } + my $ch = ord($cc); + my $rep = $tbl->{$cur}->{'Rep'}; + my $touni = $tbl->{$cur}->{'ToUni'}; + my $x; + if (&$rep($ch) eq 'C') + { + $x = $touni->[0][$ch]; + } + else + { + if(! length $str) + { + $str = $cc; # split leading byte + last; + } + my $c2 = substr($str,0,1,''); + $cc .= $c2; + $x = $touni->[$ch][ord($c2)]; + } + unless (defined $x) + { + Encode::Tcl::no_map_in_decode($name, $cc.$str); + } + $uni .= $x; + } + if($chk) + { + $_[1] = $cur eq $std ? $str : $cur.$str; + } + return $uni; +} + +sub encode +{ + my ($obj,$uni,$chk) = @_; + my $name = $obj->{'Name'}; + my $tbl = $obj->{'Tbl'}; + my $seq = $obj->{'Seq'}; + my $std = $seq->[0]; + my $str; + my $pre = $std; + my $cur = $pre; + + while (length($uni)) + { + my $ch = substr($uni,0,1,''); + my $x; + foreach my $e_seq (@$seq) + { + $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table' + ? $tbl->{$e_seq}->{FmUni}->{$ch} + : $tbl->{$e_seq}->encode($ch,1); + $cur = $e_seq and last if defined $x; + } + unless (defined $x) + { + unless($chk) + { + Encode::Tcl::no_map_in_encode(ord($ch), $name) + } + return undef; + } + if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') + { + my $def = $tbl->{$cur}->{'Def'}; + my $rep = $tbl->{$cur}->{'Rep'}; + $x = pack(&$rep($x),$x); + } + $str .= $cur eq $pre ? $x : ($pre = $cur).$x; + $str .= '~' if $x eq '~'; # to '~~' + } + $str .= $std unless $cur eq $std; + $_[1] = $uni if $chk; + return $str; +} +1; +__END__ diff --git a/ext/Encode/lib/Encode/Tcl/Table.pm b/ext/Encode/lib/Encode/Tcl/Table.pm new file mode 100644 index 0000000..1efedee --- /dev/null +++ b/ext/Encode/lib/Encode/Tcl/Table.pm @@ -0,0 +1,139 @@ +package Encode::Tcl::Table; +use strict; +our $VERSION = do {my @r=(q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; + +use Carp; +#use Data::Dumper; + +sub read +{ + my ($obj,$fh,$name,$type) = @_; + my($rep, @leading); + my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>)); + my @touni; + my %fmuni; + my $count = 0; + $def = hex($def); + while ($pages--) + { + my $line = <$fh>; + chomp($line); + my $page = hex($line); + my @page; + $leading[$page] = 1 if $page; + my $ch = $page * 256; + for (my $i = 0; $i < 16; $i++) + { + my $line = <$fh>; + for (my $j = 0; $j < 16; $j++) + { + my $val = hex(substr($line,0,4,'')); + if ($val || !$ch) + { + my $uch = pack('U', $val); # chr($val); + push(@page,$uch); + $fmuni{$uch} = $ch; + $count++; + } + else + { + push(@page,undef); + } + $ch++; + } + } + $touni[$page] = \@page; + } + $rep = $type ne 'M' + ? $obj->can("rep_$type") + : sub + { + ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'; + }; + $obj->{'Rep'} = $rep; + $obj->{'ToUni'} = \@touni; + $obj->{'FmUni'} = \%fmuni; + $obj->{'Def'} = $def; + $obj->{'Num'} = $count; + return $obj; +} + +sub rep_S { 'C' } + +sub rep_D { 'n' } + +#sub rep_M { ($_[0] > 255) ? 'n' : 'C' } + +sub representation +{ + my ($obj,$ch) = @_; + $ch = 0 unless @_ > 1; + $obj->{'Rep'}->($ch); +} + +sub decode +{ + my($obj,$str,$chk) = @_; + my $name = $obj->{'Name'}; + my $rep = $obj->{'Rep'}; + my $touni = $obj->{'ToUni'}; + my $uni; + while (length($str)) + { + my $cc = substr($str,0,1,''); + my $ch = ord($cc); + my $x; + if (&$rep($ch) eq 'C') + { + $x = $touni->[0][$ch]; + } + else + { + if(! length $str) + { + $str = pack('C',$ch); # split leading byte + last; + } + my $c2 = substr($str,0,1,''); + $cc .= $c2; + $x = $touni->[$ch][ord($c2)]; + } + unless (defined $x) + { + Encode::Tcl::no_map_in_decode($name, $cc.$str); + } + $uni .= $x; + } + $_[1] = $str if $chk; + return $uni; +} + + +sub encode +{ + my ($obj,$uni,$chk) = @_; + my $fmuni = $obj->{'FmUni'}; + my $def = $obj->{'Def'}; + my $name = $obj->{'Name'}; + my $rep = $obj->{'Rep'}; + my $str; + while (length($uni)) + { + my $ch = substr($uni,0,1,''); + my $x = $fmuni->{$ch}; + unless(defined $x) + { + unless($chk) + { + Encode::Tcl::no_map_in_encode(ord($ch), $name) + } + return undef; + } + $str .= pack(&$rep($x),$x); + } + $_[1] = $uni if $chk; + return $str; +} +1; +__END__ diff --git a/ext/Encode/lib/Encode/Unicode.pm b/ext/Encode/lib/Encode/Unicode.pm new file mode 100644 index 0000000..13a8e9c --- /dev/null +++ b/ext/Encode/lib/Encode/Unicode.pm @@ -0,0 +1,33 @@ +package Encoding::Unicode; +use strict; +our $VERSION = do {my @r=(q$Revision: 0.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; + +__PACKAGE__->Define('Unicode') unless ord('A') == 65; + +sub decode +{ + my ($obj,$str,$chk) = @_; + my $res = ''; + for (my $i = 0; $i < length($str); $i++) + { + $res .= chr(utf8::unicode_to_native(ord(substr($str,$i,1)))); + } + $_[1] = '' if $chk; + return $res; +} + +sub encode +{ + my ($obj,$str,$chk) = @_; + my $res = ''; + for (my $i = 0; $i < length($str); $i++) + { + $res .= chr(utf8::native_to_unicode(ord(substr($str,$i,1)))); + } + $_[1] = '' if $chk; + return $res; +} + +1; +__END__ diff --git a/ext/Encode/lib/Encode/XS.pm b/ext/Encode/lib/Encode/XS.pm new file mode 100644 index 0000000..875d915 --- /dev/null +++ b/ext/Encode/lib/Encode/XS.pm @@ -0,0 +1,8 @@ +package Encode::XS; +use strict; +our $VERSION = do {my @r=(q$Revision: 0.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; +1; +__END__ + + diff --git a/ext/Encode/lib/Encode/iso10646_1.pm b/ext/Encode/lib/Encode/iso10646_1.pm new file mode 100644 index 0000000..cabc6c3 --- /dev/null +++ b/ext/Encode/lib/Encode/iso10646_1.pm @@ -0,0 +1,43 @@ +package Encode::iso10646_1; +use strict; +our $VERSION = do {my @r=(q$Revision: 0.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; +# Encoding is 16-bit network order Unicode (no surogates) +# Used for X font encodings + +__PACKAGE__->Define(qw(UCS-2 iso-10646-1)); + +sub decode +{ + my ($obj,$str,$chk) = @_; + my $uni = ''; + while (length($str)) + { + my $code = unpack('n',substr($str,0,2,'')) & 0xffff; + $uni .= chr($code); + } + $_[1] = $str if $chk; + utf8::upgrade($uni); + return $uni; +} + +sub encode +{ + my ($obj,$uni,$chk) = @_; + my $str = ''; + while (length($uni)) + { + my $ch = substr($uni,0,1,''); + my $x = ord($ch); + unless ($x < 32768) + { + last if ($chk); + $x = 0; + } + $str .= pack('n',$x); + } + $_[1] = $uni if $chk; + return $str; +} +1; +__END__ diff --git a/ext/Encode/lib/Encode/ucs2_le.pm b/ext/Encode/lib/Encode/ucs2_le.pm new file mode 100644 index 0000000..82d67ca --- /dev/null +++ b/ext/Encode/lib/Encode/ucs2_le.pm @@ -0,0 +1,41 @@ +package Encode::ucs_2le; +use strict; +our $VERSION = do {my @r=(q$Revision: 0.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; + +__PACKAGE__->Define(qw(UCS-2le UCS-2LE ucs-2le)); + +sub decode +{ + my ($obj,$str,$chk) = @_; + my $uni = ''; + while (length($str)) + { + my $code = unpack('v',substr($str,0,2,'')) & 0xffff; + $uni .= chr($code); + } + $_[1] = $str if $chk; + utf8::upgrade($uni); + return $uni; +} + +sub encode +{ + my ($obj,$uni,$chk) = @_; + my $str = ''; + while (length($uni)) + { + my $ch = substr($uni,0,1,''); + my $x = ord($ch); + unless ($x < 32768) + { + last if ($chk); + $x = 0; + } + $str .= pack('v',$x); + } + $_[1] = $uni if $chk; + return $str; +} +1; +__END__ diff --git a/ext/Encode/lib/Encode/utf8.pm b/ext/Encode/lib/Encode/utf8.pm new file mode 100644 index 0000000..b7acdad --- /dev/null +++ b/ext/Encode/lib/Encode/utf8.pm @@ -0,0 +1,31 @@ +package Encode::utf8; +use strict; +our $VERSION = do {my @r=(q$Revision: 0.30 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r}; +use base 'Encode::Encoding'; +# package to allow long-hand +# $octets = encode( utf8 => $string ); +# + +__PACKAGE__->Define(qw(UTF-8 utf8)); + +sub decode +{ + my ($obj,$octets,$chk) = @_; + my $str = Encode::decode_utf8($octets); + if (defined $str) + { + $_[1] = '' if $chk; + return $str; + } + return undef; +} + +sub encode +{ + my ($obj,$string,$chk) = @_; + my $octets = Encode::encode_utf8($string); + $_[1] = '' if $chk; + return $octets; +} +1; +__END__ diff --git a/ext/Encode/Encode/EncodeFormat.pod b/ext/Encode/lib/EncodeFormat.pod similarity index 99% rename from ext/Encode/Encode/EncodeFormat.pod rename to ext/Encode/lib/EncodeFormat.pod index d83b128..3a1269d 100644 --- a/ext/Encode/Encode/EncodeFormat.pod +++ b/ext/Encode/lib/EncodeFormat.pod @@ -161,4 +161,3 @@ utf, encoding, convert # Copyright (c) 1997-1998 Sun Microsystems, Inc. # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. - # RCS: @(#) $Id: Encoding.3,v 1.7 1999/10/13 00:32:05 hobbs Exp $ diff --git a/ext/Encode.t b/ext/Encode/t/Encode.t similarity index 100% rename from ext/Encode.t rename to ext/Encode/t/Encode.t diff --git a/ext/Encode/Encode/Tcl.t b/ext/Encode/t/Tcl.t similarity index 100% rename from ext/Encode/Encode/Tcl.t rename to ext/Encode/t/Tcl.t