From: Jarkko Hietaniemi Date: Sun, 31 Mar 2002 22:31:29 +0000 (+0000) Subject: Upgrade to Encode 1.11, from Dan Kogai. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=3ef515df8b968e34c050cfc9602902de0efbefd6;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 1.11, from Dan Kogai. p4raw-id: //depot/perl@15638 --- diff --git a/MANIFEST b/MANIFEST index e397b3d..57554fc 100644 --- a/MANIFEST +++ b/MANIFEST @@ -199,6 +199,7 @@ ext/DynaLoader/XSLoader_pm.PL Simple XS Loader perl module ext/Encode/AUTHORS List of authors ext/Encode/bin/enc2xs Encode module generator ext/Encode/bin/piconv iconv by perl +ext/Encode/bin/ucm2table Table Generator for testing ext/Encode/Byte/Byte.pm Encode extension ext/Encode/Byte/Makefile.PL Encode extension ext/Encode/Changes Change Log @@ -209,74 +210,13 @@ ext/Encode/EBCDIC/Makefile.PL Encode extension ext/Encode/encengine.c Encode extension ext/Encode/Encode.pm Encode extension ext/Encode/Encode.xs Encode extension -ext/Encode/Encode/8859-1.ucm Unicode Character Map -ext/Encode/Encode/8859-10.ucm Unicode Character Map -ext/Encode/Encode/8859-11.ucm Unicode Character Map -ext/Encode/Encode/8859-13.ucm Unicode Character Map -ext/Encode/Encode/8859-14.ucm Unicode Character Map -ext/Encode/Encode/8859-15.ucm Unicode Character Map -ext/Encode/Encode/8859-16.ucm Unicode Character Map -ext/Encode/Encode/8859-2.ucm Unicode Character Map -ext/Encode/Encode/8859-3.ucm Unicode Character Map -ext/Encode/Encode/8859-4.ucm Unicode Character Map -ext/Encode/Encode/8859-5.ucm Unicode Character Map -ext/Encode/Encode/8859-6.ucm Unicode Character Map -ext/Encode/Encode/8859-7.ucm Unicode Character Map -ext/Encode/Encode/8859-8.ucm Unicode Character Map -ext/Encode/Encode/8859-9.ucm Unicode Character Map -ext/Encode/Encode/ascii.ucm Unicode Character Map -ext/Encode/Encode/big5-hkscs.ucm Unicode Character Map -ext/Encode/Encode/big5.ucm Unicode Character Map -ext/Encode/Encode/cp1047.ucm Unicode Character Map -ext/Encode/Encode/cp1250.ucm Unicode Character Map -ext/Encode/Encode/cp37.ucm Unicode Character Map -ext/Encode/Encode/cp932.ucm Unicode Character Map -ext/Encode/Encode/cp936.ucm Unicode Character Map -ext/Encode/Encode/cp949.ucm Unicode Character Map -ext/Encode/Encode/cp950.ucm Unicode Character Map -ext/Encode/Encode/dingbats.ucm Unicode Character Map +ext/Encode/Encode/Changes.e2x Skelton file for enc2xs ext/Encode/Encode/encode.h Encode extension header file -ext/Encode/Encode/euc-cn.ucm Unicode Character Map -ext/Encode/Encode/euc-jp.ucm Unicode Character Map -ext/Encode/Encode/euc-kr.ucm Unicode Character Map -ext/Encode/Encode/gb12345.ucm Unicode Character Map -ext/Encode/Encode/gb2312.ucm Unicode Character Map -ext/Encode/Encode/gsm0338.ucm Unicode Character Map -ext/Encode/Encode/hp-roman8.ucm Unicode Character Map -ext/Encode/Encode/ibm-1250.ucm Unicode Character Map -ext/Encode/Encode/ibm-1251.ucm Unicode Character Map -ext/Encode/Encode/ibm-1252.ucm Unicode Character Map -ext/Encode/Encode/ibm-1253.ucm Unicode Character Map -ext/Encode/Encode/ibm-1254.ucm Unicode Character Map -ext/Encode/Encode/ibm-1255.ucm Unicode Character Map -ext/Encode/Encode/ibm-1256.ucm Unicode Character Map -ext/Encode/Encode/ibm-1257.ucm Unicode Character Map -ext/Encode/Encode/ibm-1258.ucm Unicode Character Map -ext/Encode/Encode/ir-165.ucm Unicode Character Map -ext/Encode/Encode/jis0201.ucm Unicode Character Map -ext/Encode/Encode/johab.ucm Unicode Character Map -ext/Encode/Encode/koi8-f.ucm Unicode Character Map -ext/Encode/Encode/koi8-r.ucm Unicode Character Map -ext/Encode/Encode/koi8-u.ucm Unicode Character Map -ext/Encode/Encode/ksc5601.ucm Unicode Character Map -ext/Encode/Encode/macCentEuro.ucm Unicode Character Map -ext/Encode/Encode/macCroatian.ucm Unicode Character Map -ext/Encode/Encode/macCyrillic.ucm Unicode Character Map -ext/Encode/Encode/macDingbats.ucm Unicode Character Map -ext/Encode/Encode/macGreek.ucm Unicode Character Map -ext/Encode/Encode/macIceland.ucm Unicode Character Map -ext/Encode/Encode/macJapan.ucm Unicode Character Map -ext/Encode/Encode/macRoman.ucm Unicode Character Map -ext/Encode/Encode/macRumanian.ucm Unicode Character Map -ext/Encode/Encode/macSami.ucm Unicode Character Map -ext/Encode/Encode/macThai.ucm Unicode Character Map -ext/Encode/Encode/macTurkish.ucm Unicode Character Map -ext/Encode/Encode/macUkraine.ucm Unicode Character Map -ext/Encode/Encode/nextstep.ucm Unicode Character Map -ext/Encode/Encode/posix-bc.ucm Unicode Character Map -ext/Encode/Encode/shiftjis.ucm Unicode Character Map -ext/Encode/Encode/symbol.ucm Unicode Character Map -ext/Encode/Encode/viscii.ucm Unicode Character Map +ext/Encode/Encode/Makefile_PL.e2x Skelton file for enc2xs +ext/Encode/Encode/README.e2x Skelton file for enc2xs +ext/Encode/Encode/_PM.e2x Skelton file for enc2xs +ext/Encode/Encode/_T.e2x Skelton file for enc2xs +ext/Encode/encoding.pm Perl Pragmactic Module ext/Encode/JP/JP.pm Encode extension ext/Encode/JP/Makefile.PL Encode extension ext/Encode/KR/KR.pm Encode extension @@ -305,6 +245,7 @@ ext/Encode/Symbol/Symbol.pm Encode extension ext/Encode/t/Aliases.t Encode extension test ext/Encode/t/CN.t Encode extension test ext/Encode/t/Encode.t Encode extension test +ext/Encode/t/encoding.t Encode extension test ext/Encode/t/gb2312.euc test data ext/Encode/t/gb2312.ref test data ext/Encode/t/jisx0208.euc test data @@ -312,14 +253,81 @@ ext/Encode/t/jisx0208.ref test data ext/Encode/t/jisx0212.euc test data ext/Encode/t/jisx0212.ref test data ext/Encode/t/JP.t Encode extension test +ext/Encode/t/jperl.t Encode extension test ext/Encode/t/KR.t Encode extension test ext/Encode/t/ksc5601.euc test data ext/Encode/t/ksc5601.ref test data ext/Encode/t/TW.t Encode extension test ext/Encode/TW/Makefile.PL Encode extension ext/Encode/TW/TW.pm Encode extension -ext/Encode/ucm2table Table Generator for testing -ext/Errno/ChangeLog Errno perl module change log +ext/Encode/ucm/8859-1.ucm Unicode Character Map +ext/Encode/ucm/8859-10.ucm Unicode Character Map +ext/Encode/ucm/8859-11.ucm Unicode Character Map +ext/Encode/ucm/8859-13.ucm Unicode Character Map +ext/Encode/ucm/8859-14.ucm Unicode Character Map +ext/Encode/ucm/8859-15.ucm Unicode Character Map +ext/Encode/ucm/8859-16.ucm Unicode Character Map +ext/Encode/ucm/8859-2.ucm Unicode Character Map +ext/Encode/ucm/8859-3.ucm Unicode Character Map +ext/Encode/ucm/8859-4.ucm Unicode Character Map +ext/Encode/ucm/8859-5.ucm Unicode Character Map +ext/Encode/ucm/8859-6.ucm Unicode Character Map +ext/Encode/ucm/8859-7.ucm Unicode Character Map +ext/Encode/ucm/8859-8.ucm Unicode Character Map +ext/Encode/ucm/8859-9.ucm Unicode Character Map +ext/Encode/ucm/ascii.ucm Unicode Character Map +ext/Encode/ucm/big5-hkscs.ucm Unicode Character Map +ext/Encode/ucm/big5.ucm Unicode Character Map +ext/Encode/ucm/cp1047.ucm Unicode Character Map +ext/Encode/ucm/cp1250.ucm Unicode Character Map +ext/Encode/ucm/cp37.ucm Unicode Character Map +ext/Encode/ucm/cp932.ucm Unicode Character Map +ext/Encode/ucm/cp936.ucm Unicode Character Map +ext/Encode/ucm/cp949.ucm Unicode Character Map +ext/Encode/ucm/cp950.ucm Unicode Character Map +ext/Encode/ucm/dingbats.ucm Unicode Character Map +ext/Encode/ucm/euc-cn.ucm Unicode Character Map +ext/Encode/ucm/euc-jp.ucm Unicode Character Map +ext/Encode/ucm/euc-kr.ucm Unicode Character Map +ext/Encode/ucm/gb12345.ucm Unicode Character Map +ext/Encode/ucm/gb2312.ucm Unicode Character Map +ext/Encode/ucm/gsm0338.ucm Unicode Character Map +ext/Encode/ucm/hp-roman8.ucm Unicode Character Map +ext/Encode/ucm/ibm-1250.ucm Unicode Character Map +ext/Encode/ucm/ibm-1251.ucm Unicode Character Map +ext/Encode/ucm/ibm-1252.ucm Unicode Character Map +ext/Encode/ucm/ibm-1253.ucm Unicode Character Map +ext/Encode/ucm/ibm-1254.ucm Unicode Character Map +ext/Encode/ucm/ibm-1255.ucm Unicode Character Map +ext/Encode/ucm/ibm-1256.ucm Unicode Character Map +ext/Encode/ucm/ibm-1257.ucm Unicode Character Map +ext/Encode/ucm/ibm-1258.ucm Unicode Character Map +ext/Encode/ucm/ir-165.ucm Unicode Character Map +ext/Encode/ucm/jis0201.ucm Unicode Character Map +ext/Encode/ucm/johab.ucm Unicode Character Map +ext/Encode/ucm/koi8-f.ucm Unicode Character Map +ext/Encode/ucm/koi8-r.ucm Unicode Character Map +ext/Encode/ucm/koi8-u.ucm Unicode Character Map +ext/Encode/ucm/ksc5601.ucm Unicode Character Map +ext/Encode/ucm/macCentEuro.ucm Unicode Character Map +ext/Encode/ucm/macCroatian.ucm Unicode Character Map +ext/Encode/ucm/macCyrillic.ucm Unicode Character Map +ext/Encode/ucm/macDingbats.ucm Unicode Character Map +ext/Encode/ucm/macGreek.ucm Unicode Character Map +ext/Encode/ucm/macIceland.ucm Unicode Character Map +ext/Encode/ucm/macJapan.ucm Unicode Character Map +ext/Encode/ucm/macRoman.ucm Unicode Character Map +ext/Encode/ucm/macRumanian.ucm Unicode Character Map +ext/Encode/ucm/macSami.ucm Unicode Character Map +ext/Encode/ucm/macThai.ucm Unicode Character Map +ext/Encode/ucm/macTurkish.ucm Unicode Character Map +ext/Encode/ucm/macUkraine.ucm Unicode Character Map +ext/Encode/ucm/nextstep.ucm Unicode Character Map +ext/Encode/ucm/posix-bc.ucm Unicode Character Map +ext/Encode/ucm/shiftjis.ucm Unicode Character Map +ext/Encode/ucm/symbol.ucm Unicode Character Map +ext/Encode/ucm/viscii.ucm Unicode Character Map +ext/Errno/ChangeLog See if Errno works ext/Errno/Errno.t See if Errno works ext/Errno/Errno_pm.PL Errno perl module create script ext/Errno/Makefile.PL Errno extension makefile writer @@ -920,8 +928,6 @@ lib/dotsh.pl Code to "dot" in a shell script lib/Dumpvalue.pm Screen dump of perl values lib/Dumpvalue.t See if Dumpvalue works lib/dumpvar.pl A variable dumper -lib/encoding.pm Encoding of legacy data into Unicode -lib/encoding.t Test for the encoding pragma lib/English.pm Readable aliases for short variables lib/English.t See if English works lib/Env.pm Map environment into ordinary variables @@ -960,12 +966,12 @@ lib/ExtUtils/MM_Win32.pm MakeMaker methods for Win32 lib/ExtUtils/MM_Win95.pm MakeMaker methods for Win95 lib/ExtUtils/MY.pm MakeMaker user override class lib/ExtUtils/Packlist.pm Manipulates .packlist files +lib/ExtUtils/t/basic.t See if MakeMaker can build a module lib/ExtUtils/t/Big-Fat-Dummy/lib/Big/Fat/Dummy.pm MakeMaker dummy module lib/ExtUtils/t/Big-Fat-Dummy/Makefile.PL MakeMaker dummy module lib/ExtUtils/t/Command.t See if ExtUtils::Command works (Win32 only) lib/ExtUtils/t/Constant.t See if ExtUtils::Constant works lib/ExtUtils/t/Embed.t See if ExtUtils::Embed and embedding works -lib/ExtUtils/t/basic.t See if MakeMaker can build a module lib/ExtUtils/t/hints.t See if hint files are honored. lib/ExtUtils/t/Installed.t See if ExtUtils::Installed works lib/ExtUtils/t/Manifest.t See if ExtUtils::Manifest works diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index f0d3c54..5831af2 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -10,6 +10,7 @@ # This list is in alphabetical order. -- Anton Tagunov +Andreas J. Koenig Autrijus Tang Dan Kogai Gerrit P. Haase diff --git a/ext/Encode/Byte/Byte.pm b/ext/Encode/Byte/Byte.pm index af83d00..9c425b9 100644 --- a/ext/Encode/Byte/Byte.pm +++ b/ext/Encode/Byte/Byte.pm @@ -1,6 +1,6 @@ package Encode::Byte; use Encode; -our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; XSLoader::load('Encode::Byte',$VERSION); @@ -74,7 +74,7 @@ supported are as follows. MacSami MacGreek MacThai - MacIcelandic + MacIceland MacTurkish MacUkrainian diff --git a/ext/Encode/Byte/Makefile.PL b/ext/Encode/Byte/Makefile.PL index 57ee63e..641317a 100644 --- a/ext/Encode/Byte/Makefile.PL +++ b/ext/Encode/Byte/Makefile.PL @@ -30,7 +30,7 @@ my %tables = ( ], ); -opendir(ENC,'../Encode'); +opendir(ENC,'../ucm'); while (defined(my $file = readdir(ENC))) { if ($file =~ /(8859|ibm).*\.ucm/io) @@ -131,7 +131,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/CN/Makefile.PL b/ext/Encode/CN/Makefile.PL index 3545a1d..d0630b8 100644 --- a/ext/Encode/CN/Makefile.PL +++ b/ext/Encode/CN/Makefile.PL @@ -103,7 +103,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 3cf2226..613ef8c 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,8 +1,53 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 1.1 2002/03/29 20:59:39 dankogai Exp dankogai $ +# $Id: Changes,v 1.11 2002/03/31 22:12:13 dankogai Exp dankogai $ # -1.01 $Date: 2002/03/29 20:59:39 $ + +1.11 $Date: 2002/03/31 22:12:13 $ ++ t/encoding.t ++ t/jperl.t +! MANIFEST + Missing files from the MANIFEST fixed. + Message-Id: <20020401010156.H10509@alpha.hut.fi> + Version incremented just to make CPAN happy. + +1.10 2002/03/31 21:32:42 +! Makefile.PL +! README + INSTALL_UCM option added to Makefile.PL so you can install *.ucm + if you want. This should make Autrijus happy. Also, piconv + is added to default install. ++ Encode/*.e2x +! bin/enc2xs + Here-documented files that enc2xs generates are now exported + to *.e2x. Much cleaner and easier to debug. +! encoding.pm + encoding enhances so you can make it act more like such + (now prehistoric ) "localized" variations of perl like Jperl. ++ t/jperl.t + Further test for encoding.pm. Written in euc-jp ++ encoding.pm ++ t/encoding.t + Taken over form jhi. + Message-Id: <20020330174618.B10154@alpha.hut.fi> +- Encode/*.ucm ++ ucm/*.ucm +! Makefile.PL +! */Makefile.PL + *.ucm relocated to ucm/ so MakeMaker will not install'em by default. +- ucm2table ++ bin/ucm2table + *** +! AUTHORS +! Byte/Byte.pm +! Encode.pm +! Encode/macIceland.ucm +! lib/Encode/Alias.pm +! lib/Encode/Supported.pod + MacIceland fixes and Pod Typo fixes. This adds Andreas to AUTHORS. + Message-Id: + +1.01 2002/03/29 20:59:39 ! Makefile.PL ! README s/USE_SCRIPTS/MORE_SCRIPTS/ diff --git a/ext/Encode/EBCDIC/Makefile.PL b/ext/Encode/EBCDIC/Makefile.PL index 508df23..adf59b0 100644 --- a/ext/Encode/EBCDIC/Makefile.PL +++ b/ext/Encode/EBCDIC/Makefile.PL @@ -98,7 +98,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index d07bfea..9add728 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,6 +1,6 @@ package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.11 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; require DynaLoader; @@ -90,14 +90,14 @@ unless ($ON_EBCDIC) { # CJK added to autoload unless EBCDIC env ); } -for my $k (qw(centeuro croatian cyrillic dingbats greek - iceland roman rumanian sami - thai turkish ukraine)) +for my $k (qw{ CentralEurRoman Croatian Cyrillic Greek + Iceland Roman Rumanian Sami + Thai Turkish Ukrainian + }) { $ExtModule{"mac$k"} = 'Encode/Byte.pm'; } - sub encodings { my $class = shift; @@ -199,7 +199,7 @@ sub from_to return undef if ($check && length($string)); $string = $t->encode($uni,$check); return undef if ($check && length($uni)); - return length($_[0] = $string); + return defined($_[0] = $string) ? length($string) : undef ; } sub encode_utf8 @@ -333,7 +333,7 @@ For example to convert ISO-8859-1 data to UTF-8: $utf8 = decode("iso-8859-1", $latin1); -=item from_to($string, FROM_ENCODING, TO_ENCODING[, CHECK]) +=item [$length =] from_to($string, FROM_ENCODING, TO_ENCODING[, CHECK]) Convert B the data between two encodings. How did the data in $string originally get to be in FROM_ENCODING? Either using @@ -352,6 +352,9 @@ and to convert it back: Note that because the conversion happens in place, the data to be converted cannot be a string constant, it must be a scalar variable. +from_to() return the length of the converted string on success, undef +otherwise. + =back =head2 Listing available encodings @@ -384,9 +387,9 @@ To add new alias to a given encoding, Use; use Encode::Alias; define_alias(newName => ENCODING); -After that, newName can be to be used as am alias for ENCODING. -ENCODING may be either the name of an encoding or and I +After that, newName can be used as an alias for ENCODING. +ENCODING may be either the name of an encoding or an I See L on details. diff --git a/ext/Encode/Encode/Changes.e2x b/ext/Encode/Encode/Changes.e2x new file mode 100644 index 0000000..f6b5a77 --- /dev/null +++ b/ext/Encode/Encode/Changes.e2x @@ -0,0 +1,7 @@ +# +# $Id: Changes.e2x,v 1.10 2002/03/31 21:00:50 dankogai Exp $ +# Revision history for Perl extension Encode::$_Name_. +# + +0.01 $_Now_ + Autogenerated by enc2xs version $_Version_. diff --git a/ext/Encode/Encode/Makefile_PL.e2x b/ext/Encode/Encode/Makefile_PL.e2x new file mode 100644 index 0000000..000ce92 --- /dev/null +++ b/ext/Encode/Encode/Makefile_PL.e2x @@ -0,0 +1,150 @@ +# +# This file is auto-generated by: +# enc2xs version $_Version_ +# $_Now_ +# +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; + +# Please edit the following to the taste! +my $name = '$_Name_'; +my %tables = ( + $_Name__t => [ $_TableFiles_ ], + ); + +#### DO NOT EDIT BEYOND THIS POINT! +my $enc2xs = '$_Enc2xs_'; +WriteMakefile( + INC => "-I$_Inc_", +#### END_OF_HEADER -- DO NOT EDIT THIS LINE BY HAND! #### + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + # $self->{'H'} = [$self->catfile($self->updir,'encode.h')]; + my %xs; + foreach my $table (keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h _def.h .fnm)) { + push (@files,$table.$ext); + } + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#include +#include +#include +#define U8 U8 +#include "encode.h" +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); + int i = 0; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}_def.h"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = "."; # $self->catdir('Encode'); + my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + foreach my $table (keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + $str .= $^O eq 'VMS' # In VMS quote to preserve case + ? qq{\n\t\$(PERL) $enc2xs -"Q" -"O" -o \$\@ -f $table.fnm\n\n} + : qq{\n\t\$(PERL) $enc2xs -Q -O -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/ext/Encode/Encode/README.e2x b/ext/Encode/Encode/README.e2x new file mode 100644 index 0000000..28a31a6 --- /dev/null +++ b/ext/Encode/Encode/README.e2x @@ -0,0 +1,31 @@ +Encode::$_Name_ version 0.1 +======== + +NAME + Encode::$_Name_ - + +SYNOPSIS + use Encode::$_Name_; + # +ABSTRACT + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires perl version 5.7.3 or later. + +COPYRIGHT AND LICENCE + +Copyright (C) 2002 Your Name + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + diff --git a/ext/Encode/Encode/_PM.e2x b/ext/Encode/Encode/_PM.e2x new file mode 100644 index 0000000..208b87e --- /dev/null +++ b/ext/Encode/Encode/_PM.e2x @@ -0,0 +1,23 @@ +package Encode::$_Name_; +our $VERSION = "0.01"; + +use Encode; +use XSLoader; +XSLoader::load('Encode::$_Name_', $VERSION); + +1; +__END__ + +=head1 NAME + +Encode::$_Name_ - New Encoding + +=head1 SYNOPSIS + +You got to fill this in! + +=head1 SEE ALSO + +L + +=cut diff --git a/ext/Encode/Encode/_T.e2x b/ext/Encode/Encode/_T.e2x new file mode 100644 index 0000000..6cf5f29 --- /dev/null +++ b/ext/Encode/Encode/_T.e2x @@ -0,0 +1,7 @@ +use strict; +# Adjust the number here! +use Test::More tests => 2; + +use_ok('Encode'); +use_ok('Encode::$_Name_'); +# Add more test here! diff --git a/ext/Encode/JP/Makefile.PL b/ext/Encode/JP/Makefile.PL index 5ce6ef6..4c0fbd3 100644 --- a/ext/Encode/JP/Makefile.PL +++ b/ext/Encode/JP/Makefile.PL @@ -102,7 +102,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/KR/Makefile.PL b/ext/Encode/KR/Makefile.PL index df7534d..db6ef25 100644 --- a/ext/Encode/KR/Makefile.PL +++ b/ext/Encode/KR/Makefile.PL @@ -102,7 +102,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index df9e51a..bde0b22 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -8,74 +8,12 @@ EBCDIC/EBCDIC.pm Encode extension EBCDIC/Makefile.PL Encode extension Encode.pm Encode extension Encode.xs Encode extension -Encode/8859-1.ucm Unicode Character Map -Encode/8859-10.ucm Unicode Character Map -Encode/8859-11.ucm Unicode Character Map -Encode/8859-13.ucm Unicode Character Map -Encode/8859-14.ucm Unicode Character Map -Encode/8859-15.ucm Unicode Character Map -Encode/8859-16.ucm Unicode Character Map -Encode/8859-2.ucm Unicode Character Map -Encode/8859-3.ucm Unicode Character Map -Encode/8859-4.ucm Unicode Character Map -Encode/8859-5.ucm Unicode Character Map -Encode/8859-6.ucm Unicode Character Map -Encode/8859-7.ucm Unicode Character Map -Encode/8859-8.ucm Unicode Character Map -Encode/8859-9.ucm Unicode Character Map -Encode/ascii.ucm Unicode Character Map -Encode/big5-hkscs.ucm Unicode Character Map -Encode/big5.ucm Unicode Character Map -Encode/cp1047.ucm Unicode Character Map -Encode/cp1250.ucm Unicode Character Map -Encode/cp37.ucm Unicode Character Map -Encode/cp932.ucm Unicode Character Map -Encode/cp936.ucm Unicode Character Map -Encode/cp949.ucm Unicode Character Map -Encode/cp950.ucm Unicode Character Map -Encode/dingbats.ucm Unicode Character Map +Encode/Changes.e2x Skelton file for enc2xs +Encode/Makefile_PL.e2x Skelton file for enc2xs +Encode/README.e2x Skelton file for enc2xs +Encode/_PM.e2x Skelton file for enc2xs +Encode/_T.e2x Skelton file for enc2xs Encode/encode.h Encode extension header file -Encode/euc-cn.ucm Unicode Character Map -Encode/euc-jp.ucm Unicode Character Map -Encode/euc-kr.ucm Unicode Character Map -Encode/gb12345.ucm Unicode Character Map -Encode/gb2312.ucm Unicode Character Map -Encode/gsm0338.ucm Unicode Character Map -Encode/hp-roman8.ucm Unicode Character Map -Encode/ibm-1250.ucm Unicode Character Map -Encode/ibm-1251.ucm Unicode Character Map -Encode/ibm-1252.ucm Unicode Character Map -Encode/ibm-1253.ucm Unicode Character Map -Encode/ibm-1254.ucm Unicode Character Map -Encode/ibm-1255.ucm Unicode Character Map -Encode/ibm-1256.ucm Unicode Character Map -Encode/ibm-1257.ucm Unicode Character Map -Encode/ibm-1258.ucm Unicode Character Map -Encode/ir-165.ucm Unicode Character Map -Encode/jis0201.ucm Unicode Character Map -Encode/johab.ucm Unicode Character Map -Encode/koi8-f.ucm Unicode Character Map -Encode/koi8-r.ucm Unicode Character Map -Encode/koi8-u.ucm Unicode Character Map -Encode/ksc5601.ucm Unicode Character Map -Encode/macCentEuro.ucm Unicode Character Map -Encode/macCroatian.ucm Unicode Character Map -Encode/macCyrillic.ucm Unicode Character Map -Encode/macDingbats.ucm Unicode Character Map -Encode/macGreek.ucm Unicode Character Map -Encode/macIceland.ucm Unicode Character Map -Encode/macJapan.ucm Unicode Character Map -Encode/macRoman.ucm Unicode Character Map -Encode/macRumanian.ucm Unicode Character Map -Encode/macSami.ucm Unicode Character Map -Encode/macThai.ucm Unicode Character Map -Encode/macTurkish.ucm Unicode Character Map -Encode/macUkraine.ucm Unicode Character Map -Encode/nextstep.ucm Unicode Character Map -Encode/posix-bc.ucm Unicode Character Map -Encode/shiftjis.ucm Unicode Character Map -Encode/symbol.ucm Unicode Character Map -Encode/viscii.ucm Unicode Character Map JP/JP.pm Encode extension JP/Makefile.PL Encode extension KR/KR.pm Encode extension @@ -89,7 +27,9 @@ TW/Makefile.PL Encode extension TW/TW.pm Encode extension bin/enc2xs Encode module generator bin/piconv iconv by perl +bin/ucm2table Table Generator for testing encengine.c Encode extension +encoding.pm Perl Pragmactic Module lib/Encode/10646_1.pm Encode extension lib/Encode/Alias.pm Encode extension lib/Encode/CJKConstants.pm Encode extension @@ -112,12 +52,80 @@ t/Encode.t Encode extension test t/JP.t Encode extension test t/KR.t Encode extension test t/TW.t Encode extension test +t/encoding.t encoding extension test t/gb2312.euc test data t/gb2312.ref test data t/jisx0208.euc test data t/jisx0208.ref test data t/jisx0212.euc test data t/jisx0212.ref test data +t/jperl.t encoding extension test t/ksc5601.euc test data t/ksc5601.ref test data -ucm2table Table Generator for testing +ucm/8859-1.ucm Unicode Character Map +ucm/8859-10.ucm Unicode Character Map +ucm/8859-11.ucm Unicode Character Map +ucm/8859-13.ucm Unicode Character Map +ucm/8859-14.ucm Unicode Character Map +ucm/8859-15.ucm Unicode Character Map +ucm/8859-16.ucm Unicode Character Map +ucm/8859-2.ucm Unicode Character Map +ucm/8859-3.ucm Unicode Character Map +ucm/8859-4.ucm Unicode Character Map +ucm/8859-5.ucm Unicode Character Map +ucm/8859-6.ucm Unicode Character Map +ucm/8859-7.ucm Unicode Character Map +ucm/8859-8.ucm Unicode Character Map +ucm/8859-9.ucm Unicode Character Map +ucm/ascii.ucm Unicode Character Map +ucm/big5-hkscs.ucm Unicode Character Map +ucm/big5.ucm Unicode Character Map +ucm/cp1047.ucm Unicode Character Map +ucm/cp1250.ucm Unicode Character Map +ucm/cp37.ucm Unicode Character Map +ucm/cp932.ucm Unicode Character Map +ucm/cp936.ucm Unicode Character Map +ucm/cp949.ucm Unicode Character Map +ucm/cp950.ucm Unicode Character Map +ucm/dingbats.ucm Unicode Character Map +ucm/euc-cn.ucm Unicode Character Map +ucm/euc-jp.ucm Unicode Character Map +ucm/euc-kr.ucm Unicode Character Map +ucm/gb12345.ucm Unicode Character Map +ucm/gb2312.ucm Unicode Character Map +ucm/gsm0338.ucm Unicode Character Map +ucm/hp-roman8.ucm Unicode Character Map +ucm/ibm-1250.ucm Unicode Character Map +ucm/ibm-1251.ucm Unicode Character Map +ucm/ibm-1252.ucm Unicode Character Map +ucm/ibm-1253.ucm Unicode Character Map +ucm/ibm-1254.ucm Unicode Character Map +ucm/ibm-1255.ucm Unicode Character Map +ucm/ibm-1256.ucm Unicode Character Map +ucm/ibm-1257.ucm Unicode Character Map +ucm/ibm-1258.ucm Unicode Character Map +ucm/ir-165.ucm Unicode Character Map +ucm/jis0201.ucm Unicode Character Map +ucm/johab.ucm Unicode Character Map +ucm/koi8-f.ucm Unicode Character Map +ucm/koi8-r.ucm Unicode Character Map +ucm/koi8-u.ucm Unicode Character Map +ucm/ksc5601.ucm Unicode Character Map +ucm/macCentEuro.ucm Unicode Character Map +ucm/macCroatian.ucm Unicode Character Map +ucm/macCyrillic.ucm Unicode Character Map +ucm/macDingbats.ucm Unicode Character Map +ucm/macGreek.ucm Unicode Character Map +ucm/macIceland.ucm Unicode Character Map +ucm/macJapan.ucm Unicode Character Map +ucm/macRoman.ucm Unicode Character Map +ucm/macRumanian.ucm Unicode Character Map +ucm/macSami.ucm Unicode Character Map +ucm/macThai.ucm Unicode Character Map +ucm/macTurkish.ucm Unicode Character Map +ucm/macUkraine.ucm Unicode Character Map +ucm/nextstep.ucm Unicode Character Map +ucm/posix-bc.ucm Unicode Character Map +ucm/shiftjis.ucm Unicode Character Map +ucm/symbol.ucm Unicode Character Map +ucm/viscii.ucm Unicode Character Map diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index 0f0ed78..71bc5fb 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -1,5 +1,4 @@ -use 5.7.2; -use strict; +use 5.007003; use ExtUtils::MakeMaker; my %tables = @@ -9,13 +8,20 @@ my %tables = ] ); -my @exe_files = qw(bin/enc2xs); -my @more_exe_files = qw(bin/piconv); - +my @exe_files = qw(bin/enc2xs + bin/piconv + ); +my @more_exe_files = qw( + ucm2table + ); +my @pmlibdirs = qw(lib Encode); for my $arg (@ARGV){ if ($arg eq "MORE_SCRIPTS"){ push @exe_files, @more_exe_files; } + if ($arg eq "INSTALL_UCM"){ + push @pmlibdirs, "ucm"; + } } WriteMakefile( @@ -29,7 +35,8 @@ WriteMakefile( DIST_DEFAULT => 'all tardist', }, MAN3PODS => {}, - INC => "-I./Encode" + INC => "-I./Encode", + PMLIBDIRS => \@pmlibdirs, ); package MY; @@ -70,7 +77,7 @@ return ''; sub postamble { my $self = shift; - my $dir = $self->catdir($self->curdir,'Encode'); + my $dir = $self->catdir($self->curdir,'ucm'); my $str = "# Encode\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= 'Encode$(OBJ_EXT) :'; foreach my $table (keys %tables) diff --git a/ext/Encode/README b/ext/Encode/README index 590834a..4ff4622 100644 --- a/ext/Encode/README +++ b/ext/Encode/README @@ -23,15 +23,20 @@ To install this module type the following: To install scripts under bin/ directories also, perl Makefile.PL MORE_SCRIPTS - make - make test - make install + make && make test && make install + +by default, only enc2xs and piconv are installed. + +To install *.ucm files also, say + + perl Makefile.PL INSTALL_UCM + make && make test && make install -by default, only bin/enc2xs is installed. +by default, *.ucm are not installed. DEPENDENCIES -This module requires perl5.7.2 or later. +This module requires perl5.7.3 or later. QUESTIONS? diff --git a/ext/Encode/Symbol/Makefile.PL b/ext/Encode/Symbol/Makefile.PL index 0633699..f5772be 100644 --- a/ext/Encode/Symbol/Makefile.PL +++ b/ext/Encode/Symbol/Makefile.PL @@ -98,7 +98,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/TW/Makefile.PL b/ext/Encode/TW/Makefile.PL index 9ff1203..2b100ee 100644 --- a/ext/Encode/TW/Makefile.PL +++ b/ext/Encode/TW/Makefile.PL @@ -100,7 +100,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs index 803c264..3a9780b 100644 --- a/ext/Encode/bin/enc2xs +++ b/ext/Encode/bin/enc2xs @@ -1,4 +1,4 @@ -#!../../../perl -w +#!./perl BEGIN { # fiddle with @INC iff I am a part of perl dist if ($^X =~ m/\bminiperl$/o){ @@ -10,7 +10,7 @@ BEGIN { use strict; use Getopt::Std; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.10 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. @@ -832,215 +832,58 @@ sub output_ucm print $fh "END CHARMAP\n"; } +use vars qw( + $_Enc2xs + $_Version + $_Inc + $_Name + $_TableFiles + $_Now +); + sub make_makefile_pl { eval { require Encode; }; $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n"; + # our used for variable expanstion + $_Enc2xs = $0; + $_Version = $VERSION; + $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o; + $_Name = shift; + $_TableFiles = join(",", map {qq('$_')} @_); + $_Now = scalar localtime(); + warn "Generating Makefile.PL\n"; + _print_expand("$_Inc/Makefile_PL.e2x", "Makefile.PL"); + warn "Generating $_Name.pm\n"; + _print_expand("$_Inc/_PM.e2x", "$_Name.pm"); + warn "Generating t/$_Name.t\n"; + _print_expand("$_Inc/_T.e2x", "t/$_Name.t"); + warn "Generating README\n"; + _print_expand("$_Inc/README.e2x", "README"); + warn "Generating t/$_Name.t\n"; + _print_expand("$_Inc/Changes.e2x", "Changes"); + exit; +} + +sub _print_expand{ eval { require File::Basename; }; $@ and die "File::Basename needed. Are you on miniperl?;\nerror: $@\n"; File::Basename->import(); - my $inc = dirname($INC{"Encode/Internal.pm"}); - my $name = shift; - my $table_files = join(",", map {qq('$_')} @_); - my $now = scalar localtime(); - open my $fh, ">Makefile.PL" or die "$!"; - print $fh <<"END_OF_HEADER"; -# -# This file is auto-generated by: -# $0 -# $now -# -use 5.7.2; -use strict; -use ExtUtils::MakeMaker; - -# Please edit the following to the taste! -my \$name = '$name'; -my \%tables = ( - encode_t => [ $table_files ], - ); - -# And leave the rest! -my \$enc2xs = '$0'; -WriteMakefile( - INC => "-I$inc", -END_OF_HEADER - - print $fh <<'END_OF_MAKEFILE_PL'; - NAME => 'Encode::'.$name, - VERSION_FROM => "$name.pm", - OBJECT => '$(O_FILES)', - 'dist' => { - COMPRESS => 'gzip -9f', - SUFFIX => 'gz', - DIST_DEFAULT => 'all tardist', - }, - MAN3PODS => {}, - # OS 390 winges about line numbers > 64K ??? - XSOPT => '-nolinenumbers', - ); - -package MY; - -sub post_initialize -{ - my ($self) = @_; - my %o; - my $x = $self->{'OBJ_EXT'}; - # Add the table O_FILES - foreach my $e (keys %tables) - { - $o{$e.$x} = 1; - } - $o{"$name$x"} = 1; - $self->{'O_FILES'} = [sort keys %o]; - my @files = ("$name.xs"); - $self->{'C'} = ["$name.c"]; - # $self->{'H'} = [$self->catfile($self->updir,'encode.h')]; - my %xs; - foreach my $table (keys %tables) { - push (@{$self->{'C'}},"$table.c"); - # Do NOT add $table.h etc. to H_FILES unless we own up as to how they - # get built. - foreach my $ext (qw($(OBJ_EXT) .c .h _def.h .fnm)) { - push (@files,$table.$ext); - } - } - $self->{'XS'} = { "$name.xs" => "$name.c" }; - $self->{'clean'}{'FILES'} .= join(' ',@files); - open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; - print XS <<'END'; -#include -#include -#include -#define U8 U8 -#include "encode.h" -END - foreach my $table (keys %tables) { - print XS qq[#include "${table}.h"\n]; + my ($src, $dst) = @_; + open my $in, $src or die "$src : $!"; + if ((my $d = dirname($dst)) ne '.'){ + -d $d or mkdir $d, 0755 or die "mkdir $d : $!"; + } + open my $out, ">$dst" or die "$!"; + my $asis = 0; + while (<$in>){ + if (/^#### END_OF_HEADER/){ + $asis = 1; next; + } + s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis; + print $out $_; } - print XS <<"END"; - -static void -Encode_XSEncoding(pTHX_ encode_t *enc) -{ - dSP; - HV *stash = gv_stashpv("Encode::XS", TRUE); - SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); - int i = 0; - PUSHMARK(sp); - XPUSHs(sv); - while (enc->name[i]) - { - const char *name = enc->name[i++]; - XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); - } - PUTBACK; - call_pv("Encode::define_encoding",G_DISCARD); - SvREFCNT_dec(sv); -} - -MODULE = Encode::$name PACKAGE = Encode::$name -PROTOTYPES: DISABLE -BOOT: -{ -END - foreach my $table (keys %tables) { - print XS qq[#include "${table}_def.h"\n]; - } - print XS "}\n"; - close(XS); - return "# Built $name.xs\n\n"; -} - -sub postamble -{ - my $self = shift; - my $dir = "."; # $self->catdir('Encode'); - my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; - $str .= "$name.c : $name.xs "; - foreach my $table (keys %tables) - { - $str .= " $table.c"; - } - $str .= "\n\n"; - $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; - - foreach my $table (keys %tables) - { - my $numlines = 1; - my $lengthsofar = length($str); - my $continuator = ''; - $str .= "$table.c : Makefile.PL"; - foreach my $file (@{$tables{$table}}) - { - $str .= $continuator.' '.$self->catfile($dir,$file); - if ( length($str)-$lengthsofar > 128*$numlines ) - { - $continuator .= " \\\n\t"; - $numlines++; - } else { - $continuator = ''; - } - } - $str .= $^O eq 'VMS' # In VMS quote to preserve case - ? qq{\n\t\$(PERL) $enc2xs -"Q" -"O" -o \$\@ -f $table.fnm\n\n} - : qq{\n\t\$(PERL) $enc2xs -Q -O -o \$\@ -f $table.fnm\n\n}; - open (FILELIST, ">$table.fnm") - || die "Could not open $table.fnm: $!"; - foreach my $file (@{$tables{$table}}) - { - print FILELIST $self->catfile($dir,$file) . "\n"; - } - close(FILELIST); - } - return $str; } -END_OF_MAKEFILE_PL - close $fh; - (my $pm =<<"END_OF_PM") =~ s/^# //gm; -# package Encode::$name; -# our \$VERSION = "0.01"; -# -# use Encode; -# use XSLoader; -# XSLoader::load('Encode::$name', \$VERSION); -# -# 1; -# __END__ -# -# =head1 NAME -# -# Encode::$name - New Encoding -# -# =head1 SYNOPSIS -# -# You got to fill this in! -# -# =head1 SEE ALSO -# -# L -# -# =cut -END_OF_PM - open $fh, ">$name.pm" or die "$name.pm:$!"; - print $fh $pm; - close $fh; - -d 't' or mkdir 't', 0755 or die "mkdir t:$!"; - open $fh, ">t/$name.t" or die "t/$name.t:$!"; -print $fh <<"END_OF_TEST"; -use strict; -# Adjust the number here! -use Test::More tests => 2; - -use_ok('Encode'); -use_ok('Encode::$name'); -# Add more test here! -END_OF_TEST - close $fh; - exit; -} - __END__ =head1 NAME @@ -1083,6 +926,10 @@ defined inI. C<$> is a shell prompt. Issue a command as follows; $ enc2xs -M My my.ucm + generating Makefile.PL + generating My.pm + generating README + generating Changes Now take a look at your current directory. It should look like this. diff --git a/ext/Encode/bin/piconv b/ext/Encode/bin/piconv index c7f08ae..3958dfa 100644 --- a/ext/Encode/bin/piconv +++ b/ext/Encode/bin/piconv @@ -1,5 +1,5 @@ -#!/usr/bin/perl -# $Id: piconv,v 1.0 2002/03/28 23:26:28 dankogai Exp $ +#!./perl +# $Id: piconv,v 1.10 2002/03/31 21:00:50 dankogai Exp $ # use 5.7.3; use strict; diff --git a/ext/Encode/ucm2table b/ext/Encode/bin/ucm2table similarity index 100% rename from ext/Encode/ucm2table rename to ext/Encode/bin/ucm2table diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm new file mode 100644 index 0000000..1b3dd84 --- /dev/null +++ b/ext/Encode/encoding.pm @@ -0,0 +1,233 @@ +package encoding; +our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; + +use Encode; + +BEGIN { + if (ord("A") == 193) { + require Carp; + Carp::croak "encoding pragma does not support EBCDIC platforms"; + } +} + +sub import { + my $class = shift; + my $name = shift; + my %arg = @_; + $name ||= $ENV{PERL_ENCODING}; + + my $enc = find_encoding($name); + unless (defined $enc) { + require Carp; + Carp::croak "Unknown encoding '$name'"; + } + ${^ENCODING} = $enc; # this is all you need, actually. + + # $_OPEN_ORIG = ${^OPEN}; + for my $h (qw(STDIN STDOUT STDERR)){ + if ($arg{$h}){ + unless (defined find_encoding($name)) { + require Carp; + Carp::croak "Unknown encoding for $fhname, '$arg{$h}'"; + } + eval qq{ binmode($h, ":encoding($arg{h})") }; + }else{ + eval qq{ binmode($h, ":encoding($name)") }; + } + if ($@){ + require Carp; + Carp::croak($@); + } + } + return 1; # I doubt if we need it, though +} + +sub unimport{ + no warnings; + undef ${^ENCODING}; + binmode(STDIN, ":raw"); + binmode(STDOUT, ":raw"); + binmode(STDERR, ":raw"); +} + +1; +__END__ +=pod + +=head1 NAME + +encoding - allows you to write your script in non-asii or non-utf8 + +=head1 SYNOPSIS + + use encoding "euc-jp"; # Jperl! + + # or you can even do this if your shell supports euc-jp + + > perl -Mencoding=euc-jp -e '...' + + # or from the shebang line + + #!/your/path/to/perl -Mencoding=euc-jp + + # more control + + # A simple euc-jp => utf-8 converter + use encoding "euc-jp", STDOUT => "utf8"; while(<>){print}; + + # "no encoding;" supported (but not scoped!) + no encoding; + +=head1 ABSTRACT + +Perl 5.6.0 has introduced Unicode support. You could apply +C and regexes even to complex CJK characters -- so long as +the script was written in UTF-8. But back then text editors that +support UTF-8 was still rare and many users rather chose to writer +scripts in legacy encodings, given up whole new feature of Perl 5.6. + +With B pragma, you can write your script in any encoding you like +(so long as the C module supports it) and still enjoy Unicode +support. You can write a code in EUC-JP as follows; + + my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji + #<-char-><-char-> # 4 octets + s/\bCamel\b/$Rakuda/; + +And with C in effect, it is the same thing as +the code in UTF-8 as follow. + + my $Rakuda = "\x{99F1}\x{99DD}"; # who Unicode Characters + s/\bCamel\b/$Rakuda/; + +The B pragma also modifies the file handle disciplines of +STDIN, STDOUT, and STDERR to the specified encoding. Therefore, + + use encoding "euc-jp"; + my $message = "Camel is the symbol of perl.\n"; + my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji + $message =~ s/\bCamel\b/$Rakuda/; + print $message; + +Will print "\xF1\xD1\xF1\xCC is the symbol of perl.\n", not +"\x{99F1}\x{99DD} is the symbol of perl.\n". + +You can override this by giving extra arguments. See below. + +=head1 USAGE + +=over 4 + +=item use encoding [I] ; + +Sets the script encoding to I and file handle disciplines of +STDIN, STDOUT, and STDERR are set to ":encoding(I)". + +If no encoding is specified, the environment variable L +is consulted. If no encoding can be found, C'> +error will be thrown. + +Note that non-STD file handles remain unaffected. Use C or +C to change disciplines of those. + +=item use encoding I [ STDIN => I ...] ; + +You can also individually set encodings of STDIN, STDOUT, and STDERR +via STDI => I form. In this case, you cannot omit the +first I. + +=item no encoding; + +Unsets the script encoding and the disciplines of STDIN, STDOUT, and +STDERR are reset to ":raw". + +=back + +=head1 CAVEATS + +=head2 NOT SCOPED + +The pragma is a per script, not a per block lexical. Only the last +C or C. +Though pragma is supported and C can +appear as many times as you want in a given script, the multiple use +of this pragma is discouraged. + +=head2 DO NOT MIX MULTIPLE ENCODINGS + +Notice that only literals (string or regular expression) having only +legacy code points are affected: if you mix data like this + + \xDF\x{100} + +the data is assumed to be in (Latin 1 and) Unicode, not in your native +encoding. In other words, this will match in "greek": + + "\xDF" =~ /\x{3af}/ + +but this will not + + "\xDF\x{100}" =~ /\x{3af}\x{100}/ + +since the C<\xDF> on the left will B be upgraded to C<\x{3af}> +because of the C<\x{100}> on the left. You should not be mixing your +legacy data and Unicode in the same string. + +This pragma also affects encoding of the 0x80..0xFF code point range: +normally characters in that range are left as eight-bit bytes (unless +they are combined with characters with code points 0x100 or larger, +in which case all characters need to become UTF-8 encoded), but if +the C pragma is present, even the 0x80..0xFF range always +gets UTF-8 encoded. + +After all, the best thing about this pragma is that you don't have to +resort to \x... just to spell your name in native encoding. So feel +free to put your strings in your encoding in quotes and regexes. + +=head1 EXAMPLE - Greekperl + + use encoding "iso 8859-7"; + + # The \xDF of ISO 8859-7 (Greek) is \x{3af} in Unicode. + + $a = "\xDF"; + $b = "\x{100}"; + + printf "%#x\n", ord($a); # will print 0x3af, not 0xdf + + $c = $a . $b; + + # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}". + + # chr() is affected, and ... + + print "mega\n" if ord(chr(0xdf)) == 0x3af; + + # ... ord() is affected by the encoding pragma ... + + print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; + + # ... as are eq and cmp ... + + print "peta\n" if "\x{3af}" eq pack("C", 0xdf); + print "exa\n" if "\x{3af}" cmp pack("C", 0xdf) == 0; + + # ... but pack/unpack C are not affected, in case you still + # want back to your native encoding + + print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; + +=head1 KNOWN PROBLEMS + +For native multibyte encodings (either fixed or variable length) +the current implementation of the regular expressions may introduce +recoding errors for longer regular expression literals than 127 bytes. + +The encoding pragma is not supported on EBCDIC platforms. +(Porters wanted.) + +=head1 SEE ALSO + +L, L, L + +=cut diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm index 76a995e..d00d2bf 100644 --- a/ext/Encode/lib/Encode/Alias.pm +++ b/ext/Encode/lib/Encode/Alias.pm @@ -1,7 +1,7 @@ package Encode::Alias; use strict; use Encode; -our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; require Exporter; @@ -167,6 +167,9 @@ sub init_aliases # Sometimes seen with a leading zero. define_alias( qr/\bcp037\b/i => '"cp37"'); + # Mac Mappings + define_alias( qr/\bmacIcelandic$/i => '"macIceland"'); + define_alias( qr/^mac_(.*)$/i => '"mac$1"'); # Ououououou. define_alias( qr/\bmacRomanian$/i => '"macRumanian"'); @@ -235,8 +238,9 @@ Encode::Alias - alias defintions to encodings =head1 DESCRIPTION -Allows newName to be used as am alias for ENCODING. ENCODING may be -either the name of an encoding or and encoding object (as described in L). +Allows newName to be used as an alias for ENCODING. ENCODING may be +either the name of an encoding or an encoding object (as described +in L). Currently I can be specified in the following ways: @@ -249,28 +253,29 @@ Currently I can be specified in the following ways: define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' ); In this case if I is not a reference it is C-ed to -allow C<$1> etc. to be subsituted. The example is one way to names as -used in X11 font names to alias the MIME names for the iso-8859-* -family. Note the double quote inside the single quote. +allow C<$1> etc. to be substituted. The example is one way to alias +names as used in X11 fonts to the MIME names for the iso-8859-* +family. Note the double quote inside the single quote. -If you are using regex here, you have to do so or it won't work in -this case. Also not regex is tricky even for the experienced. Use it -with caution. +If you are using a regex here, you have to use the quotes as shown or +it won't work. Also note that regex handling is tricky even for the +experienced. Use it with caution. =item As a code reference, e.g.: define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , ''); + In this case C<$_> will be set to the name that is being looked up and I is passed to the sub as its first argument. The example -is another way to names as used in X11 font names to alias the MIME -names for the iso-8859-* family. +is another way to alias names as used in X11 fonts to the MIME names +for the iso-8859-* family. =back =head2 Alias overloading -You can override predefined aliases by simply applying define_alias(). +You can override predefined aliases by simply applying define_alias(). New alias is always evaluated first and when neccessary define_alias() flushes internal cache to make new definition available. diff --git a/ext/Encode/lib/Encode/Supported.pod b/ext/Encode/lib/Encode/Supported.pod index 1a8d076..9f98928 100644 --- a/ext/Encode/lib/Encode/Supported.pod +++ b/ext/Encode/lib/Encode/Supported.pod @@ -97,9 +97,9 @@ non-ASCII characters. MacCyrillic MacRomanian MacSami - MacGreek + MacGreek MacThai - MacIcelandic + MacIceland MacTurkish MacUkrainian diff --git a/lib/encoding.t b/ext/Encode/t/encoding.t similarity index 96% rename from lib/encoding.t rename to ext/Encode/t/encoding.t index ec21c1b..e23820a 100644 --- a/lib/encoding.t +++ b/ext/Encode/t/encoding.t @@ -168,7 +168,7 @@ print "ok 25\n"; # some more eq, cmp -my $byte=pack("C*", 0xDF); +$byte=pack("C*", 0xDF); print "not " unless pack("U*", 0x3AF) eq $byte; print "ok 26\n"; @@ -182,5 +182,9 @@ print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); print "ok 28\n"; -# Used to core dump in 5.7.3 -print ord undef == 0 ? "ok 29\n" : "not ok 29\n"; + +{ + # Used to core dump in 5.7.3 + no warnings; # so test goes noiselessly + print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; +} diff --git a/ext/Encode/t/jperl.t b/ext/Encode/t/jperl.t new file mode 100644 index 0000000..333e188 --- /dev/null +++ b/ext/Encode/t/jperl.t @@ -0,0 +1,57 @@ +# +# $Id: jperl.t,v 1.11 2002/03/31 22:12:13 dankogai Exp dankogai $ +# +# This script is written in euc-jp + +use strict; +use Test::More tests => 15; +my $Debug = shift; + +no encoding; # ensure +my $Enamae = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; # euc-jp, with \x escapes +use encoding "euc-jp"; + +my $Namae = "¾®»ô ÃÆ"; # in Japanese, in euc-jp +my $Name = "Dan Kogai"; # in English +# euc-jp in \x format but after the pragma. But this one will be converted! +my $Ynamae = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; + + +my $str = $Namae; $str =~ s/¾®»ô ÃÆ/Dan Kogai/o; +is($str, $Name, q{regex}); +$str = $Namae; $str =~ s/$Namae/Dan Kogai/o; +is($str, $Name, q{regex - with variable}); +is(length($Namae), 4, q{utf8:length}); +{ + use bytes; + # converted to UTF-8 so 3*3+1 + is(length($Namae), 10, q{bytes:length}); + # + is(length($Enamae), 7, q{euc:length}); # 2*3+1 + is ($Namae, $Ynamae, q{literal conversions}); + isnt($Enamae, $Ynamae, q{before and after}); + is($Enamae, Encode::encode('euc-jp', $Namae)); +} +# let's test the scope as well. Must be in utf8 realm +is(length($Namae), 4, q{utf8:length}); + +{ + no encoding; + ok(! defined(${^ENCODING}), q{no encoding;}); +} +# should've been isnt() but no scoping is suported -- yet +ok(! defined(${^ENCODING}), q{not scoped yet}); +{ + # now let's try some real black magic! + local(${^ENCODING}) = Encode::find_encoding("euc-jp"); + my $str = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; + is (length($str), 4, q{black magic:length}); + is ($str, $Enamae, q{black magic:eq}); +} +ok(! defined(${^ENCODING}), q{out of black magic}); +use bytes; +is (length($Namae), 10); +1; +__END__ + + diff --git a/ext/Encode/Encode/8859-1.ucm b/ext/Encode/ucm/8859-1.ucm similarity index 100% rename from ext/Encode/Encode/8859-1.ucm rename to ext/Encode/ucm/8859-1.ucm diff --git a/ext/Encode/Encode/8859-10.ucm b/ext/Encode/ucm/8859-10.ucm similarity index 100% rename from ext/Encode/Encode/8859-10.ucm rename to ext/Encode/ucm/8859-10.ucm diff --git a/ext/Encode/Encode/8859-11.ucm b/ext/Encode/ucm/8859-11.ucm similarity index 100% rename from ext/Encode/Encode/8859-11.ucm rename to ext/Encode/ucm/8859-11.ucm diff --git a/ext/Encode/Encode/8859-13.ucm b/ext/Encode/ucm/8859-13.ucm similarity index 100% rename from ext/Encode/Encode/8859-13.ucm rename to ext/Encode/ucm/8859-13.ucm diff --git a/ext/Encode/Encode/8859-14.ucm b/ext/Encode/ucm/8859-14.ucm similarity index 100% rename from ext/Encode/Encode/8859-14.ucm rename to ext/Encode/ucm/8859-14.ucm diff --git a/ext/Encode/Encode/8859-15.ucm b/ext/Encode/ucm/8859-15.ucm similarity index 100% rename from ext/Encode/Encode/8859-15.ucm rename to ext/Encode/ucm/8859-15.ucm diff --git a/ext/Encode/Encode/8859-16.ucm b/ext/Encode/ucm/8859-16.ucm similarity index 100% rename from ext/Encode/Encode/8859-16.ucm rename to ext/Encode/ucm/8859-16.ucm diff --git a/ext/Encode/Encode/8859-2.ucm b/ext/Encode/ucm/8859-2.ucm similarity index 100% rename from ext/Encode/Encode/8859-2.ucm rename to ext/Encode/ucm/8859-2.ucm diff --git a/ext/Encode/Encode/8859-3.ucm b/ext/Encode/ucm/8859-3.ucm similarity index 100% rename from ext/Encode/Encode/8859-3.ucm rename to ext/Encode/ucm/8859-3.ucm diff --git a/ext/Encode/Encode/8859-4.ucm b/ext/Encode/ucm/8859-4.ucm similarity index 100% rename from ext/Encode/Encode/8859-4.ucm rename to ext/Encode/ucm/8859-4.ucm diff --git a/ext/Encode/Encode/8859-5.ucm b/ext/Encode/ucm/8859-5.ucm similarity index 100% rename from ext/Encode/Encode/8859-5.ucm rename to ext/Encode/ucm/8859-5.ucm diff --git a/ext/Encode/Encode/8859-6.ucm b/ext/Encode/ucm/8859-6.ucm similarity index 100% rename from ext/Encode/Encode/8859-6.ucm rename to ext/Encode/ucm/8859-6.ucm diff --git a/ext/Encode/Encode/8859-7.ucm b/ext/Encode/ucm/8859-7.ucm similarity index 100% rename from ext/Encode/Encode/8859-7.ucm rename to ext/Encode/ucm/8859-7.ucm diff --git a/ext/Encode/Encode/8859-8.ucm b/ext/Encode/ucm/8859-8.ucm similarity index 100% rename from ext/Encode/Encode/8859-8.ucm rename to ext/Encode/ucm/8859-8.ucm diff --git a/ext/Encode/Encode/8859-9.ucm b/ext/Encode/ucm/8859-9.ucm similarity index 100% rename from ext/Encode/Encode/8859-9.ucm rename to ext/Encode/ucm/8859-9.ucm diff --git a/ext/Encode/Encode/ascii.ucm b/ext/Encode/ucm/ascii.ucm similarity index 98% rename from ext/Encode/Encode/ascii.ucm rename to ext/Encode/ucm/ascii.ucm index 4a0d9d8..2d44b51 100644 --- a/ext/Encode/Encode/ascii.ucm +++ b/ext/Encode/ucm/ascii.ucm @@ -1,5 +1,5 @@ # -# $Id: ascii.ucm,v 1.0 2002/03/28 23:26:25 dankogai Exp $ +# $Id: ascii.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # # ./compile -n US-ascii -o Encode/ascii.ucm Encode/ascii.enc "US-ascii" diff --git a/ext/Encode/Encode/big5-hkscs.ucm b/ext/Encode/ucm/big5-hkscs.ucm similarity index 100% rename from ext/Encode/Encode/big5-hkscs.ucm rename to ext/Encode/ucm/big5-hkscs.ucm diff --git a/ext/Encode/Encode/big5.ucm b/ext/Encode/ucm/big5.ucm similarity index 100% rename from ext/Encode/Encode/big5.ucm rename to ext/Encode/ucm/big5.ucm diff --git a/ext/Encode/Encode/cp1047.ucm b/ext/Encode/ucm/cp1047.ucm similarity index 100% rename from ext/Encode/Encode/cp1047.ucm rename to ext/Encode/ucm/cp1047.ucm diff --git a/ext/Encode/Encode/cp1250.ucm b/ext/Encode/ucm/cp1250.ucm similarity index 100% rename from ext/Encode/Encode/cp1250.ucm rename to ext/Encode/ucm/cp1250.ucm diff --git a/ext/Encode/Encode/cp37.ucm b/ext/Encode/ucm/cp37.ucm similarity index 100% rename from ext/Encode/Encode/cp37.ucm rename to ext/Encode/ucm/cp37.ucm diff --git a/ext/Encode/Encode/cp932.ucm b/ext/Encode/ucm/cp932.ucm similarity index 100% rename from ext/Encode/Encode/cp932.ucm rename to ext/Encode/ucm/cp932.ucm diff --git a/ext/Encode/Encode/cp936.ucm b/ext/Encode/ucm/cp936.ucm similarity index 99% rename from ext/Encode/Encode/cp936.ucm rename to ext/Encode/ucm/cp936.ucm index 81c1f8d..31903ec 100644 --- a/ext/Encode/Encode/cp936.ucm +++ b/ext/Encode/ucm/cp936.ucm @@ -1,5 +1,5 @@ # -# $Id: cp936.ucm,v 1.0 2002/03/28 23:26:25 dankogai Exp $ +# $Id: cp936.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # # ./compile -n cp936 -o Encode/cp936.ucm Encode/cp936.enc "cp936" diff --git a/ext/Encode/Encode/cp949.ucm b/ext/Encode/ucm/cp949.ucm similarity index 100% rename from ext/Encode/Encode/cp949.ucm rename to ext/Encode/ucm/cp949.ucm diff --git a/ext/Encode/Encode/cp950.ucm b/ext/Encode/ucm/cp950.ucm similarity index 100% rename from ext/Encode/Encode/cp950.ucm rename to ext/Encode/ucm/cp950.ucm diff --git a/ext/Encode/Encode/dingbats.ucm b/ext/Encode/ucm/dingbats.ucm similarity index 99% rename from ext/Encode/Encode/dingbats.ucm rename to ext/Encode/ucm/dingbats.ucm index 63df1c2..aafe6ff 100644 --- a/ext/Encode/Encode/dingbats.ucm +++ b/ext/Encode/ucm/dingbats.ucm @@ -1,5 +1,5 @@ # -# $Id: dingbats.ucm,v 1.0 2002/03/28 23:26:26 dankogai Exp $ +# $Id: dingbats.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # "dingbats" 1 diff --git a/ext/Encode/Encode/euc-cn.ucm b/ext/Encode/ucm/euc-cn.ucm similarity index 100% rename from ext/Encode/Encode/euc-cn.ucm rename to ext/Encode/ucm/euc-cn.ucm diff --git a/ext/Encode/Encode/euc-jp.ucm b/ext/Encode/ucm/euc-jp.ucm similarity index 100% rename from ext/Encode/Encode/euc-jp.ucm rename to ext/Encode/ucm/euc-jp.ucm diff --git a/ext/Encode/Encode/euc-kr.ucm b/ext/Encode/ucm/euc-kr.ucm similarity index 100% rename from ext/Encode/Encode/euc-kr.ucm rename to ext/Encode/ucm/euc-kr.ucm diff --git a/ext/Encode/Encode/gb12345.ucm b/ext/Encode/ucm/gb12345.ucm similarity index 100% rename from ext/Encode/Encode/gb12345.ucm rename to ext/Encode/ucm/gb12345.ucm diff --git a/ext/Encode/Encode/gb2312.ucm b/ext/Encode/ucm/gb2312.ucm similarity index 100% rename from ext/Encode/Encode/gb2312.ucm rename to ext/Encode/ucm/gb2312.ucm diff --git a/ext/Encode/Encode/gsm0338.ucm b/ext/Encode/ucm/gsm0338.ucm similarity index 100% rename from ext/Encode/Encode/gsm0338.ucm rename to ext/Encode/ucm/gsm0338.ucm diff --git a/ext/Encode/Encode/hp-roman8.ucm b/ext/Encode/ucm/hp-roman8.ucm similarity index 100% rename from ext/Encode/Encode/hp-roman8.ucm rename to ext/Encode/ucm/hp-roman8.ucm diff --git a/ext/Encode/Encode/ibm-1250.ucm b/ext/Encode/ucm/ibm-1250.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1250.ucm rename to ext/Encode/ucm/ibm-1250.ucm diff --git a/ext/Encode/Encode/ibm-1251.ucm b/ext/Encode/ucm/ibm-1251.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1251.ucm rename to ext/Encode/ucm/ibm-1251.ucm diff --git a/ext/Encode/Encode/ibm-1252.ucm b/ext/Encode/ucm/ibm-1252.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1252.ucm rename to ext/Encode/ucm/ibm-1252.ucm diff --git a/ext/Encode/Encode/ibm-1253.ucm b/ext/Encode/ucm/ibm-1253.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1253.ucm rename to ext/Encode/ucm/ibm-1253.ucm diff --git a/ext/Encode/Encode/ibm-1254.ucm b/ext/Encode/ucm/ibm-1254.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1254.ucm rename to ext/Encode/ucm/ibm-1254.ucm diff --git a/ext/Encode/Encode/ibm-1255.ucm b/ext/Encode/ucm/ibm-1255.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1255.ucm rename to ext/Encode/ucm/ibm-1255.ucm diff --git a/ext/Encode/Encode/ibm-1256.ucm b/ext/Encode/ucm/ibm-1256.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1256.ucm rename to ext/Encode/ucm/ibm-1256.ucm diff --git a/ext/Encode/Encode/ibm-1257.ucm b/ext/Encode/ucm/ibm-1257.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1257.ucm rename to ext/Encode/ucm/ibm-1257.ucm diff --git a/ext/Encode/Encode/ibm-1258.ucm b/ext/Encode/ucm/ibm-1258.ucm similarity index 100% rename from ext/Encode/Encode/ibm-1258.ucm rename to ext/Encode/ucm/ibm-1258.ucm diff --git a/ext/Encode/Encode/ir-165.ucm b/ext/Encode/ucm/ir-165.ucm similarity index 100% rename from ext/Encode/Encode/ir-165.ucm rename to ext/Encode/ucm/ir-165.ucm diff --git a/ext/Encode/Encode/jis0201.ucm b/ext/Encode/ucm/jis0201.ucm similarity index 100% rename from ext/Encode/Encode/jis0201.ucm rename to ext/Encode/ucm/jis0201.ucm diff --git a/ext/Encode/Encode/johab.ucm b/ext/Encode/ucm/johab.ucm similarity index 100% rename from ext/Encode/Encode/johab.ucm rename to ext/Encode/ucm/johab.ucm diff --git a/ext/Encode/Encode/koi8-f.ucm b/ext/Encode/ucm/koi8-f.ucm similarity index 100% rename from ext/Encode/Encode/koi8-f.ucm rename to ext/Encode/ucm/koi8-f.ucm diff --git a/ext/Encode/Encode/koi8-r.ucm b/ext/Encode/ucm/koi8-r.ucm similarity index 99% rename from ext/Encode/Encode/koi8-r.ucm rename to ext/Encode/ucm/koi8-r.ucm index 11a2bf7..b9f413e 100644 --- a/ext/Encode/Encode/koi8-r.ucm +++ b/ext/Encode/ucm/koi8-r.ucm @@ -1,5 +1,5 @@ # -# $Id: koi8-r.ucm,v 1.0 2002/03/28 23:26:27 dankogai Exp $ +# $Id: koi8-r.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # # ./compile -n koi8-r -o Encode/koi8-r.ucm Encode/koi8-r.enc "koi8-r" diff --git a/ext/Encode/Encode/koi8-u.ucm b/ext/Encode/ucm/koi8-u.ucm similarity index 100% rename from ext/Encode/Encode/koi8-u.ucm rename to ext/Encode/ucm/koi8-u.ucm diff --git a/ext/Encode/Encode/ksc5601.ucm b/ext/Encode/ucm/ksc5601.ucm similarity index 100% rename from ext/Encode/Encode/ksc5601.ucm rename to ext/Encode/ucm/ksc5601.ucm diff --git a/ext/Encode/Encode/macCentEuro.ucm b/ext/Encode/ucm/macCentEuro.ucm similarity index 100% rename from ext/Encode/Encode/macCentEuro.ucm rename to ext/Encode/ucm/macCentEuro.ucm diff --git a/ext/Encode/Encode/macCroatian.ucm b/ext/Encode/ucm/macCroatian.ucm similarity index 100% rename from ext/Encode/Encode/macCroatian.ucm rename to ext/Encode/ucm/macCroatian.ucm diff --git a/ext/Encode/Encode/macCyrillic.ucm b/ext/Encode/ucm/macCyrillic.ucm similarity index 100% rename from ext/Encode/Encode/macCyrillic.ucm rename to ext/Encode/ucm/macCyrillic.ucm diff --git a/ext/Encode/Encode/macDingbats.ucm b/ext/Encode/ucm/macDingbats.ucm similarity index 100% rename from ext/Encode/Encode/macDingbats.ucm rename to ext/Encode/ucm/macDingbats.ucm diff --git a/ext/Encode/Encode/macGreek.ucm b/ext/Encode/ucm/macGreek.ucm similarity index 100% rename from ext/Encode/Encode/macGreek.ucm rename to ext/Encode/ucm/macGreek.ucm diff --git a/ext/Encode/Encode/macIceland.ucm b/ext/Encode/ucm/macIceland.ucm similarity index 99% rename from ext/Encode/Encode/macIceland.ucm rename to ext/Encode/ucm/macIceland.ucm index 4c744a5..38dd788 100644 --- a/ext/Encode/Encode/macIceland.ucm +++ b/ext/Encode/ucm/macIceland.ucm @@ -1,8 +1,8 @@ # -# $Id: macIceland.ucm,v 1.0 2002/03/28 23:26:27 dankogai Exp $ +# $Id: macIceland.ucm,v 1.2 2002/03/31 18:22:36 dankogai Exp $ # # ./compile -n macIceland -o Encode/macIceland.ucm Encode/macIceland.enc - "MacIcelandic" + "MacIceland" 1 1 \x3F diff --git a/ext/Encode/Encode/macJapan.ucm b/ext/Encode/ucm/macJapan.ucm similarity index 100% rename from ext/Encode/Encode/macJapan.ucm rename to ext/Encode/ucm/macJapan.ucm diff --git a/ext/Encode/Encode/macRoman.ucm b/ext/Encode/ucm/macRoman.ucm similarity index 100% rename from ext/Encode/Encode/macRoman.ucm rename to ext/Encode/ucm/macRoman.ucm diff --git a/ext/Encode/Encode/macRumanian.ucm b/ext/Encode/ucm/macRumanian.ucm similarity index 100% rename from ext/Encode/Encode/macRumanian.ucm rename to ext/Encode/ucm/macRumanian.ucm diff --git a/ext/Encode/Encode/macSami.ucm b/ext/Encode/ucm/macSami.ucm similarity index 100% rename from ext/Encode/Encode/macSami.ucm rename to ext/Encode/ucm/macSami.ucm diff --git a/ext/Encode/Encode/macThai.ucm b/ext/Encode/ucm/macThai.ucm similarity index 100% rename from ext/Encode/Encode/macThai.ucm rename to ext/Encode/ucm/macThai.ucm diff --git a/ext/Encode/Encode/macTurkish.ucm b/ext/Encode/ucm/macTurkish.ucm similarity index 99% rename from ext/Encode/Encode/macTurkish.ucm rename to ext/Encode/ucm/macTurkish.ucm index 858d200..f7df090 100644 --- a/ext/Encode/Encode/macTurkish.ucm +++ b/ext/Encode/ucm/macTurkish.ucm @@ -1,5 +1,5 @@ # -# $Id: macTurkish.ucm,v 1.0 2002/03/28 23:26:27 dankogai Exp $ +# $Id: macTurkish.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # # ./compile -n macTurkish -o Encode/macTurkish.ucm Encode/macTurkish.enc "MacTurkish" diff --git a/ext/Encode/Encode/macUkraine.ucm b/ext/Encode/ucm/macUkraine.ucm similarity index 100% rename from ext/Encode/Encode/macUkraine.ucm rename to ext/Encode/ucm/macUkraine.ucm diff --git a/ext/Encode/Encode/nextstep.ucm b/ext/Encode/ucm/nextstep.ucm similarity index 100% rename from ext/Encode/Encode/nextstep.ucm rename to ext/Encode/ucm/nextstep.ucm diff --git a/ext/Encode/Encode/posix-bc.ucm b/ext/Encode/ucm/posix-bc.ucm similarity index 100% rename from ext/Encode/Encode/posix-bc.ucm rename to ext/Encode/ucm/posix-bc.ucm diff --git a/ext/Encode/Encode/shiftjis.ucm b/ext/Encode/ucm/shiftjis.ucm similarity index 100% rename from ext/Encode/Encode/shiftjis.ucm rename to ext/Encode/ucm/shiftjis.ucm diff --git a/ext/Encode/Encode/symbol.ucm b/ext/Encode/ucm/symbol.ucm similarity index 100% rename from ext/Encode/Encode/symbol.ucm rename to ext/Encode/ucm/symbol.ucm diff --git a/ext/Encode/Encode/viscii.ucm b/ext/Encode/ucm/viscii.ucm similarity index 100% rename from ext/Encode/Encode/viscii.ucm rename to ext/Encode/ucm/viscii.ucm diff --git a/lib/encoding.pm b/lib/encoding.pm deleted file mode 100644 index 441be33..0000000 --- a/lib/encoding.pm +++ /dev/null @@ -1,122 +0,0 @@ -package encoding; - -our $VERSION = '1.00'; - -use Encode; - -BEGIN { - if (ord("A") == 193) { - require Carp; - Carp::croak "encoding pragma does not support EBCDIC platforms"; - } -} - -sub import { - my ($class, $name) = @_; - $name = $ENV{PERL_ENCODING} if @_ < 2; - $name = "latin1" unless defined $name; - my $enc = find_encoding($name); - unless (defined $enc) { - require Carp; - Carp::croak "Unknown encoding '$name'"; - } - ${^ENCODING} = $enc; -} - -=pod - -=head1 NAME - -encoding - pragma to control the conversion of legacy data into Unicode - -=head1 SYNOPSIS - - use encoding "iso 8859-7"; - - # The \xDF of ISO 8859-7 (Greek) is \x{3af} in Unicode. - - $a = "\xDF"; - $b = "\x{100}"; - - printf "%#x\n", ord($a); # will print 0x3af, not 0xdf - - $c = $a . $b; - - # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}". - - # chr() is affected, and ... - - print "mega\n" if ord(chr(0xdf)) == 0x3af; - - # ... ord() is affected by the encoding pragma ... - - print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; - - # ... as are eq and cmp ... - - print "peta\n" if "\x{3af}" eq pack("C", 0xdf); - print "exa\n" if "\x{3af}" cmp pack("C", 0xdf) == 0; - - # ... but pack/unpack C are not affected, in case you still - # want back to your native encoding - - print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; - -=head1 DESCRIPTION - -Normally when legacy 8-bit data is converted to Unicode the data is -expected to be Latin-1 (or EBCDIC in EBCDIC platforms). With the -encoding pragma you can change this default. - -The pragma is a per script, not a per block lexical. Only the last -C matters, and it affects B. - -Notice that only literals (string or regular expression) having only -legacy code points are affected: if you mix data like this - - \xDF\x{100} - -the data is assumed to be in (Latin 1 and) Unicode, not in your native -encoding. In other words, this will match in "greek": - - "\xDF" =~ /\x{3af}/ - -but this will not - - "\xDF\x{100}" =~ /\x{3af}\x{100}/ - -since the C<\xDF> on the left will B be upgraded to C<\x{3af}> -because of the C<\x{100}> on the left. You should not be mixing your -legacy data and Unicode in the same string. - -This pragma also affects encoding of the 0x80..0xFF code point range: -normally characters in that range are left as eight-bit bytes (unless -they are combined with characters with code points 0x100 or larger, -in which case all characters need to become UTF-8 encoded), but if -the C pragma is present, even the 0x80..0xFF range always -gets UTF-8 encoded. - -If no encoding is specified, the environment variable L -is consulted. If that fails, "latin1" (ISO 8859-1) is assumed. If no -encoding can be found, C error will be thrown. - -Note if you want to get back to the original byte encoding, you need -to use things like I/O with encoding discplines (see L) or the -Encode module, since C (or re-C) do not work. - -=head1 KNOWN PROBLEMS - -For native multibyte encodings (either fixed or variable length) -the current implementation of the regular expressions may introduce -recoding errors for longer regular expression literals than 127 bytes. - -The encoding pragma is not supported on EBCDIC platforms. -(Porters wanted.) - -=head1 SEE ALSO - -L, L - -=cut - -1;