From: Jarkko Hietaniemi Date: Fri, 9 Nov 2001 00:23:40 +0000 (+0000) Subject: Upgrade to Unicode::Normalize 0.10, now in XS. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=ac5ea53171baa7dab1a92df1eacf8d2fe19cbdbb;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Unicode::Normalize 0.10, now in XS. The CPAN distribution has both pm and XS implementations, and for performance reasons we choose the XS. Another reason to choose the XS is that it doesn't require Lingua::KO::Hangul::Util, which means that we can delete that-- which in turn means that Unicode::UCD cannot expect that: support it, but don't expect. Ditto Unicode::Collate. Note that Unicode::Normalize Makefile.PL and Normalize.xs have been modified from the CPAN 0.10 versions: the first one to be simpler (no pm) and clean up the generated unf*.h files, the second one to quench compiler grumblings. Must notify Sadahiro about these changes. p4raw-id: //depot/perl@12909 --- diff --git a/MANIFEST b/MANIFEST index 627c8f0..63b0257 100644 --- a/MANIFEST +++ b/MANIFEST @@ -596,6 +596,16 @@ ext/Time/HiRes/HiRes.pm Time::HiRes extension ext/Time/HiRes/HiRes.t Test for Time::HiRes ext/Time/HiRes/HiRes.xs Time::HiRes extension ext/Time/HiRes/Makefile.PL Time::HiRes extension +ext/Unicode/Normalize/Changes Unicode::Normalize +ext/Unicode/Normalize/Makefile.PL Unicode::Normalize +ext/Unicode/Normalize/mkheader Unicode::Normalize +ext/Unicode/Normalize/Normalize.pm Unicode::Normalize +ext/Unicode/Normalize/Normalize.pod Unicode::Normalize +ext/Unicode/Normalize/Normalize.xs Unicode::Normalize +ext/Unicode/Normalize/README Unicode::Normalize +ext/Unicode/Normalize/t/func.t Unicode::Normalize +ext/Unicode/Normalize/t/norm.t Unicode::Normalize +ext/Unicode/Normalize/t/test.t Unicode::Normalize ext/util/make_ext Used by Makefile to execute extension Makefiles ext/XS/Typemap/Makefile.PL XS::Typemap extension ext/XS/Typemap/README XS::Typemap extension @@ -1005,10 +1015,6 @@ lib/IPC/SysV.t See if IPC::SysV works lib/less.pm For "use less" lib/less.t See if less support works lib/lib_pm.PL For "use lib", produces lib/lib.pm -lib/Lingua/KO/Hangul/Util.pm Lingua::KO::Hangul::Util -lib/Lingua/KO/Hangul/Util/Changes Lingua::KO::Hangul::Util -lib/Lingua/KO/Hangul/Util/README Lingua::KO::Hangul::Util -lib/Lingua/KO/Hangul/Util/t/test.t Lingua::KO::Hangul::Util lib/locale.pm For "use locale" lib/locale.t See if locale support works lib/Locale/Codes/t/all.t See if Locale::Codes work @@ -1277,11 +1283,6 @@ lib/Unicode/Collate/Changes Unicode::Collate lib/Unicode/Collate/keys.txt Unicode::Collate lib/Unicode/Collate/README Unicode::Collate lib/Unicode/Collate/t/test.t Unicode::Collate -lib/Unicode/Normalize.pm Unicode::Normalize -lib/Unicode/Normalize/Changes Unicode::Normalize -lib/Unicode/Normalize/README Unicode::Normalize -lib/Unicode/Normalize/t/norm.t Unicode::Normalize -lib/Unicode/Normalize/t/test.t Unicode::Normalize lib/Unicode/README Explanation what happened to lib/unicode. lib/Unicode/UCD.pm Unicode character database lib/Unicode/UCD.t See if Unicode character database works diff --git a/NetWare/Makefile b/NetWare/Makefile index ec06f7c..ff879e9 100644 --- a/NetWare/Makefile +++ b/NetWare/Makefile @@ -258,26 +258,27 @@ NW_CFG_VARS = \ NW_CFGSH_TMPL = config.wc NW_CFGH_TMPL = config_H.wc -SOCKET_NLP = $(AUTODIR)\Socket\Socket.nlp -FCNTL_NLP = $(AUTODIR)\Fcntl\Fcntl.nlp -IO_NLP = $(AUTODIR)\IO\IO.nlp -OPCODE_NLP = $(AUTODIR)\Opcode\Opcode.nlp -SDBM_FILE_NLP = $(AUTODIR)\SDBM_File\SDBM_File.nlp -POSIX_NLP = $(AUTODIR)\POSIX\POSIX.nlp -ATTRS_NLP = $(AUTODIR)\attrs\attrs.nlp -THREAD_NLP = $(AUTODIR)\Thread\Thread.nlp -B_NLP = $(AUTODIR)\B\B.nlp -DUMPER_NLP = $(AUTODIR)\Data\Dumper\Dumper.nlp -PEEK_NLP = $(AUTODIR)\Devel\Peek\Peek.nlp -RE_NLP = $(AUTODIR)\re\re.nlp -BYTELOADER_NLP = $(AUTODIR)\ByteLoader\ByteLoader.nlp -DPROF_NLP = $(AUTODIR)\Devel\DProf\DProf.nlp -GLOB_NLP = $(AUTODIR)\File\Glob\Glob.nlp -CWD_NLP = $(AUTODIR)\Cwd\Cwd.nlp -STORABLE_NLP = $(AUTODIR)\Storable\Storable.nlp -LISTUTIL_NLP = $(AUTODIR)\List\Util\Util.nlp -MIMEBASE64_NLP = $(AUTODIR)\MIME\Base64\Base64.nlp -XSTYPEMAP_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp +SOCKET_NLP = $(AUTODIR)\Socket\Socket.nlp +FCNTL_NLP = $(AUTODIR)\Fcntl\Fcntl.nlp +IO_NLP = $(AUTODIR)\IO\IO.nlp +OPCODE_NLP = $(AUTODIR)\Opcode\Opcode.nlp +SDBM_FILE_NLP = $(AUTODIR)\SDBM_File\SDBM_File.nlp +POSIX_NLP = $(AUTODIR)\POSIX\POSIX.nlp +ATTRS_NLP = $(AUTODIR)\attrs\attrs.nlp +THREAD_NLP = $(AUTODIR)\Thread\Thread.nlp +B_NLP = $(AUTODIR)\B\B.nlp +DUMPER_NLP = $(AUTODIR)\Data\Dumper\Dumper.nlp +PEEK_NLP = $(AUTODIR)\Devel\Peek\Peek.nlp +RE_NLP = $(AUTODIR)\re\re.nlp +BYTELOADER_NLP = $(AUTODIR)\ByteLoader\ByteLoader.nlp +DPROF_NLP = $(AUTODIR)\Devel\DProf\DProf.nlp +GLOB_NLP = $(AUTODIR)\File\Glob\Glob.nlp +CWD_NLP = $(AUTODIR)\Cwd\Cwd.nlp +STORABLE_NLP = $(AUTODIR)\Storable\Storable.nlp +LISTUTIL_NLP = $(AUTODIR)\List\Util\Util.nlp +MIMEBASE64_NLP = $(AUTODIR)\MIME\Base64\Base64.nlp +XSTYPEMAP_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp +UNICODENORMALIZE_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp EXTENSION_NLP = \ $(FCNTL_NLP) \ @@ -299,6 +300,7 @@ EXTENSION_NLP = \ $(LISTUTIL_NLP) \ $(MIMEBASE64_NLP) \ $(XSTYPEMAP_NLP) \ + $(UNICODENORMALIZE_NLP) \ # $(CWD_NLP) \ # cwd.pm needs to be modifed for NetWare. @@ -764,33 +766,35 @@ X2P_OBJ = $(X2P_SRC:.c=.obj) DYNAMIC_EXT = Socket IO Fcntl Opcode SDBM_File POSIX attrs Thread B re \ Data/Dumper Devel/Peek ByteLoader Devel/DProf File/Glob \ - Storable/Storable List/Util MIME/Base64/Base64 XS/Typemap/Typemap + Storable/Storable List/Util MIME/Base64/Base64 \ + XS/Typemap/Typemap Unicode/Normalize/Normalize STATIC_EXT = DynaLoader NONXS_EXT = Errno -DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader -SOCKET = $(EXTDIR)\Socket\Socket -FCNTL = $(EXTDIR)\Fcntl\Fcntl -OPCODE = $(EXTDIR)\Opcode\Opcode -SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File +DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader +SOCKET = $(EXTDIR)\Socket\Socket +FCNTL = $(EXTDIR)\Fcntl\Fcntl +OPCODE = $(EXTDIR)\Opcode\Opcode +SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File IO = $(EXTDIR)\IO\IO -POSIX = $(EXTDIR)\POSIX\POSIX -ATTRS = $(EXTDIR)\attrs\attrs -THREAD = $(EXTDIR)\Thread\Thread +POSIX = $(EXTDIR)\POSIX\POSIX +ATTRS = $(EXTDIR)\attrs\attrs +THREAD = $(EXTDIR)\Thread\Thread B = $(EXTDIR)\B\B RE = $(EXTDIR)\re\re -DUMPER = $(EXTDIR)\Data\Dumper\Dumper -ERRNO = $(EXTDIR)\Errno\Errno -PEEK = $(EXTDIR)\Devel\Peek\Peek -BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader -DPROF = $(EXTDIR)\Devel\DProf\DProf -GLOB = $(EXTDIR)\File\Glob\Glob +DUMPER = $(EXTDIR)\Data\Dumper\Dumper +ERRNO = $(EXTDIR)\Errno\Errno +PEEK = $(EXTDIR)\Devel\Peek\Peek +BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader +DPROF = $(EXTDIR)\Devel\DProf\DProf +GLOB = $(EXTDIR)\File\Glob\Glob CWD = $(EXTDIR)\Cwd\Cwd -STORABLE = $(EXTDIR)\Storable\Storable -LISTUTIL = $(EXTDIR)\List\Util -MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64 -XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap +STORABLE = $(EXTDIR)\Storable\Storable +LISTUTIL = $(EXTDIR)\List\Util +MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64 +XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap +UNICODENORMALIZE = $(EXTDIR)\Unicode\Normalize\Normalize EXTENSION_C = \ $(SOCKET).c \ @@ -813,6 +817,7 @@ EXTENSION_C = \ $(LISTUTIL).c \ $(MIMEBASE64).c \ $(XSTYPEMAP).c \ + $(UNICODENORMALIZE).c \ POD2HTML = $(PODDIR)\pod2html POD2MAN = $(PODDIR)\pod2man @@ -1285,6 +1290,12 @@ $(XSTYPEMAP_NLP): $(MAKE) cd ..\..\..\netware +$(UNICODENORMALIZE_NLP): + cd $(EXTDIR)\Unicode\$(*B) + ..\..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl + $(MAKE) + cd ..\..\..\netware + $(ERRNO_PM_NW): cd $(EXTDIR)\$(*B) ..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl @@ -1425,6 +1436,7 @@ distclean: clean nwclean -del /f $(LIBDIR)\Data\Dumper.pm $(LIBDIR)\ByteLoader.pm -del /f $(LIBDIR)\Devel\Peek.pm $(LIBDIR)\Devel\DProf.pm -del /f $(LIBDIR)\File\Glob.pm + -del /f $(LIBDIR)\Unicode\Normalize.pm -rmdir /s /q $(LIBDIR)\IO || rmdir /s $(LIBDIR)\IO -rmdir /s /q $(LIBDIR)\Thread || rmdir /s $(LIBDIR)\Thread -rmdir /s /q $(LIBDIR)\B || rmdir /s $(LIBDIR)\B diff --git a/djgpp/config.over b/djgpp/config.over index 5f58ba7..55eef9b 100644 --- a/djgpp/config.over +++ b/djgpp/config.over @@ -46,6 +46,7 @@ repair() -e 's=cwd=Cwd=' \ -e 's=perlio/via=PerlIO/Via=' \ -e 's=xs/typemap=XS/Typemap=' \ + -e 's=unicode/normalize=Unicode/Normalize=' \ -e 's=i18n/langinfo=I18N/Langinfo=' } static_ext=$(repair "$static_ext") diff --git a/epoc/config.sh b/epoc/config.sh index 42ada75..2cafe19 100644 --- a/epoc/config.sh +++ b/epoc/config.sh @@ -426,7 +426,7 @@ emacs='' eunicefix=':' exe_ext='' expr='expr' -extensions='Data/Dumper Digest/MD5 Errno Fcntl File/Glob Filter/Util/Call IO List/Util MIME/Base64 Opcode PerlIO/Scalar Socket Storable Sys/Hostname attrs re' +extensions='Data/Dumper Digest/MD5 Errno Fcntl File/Glob Filter/Util/Call IO List/Util MIME/Base64 Opcode PerlIO/Scalar Socket Storable Sys/Hostname Unicode/Storable attrs re' fflushNULL='undef' fflushall='define' find='' diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes new file mode 100644 index 0000000..bf17449 --- /dev/null +++ b/ext/Unicode/Normalize/Changes @@ -0,0 +1,38 @@ +Revision history for Perl extension Unicode::Normalize. + +0.10 Sat Nov 03 16:30:20 2001 + - The XS version is now independent of Lingua::KO::Hangul::Util. + (though the Non-XS version still requires that.) + +0.09 Fri Nov 02 22:39:30 2001 + - remove pTHX_. + +0.08 Thu Nov 01 23:20:42 2001 + - use Lingua::KO::Hangul::Util 0.06 and remove "hangul.h". + +0.07 Wed Oct 31 22:06:42 2001 + - modify internal. decompose() - reorder() - compose(). + +0.06 Sun Oct 28 14:28:46 2001 + - an XS version. + (but the Non-XS version is also supported.) + +0.05 Wed Oct 10 22:02:15 2001 (not released) + - %Compos contains unnecessary singletons + (though it did not cause any bug, only useless). + They will not be stored. + +0.04 Wed Aug 15 19:02:41 2001 + - fix: NFD("") and NFKD("") must return "", not but undef. + +0.03 Fri Aug 10 22:44:18 2001 + - rename the module name to Unicode::Normalize. + - normalize takes two arguments. + +0.02 Thu Aug 9 22:56:36 2001 + - add function normalize + +0.01 Mon Aug 6 21:45:11 2001 + - original version; created by h2xs 1.21 with options + -A -X -n Text::Unicode::Normalize + diff --git a/ext/Unicode/Normalize/Makefile.PL b/ext/Unicode/Normalize/Makefile.PL new file mode 100644 index 0000000..88ab9b7 --- /dev/null +++ b/ext/Unicode/Normalize/Makefile.PL @@ -0,0 +1,15 @@ +use ExtUtils::MakeMaker; + +# This is not the CPAN Unicode::Normalize makefile +# that can handle XS-NOXS installing. We do just XS. + +do "mkheader"; + +WriteMakefile( + 'NAME' => 'Unicode::Normalize', + 'VERSION_FROM' => 'Normalize.pm', # finds $VERSION + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'Normalize.pod', # retrieve abstract from module + AUTHOR => 'SADAHIRO Tomoyuki ') : ()), + clean => {FILES=> 'unfcan.h unfcmb.h unfcmp.h unfcpt.h unfexc.h'}, +); diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm new file mode 100644 index 0000000..a583425 --- /dev/null +++ b/ext/Unicode/Normalize/Normalize.pm @@ -0,0 +1,45 @@ +package Unicode::Normalize; + +use 5.006; +use strict; +use warnings; +use Carp; + +our $VERSION = '0.10'; +our $PACKAGE = __PACKAGE__; + +require Exporter; +require DynaLoader; +require AutoLoader; + +our @ISA = qw(Exporter DynaLoader); +our @EXPORT = qw( NFC NFD NFKC NFKD ); +our @EXPORT_OK = qw( normalize decompose reorder compose + getCanon getCompat getComposite getCombinClass getExclusion); +our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] ); + +bootstrap Unicode::Normalize $VERSION; + +use constant CANON => 0; +use constant COMPAT => 1; + +sub NFD ($) { reorder(decompose($_[0], CANON)) } + +sub NFKD ($) { reorder(decompose($_[0], COMPAT)) } + +sub NFC ($) { compose(reorder(decompose($_[0], CANON))) } + +sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) } + +sub normalize($$) +{ + my $form = shift; + $form eq 'D' || $form eq 'NFD' ? NFD ($_[0]) : + $form eq 'C' || $form eq 'NFC' ? NFC ($_[0]) : + $form eq 'KD' || $form eq 'NFKD' ? NFKD($_[0]) : + $form eq 'KC' || $form eq 'NFKC' ? NFKC($_[0]) : + croak $PACKAGE."::normalize: invalid form name: $form"; +} + +1; +__END__ diff --git a/ext/Unicode/Normalize/Normalize.pod b/ext/Unicode/Normalize/Normalize.pod new file mode 100644 index 0000000..4ac8966 --- /dev/null +++ b/ext/Unicode/Normalize/Normalize.pod @@ -0,0 +1,89 @@ + +=head1 NAME + +Unicode::Normalize - normalized forms of Unicode text + +=head1 SYNOPSIS + + use Unicode::Normalize; + + $string_NFD = NFD($raw_string); # Normalization Form D + $string_NFC = NFC($raw_string); # Normalization Form C + $string_NFKD = NFKD($raw_string); # Normalization Form KD + $string_NFKC = NFKC($raw_string); # Normalization Form KC + + or + + use Unicode::Normalize 'normalize'; + + $string_NFD = normalize('D', $raw_string); # Normalization Form D + $string_NFC = normalize('C', $raw_string); # Normalization Form C + $string_NFKD = normalize('KD', $raw_string); # Normalization Form KD + $string_NFKC = normalize('KC', $raw_string); # Normalization Form KC + +=head1 DESCRIPTION + +=over 4 + +=item C<$string_NFD = NFD($raw_string)> + +returns the Normalization Form D (formed by canonical decomposition). + + +=item C<$string_NFC = NFC($raw_string)> + +returns the Normalization Form C (formed by canonical decomposition +followed by canonical composition). + +=item C<$string_NFKD = NFKD($raw_string)> + +returns the Normalization Form KD (formed by compatibility decomposition). + +=item C<$string_NFKC = NFKC($raw_string)> + +returns the Normalization Form KC (formed by compatibility decomposition +followed by B composition). + +=item C<$normalized_string = normalize($form_name, $raw_string)> + +As C<$form_name>, one of the following names must be given. + + 'C' or 'NFC' for Normalization Form C + 'D' or 'NFD' for Normalization Form D + 'KC' or 'NFKC' for Normalization Form KC + 'KD' or 'NFKD' for Normalization Form KD + +=back + +=head2 EXPORT + +C, C, C, C: by default. + +C: on request. + +=head1 AUTHOR + +SADAHIRO Tomoyuki, ESADAHIRO@cpan.orgE + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. + +=head1 SEE ALSO + +=over 4 + +=item L + +utility functions for Hangul Syllables + +=item http://www.unicode.org/unicode/reports/tr15/ + +Unicode Normalization Forms - UAX #15 + +=back + +=cut diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs new file mode 100644 index 0000000..aca0853 --- /dev/null +++ b/ext/Unicode/Normalize/Normalize.xs @@ -0,0 +1,378 @@ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +/* These 5 files are prepared by mkheader */ +#include "unfcmb.h" +#include "unfcan.h" +#include "unfcpt.h" +#include "unfcmp.h" +#include "unfexc.h" + +/* Perl 5.6.1 ? */ +#ifndef uvuni_to_utf8 +#define uvuni_to_utf8 uv_to_utf8 +#endif /* uvuni_to_utf8 */ + +/* Perl 5.6.1 ? */ +#ifndef utf8n_to_uvchr +#define utf8n_to_uvchr utf8_to_uv +#endif /* utf8n_to_uvchr */ + +/* At present, char > 0x10ffff are unaffected without complaint, right? */ +#define VALID_UTF_MAX (0x10ffff) +#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) + +/* HANGUL_H */ +#define Hangul_SBase 0xAC00 +#define Hangul_SFinal 0xD7A3 +#define Hangul_SCount 11172 + +#define Hangul_NCount 588 + +#define Hangul_LBase 0x1100 +#define Hangul_LFinal 0x1112 +#define Hangul_LCount 19 + +#define Hangul_VBase 0x1161 +#define Hangul_VFinal 0x1175 +#define Hangul_VCount 21 + +#define Hangul_TBase 0x11A7 +#define Hangul_TFinal 0x11C2 +#define Hangul_TCount 28 + +#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal)) +#define Hangul_IsN(u) (! (((u) - Hangul_SBase) % Hangul_TCount)) +#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u)) +#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal)) +#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal)) +#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal)) +/* HANGUL_H */ + +/* this is used for canonical ordering of combining characters (c.c.). */ +typedef struct { + U8 cc; /* combining class */ + UV uv; /* codepoint */ + STRLEN pos; /* position */ +} UNF_cc; + +int compare_cc(const void *a, const void *b) +{ + int ret_cc; + ret_cc = (*(UNF_cc*)a).cc - (*(UNF_cc*)b).cc; + if(ret_cc) return ret_cc; + return (*(UNF_cc*)a).pos - (*(UNF_cc*)b).pos; +} + +U8* dec_canonical (UV uv) +{ + U8 ***plane, **row; + if(OVER_UTF_MAX(uv)) return NULL; + plane = (U8***)UNF_canon[uv >> 16]; + if(! plane) return NULL; + row = plane[(uv >> 8) & 0xff]; + return row ? row[uv & 0xff] : NULL; +} + +U8* dec_compat (UV uv) +{ + U8 ***plane, **row; + if(OVER_UTF_MAX(uv)) return NULL; + plane = (U8***)UNF_compat[uv >> 16]; + if(! plane) return NULL; + row = plane[(uv >> 8) & 0xff]; + return row ? row[uv & 0xff] : NULL; +} + +UV getComposite (UV uv, UV uv2) +{ + UNF_complist ***plane, **row, *cell, *i; + + if(! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) return 0; + + if(Hangul_IsL(uv) && Hangul_IsV(uv2)) { + uv -= Hangul_LBase; /* lindex */ + uv2 -= Hangul_VBase; /* vindex */ + return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount); + } + if(Hangul_IsLV(uv) && Hangul_IsT(uv2)) { + uv2 -= Hangul_TBase; /* tindex */ + return (uv + uv2); + } + plane = UNF_compos[uv >> 16]; + if(! plane) return 0; + row = plane[(uv >> 8) & 0xff]; + if(! row) return 0; + cell = row[uv & 0xff]; + if(! cell) return 0; + for(i = cell; i->nextchar; i++) { + if(uv2 == i->nextchar) return i->composite; + } + return 0; +} + +U8 getCombinClass (UV uv) +{ + U8 **plane, *row; + if(OVER_UTF_MAX(uv)) return 0; + plane = (U8**)UNF_combin[uv >> 16]; + if(! plane) return 0; + row = plane[(uv >> 8) & 0xff]; + return row ? row[uv & 0xff] : 0; +} + +void sv_cat_decompHangul (SV* sv, UV uv) +{ + UV sindex, lindex, vindex, tindex; + U8 *t, temp[3 * UTF8_MAXLEN + 1]; + + if(! Hangul_IsS(uv)) return; + + sindex = uv - Hangul_SBase; + lindex = sindex / Hangul_NCount; + vindex = (sindex % Hangul_NCount) / Hangul_TCount; + tindex = sindex % Hangul_TCount; + + t = temp; + t = uvuni_to_utf8(t, (lindex + Hangul_LBase)); + t = uvuni_to_utf8(t, (vindex + Hangul_VBase)); + if (tindex) t = uvuni_to_utf8(t, (tindex + Hangul_TBase)); + *t = '\0'; + sv_catpvn(sv, (char *)temp, strlen((char *)temp)); +} + +MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize + + +SV* +decompose(arg, compat) + SV * arg + SV * compat + PROTOTYPE: $ + PREINIT: + SV *src, *dst; + STRLEN srclen, dstlen, retlen; + U8 *s, *e, *p, *d, *r; + UV uv; + bool iscompat; + CODE: + if(SvUTF8(arg)) { + src = arg; + } else { + src = sv_mortalcopy(arg); + sv_utf8_upgrade(src); + } + + iscompat = SvTRUE(compat); + + dst = newSV(1); + (void)SvPOK_only(dst); + SvUTF8_on(dst); + + s = (U8*)SvPV(src,srclen); + e = s + srclen; + for(p = s; p < e;){ + uv = utf8n_to_uvchr(p, e - p, &retlen, 0); + p += retlen; + if(Hangul_IsS(uv)) sv_cat_decompHangul(dst, uv); + else { + r = iscompat ? dec_compat(uv) : dec_canonical(uv); + if(r) sv_catpv(dst, (char *)r); + else sv_catpvn(dst, (char *)p - retlen, retlen); + } + } + RETVAL = dst; + OUTPUT: + RETVAL + + + +SV* +reorder(arg) + SV * arg + PROTOTYPE: $ + PREINIT: + SV *src; + STRLEN srclen, retlen, stk_cc_max; + U8 *s, *e, *p, curCC; + UV uv; + UNF_cc * stk_cc; + CODE: + src = newSVsv(arg); + if(! SvUTF8(arg)) sv_utf8_upgrade(src); + + stk_cc_max = 10; /* enough as an initial value? */ + New(0, stk_cc, stk_cc_max, UNF_cc); + + s = (U8*)SvPV(src,srclen); + e = s + srclen; + for(p = s; p < e;){ + U8 *cc_in; + STRLEN cc_len, cc_iter, cc_pos; + + uv = utf8n_to_uvchr(p, e - p, &retlen, 0); + p += retlen; + cc_pos = 0; + curCC = getCombinClass(uv); + if(! (curCC && p < e)) continue; else cc_in = p - retlen; + + stk_cc[cc_pos].cc = curCC; + stk_cc[cc_pos].uv = uv; + stk_cc[cc_pos].pos = cc_pos; + + while(p < e) { + uv = utf8n_to_uvchr(p, e - p, &retlen, 0); + curCC = getCombinClass(uv); + if(!curCC) break; + p += retlen; + cc_pos++; + if(stk_cc_max <= cc_pos) { /* extend if need */ + stk_cc_max = cc_pos + 1; + Renew(stk_cc, stk_cc_max, UNF_cc); + } + stk_cc[cc_pos].cc = curCC; + stk_cc[cc_pos].uv = uv; + stk_cc[cc_pos].pos = cc_pos; + } + + /* only one c.c. in cc_len from cc_in, no need of reordering */ + if(!cc_pos) continue; + + qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc); + + cc_len = p - cc_in; + p = cc_in; + for(cc_iter = 0; cc_iter <= cc_pos; cc_iter++) { + p = uvuni_to_utf8(p, stk_cc[cc_iter].uv); + } + } + Safefree(stk_cc); + RETVAL = src; + OUTPUT: + RETVAL + + + +void +compose(arg) + SV * arg + PROTOTYPE: $ + PREINIT: + SV *src, *dst, *tmp; + U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC; + UV uv, uvS, uvComp; + STRLEN srclen, dstlen, tmplen, dstcur, retlen; + bool beginning = TRUE; + PPCODE: + if(SvUTF8(arg)) { + src = arg; + } else { + src = sv_mortalcopy(arg); + sv_utf8_upgrade(src); + } + s = (U8*)SvPV(src, srclen); + e = s + srclen; + dstlen = srclen + 1; /* equal or shorter, XXX */ + dst = sv_2mortal(newSV(dstlen)); + (void)SvPOK_only(dst); + SvUTF8_on(dst); + d = (U8*)SvPVX(dst); + + /* for uncomposed combining char */ + tmp = sv_2mortal(newSV(dstlen)); + (void)SvPOK_only(tmp); + SvUTF8_on(tmp); + + for(p = s; p < e;){ + if(beginning) { + uvS = utf8n_to_uvchr(p, e - p, &retlen, 0); + p += retlen; + + if (getCombinClass(uvS)){ /* no Starter found yet */ + d = uvuni_to_utf8(d, uvS); + continue; + } + beginning = FALSE; + } + + /* Starter */ + t = tmp_start = (U8*)SvPVX(tmp); + preCC = 0; + + /* to the next Starter */ + while(p < e) { + uv = utf8n_to_uvchr(p, e - p, &retlen, 0); + p += retlen; + curCC = getCombinClass(uv); + + if(preCC && preCC == curCC) { + preCC = curCC; + t = uvuni_to_utf8(t, uv); + } else { + uvComp = getComposite(uvS, uv); + + /* S + C + S => S-S + C would be also blocked. */ + if( uvComp && ! getExclusion(uvComp) && preCC <= curCC) + { + /* preCC not changed to curCC */ + uvS = uvComp; + } else if (! curCC && p < e) { /* blocked */ + break; + } else { + preCC = curCC; + t = uvuni_to_utf8(t, uv); + } + } + } + d = uvuni_to_utf8(d, uvS); /* composed char */ + if(tmplen = t - tmp_start) { /* uncomposed combining char */ + t = (U8*)SvPVX(tmp); + while(tmplen--) *d++ = *t++; + } + uvS = uv; + } /* for */ + dstcur = d - (U8*)SvPVX(dst); + SvCUR_set(dst, dstcur); + XPUSHs(dst); + + + +U8 +getCombinClass(uv) + UV uv + +bool +getExclusion(uv) + UV uv + +UV +getComposite(uv, uv2) + UV uv + UV uv2 + +SV* +getCanon(uv) + UV uv + PROTOTYPE: $ + ALIAS: + getCompat = 1 + PREINIT: + U8 * rstr; + CODE: + if(Hangul_IsS(uv)) { + SV * dst; + dst = newSV(1); + (void)SvPOK_only(dst); + sv_cat_decompHangul(dst, uv); + RETVAL = dst; + } else { + rstr = ix ? dec_compat(uv) : dec_canonical(uv); + if(!rstr) XSRETURN_UNDEF; + RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr)); + } + SvUTF8_on(RETVAL); + OUTPUT: + RETVAL + diff --git a/lib/Unicode/Normalize/README b/ext/Unicode/Normalize/README similarity index 75% rename from lib/Unicode/Normalize/README rename to ext/Unicode/Normalize/README index e1f9e96..3f0c424 100644 --- a/lib/Unicode/Normalize/README +++ b/ext/Unicode/Normalize/README @@ -1,4 +1,4 @@ -Unicode/Normalize version 0.04 +Unicode/Normalize version 0.10 =================================== Unicode::Normalize - normalized forms of Unicode text @@ -23,6 +23,8 @@ SYNOPSIS INSTALLATION +Perl 5.006 or later + To install this module type the following: perl Makefile.PL @@ -30,19 +32,26 @@ To install this module type the following: make test make install +If you have a C compiler and want to use the XS version, +type the following: + + perl Makefile.PL xs + make + make test + make install + DEPENDENCIES This module requires these other modules and libraries: Carp Exporter +File::Copy File::Spec -Lingua::KO::Hangul::Util -$unidir/CombiningClass.pl -$unidir/Decomposition.pl -$unidir/CompExcl.txt - -# $unidir is $LIB/unicore or $LIB/unicode +Lingua::KO::Hangul::Util 0.06 +unicore/CombiningClass.pl or unicode/CombiningClass.pl +unicore/Decomposition.pl or unicode/Decomposition.pl +unicore/CompExcl.txt or unicode/CompExcl.txt COPYRIGHT AND LICENCE diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader new file mode 100644 index 0000000..85d2b90 --- /dev/null +++ b/ext/Unicode/Normalize/mkheader @@ -0,0 +1,284 @@ +#!perl +# +# This script generates "unfcan.h", "unfcpt.h", "unfcmb.h", +# "unfcmp.h", and "unfexc.h" +# from CombiningClass.pl, Decomposition.pl, CompExcl.txt +# in lib/unicore or unicode directory +# for Unicode::Normalize.xs. (cf. Makefile.PL) +# +use 5.006; +use strict; +use warnings; +use Carp; + +our $PACKAGE = 'Unicode::Normalize, mkheader'; + +our $Combin = do "unicore/CombiningClass.pl" + || do "unicode/CombiningClass.pl" + || croak "$PACKAGE: CombiningClass.pl not found"; + +our $Decomp = do "unicore/Decomposition.pl" + || do "unicode/Decomposition.pl" + || croak "$PACKAGE: Decomposition.pl not found"; + +our %Combin; # $codepoint => $number : combination class +our %Canon; # $codepoint => $hexstring : canonical decomp. +our %Compat; # $codepoint => $hexstring : compat. decomp. +our %Compos; # $string => $codepoint : composite + +our %Exclus; # $codepoint => 1 : composition exclusions + +{ + my($f, $fh); + foreach my $d (@INC) { + use File::Spec; + $f = File::Spec->catfile($d, "unicore", "CompExcl.txt"); + last if open($fh, $f); + $f = File::Spec->catfile($d, "unicode", "CompExcl.txt"); + last if open($fh, $f); + $f = undef; + } + croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f; + while(<$fh>) { + next if /^#/ or /^$/; + s/#.*//; + $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/; + } + close $fh; +} + +while($Combin =~ /(.+)/g) { + my @tab = split /\t/, $1; + my $ini = hex $tab[0]; + if($tab[1] eq '') { + $Combin{ $ini } = $tab[2]; + } else { + $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]); + } +} + +while($Decomp =~ /(.+)/g) { + my @tab = split /\t/, $1; + my $compat = $tab[2] =~ s/<[^>]+>//; + my $dec = [ _getHexArray($tab[2]) ]; # decomposition + my $com = pack('U*', @$dec); # composable sequence + my $ini = hex($tab[0]); + if($tab[1] eq '') { + $Compat{ $ini } = $dec; + if(! $compat) { + $Canon{ $ini } = $dec; + $Compos{ $com } = $ini if @$dec > 1; + } + } else { + foreach my $u ($ini .. hex($tab[1])){ + $Compat{ $u } = $dec; + if(! $compat){ + $Canon{ $u } = $dec; + $Compos{ $com } = $ini if @$dec > 1; + } + } + } +} + +# exhaustive decomposition +foreach my $key (keys %Canon) { + $Canon{$key} = [ getCanonList($key) ]; +} + +# exhaustive decomposition +foreach my $key (keys %Compat) { + $Compat{$key} = [ getCompatList($key) ]; +} + +sub getCanonList { + my @src = @_; + my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src; + join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec); + # condition @src == @dec is not ok. +} + +sub getCompatList { + my @src = @_; + my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src; + join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec); + # condition @src == @dec is not ok. +} + +sub _getHexArray { + my $str = shift; + map hex(), $str =~ /([0-9A-Fa-f]+)/g; +} + +sub _U_stringify { + sprintf '"%s"', join '', + map sprintf("\\x%2x", $_), unpack 'C*', pack 'U*', @_; +} + +foreach my $hash (\%Canon, \%Compat) { + foreach my $key (keys %$hash) { + $hash->{$key} = _U_stringify( @{ $hash->{$key} } ); + } +} + +sub utf8len { + my $uv = shift; + return $uv < 0x80 ? 1 : + $uv < 0x800 ? 2 : + $uv < 0x10000 ? 3 : + $uv < 0x110000 ? 4 : + croak "$PACKAGE: illegal char in the composite. utf-8 max is 0x10ffff."; +} + +my $prefix = "UNF_"; + +my $structname = "${prefix}complist"; + +our (%Comp1st, %CompList); + +foreach(sort keys %Compos) { + my @a = unpack('U*', $_); + my $val = $Compos{$_}; + my $name = sprintf "${structname}_%06x", $a[0]; + $Comp1st{ $a[0] } = $name; + $CompList{ $name }{ $a[1] } = $val; + + if( utf8len($a[0]) + utf8len($a[1]) < utf8len($val) ) { + croak "$PACKAGE: " + . "composable pair is longer than the composite in bytes!\n" + . sprintf("%d + %d => %d", $a[0], $a[1], $val); + } +} + +my $compinit = + "typedef struct { UV nextchar; UV composite; } $structname;\n\n"; + +foreach my $i (sort keys %CompList) { + $compinit .= "$structname $i [] = {\n"; + $compinit .= join ",\n", + map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}), + sort {$a <=> $b } keys %{ $CompList{$i} }; + $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel +} + +#################################### + +my @Exclus = sort {$a <=> $b} keys %Exclus; + +my $file = "unfexc.h"; +open FH, ">$file" or croak "$PACKAGE: $file can't be made"; +binmode FH; select FH; + +print "bool getExclusion (UV uv) \n{\nreturn\n\t"; + +while(@Exclus) { + my $cur = shift @Exclus; + if(@Exclus && $cur + 1 == $Exclus[0]) { + print "$cur <= uv && uv <= "; + while(@Exclus && $cur + 1 == $Exclus[0]) { + $cur = shift @Exclus; + } + print $cur; + print "\n\t|| " if @Exclus; + } else { + print "uv == $cur"; + print "\n\t|| " if @Exclus; + } +} + +print "\n\t? TRUE : FALSE;\n}\n\n"; +close FH; + +#################################### + +my @tripletable = ( + { + file => "unfcmb", + name => "combin", + type => "char", + hash => \%Combin, + null => 0, + }, + { + file => "unfcan", + name => "canon", + type => "char*", + hash => \%Canon, + null => "NULL", + }, + { + file => "unfcpt", + name => "compat", + type => "char*", + hash => \%Compat, + null => "NULL", + }, + { + file => "unfcmp", + name => "compos", + type => "$structname *", + hash => \%Comp1st, + null => "NULL", + init => $compinit, + }, +); + +foreach my $tbl (@tripletable) { + my $file = "$tbl->{file}.h"; + my $head = "${prefix}$tbl->{name}"; + my $type = $tbl->{type}; + my $hash = $tbl->{hash}; + my $null = $tbl->{null}; + my $init = $tbl->{init}; + + open FH, ">$file" or croak "$PACKAGE: $file can't be made"; + binmode FH; select FH; + my %val; + + print FH << 'EOF'; +/* + * This file is auto-generated by mkheader. + * Any changes here will be lost! + */ +EOF + + print $init if defined $init; + + foreach my $uv (keys %$hash) { + my @c = unpack 'CCCC', pack 'N', $uv; + $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; + } + + foreach my $p (sort { $a <=> $b } keys %val) { + next if ! $val{ $p }; + for(my $r = 0; $r < 256; $r++){ + next if ! $val{ $p }{ $r }; + printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r; + for(my $c = 0; $c < 256; $c++){ + print "\t", defined $val{$p}{$r}{$c} ? $val{$p}{$r}{$c} : $null; + print ',' if $c != 255; + print "\n" if $c % 8 == 7; + } + print "};\n\n"; + } + } + foreach my $p (sort { $a <=> $b } keys %val) { + next if ! $val{ $p }; + printf "$type* ${head}_%02x [256] = {\n", $p; + for(my $r = 0; $r < 256; $r++){ + print $val{ $p }{ $r } ? sprintf("${head}_%02x_%02x", $p, $r) : "NULL"; + print ',' if $r != 255; + print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; + } + print "};\n\n"; + } + print "$type** $head [] = {\n"; + for(my $p = 0; $p <= 0x10; $p++){ + print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; + print ',' if $p != 0x10; + print "\n"; + } + print "};\n\n"; + close FH; +} + +__END__ diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t new file mode 100644 index 0000000..8907634 --- /dev/null +++ b/ext/Unicode/Normalize/t/func.t @@ -0,0 +1,69 @@ +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 6 }; +use Unicode::Normalize qw(:all); +ok(1); # If we made it this far, we're ok. + +######################### + +print getCombinClass( 0) == 0 + && getCombinClass( 768) == 230 + && getCombinClass(1809) == 36 +# && getCombinClass(119143) == 1 + ? "ok" : "not ok", " 2\n"; + +print ! defined getCanon( 0) + && ! defined getCanon(41) + && getCanon(0x00C0) eq pack('U*', 0x0041, 0x0300) + && getCanon(0x00EF) eq pack('U*', 0x0069, 0x0308) + && getCanon(0x304C) eq pack('U*', 0x304B, 0x3099) + && getCanon(0x1EA4) eq pack('U*', 0x0041, 0x0302, 0x0301) + && getCanon(0x1FAF) eq pack('U*', 0x03A9, 0x0314, 0x0342, 0x0345) + && getCanon(0xAC00) eq pack('U*', 0x1100, 0x1161) + && getCanon(0xAE00) eq pack('U*', 0x1100, 0x1173, 0x11AF) + && ! defined getCanon(0x212C) + && ! defined getCanon(0x3243) + && getCanon(0xFA2D) eq pack('U*', 0x9DB4) + ? "ok" : "not ok", " 3\n"; + +print ! defined getCompat( 0) + && ! defined getCompat(41) + && getCompat(0x00C0) eq pack('U*', 0x0041, 0x0300) + && getCompat(0x00EF) eq pack('U*', 0x0069, 0x0308) + && getCompat(0x304C) eq pack('U*', 0x304B, 0x3099) + && getCompat(0x1EA4) eq pack('U*', 0x0041, 0x0302, 0x0301) + && getCompat(0x1FAF) eq pack('U*', 0x03A9, 0x0314, 0x0342, 0x0345) + && getCompat(0x212C) eq pack('U*', 0x0042) + && getCompat(0x3243) eq pack('U*', 0x0028, 0x81F3, 0x0029) + && getCompat(0xAC00) eq pack('U*', 0x1100, 0x1161) + && getCompat(0xAE00) eq pack('U*', 0x1100, 0x1173, 0x11AF) + && getCompat(0xFA2D) eq pack('U*', 0x9DB4) + ? "ok" : "not ok", " 4\n"; + +print ! getComposite( 0, 0) + && ! getComposite( 0, 41) + && ! getComposite(41, 0) + && ! getComposite(41, 41) + && ! getComposite(12, 0x0300) + && ! getComposite(0x0055, 0xFF00) + && 0x00D9 == getComposite(0x0055, 0x0300) + && 0x1E14 == getComposite(0x0112, 0x0300) + && 0xAC00 == getComposite(0x1100, 0x1161) + && 0xADF8 == getComposite(0x1100, 0x1173) + && ! getComposite(0x1100, 0x11AF) + && ! getComposite(0x1173, 0x11AF) + && 0xAE00 == getComposite(0xADF8, 0x11AF) + ? "ok" : "not ok", " 5\n"; + +print ! getExclusion( 0) + && ! getExclusion(41) + && getExclusion(2392) + && getExclusion(3907) + && getExclusion(64334) + ? "ok" : "not ok", " 6\n"; diff --git a/lib/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t similarity index 88% rename from lib/Unicode/Normalize/t/norm.t rename to ext/Unicode/Normalize/t/norm.t index 88e4e7d..1de2e7f 100644 --- a/lib/Unicode/Normalize/t/norm.t +++ b/ext/Unicode/Normalize/t/norm.t @@ -6,7 +6,7 @@ use Test; use strict; use warnings; -BEGIN { plan tests => 15 }; +BEGIN { plan tests => 18 }; use Unicode::Normalize qw(normalize); ok(1); # If we made it this far, we're ok. @@ -27,16 +27,17 @@ sub hexNFD { ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); +ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); +ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); - ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); - ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); diff --git a/lib/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t similarity index 87% rename from lib/Unicode/Normalize/t/test.t rename to ext/Unicode/Normalize/t/test.t index 499f3ae..5544a3b 100644 --- a/lib/Unicode/Normalize/t/test.t +++ b/ext/Unicode/Normalize/t/test.t @@ -6,7 +6,7 @@ use Test; use strict; use warnings; -BEGIN { plan tests => 15 }; +BEGIN { plan tests => 18 }; use Unicode::Normalize; ok(1); # If we made it this far, we're ok. @@ -27,16 +27,17 @@ sub hexNFD { ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); +ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); +ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); - ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); - ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); diff --git a/hints/uwin.sh b/hints/uwin.sh index b8dd26c..e5a09a6 100644 --- a/hints/uwin.sh +++ b/hints/uwin.sh @@ -24,7 +24,7 @@ i_utime=undef # compile/link flags ldflags=-g optimize=-g -static_ext="B Data/Dumper Digest/MD5 Errno Fcntl Filter::Util::Call IO IPC/SysV MIME::Base64 Opcode PerlIO::Scalar POSIX SDBM_File Socket Storable attrs re" +static_ext="B Data/Dumper Digest/MD5 Errno Fcntl Filter::Util::Call IO IPC/SysV MIME::Base64 Opcode PerlIO::Scalar POSIX SDBM_File Socket Storable Unicode::Normalize attrs re" #static_ext=none # dynamic loading needs work usedl=undef diff --git a/hints/vmesa.sh b/hints/vmesa.sh index 20502c1..f0c0232 100644 --- a/hints/vmesa.sh +++ b/hints/vmesa.sh @@ -218,7 +218,6 @@ dynamic_ext='' eagain='EAGAIN' ebcdic='define' exe_ext='' -extensions='Data/Dumper Digest/MD5 Errno Fcntl Filter/Util/Call GDBM_File IO IPC/SysV List/Util MIME/Base64 NDBM_File Opcode PerlIO/Scalar POSIX Socket Storable Time/HiRes Thread attrs re' fpostype='fpos_t' freetype='void' groupstype='gid_t' diff --git a/lib/Lingua/KO/Hangul/Util.pm b/lib/Lingua/KO/Hangul/Util.pm deleted file mode 100644 index 3848592..0000000 --- a/lib/Lingua/KO/Hangul/Util.pm +++ /dev/null @@ -1,278 +0,0 @@ -package Lingua::KO::Hangul::Util; - -use 5.006; -use strict; -use warnings; - -require Exporter; - -our @ISA = qw(Exporter); -our %EXPORT_TAGS = (); -our @EXPORT_OK = (); -our @EXPORT = qw( - decomposeHangul - composeHangul - getHangulName - parseHangulName -); -our $VERSION = '0.02'; - -our @JamoL = ( # Initial (HANGUL CHOSEONG) - "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", - "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H", - ); - -our @JamoV = ( # Medial (HANGUL JUNGSEONG) - "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", - "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI", - "YU", "EU", "YI", "I", - ); - -our @JamoT = ( # Final (HANGUL JONGSEONG) - "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", - "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", - "S", "SS", "NG", "J", "C", "K", "T", "P", "H", - ); - -our $BlockName = "HANGUL SYLLABLE "; - -use constant SBase => 0xAC00; -use constant LBase => 0x1100; -use constant VBase => 0x1161; -use constant TBase => 0x11A7; -use constant LCount => 19; # scalar @JamoL -use constant VCount => 21; # scalar @JamoV -use constant TCount => 28; # scalar @JamoT -use constant NCount => 588; # VCount * TCount -use constant SCount => 11172; # LCount * NCount -use constant SFinal => 0xD7A3; # SBase -1 + SCount - -our(%CodeL, %CodeV, %CodeT); -@CodeL{@JamoL} = 0 .. LCount-1; -@CodeV{@JamoV} = 0 .. VCount-1; -@CodeT{@JamoT} = 0 .. TCount-1; - -sub getHangulName { - my $code = shift; - return undef unless SBase <= $code && $code <= SFinal; - my $SIndex = $code - SBase; - my $LIndex = int( $SIndex / NCount); - my $VIndex = int(($SIndex % NCount) / TCount); - my $TIndex = $SIndex % TCount; - "$BlockName$JamoL[$LIndex]$JamoV[$VIndex]$JamoT[$TIndex]"; -} - -sub parseHangulName { - my $arg = shift; - return undef unless $arg =~ s/$BlockName//o; - return undef unless $arg =~ /^([^AEIOUWY]*)([AEIOUWY]+)([^AEIOUWY]*)$/; - return undef unless exists $CodeL{$1} - && exists $CodeV{$2} - && exists $CodeT{$3}; - SBase + $CodeL{$1} * NCount + $CodeV{$2} * TCount + $CodeT{$3}; -} - -sub decomposeHangul { - my $code = shift; - return unless SBase <= $code && $code <= SFinal; - my $SIndex = $code - SBase; - my $LIndex = int( $SIndex / NCount); - my $VIndex = int(($SIndex % NCount) / TCount); - my $TIndex = $SIndex % TCount; - my @ret = ( - LBase + $LIndex, - VBase + $VIndex, - $TIndex ? (TBase + $TIndex) : (), - ); - wantarray ? @ret : pack('U*', @ret); -} - -# -# To Do: -# s/(\p{JamoL}\p{JamoV})/toHangLV($1)/ge; -# s/(\p{HangLV}\p{JamoT})/toHangLVT($1)/ge; -# -sub composeHangul { - my $str = shift; - return $str unless length $str; - my(@ret); - - foreach my $ch (unpack('U*', $str)) # Makes list! The string be short! - { - push(@ret, $ch) and next unless @ret; - - # 1. check to see if $ret[-1] is L and $ch is V. - my $LIndex = $ret[-1] - LBase; - if(0 <= $LIndex && $LIndex < LCount) - { - my $VIndex = $ch - VBase; - if(0 <= $VIndex && $VIndex < VCount) - { - $ret[-1] = SBase + ($LIndex * VCount + $VIndex) * TCount; - next; # discard $ch - } - } - - # 2. check to see if $ret[-1] is LV and $ch is T. - my $SIndex = $ret[-1] - SBase; - if(0 <= $SIndex && $SIndex < SCount && $SIndex % TCount == 0) - { - my $TIndex = $ch - TBase; - if(0 <= $TIndex && $TIndex < TCount) - { - $ret[-1] += $TIndex; - next; # discard $ch - } - } - - # 3. just append $ch - push(@ret, $ch); - } - wantarray ? @ret : pack('U*', @ret); -} - -1; -__END__ - -=head1 NAME - -Lingua::KO::Hangul::Util - utility functions for Hangul Syllables - -=head1 SYNOPSIS - - use Lingua::KO::Hangul::Util; - - decomposeHangul(0xAC00); - # (0x1100,0x1161) or "\x{1100}\x{1161}" - - composeHangul("\x{1100}\x{1161}"); - # "\x{AC00}" - - getHangulName(0xAC00); - # "HANGUL SYLLABLE GA" - - parseHangulName("HANGUL SYLLABLE GA"); - # 0xAC00 - -=head1 DESCRIPTION - -A Hangul syllable consists of Hangul Jamo. - -Hangul Jamo are classified into three classes: - - CHOSEONG (the initial sound) as a leading consonant (L), - JUNGSEONG (the medial sound) as a vowel (V), - JONGSEONG (the final sound) as a trailing consonant (T). - -Any Hangul syllable is a composition of - - i) CHOSEONG + JUNGSEONG (L + V) - - or - - ii) CHOSEONG + JUNGSEONG + JONGSEONG (L + V + T). - -Names of Hangul Syllables have a format of C<"HANGUL SYLLABLE %s">. - -=head2 Composition and Decomposition - -=over 4 - -=item C<$string_decomposed = decomposeHangul($codepoint)> - -=item C<@codepoints = decomposeHangul($codepoint)> - -Accepts unicode codepoint integer. - -If the specified codepoint is of a Hangul syllable, -returns a list of codepoints (in a list context) -or a UTF-8 string (in a scalar context) -of its decomposition. - - decomposeHangul(0xAC00) # U+AC00 is HANGUL SYLLABLE GA. - returns "\x{1100}\x{1161}" or (0x1100, 0x1161); - - decomposeHangul(0xAE00) # U+AE00 is HANGUL SYLLABLE GEUL. - returns "\x{1100}\x{1173}\x{11AF}" or (0x1100, 0x1173, 0x11AF); - -Otherwise, returns false (empty string or empty list). - - decomposeHangul(0x0041) # outside Hangul Syllables - returns empty string or empty list. - -=item C<$string_composed = composeHangul($src_string)> - -=item C<@codepoints_composed = composeHangul($src_string)> - -Any sequence of an initial Jamo C and a medial Jamo C -is composed into a syllable C; -then any sequence of a syllable C and a final Jamo C -is composed into a syllable C. - -Any characters other than Hangul Jamo and Hangul Syllables -are unaffected. - - composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}.") - returns "Hangul \x{AC00}\x{AE00}." or - (0x48,0x61,0x6E,0x67,0x75,0x6C,0x20,0xAC00,0xAE00,0x2E); - -=back - -=head2 Hangul Syllable Name - -=over 4 - -=item C<$name = getHangulName($codepoint)> - -If the specified codepoint is of a Hangul syllable, -returns its name; otherwise returns undef. - - getHangulName(0xAC00) returns "HANGUL SYLLABLE GA"; - getHangulName(0x0041) returns undef. - -=item C<$codepoint = parseHangulName($name)> - -If the specified name is of a Hangul syllable, -returns its codepoint; otherwise returns undef. - - parseHangulName("HANGUL SYLLABLE GEUL") returns 0xAE00; - - parseHangulName("LATIN SMALL LETTER A") returns undef; - - parseHangulName("HANGUL SYLLABLE PERL") returns undef; - # Regrettably, HANGUL SYLLABLE PERL does not exist :-) - -=back - -=head2 EXPORT - -By default, - - decomposeHangul - composeHangul - getHangulName - parseHangulName - -=head1 AUTHOR - -SADAHIRO Tomoyuki - - bqw10602@nifty.com - http://homepage1.nifty.com/nomenclator/perl/ - - Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. - - This program is free software; you can redistribute it and/or - modify it under the same terms as Perl itself. - -=head1 SEE ALSO - -=over 4 - -=item http://www.unicode.org/unicode/reports/tr15 - -Annex 10: Hangul, in Unicode Normalization Forms (UAX #15). - -=back - -=cut diff --git a/lib/Lingua/KO/Hangul/Util/Changes b/lib/Lingua/KO/Hangul/Util/Changes deleted file mode 100644 index 2e43817..0000000 --- a/lib/Lingua/KO/Hangul/Util/Changes +++ /dev/null @@ -1,11 +0,0 @@ -Revision history for Perl extension Lingua::KO::Hangul::Util. - -0.02 Sat Aug 11 00:16:02 2001 - - fix SEE ALSO (the Unicode Normalization Forms is UAX #15) - - getHangulName and parseHangulName return - a list (undef) of one element in list context. - -0.01 Fri Aug 3 21:25:11 2001 - - original version; created by h2xs 1.21 with options - -A -X -n Lingua::KO::Hangul::Util - diff --git a/lib/Lingua/KO/Hangul/Util/README b/lib/Lingua/KO/Hangul/Util/README deleted file mode 100644 index 9fc04d8..0000000 --- a/lib/Lingua/KO/Hangul/Util/README +++ /dev/null @@ -1,44 +0,0 @@ -Lingua/KO/Hangul/Util version 0.02 -================================== - -SYNOPSIS - - use Lingua::KO::Hangul::Util; - - decomposeHangul(0xAC00); - # (0x1100,0x1161) or "\x{1100}\x{1161}" - - composeHangul("\x{1100}\x{1161}"); - # "\x{AC00}" - - getHangulName(0xAC00); - # "HANGUL SYLLABLE GA" - - parseHangulName("HANGUL SYLLABLE GA"); - # 0xAC00 - -INSTALLATION - -To install this module type the following: - - perl Makefile.PL - make - make test - make install - -DEPENDENCIES - -Perl 5.006 or later - -COPYRIGHT AND LICENCE - -SADAHIRO Tomoyuki - - bqw10602@nifty.com - - http://homepage1.nifty.com/nomenclator/perl/ - - Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. - - This program is free software; you can redistribute it and/or - modify it under the same terms as Perl itself. diff --git a/lib/Lingua/KO/Hangul/Util/t/test.t b/lib/Lingua/KO/Hangul/Util/t/test.t deleted file mode 100644 index d4a5df5..0000000 --- a/lib/Lingua/KO/Hangul/Util/t/test.t +++ /dev/null @@ -1,55 +0,0 @@ -# Before `make install' is performed this script should be runnable with -# `make test'. After `make install' it should work as `perl test.pl' - -######################### - -use Test; -use strict; -BEGIN { plan tests => 22 }; -use Lingua::KO::Hangul::Util; -ok(1); # If we made it this far, we're ok. - -######################### - -sub unpk { - join ':', map sprintf("%04X", $_), - @_ == 1 ? unpack('U*', shift) : @_; -} - -ok(getHangulName(0xAC00), "HANGUL SYLLABLE GA"); -ok(getHangulName(0xAE00), "HANGUL SYLLABLE GEUL"); -ok(getHangulName(0xC544), "HANGUL SYLLABLE A"); -ok(getHangulName(0xD7A3), "HANGUL SYLLABLE HIH"); -ok(getHangulName(0x11A3), undef); -ok(getHangulName(0x0000), undef); - -ok(unpk(decomposeHangul(0xAC00)), "1100:1161"); -ok(unpk(decomposeHangul(0xAE00)), "1100:1173:11AF"); -ok(unpk(scalar decomposeHangul(0xAC00)), "1100:1161"); -ok(unpk(scalar decomposeHangul(0xAE00)), "1100:1173:11AF"); -ok(scalar decomposeHangul(0x0041), undef); -ok(scalar decomposeHangul(0x0000), undef); - -ok(composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}."), - "Hangul \x{AC00}\x{AE00}."); - -ok(parseHangulName("HANGUL SYLLABLE GA"), 0xAC00); -ok(parseHangulName("HANGUL SYLLABLE GEUL"), 0xAE00); -ok(parseHangulName("HANGUL SYLLABLE A"), 0xC544); -ok(parseHangulName("HANGUL SYLLABLE HIH"), 0xD7A3); -ok(parseHangulName("HANGUL SYLLABLE PERL"), undef); -ok(parseHangulName("LATIN LETTER SMALL A"), undef); - -my $ng; - -$ng = 0; -foreach my $i (0xAC00..0xD7A3){ - $ng ++ if $i != parseHangulName(getHangulName($i)); -} -ok($ng, 0); - -$ng = 0; -foreach my $i (0xAC00..0xD7A3){ - $ng ++ if $i != (composeHangul scalar decomposeHangul($i))[0]; -} -ok($ng, 0); diff --git a/lib/Unicode/Collate.pm b/lib/Unicode/Collate.pm index 113613e..2ffda37 100644 --- a/lib/Unicode/Collate.pm +++ b/lib/Unicode/Collate.pm @@ -4,7 +4,6 @@ use 5.006; use strict; use warnings; use Carp; -use Lingua::KO::Hangul::Util; require Exporter; our $VERSION = '0.08'; @@ -19,6 +18,15 @@ our @EXPORT = (); (our $Path = $INC{'Unicode/Collate.pm'}) =~ s/\.pm$//; our $KeyFile = "allkeys.txt"; +# Lingua::KO::Hangul::Util not part of the standard distribution +# but it will be used if available. + +eval { require Lingua::KO::Hangul::Util }; +my $hasHangulUtil = ! $@; +if ($hasHangulUtil) { + Lingua::KO::Hangul::Util->import(); +} + our %Combin; # combining class from Unicode::Normalize use constant Min2 => 0x20; # minimum weight at level 2 @@ -256,7 +264,10 @@ sub getWt _isHangul($u) ? $hang ? &$hang($u) - : map(@{ $ent->{pack('U', $_)} }, decomposeHangul($u)) + : ($hasHangulUtil ? + map(@{ $ent->{pack('U', $_)} }, decomposeHangul($u)) : + # runtime compile error... + (eval 'use Lingua::KO::Hangul::Util', print $@)) : _isCJK($u) ? $cjk ? &$cjk($u) : map($self->altCE(0,@$_), _CJK($u)) : map($self->altCE(0,@$_), _derivCE($u)); diff --git a/lib/Unicode/Normalize/Changes b/lib/Unicode/Normalize/Changes deleted file mode 100644 index 910016c..0000000 --- a/lib/Unicode/Normalize/Changes +++ /dev/null @@ -1,16 +0,0 @@ -Revision history for Perl extension Unicode::Normalize. - -0.04 Wed Aug 15 19:02:41 2001 - - fix: NFD("") and NFKD("") must return "", not but undef. - -0.03 Fri Aug 10 22:44:18 2001 - - rename the module name to Unicode::Normalize. - - normalize takes two arguments. - -0.02 Thu Aug 9 22:56:36 2001 - - add function normalize - -0.01 Mon Aug 6 21:45:11 2001 - - original version; created by h2xs 1.21 with options - -A -X -n Text::Unicode::Normalize - diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 2cc0ece..0aaccd0 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -135,14 +135,26 @@ sub _getcode { return; } -use Lingua::KO::Hangul::Util; +# Lingua::KO::Hangul::Util not part of the standard distribution +# but it will be used if available. + +eval { require Lingua::KO::Hangul::Util }; +my $hasHangulUtil = ! $@; +if ($hasHangulUtil) { + Lingua::KO::Hangul::Util->import(); +} sub hangul_decomp { # internal: called from charinfo - my @tmp = decomposeHangul(shift); - return - @tmp == 2 ? sprintf("%04X %04X", @tmp) : - @tmp == 3 ? sprintf("%04X %04X %04X", @tmp) : - undef; + if ($hasHangulUtil) { + my @tmp = decomposeHangul(shift); + return sprintf("%04X %04X", @tmp) if @tmp == 2; + return sprintf("%04X %04X %04X", @tmp) if @tmp == 3; + } + return; +} + +sub hangul_charname { # internal: called from charinfo + return sprintf("HANGUL SYLLABLE-%04X", shift); } sub han_charname { # internal: called from charinfo @@ -157,7 +169,7 @@ my @CharinfoRanges = ( # CJK Ideographs [ 0x4E00, 0x9FA5, \&han_charname, undef ], # Hangul Syllables - [ 0xAC00, 0xD7A3, \&getHangulName, \&hangul_decomp ], + [ 0xAC00, 0xD7A3, $hasHangulUtil ? \&getHangulName : \&hangul_charname, \&hangul_decomp ], # Non-Private Use High Surrogates [ 0xD800, 0xDB7F, undef, undef ], # Private Use High Surrogates diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 0434eb9..e70e104 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -106,11 +106,11 @@ ok($charinfo->{script}, 'Hebrew'); $charinfo = charinfo(0xAC00); ok($charinfo->{code}, 'AC00'); -ok($charinfo->{name}, 'HANGUL SYLLABLE GA'); +ok($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); ok($charinfo->{category}, 'Lo'); ok($charinfo->{combining}, '0'); ok($charinfo->{bidi}, 'L'); -ok($charinfo->{decomposition}, '1100 1161'); +ok($charinfo->{decomposition}, undef); ok($charinfo->{decimal}, ''); ok($charinfo->{digit}, ''); ok($charinfo->{numeric}, ''); @@ -128,11 +128,11 @@ ok($charinfo->{script}, 'Hangul'); $charinfo = charinfo(0xAE00); ok($charinfo->{code}, 'AE00'); -ok($charinfo->{name}, 'HANGUL SYLLABLE GEUL'); +ok($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); ok($charinfo->{category}, 'Lo'); ok($charinfo->{combining}, '0'); ok($charinfo->{bidi}, 'L'); -ok($charinfo->{decomposition}, '1100 1173 11AF'); +ok($charinfo->{decomposition}, undef); ok($charinfo->{decimal}, ''); ok($charinfo->{digit}, ''); ok($charinfo->{numeric}, ''); diff --git a/win32/Makefile b/win32/Makefile index 5ae84c7..c43b8c8 100644 --- a/win32/Makefile +++ b/win32/Makefile @@ -592,63 +592,65 @@ PERLDLL_OBJ = $(PERLDLL_OBJ) $(WIN32_OBJ) $(DLL_OBJ) SETARGV_OBJ = setargv$(o) !ENDIF -DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader -SOCKET = $(EXTDIR)\Socket\Socket -FCNTL = $(EXTDIR)\Fcntl\Fcntl -OPCODE = $(EXTDIR)\Opcode\Opcode -SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File -IO = $(EXTDIR)\IO\IO -POSIX = $(EXTDIR)\POSIX\POSIX -ATTRS = $(EXTDIR)\attrs\attrs -THREAD = $(EXTDIR)\Thread\Thread -B = $(EXTDIR)\B\B -RE = $(EXTDIR)\re\re -DUMPER = $(EXTDIR)\Data\Dumper\Dumper -ERRNO = $(EXTDIR)\Errno\Errno -PEEK = $(EXTDIR)\Devel\Peek\Peek -BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader -DPROF = $(EXTDIR)\Devel\DProf\DProf -GLOB = $(EXTDIR)\File\Glob\Glob -HOSTNAME = $(EXTDIR)\Sys\Hostname\Hostname -STORABLE = $(EXTDIR)\Storable\Storable -FILTER = $(EXTDIR)\Filter\Util\Call\Call -ENCODE = $(EXTDIR)\Encode\Encode -MD5 = $(EXTDIR)\Digest\MD5\MD5 -PERLIOSCALAR = $(EXTDIR)\PerlIO\Scalar\Scalar -MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64 -TIMEHIRES = $(EXTDIR)\Time\HiRes\HiRes -CWD = $(EXTDIR)\Cwd\Cwd -LISTUTIL = $(EXTDIR)\List\Util\Util -PERLIOVIA = $(EXTDIR)\PerlIO\Via\Via -XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap - -SOCKET_DLL = $(AUTODIR)\Socket\Socket.dll -FCNTL_DLL = $(AUTODIR)\Fcntl\Fcntl.dll -OPCODE_DLL = $(AUTODIR)\Opcode\Opcode.dll -SDBM_FILE_DLL = $(AUTODIR)\SDBM_File\SDBM_File.dll -IO_DLL = $(AUTODIR)\IO\IO.dll -POSIX_DLL = $(AUTODIR)\POSIX\POSIX.dll -ATTRS_DLL = $(AUTODIR)\attrs\attrs.dll -THREAD_DLL = $(AUTODIR)\Thread\Thread.dll -B_DLL = $(AUTODIR)\B\B.dll -DUMPER_DLL = $(AUTODIR)\Data\Dumper\Dumper.dll -PEEK_DLL = $(AUTODIR)\Devel\Peek\Peek.dll -RE_DLL = $(AUTODIR)\re\re.dll -BYTELOADER_DLL = $(AUTODIR)\ByteLoader\ByteLoader.dll -DPROF_DLL = $(AUTODIR)\Devel\DProf\DProf.dll -GLOB_DLL = $(AUTODIR)\File\Glob\Glob.dll -HOSTNAME_DLL = $(AUTODIR)\Sys\Hostname\Hostname.dll -STORABLE_DLL = $(AUTODIR)\Storable\Storable.dll -FILTER_DLL = $(AUTODIR)\Filter\Util\Call\Call.dll -ENCODE_DLL = $(AUTODIR)\Encode\Encode.dll -MD5_DLL = $(AUTODIR)\Digest\MD5\MD5.dll -PERLIOSCALAR_DLL= $(AUTODIR)\PerlIO\Scalar\Scalar.dll -MIMEBASE64_DLL = $(AUTODIR)\MIME\Base64\Base64.dll -TIMEHIRES_DLL = $(AUTODIR)\Time\HiRes\HiRes.dll -CWD_DLL = $(AUTODIR)\Cwd\Cwd.dll -LISTUTIL_DLL = $(AUTODIR)\List\Util\Util.dll -PERLIOVIA_DLL = $(AUTODIR)\PerlIO\Via\Via.dll -XSTYPEMAP_DLL = $(AUTODIR)\XS\Typemap\Typemap.dll +DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader +SOCKET = $(EXTDIR)\Socket\Socket +FCNTL = $(EXTDIR)\Fcntl\Fcntl +OPCODE = $(EXTDIR)\Opcode\Opcode +SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File +IO = $(EXTDIR)\IO\IO +POSIX = $(EXTDIR)\POSIX\POSIX +ATTRS = $(EXTDIR)\attrs\attrs +THREAD = $(EXTDIR)\Thread\Thread +B = $(EXTDIR)\B\B +RE = $(EXTDIR)\re\re +DUMPER = $(EXTDIR)\Data\Dumper\Dumper +ERRNO = $(EXTDIR)\Errno\Errno +PEEK = $(EXTDIR)\Devel\Peek\Peek +BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader +DPROF = $(EXTDIR)\Devel\DProf\DProf +GLOB = $(EXTDIR)\File\Glob\Glob +HOSTNAME = $(EXTDIR)\Sys\Hostname\Hostname +STORABLE = $(EXTDIR)\Storable\Storable +FILTER = $(EXTDIR)\Filter\Util\Call\Call +ENCODE = $(EXTDIR)\Encode\Encode +MD5 = $(EXTDIR)\Digest\MD5\MD5 +PERLIOSCALAR = $(EXTDIR)\PerlIO\Scalar\Scalar +MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64 +TIMEHIRES = $(EXTDIR)\Time\HiRes\HiRes +CWD = $(EXTDIR)\Cwd\Cwd +LISTUTIL = $(EXTDIR)\List\Util\Util +PERLIOVIA = $(EXTDIR)\PerlIO\Via\Via +XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap +UNICODENORMALIZE = $(EXTDIR)\Unicode\Normalize\Normalize + +SOCKET_DLL = $(AUTODIR)\Socket\Socket.dll +FCNTL_DLL = $(AUTODIR)\Fcntl\Fcntl.dll +OPCODE_DLL = $(AUTODIR)\Opcode\Opcode.dll +SDBM_FILE_DLL = $(AUTODIR)\SDBM_File\SDBM_File.dll +IO_DLL = $(AUTODIR)\IO\IO.dll +POSIX_DLL = $(AUTODIR)\POSIX\POSIX.dll +ATTRS_DLL = $(AUTODIR)\attrs\attrs.dll +THREAD_DLL = $(AUTODIR)\Thread\Thread.dll +B_DLL = $(AUTODIR)\B\B.dll +DUMPER_DLL = $(AUTODIR)\Data\Dumper\Dumper.dll +PEEK_DLL = $(AUTODIR)\Devel\Peek\Peek.dll +RE_DLL = $(AUTODIR)\re\re.dll +BYTELOADER_DLL = $(AUTODIR)\ByteLoader\ByteLoader.dll +DPROF_DLL = $(AUTODIR)\Devel\DProf\DProf.dll +GLOB_DLL = $(AUTODIR)\File\Glob\Glob.dll +HOSTNAME_DLL = $(AUTODIR)\Sys\Hostname\Hostname.dll +STORABLE_DLL = $(AUTODIR)\Storable\Storable.dll +FILTER_DLL = $(AUTODIR)\Filter\Util\Call\Call.dll +ENCODE_DLL = $(AUTODIR)\Encode\Encode.dll +MD5_DLL = $(AUTODIR)\Digest\MD5\MD5.dll +PERLIOSCALAR_DLL = $(AUTODIR)\PerlIO\Scalar\Scalar.dll +MIMEBASE64_DLL = $(AUTODIR)\MIME\Base64\Base64.dll +TIMEHIRES_DLL = $(AUTODIR)\Time\HiRes\HiRes.dll +CWD_DLL = $(AUTODIR)\Cwd\Cwd.dll +LISTUTIL_DLL = $(AUTODIR)\List\Util\Util.dll +PERLIOVIA_DLL = $(AUTODIR)\PerlIO\Via\Via.dll +XSTYPEMAP_DLL = $(AUTODIR)\XS\Typemap\Typemap.dll +UNICODENORMALIZE_DLL = $(AUTODIR)\Unicode\Normalize\Normalize.dll EXTENSION_C = \ $(SOCKET).c \ @@ -677,7 +679,8 @@ EXTENSION_C = \ $(CWD).c \ $(LISTUTIL).c \ $(PERLIOVIA).c \ - $(XSTYPEMAP).c + $(XSTYPEMAP).c \ + $(UNICODENORMALIZE).c EXTENSION_DLL = \ $(SOCKET_DLL) \ @@ -706,7 +709,8 @@ EXTENSION_DLL = \ $(CWD_DLL) \ $(LISTUTIL_DLL) \ $(PERLIOVIA_DLL) \ - $(XSTYPEMAP_DLL) + $(XSTYPEMAP_DLL) \ + $(UNICODENORMALIZE_DLL) POD2HTML = $(PODDIR)\pod2html POD2MAN = $(PODDIR)\pod2man @@ -968,6 +972,7 @@ distclean: clean -del /f $(LIBDIR)\Scalar\Util.pm -del /f $(LIBDIR)\Time\HiRes.pm -del /f $(LIBDIR)\XS\Typemap.pm + -del /f $(LIBDIR)\Unicode\Normalize.pm -if exist $(LIBDIR)\IO rmdir /s /q $(LIBDIR)\IO -rmdir /s $(LIBDIR)\IO -if exist $(LIBDIR)\Thread rmdir /s /q $(LIBDIR)\Thread diff --git a/win32/makefile.mk b/win32/makefile.mk index bff4233..fd2b5ff 100644 --- a/win32/makefile.mk +++ b/win32/makefile.mk @@ -748,7 +748,8 @@ SETARGV_OBJ = setargv$(o) DYNAMIC_EXT = Socket IO Fcntl Opcode SDBM_File POSIX attrs Thread B re \ Data/Dumper Devel/Peek ByteLoader Devel/DProf File/Glob \ Sys/Hostname Storable Filter/Util/Call Encode \ - Digest/MD5 PerlIO/Scalar MIME/Base64 Time/HiRes + Digest/MD5 PerlIO/Scalar MIME/Base64 Time/HiRes \ + Unicode/Normalize STATIC_EXT = DynaLoader NONXS_EXT = Errno @@ -1123,6 +1124,7 @@ distclean: clean -del /f $(LIBDIR)\Time\HiRes.pm -del /f $(LIBDIR)\List\Util.pm -del /f $(LIBDIR)\Scalar\Util.pm + -del /f $(LIBDIR)\Unicode\Normalize.pm -if exist $(LIBDIR)\IO rmdir /s /q $(LIBDIR)\IO || rmdir /s $(LIBDIR)\IO -if exist $(LIBDIR)\Thread rmdir /s /q $(LIBDIR)\Thread || rmdir /s $(LIBDIR)\Thread -if exist $(LIBDIR)\B rmdir /s /q $(LIBDIR)\B || rmdir /s $(LIBDIR)\B