From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Tue, 19 Mar 2002 14:04:55 +0000 (+0000)
Subject: Update to Unicode::Normalize 0.15 (+ the EBCDIC guards)
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=8f118dcd647d91040f14fd1a562bdd89ac99859a;p=p5sagit%2Fp5-mst-13.2.git

Update to Unicode::Normalize 0.15 (+ the EBCDIC guards)

p4raw-id: //depot/perl@15318
---

diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes
index 35c1b94..c905acc 100644
--- a/ext/Unicode/Normalize/Changes
+++ b/ext/Unicode/Normalize/Changes
@@ -1,5 +1,10 @@
 Revision history for Perl extension Unicode::Normalize.
 
+0.15  Tue Mar 19 22:04:07 2002
+	- Quick check is implemented.
+	- decompose(), reorder(), and compose() are documented.
+	- The Non-XS version is also independent of Lingua::KO::Hangul::Util.
+
 0.14  Sat Feb 02 20:40:14 2002
 	- synchronization with bleadperl.
 	- Change 14128: by Arthur
diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm
index cec5fa7..b3bc9d6 100644
--- a/ext/Unicode/Normalize/Normalize.pm
+++ b/ext/Unicode/Normalize/Normalize.pm
@@ -2,7 +2,7 @@ package Unicode::Normalize;
 
 BEGIN {
     if (ord("A") == 193) {
-	die "Unicode::Normalize not ported to EBCDIC\n";
+       die "Unicode::Normalize not ported to EBCDIC\n";
     }
 }
 
@@ -11,7 +11,7 @@ use strict;
 use warnings;
 use Carp;
 
-our $VERSION = '0.14';
+our $VERSION = '0.15';
 our $PACKAGE = __PACKAGE__;
 
 require Exporter;
@@ -22,9 +22,16 @@ our @ISA = qw(Exporter DynaLoader);
 our @EXPORT = qw( NFC NFD NFKC NFKD );
 our @EXPORT_OK = qw(
     normalize decompose reorder compose
-    getCanon getCompat getComposite getCombinClass isExclusion
+    checkNFD checkNFKD checkNFC checkNFKC check
+    getCanon getCompat getComposite getCombinClass
+    isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
+    isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
+);
+our %EXPORT_TAGS = (
+    all       => [ @EXPORT, @EXPORT_OK ],
+    normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ],
+    check     => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ],
 );
-our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] );
 
 bootstrap Unicode::Normalize $VERSION;
 
@@ -32,7 +39,6 @@ use constant COMPAT => 1;
 
 sub NFD  ($) { reorder(decompose($_[0])) }
 sub NFKD ($) { reorder(decompose($_[0], COMPAT)) }
-
 sub NFC  ($) { compose(reorder(decompose($_[0]))) }
 sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }
 
@@ -48,6 +54,18 @@ sub normalize($$)
       croak $PACKAGE."::normalize: invalid form name: $form";
 }
 
+sub check($$)
+{
+    my $form = shift;
+    $form =~ s/^NF//;
+    return
+	$form eq 'D'  ? checkNFD ($_[0]) :
+	$form eq 'C'  ? checkNFC ($_[0]) :
+	$form eq 'KD' ? checkNFKD($_[0]) :
+	$form eq 'KC' ? checkNFKC($_[0]) :
+      croak $PACKAGE."::check: invalid form name: $form";
+}
+
 1;
 __END__
 
@@ -59,19 +77,19 @@ Unicode::Normalize - normalized forms of Unicode text
 
   use Unicode::Normalize;
 
-  $string_NFD  = NFD($raw_string);  # Normalization Form D
-  $string_NFC  = NFC($raw_string);  # Normalization Form C
-  $string_NFKD = NFKD($raw_string); # Normalization Form KD
-  $string_NFKC = NFKC($raw_string); # Normalization Form KC
+  $NFD_string  = NFD($string);  # Normalization Form D
+  $NFC_string  = NFC($string);  # Normalization Form C
+  $NFKD_string = NFKD($string); # Normalization Form KD
+  $NFKC_string = NFKC($string); # Normalization Form KC
 
    or
 
   use Unicode::Normalize 'normalize';
 
-  $string_NFD  = normalize('D',  $raw_string);  # Normalization Form D
-  $string_NFC  = normalize('C',  $raw_string);  # Normalization Form C
-  $string_NFKD = normalize('KD', $raw_string);  # Normalization Form KD
-  $string_NFKC = normalize('KC', $raw_string);  # Normalization Form KC
+  $NFD_string  = normalize('D',  $string);  # Normalization Form D
+  $NFC_string  = normalize('C',  $string);  # Normalization Form C
+  $NFKD_string = normalize('KD', $string);  # Normalization Form KD
+  $NFKC_string = normalize('KC', $string);  # Normalization Form KC
 
 =head1 DESCRIPTION
 
@@ -79,26 +97,25 @@ Unicode::Normalize - normalized forms of Unicode text
 
 =over 4
 
-=item C<$string_NFD = NFD($raw_string)>
+=item C<$NFD_string = NFD($string)>
 
 returns the Normalization Form D (formed by canonical decomposition).
 
-
-=item C<$string_NFC = NFC($raw_string)>
+=item C<$NFC_string = NFC($string)>
 
 returns the Normalization Form C (formed by canonical decomposition
 followed by canonical composition).
 
-=item C<$string_NFKD = NFKD($raw_string)>
+=item C<$NFKD_string = NFKD($string)>
 
 returns the Normalization Form KD (formed by compatibility decomposition).
 
-=item C<$string_NFKC = NFKC($raw_string)>
+=item C<$NFKC_string = NFKC($string)>
 
 returns the Normalization Form KC (formed by compatibility decomposition
 followed by B<canonical> composition).
 
-=item C<$normalized_string = normalize($form_name, $raw_string)>
+=item C<$normalized_string = normalize($form_name, $string)>
 
 As C<$form_name>, one of the following names must be given.
 
@@ -109,6 +126,107 @@ As C<$form_name>, one of the following names must be given.
 
 =back
 
+=head2 Decomposition and Composition
+
+=over 4
+
+=item C<$decomposed_string = decompose($string)>
+
+=item C<$decomposed_string = decompose($string, $useCompatMapping)>
+
+Decompose the specified string and returns the result.
+
+If the second parameter (a boolean) is omitted or false, decomposes it
+using the Canonical Decomposition Mapping.
+If true, decomposes it using the Compatibility Decomposition Mapping.
+
+The string returned is not always in NFD/NFKD.
+Reordering may be required.
+
+    $NFD_string  = reorder(decompose($string));       # eq. to NFD()
+    $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD()
+
+=item C<$reordered_string  = reorder($string)>
+
+Reorder the combining characters and the like in the canonical ordering
+and returns the result.
+
+E.g., when you have a list of NFD/NFKD strings,
+you can get the concatenated NFD/NFKD string from them, saying
+
+    $concat_NFD  = reorder(join '', @NFD_strings);
+    $concat_NFKD = reorder(join '', @NFKD_strings);
+
+=item C<$composed_string   = compose($string)>
+
+Returns the string where composable pairs are composed.
+
+E.g., when you have a NFD/NFKD string,
+you can get its NFC/NFKC string, saying
+
+    $NFC_string  = compose($NFD_string);
+    $NFKC_string = compose($NFKD_string);
+
+=back
+
+=head2 Quick Check
+
+(see Annex 8, UAX #15; F<DerivedNormalizationProperties.txt>)
+
+The following functions check whether the string is in that normalization form.
+
+The result returned will be:
+
+    YES     The string is in that normalization form.
+    NO      The string is not in that normalization form.
+    MAYBE   Dubious. Maybe yes, maybe no.
+
+=over 4
+
+=item C<$result = checkNFD($string)>
+
+returns YES (1) or NO (empty string).
+
+=item C<$result = checkNFC($string)>
+
+returns YES (1), NO (empty string), or MAYBE (undef).
+
+=item C<$result = checkNFKD($string)>
+
+returns YES (1) or NO (empty string).
+
+=item C<$result = checkNFKC($string)>
+
+returns YES (1), NO (empty string), or MAYBE (undef).
+
+=item C<$result = check($form_name, $string)>
+
+returns YES (1), NO (empty string), or MAYBE (undef).
+
+C<$form_name> is alike to that for C<normalize()>.
+
+=back
+
+B<Note>
+
+In the cases of NFD and NFKD, the answer must be either C<YES> or C<NO>.
+The answer C<MAYBE> may be returned in the cases of NFC and NFKC.
+
+A MAYBE-NFC/NFKC string should contain at least
+one combining character or the like.
+For example, C<COMBINING ACUTE ACCENT> has
+the MAYBE_NFC/MAYBE_NFKC property.
+Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")>
+and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>.
+Though, C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC 
+(its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">),
+while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC.
+
+If you want to check exactly, compare the string with its NFC/NFKC; i.e.,
+
+    $string eq NFC($string)    # more thorough than checkNFC($string)
+    $string eq NFKC($string)   # more thorough than checkNFKC($string)
+
 =head2 Character Data
 
 These functions are interface of character data used internally.
@@ -119,30 +237,54 @@ call them yourself.
 
 =item C<$canonical_decomposed = getCanon($codepoint)>
 
+If the character of the specified codepoint is canonically
+decomposable (including Hangul Syllables),
+returns the B<completely decomposed> string canonically equivalent to it.
+
+If it is not decomposable, returns undef.
+
 =item C<$compatibility_decomposed = getCompat($codepoint)>
 
-If the character of the specified codepoint is canonically or 
-compatibility decomposable (including Hangul Syllables),
-returns the B<completely decomposed> string equivalent to it.
+If the character of the specified codepoint is compatibility
+decomposable (including Hangul Syllables),
+returns the B<completely decomposed> string compatibility equivalent to it.
 
 If it is not decomposable, returns undef.
 
-=item C<$uv_composite = getComposite($uv_here, $uv_next)>
+=item C<$codepoint_composite = getComposite($codepoint_here, $codepoint_next)>
 
 If two characters here and next (as codepoints) are composable
-(including Hangul Jamo/Syllables and Exclusions),
+(including Hangul Jamo/Syllables and Composition Exclusions),
 returns the codepoint of the composite.
 
 If they are not composable, returns undef.
 
 =item C<$combining_class = getCombinClass($codepoint)>
 
-Returns the combining class as integer of the character.
+Returns the combining class of the character as an integer.
 
 =item C<$is_exclusion = isExclusion($codepoint)>
 
+Returns a boolean whether the character of the specified codepoint
+is a composition exclusion.
+
+=item C<$is_singleton = isSingleton($codepoint)>
+
 Returns a boolean whether the character of the specified codepoint is
-a composition exclusion.
+a singleton.
+
+=item C<$is_non_startar_decomposition = isNonStDecomp($codepoint)>
+
+Returns a boolean whether the canonical decomposition
+of the character of the specified codepoint
+is a Non-Starter Decomposition.
+
+=item C<$may_be_composed_with_prev_char = isComp2nd($codepoint)>
+
+Returns a boolean whether the character of the specified codepoint
+may be composed with the previous one in a certain composition
+(including Hangul Compositions, but excluding
+Composition Exclusions and Non-Starter Decompositions).
 
 =back
 
@@ -152,16 +294,6 @@ C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.
 
 C<normalize> and other some functions: on request.
 
-=head2 TODO
-
-Unicode::Normalize has not been ported to EBCDIC.  The code mostly
-would work just fine but a decision needs to be made: how the module
-should work in EBCDIC?  Should the low 256 characters be understood as
-Unicode or as EBCDIC code points?  Should one be chosen or should
-there be a way to do either?  Or should such translation be left
-outside the module for the user to do, for example by using
-Encode::from_to()?
-
 =head1 AUTHOR
 
 SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
@@ -181,6 +313,10 @@ SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
 
 Unicode Normalization Forms - UAX #15
 
+=item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProperties.txt
+
+Derived Normalization Properties
+
 =back
 
 =cut
diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs
index 7a11095..59da98e 100644
--- a/ext/Unicode/Normalize/Normalize.xs
+++ b/ext/Unicode/Normalize/Normalize.xs
@@ -62,16 +62,19 @@ int compare_cc(const void *a, const void *b)
 {
     int ret_cc;
     ret_cc = (*(UNF_cc*)a).cc - (*(UNF_cc*)b).cc;
-    if(ret_cc) return ret_cc;
+    if (ret_cc)
+	return ret_cc;
     return (*(UNF_cc*)a).pos - (*(UNF_cc*)b).pos;
 }
 
 U8* dec_canonical (UV uv)
 {
     U8 ***plane, **row;
-    if(OVER_UTF_MAX(uv)) return NULL;
+    if (OVER_UTF_MAX(uv))
+	return NULL;
     plane = (U8***)UNF_canon[uv >> 16];
-    if(! plane) return NULL;
+    if (! plane)
+	return NULL;
     row = plane[(uv >> 8) & 0xff];
     return row ? row[uv & 0xff] : NULL;
 }
@@ -79,9 +82,11 @@ U8* dec_canonical (UV uv)
 U8* dec_compat (UV uv)
 {
     U8 ***plane, **row;
-    if(OVER_UTF_MAX(uv)) return NULL;
+    if (OVER_UTF_MAX(uv))
+	return NULL;
     plane = (U8***)UNF_compat[uv >> 16];
-    if(! plane) return NULL;
+    if (! plane)
+	return NULL;
     row = plane[(uv >> 8) & 0xff];
     return row ? row[uv & 0xff] : NULL;
 }
@@ -90,25 +95,30 @@ UV composite_uv (UV uv, UV uv2)
 {
     UNF_complist ***plane, **row, *cell, *i;
 
-    if(! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) return 0;
+    if (! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2))
+	return 0;
 
-    if(Hangul_IsL(uv) && Hangul_IsV(uv2)) {
+    if (Hangul_IsL(uv) && Hangul_IsV(uv2)) {
 	uv  -= Hangul_LBase; /* lindex */
 	uv2 -= Hangul_VBase; /* vindex */
 	return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount);
     }
-    if(Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
+    if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
 	uv2 -= Hangul_TBase; /* tindex */
 	return(uv + uv2);
     }
     plane = UNF_compos[uv >> 16];
-    if(! plane) return 0;
+    if (! plane)
+	return 0;
     row = plane[(uv >> 8) & 0xff];
-    if(! row)   return 0;
+    if (! row)
+	return 0;
     cell = row[uv & 0xff];
-    if(! cell)  return 0;
-    for(i = cell; i->nextchar; i++) {
-	if(uv2 == i->nextchar) return i->composite;
+    if (! cell)
+	return 0;
+    for (i = cell; i->nextchar; i++) {
+	if (uv2 == i->nextchar)
+	    return i->composite;
     }
     return 0;
 }
@@ -116,9 +126,11 @@ UV composite_uv (UV uv, UV uv2)
 U8 getCombinClass (UV uv)
 {
     U8 **plane, *row;
-    if(OVER_UTF_MAX(uv)) return 0;
+    if (OVER_UTF_MAX(uv))
+	return 0;
     plane = (U8**)UNF_combin[uv >> 16];
-    if(! plane) return 0;
+    if (! plane)
+	return 0;
     row = plane[(uv >> 8) & 0xff];
     return row ? row[uv & 0xff] : 0;
 }
@@ -128,7 +140,8 @@ void sv_cat_decompHangul (SV* sv, UV uv)
     UV sindex, lindex, vindex, tindex;
     U8 *t, tmp[3 * UTF8_MAXLEN + 1];
 
-    if(! Hangul_IsS(uv)) return;
+    if (! Hangul_IsS(uv))
+	return;
 
     sindex =  uv - Hangul_SBase;
     lindex =  sindex / Hangul_NCount;
@@ -138,7 +151,8 @@ void sv_cat_decompHangul (SV* sv, UV uv)
     t = tmp;
     t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
     t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
-    if (tindex) t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
+    if (tindex)
+	t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
     *t = '\0';
     sv_catpvn(sv, (char *)tmp, strlen((char *)tmp));
 }
@@ -157,7 +171,7 @@ decompose(arg, compat = &PL_sv_no)
     U8 *s, *e, *p, *r;
     bool iscompat;
   CODE:
-    if(SvUTF8(arg)) {
+    if (SvUTF8(arg)) {
 	src = arg;
     } else {
 	src = sv_mortalcopy(arg);
@@ -171,14 +185,17 @@ decompose(arg, compat = &PL_sv_no)
 
     s = (U8*)SvPV(src,srclen);
     e = s + srclen;
-    for(p = s; p < e;){
+    for (p = s; p < e;) {
 	uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
 	p += retlen;
-	if(Hangul_IsS(uv)) sv_cat_decompHangul(dst, uv);
+	if (Hangul_IsS(uv))
+	    sv_cat_decompHangul(dst, uv);
 	else {
 	    r = iscompat ? dec_compat(uv) : dec_canonical(uv);
-	    if(r) sv_catpv(dst, (char *)r);
-	    else  sv_catpvn(dst, (char *)p - retlen, retlen);
+	    if (r)
+		sv_catpv(dst, (char *)r);
+	    else
+		sv_catpvn(dst, (char *)p - retlen, retlen);
 	}
     }
     RETVAL = dst;
@@ -192,22 +209,33 @@ reorder(arg)
     SV * arg
   PROTOTYPE: $
   PREINIT:
-    SV *src;
-    STRLEN srclen, retlen, stk_cc_max;
-    U8 *s, *e, *p, curCC;
+    SV *src, *dst;
+    STRLEN srclen, dstlen, retlen, stk_cc_max;
+    U8 *s, *e, *p, *d, curCC;
     UV uv;
     UNF_cc * stk_cc;
   CODE:
-    src = newSVsv(arg);
-    if(! SvUTF8(arg)) sv_utf8_upgrade(src);
+    if (SvUTF8(arg)) {
+	src = arg;
+    } else {
+	src = sv_mortalcopy(arg);
+	sv_utf8_upgrade(src);
+    }
+
+    s = (U8*)SvPV(src, srclen);
+
+    dstlen = srclen + 1;
+    dst = newSV(dstlen);
+    sv_setpvn(dst,s,srclen);
+    SvUTF8_on(dst);
 
     stk_cc_max = 10; /* enough as an initial value? */
     New(0, stk_cc, stk_cc_max, UNF_cc);
 
-    s = (U8*)SvPV(src,srclen);
-    e = s + srclen;
+    d = (U8*)SvPV(dst,dstlen);
+    e = d + dstlen;
 
-    for(p = s; p < e;){
+    for (p = d; p < e;) {
 	U8 *cc_in;
 	STRLEN cc_len, cc_iter, cc_pos;
 
@@ -215,20 +243,24 @@ reorder(arg)
 	curCC = getCombinClass(uv);
 	p += retlen;
 
-	if(! (curCC && p < e)) continue; else cc_in = p - retlen;
+	if (! (curCC && p < e))
+	    continue;
+	else
+	    cc_in = p - retlen;
 
 	cc_pos = 0;
 	stk_cc[cc_pos].cc  = curCC;
 	stk_cc[cc_pos].uv  = uv;
 	stk_cc[cc_pos].pos = cc_pos;
 
-	while(p < e) {
+	while (p < e) {
 	    uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
 	    curCC = getCombinClass(uv);
-	    if(!curCC) break;
+	    if (!curCC)
+		break;
 	    p += retlen;
 	    cc_pos++;
-	    if(stk_cc_max <= cc_pos) { /* extend if need */
+	    if (stk_cc_max <= cc_pos) { /* extend if need */
 		stk_cc_max = cc_pos + 1;
 		Renew(stk_cc, stk_cc_max, UNF_cc);
 	    }
@@ -238,18 +270,19 @@ reorder(arg)
 	}
 
 	 /* only one c.c. in cc_len from cc_in, no need of reordering */
-	if(!cc_pos) continue;
+	if (!cc_pos)
+	    continue;
 
 	qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc);
 
 	cc_len = p - cc_in;
 	p = cc_in;
-	for(cc_iter = 0; cc_iter <= cc_pos; cc_iter++) {
+	for (cc_iter = 0; cc_iter <= cc_pos; cc_iter++) {
 	    p = uvuni_to_utf8(p, stk_cc[cc_iter].uv);
 	}
     }
     Safefree(stk_cc);
-    RETVAL = src;
+    RETVAL = dst;
   OUTPUT:
     RETVAL
 
@@ -266,7 +299,7 @@ compose(arg)
     STRLEN srclen, dstlen, tmplen, retlen;
     bool beginning = TRUE;
   CODE:
-    if(SvUTF8(arg)) {
+    if (SvUTF8(arg)) {
 	src = arg;
     } else {
 	src = sv_mortalcopy(arg);
@@ -286,12 +319,12 @@ compose(arg)
     (void)SvPOK_only(tmp);
     SvUTF8_on(tmp);
 
-    for(p = s; p < e;){
-	if(beginning) {
+    for (p = s; p < e;) {
+	if (beginning) {
 	    uvS = utf8n_to_uvuni(p, e - p, &retlen, 0);
 	    p += retlen;
 
-            if (getCombinClass(uvS)){ /* no Starter found yet */
+            if (getCombinClass(uvS)) { /* no Starter found yet */
 		d = uvuni_to_utf8(d, uvS);
 		continue;
 	    }
@@ -303,20 +336,18 @@ compose(arg)
 	preCC = 0;
 
     /* to the next Starter */
-	while(p < e) {
+	while (p < e) {
 	    uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
 	    p += retlen;
 	    curCC = getCombinClass(uv);
 
-	    if(preCC && preCC == curCC) {
+	    if (preCC && preCC == curCC) {
 		preCC = curCC;
 		t = uvuni_to_utf8(t, uv);
 	    } else {
 		uvComp = composite_uv(uvS, uv);
 
-	/* S + C + S => S-S + C would be also blocked. */
-		if( uvComp && ! isExclusion(uvComp) && preCC <= curCC)
-		{
+		if (uvComp && ! isExclusion(uvComp) && preCC <= curCC) {
 		    STRLEN leftcur, rightcur, dstcur;
 		    leftcur  = UNISKIP(uvComp);
 		    rightcur = UNISKIP(uvS) + UNISKIP(uv);
@@ -326,7 +357,6 @@ compose(arg)
 			dstlen += leftcur - rightcur;
 			d = (U8*)SvGROW(dst,dstlen) + dstcur;
 		    }
-
 		    /* preCC not changed to curCC */
 		    uvS = uvComp;
 	        } else if (! curCC && p < e) { /* blocked */
@@ -341,7 +371,8 @@ compose(arg)
 	tmplen = t - tmp_start;
 	if (tmplen) { /* uncomposed combining char */
 	    t = (U8*)SvPVX(tmp);
-	    while(tmplen--) *d++ = *t++;
+	    while (tmplen--)
+		*d++ = *t++;
 	}
 	uvS = uv;
     } /* for */
@@ -352,13 +383,169 @@ compose(arg)
 
 
 
+void
+checkNFD(arg)
+    SV * arg
+  PROTOTYPE: $
+  ALIAS:
+    checkNFKD = 1
+  PREINIT:
+    UV uv;
+    SV *src;
+    STRLEN srclen, retlen;
+    U8 *s, *e, *p, curCC, preCC;
+  PPCODE:
+    if (SvUTF8(arg)) {
+	src = arg;
+    } else {
+	src = sv_mortalcopy(arg);
+	sv_utf8_upgrade(src);
+    }
+    
+    s = (U8*)SvPV(src,srclen);
+    e = s + srclen;
+
+    preCC = 0;
+    for (p = s; p < e; p += retlen) {
+	uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
+	curCC = getCombinClass(uv);
+	if (preCC > curCC && curCC != 0) /* canonical ordering violated */
+	    XSRETURN_NO;
+	if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
+	    XSRETURN_NO;
+	preCC = curCC;
+    }
+    XSRETURN_YES;
+
+
+
+void
+checkNFC(arg)
+    SV * arg
+  PROTOTYPE: $
+  ALIAS:
+    checkNFKC = 1
+  PREINIT:
+    UV uv;
+    SV *src;
+    STRLEN srclen, retlen;
+    U8 *s, *e, *p, curCC, preCC;
+    bool isMAYBE;
+  PPCODE:
+    if (SvUTF8(arg)) {
+	src = arg;
+    } else {
+	src = sv_mortalcopy(arg);
+	sv_utf8_upgrade(src);
+    }
+    
+    s = (U8*)SvPV(src,srclen);
+    e = s + srclen;
+
+    preCC = 0;
+    isMAYBE = FALSE;
+    for (p = s; p < e; p += retlen) {
+	uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
+	curCC = getCombinClass(uv);
+
+	if (preCC > curCC && curCC != 0) /* canonical ordering violated */
+	    XSRETURN_NO;
+
+	/* get NFC/NFKC property */
+	if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */
+	    ; /* YES */
+	else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
+	    XSRETURN_NO;
+	else if (isComp2nd(uv))
+	    isMAYBE = TRUE;
+	else if (ix) {
+	    char *canon, *compat;
+	 /*
+	  * NFKC_NO when having compatibility mapping;
+	  * i.e. dec_compat(uv) defined & different with dec_canonical(uv).
+	  */
+	    canon  = (char *) dec_canonical(uv);
+	    compat = (char *) dec_compat(uv);
+	    if (compat && (!canon || strNE(canon, compat)))
+		XSRETURN_NO;
+	} /* end of get NFC/NFKC property */
+
+	preCC = curCC;
+    }
+    if (isMAYBE)
+	XSRETURN_UNDEF;
+    else
+	XSRETURN_YES;
+
+
+
 U8
 getCombinClass(uv)
     UV uv
+  PROTOTYPE: $
 
 bool
 isExclusion(uv)
     UV uv
+  PROTOTYPE: $
+
+bool
+isSingleton(uv)
+    UV uv
+  PROTOTYPE: $
+
+bool
+isNonStDecomp(uv)
+    UV uv
+  PROTOTYPE: $
+
+bool
+isComp2nd(uv)
+    UV uv
+  PROTOTYPE: $
+  ALIAS:
+    isNFC_MAYBE  = 1
+    isNFKC_MAYBE = 2
+
+
+
+void
+isNFD_NO(uv)
+    UV uv
+  PROTOTYPE: $
+  ALIAS:
+    isNFKD_NO = 1
+  PPCODE:
+    if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
+	XSRETURN_YES; /* NFD_NO or NFKD_NO */
+    else
+	XSRETURN_NO;
+
+
+
+void
+isComp_Ex(uv)
+    UV uv
+  PROTOTYPE: $
+  ALIAS:
+    isNFC_NO  = 0
+    isNFKC_NO = 1
+  PPCODE:
+    if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
+	XSRETURN_YES; /* NFC_NO or NFKC_NO */
+    else if (ix) {
+	char *canon, *compat;
+	canon  = (char *) dec_canonical(uv);
+	compat = (char *) dec_compat(uv);
+	if (compat && (!canon || strNE(canon, compat)))
+	    XSRETURN_YES; /* NFC_NO or NFKC_NO */
+	else
+	    XSRETURN_NO;
+    }
+    else
+	XSRETURN_NO;
+
+
 
 SV*
 getComposite(uv, uv2)
@@ -373,6 +560,8 @@ getComposite(uv, uv2)
   OUTPUT:
     RETVAL
 
+
+
 SV*
 getCanon(uv)
     UV uv
@@ -382,7 +571,7 @@ getCanon(uv)
   PREINIT:
     U8 * rstr;
   CODE:
-    if(Hangul_IsS(uv)) {
+    if (Hangul_IsS(uv)) {
 	SV * dst;
 	dst = newSV(1);
 	(void)SvPOK_only(dst);
@@ -390,7 +579,8 @@ getCanon(uv)
 	RETVAL = dst;
     } else {
 	rstr = ix ? dec_compat(uv) : dec_canonical(uv);
-	if(!rstr) XSRETURN_UNDEF;
+	if (!rstr)
+	    XSRETURN_UNDEF;
 	RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr));
     }
     SvUTF8_on(RETVAL);
diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README
index 2404f2f..1f28333 100644
--- a/ext/Unicode/Normalize/README
+++ b/ext/Unicode/Normalize/README
@@ -1,4 +1,4 @@
-Unicode/Normalize version 0.14
+Unicode/Normalize version 0.15
 ===================================
 
 Unicode::Normalize - normalized forms of Unicode text
@@ -7,23 +7,26 @@ SYNOPSIS
 
   use Unicode::Normalize;
 
-  $string_NFD  = NFD($string);  # Normalization Form D
-  $string_NFC  = NFC($string);  # Normalization Form C
-  $string_NFKD = NFKD($string); # Normalization Form KD
-  $string_NFKC = NFKC($string); # Normalization Form KC
+  $NFD_string  = NFD($string);  # Normalization Form D
+  $NFC_string  = NFC($string);  # Normalization Form C
+  $NFKD_string = NFKD($string); # Normalization Form KD
+  $NFKC_string = NFKC($string); # Normalization Form KC
+
+
+  or
 
-   or
 
   use Unicode::Normalize 'normalize';
 
-  $string_NFD  = normalize('D',  $string);  # Normalization Form D
-  $string_NFC  = normalize('C',  $string);  # Normalization Form C
-  $string_NFKD = normalize('KD', $string);  # Normalization Form KD
-  $string_NFKC = normalize('KC', $string);  # Normalization Form KC
+  $NFD_string  = normalize('D',  $string);  # Normalization Form D
+  $NFC_string  = normalize('C',  $string);  # Normalization Form C
+  $NFKD_string = normalize('KD', $string);  # Normalization Form KD
+  $NFKC_string = normalize('KC', $string);  # Normalization Form KC
+
 
 INSTALLATION
 
-Perl 5.006 or later
+Perl 5.6 or later
 
 To install this module type the following:
 
@@ -58,26 +61,30 @@ Exporter
 File::Copy
 File::Spec
 
-unicore/CombiningClass.pl
-  (or unicode/CombiningClass.pl)
-unicore/Decomposition.pl
-  (or unicode/Decomposition.pl)
-unicore/CompositionExclusions.txt
-  (or unicode/CompExcl.txt)
+unicore/CombiningClass.pl         (or unicode/CombiningClass.pl)
+unicore/Decomposition.pl          (or unicode/Decomposition.pl)
+unicore/CompositionExclusions.txt (or unicode/CompExcl.txt)
 
-# CAVEAT:
-# In bleadperl, unicore/CompExcl.txt is renamed
-# unicore/CompositionExclusions.txt.
+CAVEAT
 
-And for the NoXS version, in addition to the above,
+(1) In bleadperl, unicore/CompExcl.txt is renamed
+  unicore/CompositionExclusions.txt.
 
-  Lingua::KO::Hangul::Util 0.06
+(2) When these unicore/*.* files are updated;
 
-is required.
+  in the case of an XS version:
+    You must rebuild the module,
+    as the data will be compiled on building.
+
+  in the case of a NoXS version:
+    Rebuilding is not necessary,
+    as the data will be read on requirement.
 
 COPYRIGHT AND LICENCE
 
-SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
+  SADAHIRO Tomoyuki
+
+  SADAHIRO@cpan.org
 
   http://homepage1.nifty.com/nomenclator/perl/
 
@@ -85,4 +92,3 @@ SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
 
   This program is free software; you can redistribute it and/or
   modify it under the same terms as Perl itself.
-
diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader
index 12c5aa8..67a132b 100644
--- a/ext/Unicode/Normalize/mkheader
+++ b/ext/Unicode/Normalize/mkheader
@@ -14,257 +14,316 @@ use Carp;
 our $PACKAGE = 'Unicode::Normalize, mkheader';
 
 our $Combin = do "unicore/CombiningClass.pl"
-  || do "unicode/CombiningClass.pl"
-  || croak "$PACKAGE: CombiningClass.pl not found";
+    || do "unicode/CombiningClass.pl"
+    || croak "$PACKAGE: CombiningClass.pl not found";
 
 our $Decomp = do "unicore/Decomposition.pl"
-  || do "unicode/Decomposition.pl"
-  || croak "$PACKAGE: Decomposition.pl not found";
+    || do "unicode/Decomposition.pl"
+    || croak "$PACKAGE: Decomposition.pl not found";
 
 our %Combin; # $codepoint => $number      : combination class
 our %Canon;  # $codepoint => $hexstring   : canonical decomp.
 our %Compat; # $codepoint => $hexstring   : compat. decomp.
 our %Compos; # $string    => $codepoint   : composite
-
 our %Exclus; # $codepoint => 1            : composition exclusions
+our %Single; # $codepoint => 1            : singletons
+our %NonStD; # $codepoint => 1            : non-starter decompositions
 
 {
-  my($f, $fh);
-  foreach my $d (@INC) {
-    use File::Spec;
-    $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt");
-    last if open($fh, $f);
-    $f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
-    last if open($fh, $f);
-    $f = undef;
-  }
-  croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f;
-  while(<$fh>) {
-    next if /^#/ or /^$/;
-    s/#.*//;
-    $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/;
-  }
-  close $fh;
+    my($f, $fh);
+    foreach my $d (@INC) {
+	use File::Spec;
+	$f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt");
+	last if open($fh, $f);
+	$f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
+	last if open($fh, $f);
+	$f = undef;
+    }
+	croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f;
+	while (<$fh>) {
+	    next if /^#/ or /^$/;
+	    s/#.*//;
+	    $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/;
+	}
+    close $fh;
 }
 
-while($Combin =~ /(.+)/g) {
-  my @tab = split /\t/, $1;
-  my $ini = hex $tab[0];
-  if($tab[1] eq '') {
-    $Combin{ $ini } = $tab[2];
-  } else {
-    $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]);
-  }
+while ($Combin =~ /(.+)/g) {
+    my @tab = split /\t/, $1;
+    my $ini = hex $tab[0];
+    if ($tab[1] eq '') {
+	$Combin{ $ini } = $tab[2];
+    } else {
+	$Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]);
+    }
 }
 
-while($Decomp =~ /(.+)/g) {
-  my @tab = split /\t/, $1;
-  my $compat = $tab[2] =~ s/<[^>]+>//;
-  my $dec = [ _getHexArray($tab[2]) ]; # decomposition
-  my $com = pack('U*', @$dec); # composable sequence
-  my $ini = hex($tab[0]);
-  if($tab[1] eq '') {
-    $Compat{ $ini } = $dec;
-    if(! $compat) {
-      $Canon{  $ini } = $dec;
-      $Compos{ $com } = $ini if @$dec > 1;
+while ($Decomp =~ /(.+)/g) {
+    my @tab = split /\t/, $1;
+    my $compat = $tab[2] =~ s/<[^>]+>//;
+    my $dec = [ _getHexArray($tab[2]) ]; # decomposition
+    my $com = pack('U*', @$dec); # composable sequence
+    my $ini = hex($tab[0]); # initial decomposable character
+    if ($tab[1] eq '') {
+	$Compat{ $ini } = $dec;
+
+	if (! $compat) {
+	    $Canon{  $ini } = $dec;
+
+	    if (@$dec > 1) {
+		if ($Combin{ $dec->[0] }) {
+		    $NonStD{ $ini } = 1;
+		} else {
+		    $Compos{ $com } = $ini;
+		}
+	    } else {
+		$Single{ $ini } = 1;
+	    }
+	}
+    } else {
+	foreach my $u ($ini .. hex($tab[1])){
+	    $Compat{ $u } = $dec;
+	    if (! $compat) {
+		$Canon{  $u }   = $dec;
+
+		if (@$dec > 1) {
+		    if ($Combin{ $dec->[0] }) {
+			$NonStD{ $u } = 1;
+		    } else {
+			$Compos{ $com } = $u;
+		    }
+		} else {
+		    $Single{ $u } = 1;
+		}
+	    }
+	}
     }
-  } else {
-    foreach my $u ($ini .. hex($tab[1])){
-      $Compat{ $u } = $dec;
-      if(! $compat){
-        $Canon{  $u }   = $dec;
-        $Compos{ $com } = $ini if @$dec > 1;
-      }
-    }
-  }
 }
 
 # exhaustive decomposition
 foreach my $key (keys %Canon) {
-  $Canon{$key}  = [ getCanonList($key) ];
+    $Canon{$key}  = [ getCanonList($key) ];
 }
 
 # exhaustive decomposition
 foreach my $key (keys %Compat) { 
-  $Compat{$key} = [ getCompatList($key) ];
+    $Compat{$key} = [ getCompatList($key) ];
 }
 
 sub getCanonList {
-  my @src = @_;
-  my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src;
-  join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
-  # condition @src == @dec is not ok.
+    my @src = @_;
+    my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src;
+    return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
+    # condition @src == @dec is not ok.
 }
 
 sub getCompatList {
-  my @src = @_;
-  my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src;
-  join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
-  # condition @src == @dec is not ok.
+    my @src = @_;
+    my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src;
+    return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
+    # condition @src == @dec is not ok.
 }
 
-sub _getHexArray {
-  my $str = shift;
-  map hex(), $str =~ /([0-9A-Fa-f]+)/g;
-}
+sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g }
 
 sub _U_stringify {
-  sprintf '"%s"', join '',
-    map sprintf("\\x%02x", $_), unpack 'C*', pack 'U*', @_;
+    sprintf '"%s"', join '',
+	map sprintf("\\x%02x", $_), unpack 'C*', pack 'U*', @_;
 }
 
 foreach my $hash (\%Canon, \%Compat) {
-  foreach my $key (keys %$hash) {
-    $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
-  }
+    foreach my $key (keys %$hash) {
+	$hash->{$key} = _U_stringify( @{ $hash->{$key} } );
+    }
 }
 
 my $prefix = "UNF_";
-
 my $structname = "${prefix}complist";
 
-our (%Comp1st, %CompList);
+our (%Comp1st, %Comp2nd, %CompList);
 
-foreach(sort keys %Compos) {
-  my @a = unpack('U*', $_);
-  my $val = $Compos{$_};
-  my $name = sprintf "${structname}_%06x", $a[0];
-  $Comp1st{ $a[0] } = $name;
-  $CompList{ $name }{ $a[1] } = $val;
+foreach (sort keys %Compos) {
+    my @a = unpack('U*', $_);
+    my $val = $Compos{$_};
+    my $name = sprintf "${structname}_%06x", $a[0];
+    $Comp1st{$a[0]} = $name;
+    $Comp2nd{$a[1]} = 1 if ! $Exclus{$Compos{$_}} && ! $Combin{$a[0]};
+    $CompList{$name}{$a[1]} = $val;
+}
+
+# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo
+foreach (0x1161..0x1175, 0x11A8..0x11C2) {
+    $Comp2nd{$_} = 1;
 }
 
 my $compinit =
-  "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
+    "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
 
 foreach my $i (sort keys %CompList) {
-  $compinit .= "$structname $i [] = {\n";
-  $compinit .= join ",\n", 
-    map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
-    sort {$a <=> $b } keys %{ $CompList{$i} };
-  $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
+    $compinit .= "$structname $i [] = {\n";
+    $compinit .= join ",\n",
+	map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
+	    sort {$a <=> $b } keys %{ $CompList{$i} };
+    $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
 }
 
 ####################################
 
-my @Exclus = sort {$a <=> $b} keys %Exclus;
+my @boolfunc = (
+    {
+	name => "Exclusion",
+	type => "bool",
+	hash => \%Exclus,
+    },
+    {
+	name => "Singleton",
+	type => "bool",
+	hash => \%Single,
+    },
+    {
+	name => "NonStDecomp",
+	type => "bool",
+	hash => \%NonStD,
+    },
+    {
+	name => "Comp2nd",
+	type => "bool",
+	hash => \%Comp2nd,
+    },
+);
 
 my $file = "unfexc.h";
 open FH, ">$file" or croak "$PACKAGE: $file can't be made";
 binmode FH; select FH;
 
-print "bool isExclusion (UV uv) \n{\nreturn\n\t";
+    print << 'EOF';
+/*
+ * This file is auto-generated by mkheader.
+ * Any changes here will be lost!
+ */
+EOF
 
-while(@Exclus) {
-  my $cur = shift @Exclus;
-  if(@Exclus && $cur + 1 == $Exclus[0]) {
-    print "($cur <= uv && uv <= ";
-    while(@Exclus && $cur + 1 == $Exclus[0]) {
-      $cur = shift @Exclus;
+foreach my $tbl (@boolfunc) {
+    my @temp = sort {$a <=> $b} keys %{$tbl->{hash}};
+    my $type = $tbl->{type};
+    my $name = $tbl->{name};
+    print "$type is$name (UV uv)\n{\nreturn\n\t";
+
+    while (@temp) {
+	my $cur = shift @temp;
+	if (@temp && $cur + 1 == $temp[0]) {
+	    print "($cur <= uv && uv <= ";
+	    while (@temp && $cur + 1 == $temp[0]) {
+		$cur = shift @temp;
+	    }
+	    print "$cur)";
+	    print "\n\t|| " if @temp;
+	} else {
+	    print "uv == $cur";
+	    print "\n\t|| " if @temp;
+	}
     }
-    print "$cur)";
-    print "\n\t|| " if @Exclus;
-  } else {
-    print "uv == $cur";
-    print "\n\t|| " if @Exclus;
-  }
+    print "\n\t? TRUE : FALSE;\n}\n\n";
 }
 
-print "\n\t? TRUE : FALSE;\n}\n\n";
 close FH;
 
+
 ####################################
 
 my @tripletable = (
-  {
-    file => "unfcmb",
-    name => "combin",
-    type => "STDCHAR",
-    hash => \%Combin,
-    null =>  0,
-  },
-  {
-    file => "unfcan",
-    name => "canon",
-    type => "char*",
-    hash => \%Canon,
-    null => "NULL",
-  },
-  {
-    file => "unfcpt",
-    name => "compat",
-    type => "char*",
-    hash => \%Compat,
-    null => "NULL",
-  },
-  {
-    file => "unfcmp",
-    name => "compos",
-    type => "$structname *",
-    hash => \%Comp1st,
-    null => "NULL",
-    init => $compinit,
-  },
+    {
+	file => "unfcmb",
+	name => "combin",
+	type => "STDCHAR",
+	hash => \%Combin,
+	null =>  0,
+    },
+    {
+	file => "unfcan",
+	name => "canon",
+	type => "char*",
+	hash => \%Canon,
+	null => "NULL",
+    },
+    {
+	file => "unfcpt",
+	name => "compat",
+	type => "char*",
+	hash => \%Compat,
+	null => "NULL",
+    },
+    {
+	file => "unfcmp",
+	name => "compos",
+	type => "$structname *",
+	hash => \%Comp1st,
+	null => "NULL",
+	init => $compinit,
+    },
 );
 
 foreach my $tbl (@tripletable) {
-  my $file = "$tbl->{file}.h";
-  my $head = "${prefix}$tbl->{name}";
-  my $type = $tbl->{type};
-  my $hash = $tbl->{hash};
-  my $null = $tbl->{null};
-  my $init = $tbl->{init};
-
-  open FH, ">$file" or croak "$PACKAGE: $file can't be made";
-  binmode FH; select FH;
-  my %val;
-
-  print FH << 'EOF';
+    my $file = "$tbl->{file}.h";
+    my $head = "${prefix}$tbl->{name}";
+    my $type = $tbl->{type};
+    my $hash = $tbl->{hash};
+    my $null = $tbl->{null};
+    my $init = $tbl->{init};
+
+    open FH, ">$file" or croak "$PACKAGE: $file can't be made";
+    binmode FH; select FH;
+    my %val;
+
+    print FH << 'EOF';
 /*
  * This file is auto-generated by mkheader.
  * Any changes here will be lost!
  */
 EOF
 
-  print $init if defined $init;
-
-  foreach my $uv (keys %$hash) {
-    my @c = unpack 'CCCC', pack 'N', $uv;
-    $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
-  }
-
-  foreach my $p (sort { $a <=> $b } keys %val) {
-    next if ! $val{ $p };
-    for(my $r = 0; $r < 256; $r++){
-      next if ! $val{ $p }{ $r };
-      printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r;
-      for(my $c = 0; $c < 256; $c++){
-        print "\t", defined $val{$p}{$r}{$c}
-	  ? "($type)".$val{$p}{$r}{$c} : $null;
-        print ','  if $c != 255;
-        print "\n" if $c % 8 == 7;
-      }
-      print "};\n\n";
+    print $init if defined $init;
+
+    foreach my $uv (keys %$hash) {
+	my @c = unpack 'CCCC', pack 'N', $uv;
+	$val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
+    }
+
+    foreach my $p (sort { $a <=> $b } keys %val) {
+	next if ! $val{ $p };
+	for (my $r = 0; $r < 256; $r++) {
+	    next if ! $val{ $p }{ $r };
+	    printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r;
+	    for (my $c = 0; $c < 256; $c++) {
+		print "\t", defined $val{$p}{$r}{$c}
+		    ? "($type)".$val{$p}{$r}{$c}
+		    : $null;
+		print ','  if $c != 255;
+		print "\n" if $c % 8 == 7;
+	    }
+	    print "};\n\n";
+	}
+    }
+    foreach my $p (sort { $a <=> $b } keys %val) {
+	next if ! $val{ $p };
+	printf "$type* ${head}_%02x [256] = {\n", $p;
+	for (my $r = 0; $r < 256; $r++) {
+	    print $val{ $p }{ $r }
+		? sprintf("${head}_%02x_%02x", $p, $r)
+		: "NULL";
+	    print ','  if $r != 255;
+	    print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
+	}
+	print "};\n\n";
     }
-  }
-  foreach my $p (sort { $a <=> $b } keys %val) {
-    next if ! $val{ $p };
-    printf "$type* ${head}_%02x [256] = {\n", $p;
-    for(my $r = 0; $r < 256; $r++){
-      print $val{ $p }{ $r } ? sprintf("${head}_%02x_%02x", $p, $r) : "NULL";
-      print ','  if $r != 255;
-      print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
+    print "$type** $head [] = {\n";
+    for (my $p = 0; $p <= 0x10; $p++) {
+	print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
+	print ','  if $p != 0x10;
+	print "\n";
     }
     print "};\n\n";
-  }
-  print "$type** $head [] = {\n";
-  for(my $p = 0; $p <= 0x10; $p++){
-    print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
-    print ','  if $p != 0x10;
-    print "\n";
-  }
-  print "};\n\n";
-  close FH;
+    close FH;
 }
 
 __END__
diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t
index c436a7c..86320cc 100644
--- a/ext/Unicode/Normalize/t/func.t
+++ b/ext/Unicode/Normalize/t/func.t
@@ -3,8 +3,8 @@
 
 BEGIN {
     if (ord("A") == 193) {
-	print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
-	exit 0;
+       print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+       exit 0;
     }
 }
 
@@ -13,7 +13,7 @@ BEGIN {
 use Test;
 use strict;
 use warnings;
-BEGIN { plan tests => 6 };
+BEGIN { plan tests => 10 };
 use Unicode::Normalize qw(:all);
 ok(1); # If we made it this far, we're ok.
 
@@ -22,7 +22,7 @@ ok(1); # If we made it this far, we're ok.
 print getCombinClass(   0) == 0
    && getCombinClass( 768) == 230
    && getCombinClass(1809) == 36
-#  && getCombinClass(119143) == 1
+#  && getCombinClass(119143) == 1 # U+1D167, a Unicode 3.1 character
   ? "ok" : "not ok", " 2\n";
 
 print ! defined getCanon( 0)
@@ -68,13 +68,60 @@ print ! defined getComposite( 0,  0)
    && 0xADF8 == getComposite(0x1100, 0x1173)
    && ! defined getComposite(0x1100, 0x11AF)
    && ! defined getComposite(0x1173, 0x11AF)
+   && ! defined getComposite(0xAC00, 0x11A7)
+   && 0xAC01 == getComposite(0xAC00, 0x11A8)
    && 0xAE00 == getComposite(0xADF8, 0x11AF)
   ? "ok" : "not ok", " 5\n";
 
 print ! isExclusion( 0)
    && ! isExclusion(41)
-   && isExclusion(2392)
-   && isExclusion(3907)
-   && isExclusion(64334)
+   && isExclusion(2392)  # DEVANAGARI LETTER QA
+   && isExclusion(3907)  # TIBETAN LETTER GHA
+   && isExclusion(64334) # HEBREW LETTER PE WITH RAFE
   ? "ok" : "not ok", " 6\n";
 
+print ! isSingleton( 0)
+   && isSingleton(0x212B) # ANGSTROM SIGN
+  ? "ok" : "not ok", " 7\n";
+
+print reorder("") eq ""
+   && reorder(pack("U*", 0x0041, 0x0300, 0x0315, 0x0313, 0x031b, 0x0061))
+      eq pack("U*", 0x0041, 0x031b, 0x0300, 0x0313, 0x0315, 0x0061)
+   && reorder(pack("U*", 0x00C1, 0x0300, 0x0315, 0x0313, 0x031b,
+	0x0061, 0x309A, 0x3099))
+      eq pack("U*", 0x00C1, 0x031b, 0x0300, 0x0313, 0x0315,
+	0x0061, 0x309A, 0x3099)
+  ? "ok" : "not ok", " 8\n";
+
+sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
+
+print answer(checkNFD(""))  eq "YES"
+  &&  answer(checkNFC(""))  eq "YES"
+  &&  answer(checkNFKD("")) eq "YES"
+  &&  answer(checkNFKC("")) eq "YES"
+  &&  answer(check("NFD", "")) eq "YES"
+  &&  answer(check("NFC", "")) eq "YES"
+  &&  answer(check("NFKD","")) eq "YES"
+  &&  answer(check("NFKC","")) eq "YES"
+# U+0000 to U+007F are prenormalized in all the normalization forms.
+  && answer(checkNFD("AZaz\t12!#`"))  eq "YES"
+  && answer(checkNFC("AZaz\t12!#`"))  eq "YES"
+  && answer(checkNFKD("AZaz\t12!#`")) eq "YES"
+  && answer(checkNFKC("AZaz\t12!#`")) eq "YES"
+  && answer(check("D", "AZaz\t12!#`")) eq "YES"
+  && answer(check("C", "AZaz\t12!#`")) eq "YES"
+  && answer(check("KD","AZaz\t12!#`")) eq "YES"
+  && answer(check("KC","AZaz\t12!#`")) eq "YES"
+  ? "ok" : "not ok", " 9\n";
+
+print 1
+  && answer(checkNFD(NFD(pack('U*', 0xC1, 0x1100, 0x1173, 0x11AF)))) eq "YES"
+  && answer(checkNFD(pack('U*', 0x20, 0xC1, 0x1100, 0x1173, 0x11AF))) eq "NO"
+  && answer(checkNFC(pack('U*', 0x20, 0xC1, 0x1173, 0x11AF))) eq "MAYBE"
+  && answer(checkNFC(pack('U*', 0x20, 0xC1, 0xAE00, 0x1100))) eq "YES"
+  && answer(checkNFC(pack('U*', 0x20, 0xC1, 0xAE00, 0x1100, 0x300))) eq "MAYBE"
+  && answer(checkNFC(pack('U*', 0x20, 0xC1, 0xFF71, 0x2025))) eq "YES"
+  && answer(check("NFC", pack('U*', 0x20, 0xC1, 0x212B, 0x300))) eq "NO"
+  && answer(checkNFKD(pack('U*', 0x20, 0xC1, 0xFF71, 0x2025))) eq "NO"
+  && answer(checkNFKC(pack('U*', 0x20, 0xC1, 0xAE00, 0x2025))) eq "NO"
+  ? "ok" : "not ok", " 10\n";