From: Jarkko Hietaniemi Date: Tue, 19 Mar 2002 14:04:55 +0000 (+0000) Subject: Update to Unicode::Normalize 0.15 (+ the EBCDIC guards) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=8f118dcd647d91040f14fd1a562bdd89ac99859a;p=p5sagit%2Fp5-mst-13.2.git Update to Unicode::Normalize 0.15 (+ the EBCDIC guards) p4raw-id: //depot/perl@15318 --- diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes index 35c1b94..c905acc 100644 --- a/ext/Unicode/Normalize/Changes +++ b/ext/Unicode/Normalize/Changes @@ -1,5 +1,10 @@ Revision history for Perl extension Unicode::Normalize. +0.15 Tue Mar 19 22:04:07 2002 + - Quick check is implemented. + - decompose(), reorder(), and compose() are documented. + - The Non-XS version is also independent of Lingua::KO::Hangul::Util. + 0.14 Sat Feb 02 20:40:14 2002 - synchronization with bleadperl. - Change 14128: by Arthur diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm index cec5fa7..b3bc9d6 100644 --- a/ext/Unicode/Normalize/Normalize.pm +++ b/ext/Unicode/Normalize/Normalize.pm @@ -2,7 +2,7 @@ package Unicode::Normalize; BEGIN { if (ord("A") == 193) { - die "Unicode::Normalize not ported to EBCDIC\n"; + die "Unicode::Normalize not ported to EBCDIC\n"; } } @@ -11,7 +11,7 @@ use strict; use warnings; use Carp; -our $VERSION = '0.14'; +our $VERSION = '0.15'; our $PACKAGE = __PACKAGE__; require Exporter; @@ -22,9 +22,16 @@ our @ISA = qw(Exporter DynaLoader); our @EXPORT = qw( NFC NFD NFKC NFKD ); our @EXPORT_OK = qw( normalize decompose reorder compose - getCanon getCompat getComposite getCombinClass isExclusion + checkNFD checkNFKD checkNFC checkNFKC check + getCanon getCompat getComposite getCombinClass + isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex + isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE +); +our %EXPORT_TAGS = ( + all => [ @EXPORT, @EXPORT_OK ], + normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ], + check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ], ); -our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] ); bootstrap Unicode::Normalize $VERSION; @@ -32,7 +39,6 @@ use constant COMPAT => 1; sub NFD ($) { reorder(decompose($_[0])) } sub NFKD ($) { reorder(decompose($_[0], COMPAT)) } - sub NFC ($) { compose(reorder(decompose($_[0]))) } sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) } @@ -48,6 +54,18 @@ sub normalize($$) croak $PACKAGE."::normalize: invalid form name: $form"; } +sub check($$) +{ + my $form = shift; + $form =~ s/^NF//; + return + $form eq 'D' ? checkNFD ($_[0]) : + $form eq 'C' ? checkNFC ($_[0]) : + $form eq 'KD' ? checkNFKD($_[0]) : + $form eq 'KC' ? checkNFKC($_[0]) : + croak $PACKAGE."::check: invalid form name: $form"; +} + 1; __END__ @@ -59,19 +77,19 @@ Unicode::Normalize - normalized forms of Unicode text use Unicode::Normalize; - $string_NFD = NFD($raw_string); # Normalization Form D - $string_NFC = NFC($raw_string); # Normalization Form C - $string_NFKD = NFKD($raw_string); # Normalization Form KD - $string_NFKC = NFKC($raw_string); # Normalization Form KC + $NFD_string = NFD($string); # Normalization Form D + $NFC_string = NFC($string); # Normalization Form C + $NFKD_string = NFKD($string); # Normalization Form KD + $NFKC_string = NFKC($string); # Normalization Form KC or use Unicode::Normalize 'normalize'; - $string_NFD = normalize('D', $raw_string); # Normalization Form D - $string_NFC = normalize('C', $raw_string); # Normalization Form C - $string_NFKD = normalize('KD', $raw_string); # Normalization Form KD - $string_NFKC = normalize('KC', $raw_string); # Normalization Form KC + $NFD_string = normalize('D', $string); # Normalization Form D + $NFC_string = normalize('C', $string); # Normalization Form C + $NFKD_string = normalize('KD', $string); # Normalization Form KD + $NFKC_string = normalize('KC', $string); # Normalization Form KC =head1 DESCRIPTION @@ -79,26 +97,25 @@ Unicode::Normalize - normalized forms of Unicode text =over 4 -=item C<$string_NFD = NFD($raw_string)> +=item C<$NFD_string = NFD($string)> returns the Normalization Form D (formed by canonical decomposition). - -=item C<$string_NFC = NFC($raw_string)> +=item C<$NFC_string = NFC($string)> returns the Normalization Form C (formed by canonical decomposition followed by canonical composition). -=item C<$string_NFKD = NFKD($raw_string)> +=item C<$NFKD_string = NFKD($string)> returns the Normalization Form KD (formed by compatibility decomposition). -=item C<$string_NFKC = NFKC($raw_string)> +=item C<$NFKC_string = NFKC($string)> returns the Normalization Form KC (formed by compatibility decomposition followed by B composition). -=item C<$normalized_string = normalize($form_name, $raw_string)> +=item C<$normalized_string = normalize($form_name, $string)> As C<$form_name>, one of the following names must be given. @@ -109,6 +126,107 @@ As C<$form_name>, one of the following names must be given. =back +=head2 Decomposition and Composition + +=over 4 + +=item C<$decomposed_string = decompose($string)> + +=item C<$decomposed_string = decompose($string, $useCompatMapping)> + +Decompose the specified string and returns the result. + +If the second parameter (a boolean) is omitted or false, decomposes it +using the Canonical Decomposition Mapping. +If true, decomposes it using the Compatibility Decomposition Mapping. + +The string returned is not always in NFD/NFKD. +Reordering may be required. + + $NFD_string = reorder(decompose($string)); # eq. to NFD() + $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD() + +=item C<$reordered_string = reorder($string)> + +Reorder the combining characters and the like in the canonical ordering +and returns the result. + +E.g., when you have a list of NFD/NFKD strings, +you can get the concatenated NFD/NFKD string from them, saying + + $concat_NFD = reorder(join '', @NFD_strings); + $concat_NFKD = reorder(join '', @NFKD_strings); + +=item C<$composed_string = compose($string)> + +Returns the string where composable pairs are composed. + +E.g., when you have a NFD/NFKD string, +you can get its NFC/NFKC string, saying + + $NFC_string = compose($NFD_string); + $NFKC_string = compose($NFKD_string); + +=back + +=head2 Quick Check + +(see Annex 8, UAX #15; F) + +The following functions check whether the string is in that normalization form. + +The result returned will be: + + YES The string is in that normalization form. + NO The string is not in that normalization form. + MAYBE Dubious. Maybe yes, maybe no. + +=over 4 + +=item C<$result = checkNFD($string)> + +returns YES (1) or NO (empty string). + +=item C<$result = checkNFC($string)> + +returns YES (1), NO (empty string), or MAYBE (undef). + +=item C<$result = checkNFKD($string)> + +returns YES (1) or NO (empty string). + +=item C<$result = checkNFKC($string)> + +returns YES (1), NO (empty string), or MAYBE (undef). + +=item C<$result = check($form_name, $string)> + +returns YES (1), NO (empty string), or MAYBE (undef). + +C<$form_name> is alike to that for C. + +=back + +B + +In the cases of NFD and NFKD, the answer must be either C or C. +The answer C may be returned in the cases of NFC and NFKC. + +A MAYBE-NFC/NFKC string should contain at least +one combining character or the like. +For example, C has +the MAYBE_NFC/MAYBE_NFKC property. +Both C +and C will return C. +Though, C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC +(its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">), +while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC. + +If you want to check exactly, compare the string with its NFC/NFKC; i.e., + + $string eq NFC($string) # more thorough than checkNFC($string) + $string eq NFKC($string) # more thorough than checkNFKC($string) + =head2 Character Data These functions are interface of character data used internally. @@ -119,30 +237,54 @@ call them yourself. =item C<$canonical_decomposed = getCanon($codepoint)> +If the character of the specified codepoint is canonically +decomposable (including Hangul Syllables), +returns the B string canonically equivalent to it. + +If it is not decomposable, returns undef. + =item C<$compatibility_decomposed = getCompat($codepoint)> -If the character of the specified codepoint is canonically or -compatibility decomposable (including Hangul Syllables), -returns the B string equivalent to it. +If the character of the specified codepoint is compatibility +decomposable (including Hangul Syllables), +returns the B string compatibility equivalent to it. If it is not decomposable, returns undef. -=item C<$uv_composite = getComposite($uv_here, $uv_next)> +=item C<$codepoint_composite = getComposite($codepoint_here, $codepoint_next)> If two characters here and next (as codepoints) are composable -(including Hangul Jamo/Syllables and Exclusions), +(including Hangul Jamo/Syllables and Composition Exclusions), returns the codepoint of the composite. If they are not composable, returns undef. =item C<$combining_class = getCombinClass($codepoint)> -Returns the combining class as integer of the character. +Returns the combining class of the character as an integer. =item C<$is_exclusion = isExclusion($codepoint)> +Returns a boolean whether the character of the specified codepoint +is a composition exclusion. + +=item C<$is_singleton = isSingleton($codepoint)> + Returns a boolean whether the character of the specified codepoint is -a composition exclusion. +a singleton. + +=item C<$is_non_startar_decomposition = isNonStDecomp($codepoint)> + +Returns a boolean whether the canonical decomposition +of the character of the specified codepoint +is a Non-Starter Decomposition. + +=item C<$may_be_composed_with_prev_char = isComp2nd($codepoint)> + +Returns a boolean whether the character of the specified codepoint +may be composed with the previous one in a certain composition +(including Hangul Compositions, but excluding +Composition Exclusions and Non-Starter Decompositions). =back @@ -152,16 +294,6 @@ C, C, C, C: by default. C and other some functions: on request. -=head2 TODO - -Unicode::Normalize has not been ported to EBCDIC. The code mostly -would work just fine but a decision needs to be made: how the module -should work in EBCDIC? Should the low 256 characters be understood as -Unicode or as EBCDIC code points? Should one be chosen or should -there be a way to do either? Or should such translation be left -outside the module for the user to do, for example by using -Encode::from_to()? - =head1 AUTHOR SADAHIRO Tomoyuki, ESADAHIRO@cpan.orgE @@ -181,6 +313,10 @@ SADAHIRO Tomoyuki, ESADAHIRO@cpan.orgE Unicode Normalization Forms - UAX #15 +=item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProperties.txt + +Derived Normalization Properties + =back =cut diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs index 7a11095..59da98e 100644 --- a/ext/Unicode/Normalize/Normalize.xs +++ b/ext/Unicode/Normalize/Normalize.xs @@ -62,16 +62,19 @@ int compare_cc(const void *a, const void *b) { int ret_cc; ret_cc = (*(UNF_cc*)a).cc - (*(UNF_cc*)b).cc; - if(ret_cc) return ret_cc; + if (ret_cc) + return ret_cc; return (*(UNF_cc*)a).pos - (*(UNF_cc*)b).pos; } U8* dec_canonical (UV uv) { U8 ***plane, **row; - if(OVER_UTF_MAX(uv)) return NULL; + if (OVER_UTF_MAX(uv)) + return NULL; plane = (U8***)UNF_canon[uv >> 16]; - if(! plane) return NULL; + if (! plane) + return NULL; row = plane[(uv >> 8) & 0xff]; return row ? row[uv & 0xff] : NULL; } @@ -79,9 +82,11 @@ U8* dec_canonical (UV uv) U8* dec_compat (UV uv) { U8 ***plane, **row; - if(OVER_UTF_MAX(uv)) return NULL; + if (OVER_UTF_MAX(uv)) + return NULL; plane = (U8***)UNF_compat[uv >> 16]; - if(! plane) return NULL; + if (! plane) + return NULL; row = plane[(uv >> 8) & 0xff]; return row ? row[uv & 0xff] : NULL; } @@ -90,25 +95,30 @@ UV composite_uv (UV uv, UV uv2) { UNF_complist ***plane, **row, *cell, *i; - if(! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) return 0; + if (! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) + return 0; - if(Hangul_IsL(uv) && Hangul_IsV(uv2)) { + if (Hangul_IsL(uv) && Hangul_IsV(uv2)) { uv -= Hangul_LBase; /* lindex */ uv2 -= Hangul_VBase; /* vindex */ return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount); } - if(Hangul_IsLV(uv) && Hangul_IsT(uv2)) { + if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) { uv2 -= Hangul_TBase; /* tindex */ return(uv + uv2); } plane = UNF_compos[uv >> 16]; - if(! plane) return 0; + if (! plane) + return 0; row = plane[(uv >> 8) & 0xff]; - if(! row) return 0; + if (! row) + return 0; cell = row[uv & 0xff]; - if(! cell) return 0; - for(i = cell; i->nextchar; i++) { - if(uv2 == i->nextchar) return i->composite; + if (! cell) + return 0; + for (i = cell; i->nextchar; i++) { + if (uv2 == i->nextchar) + return i->composite; } return 0; } @@ -116,9 +126,11 @@ UV composite_uv (UV uv, UV uv2) U8 getCombinClass (UV uv) { U8 **plane, *row; - if(OVER_UTF_MAX(uv)) return 0; + if (OVER_UTF_MAX(uv)) + return 0; plane = (U8**)UNF_combin[uv >> 16]; - if(! plane) return 0; + if (! plane) + return 0; row = plane[(uv >> 8) & 0xff]; return row ? row[uv & 0xff] : 0; } @@ -128,7 +140,8 @@ void sv_cat_decompHangul (SV* sv, UV uv) UV sindex, lindex, vindex, tindex; U8 *t, tmp[3 * UTF8_MAXLEN + 1]; - if(! Hangul_IsS(uv)) return; + if (! Hangul_IsS(uv)) + return; sindex = uv - Hangul_SBase; lindex = sindex / Hangul_NCount; @@ -138,7 +151,8 @@ void sv_cat_decompHangul (SV* sv, UV uv) t = tmp; t = uvuni_to_utf8(t, (lindex + Hangul_LBase)); t = uvuni_to_utf8(t, (vindex + Hangul_VBase)); - if (tindex) t = uvuni_to_utf8(t, (tindex + Hangul_TBase)); + if (tindex) + t = uvuni_to_utf8(t, (tindex + Hangul_TBase)); *t = '\0'; sv_catpvn(sv, (char *)tmp, strlen((char *)tmp)); } @@ -157,7 +171,7 @@ decompose(arg, compat = &PL_sv_no) U8 *s, *e, *p, *r; bool iscompat; CODE: - if(SvUTF8(arg)) { + if (SvUTF8(arg)) { src = arg; } else { src = sv_mortalcopy(arg); @@ -171,14 +185,17 @@ decompose(arg, compat = &PL_sv_no) s = (U8*)SvPV(src,srclen); e = s + srclen; - for(p = s; p < e;){ + for (p = s; p < e;) { uv = utf8n_to_uvuni(p, e - p, &retlen, 0); p += retlen; - if(Hangul_IsS(uv)) sv_cat_decompHangul(dst, uv); + if (Hangul_IsS(uv)) + sv_cat_decompHangul(dst, uv); else { r = iscompat ? dec_compat(uv) : dec_canonical(uv); - if(r) sv_catpv(dst, (char *)r); - else sv_catpvn(dst, (char *)p - retlen, retlen); + if (r) + sv_catpv(dst, (char *)r); + else + sv_catpvn(dst, (char *)p - retlen, retlen); } } RETVAL = dst; @@ -192,22 +209,33 @@ reorder(arg) SV * arg PROTOTYPE: $ PREINIT: - SV *src; - STRLEN srclen, retlen, stk_cc_max; - U8 *s, *e, *p, curCC; + SV *src, *dst; + STRLEN srclen, dstlen, retlen, stk_cc_max; + U8 *s, *e, *p, *d, curCC; UV uv; UNF_cc * stk_cc; CODE: - src = newSVsv(arg); - if(! SvUTF8(arg)) sv_utf8_upgrade(src); + if (SvUTF8(arg)) { + src = arg; + } else { + src = sv_mortalcopy(arg); + sv_utf8_upgrade(src); + } + + s = (U8*)SvPV(src, srclen); + + dstlen = srclen + 1; + dst = newSV(dstlen); + sv_setpvn(dst,s,srclen); + SvUTF8_on(dst); stk_cc_max = 10; /* enough as an initial value? */ New(0, stk_cc, stk_cc_max, UNF_cc); - s = (U8*)SvPV(src,srclen); - e = s + srclen; + d = (U8*)SvPV(dst,dstlen); + e = d + dstlen; - for(p = s; p < e;){ + for (p = d; p < e;) { U8 *cc_in; STRLEN cc_len, cc_iter, cc_pos; @@ -215,20 +243,24 @@ reorder(arg) curCC = getCombinClass(uv); p += retlen; - if(! (curCC && p < e)) continue; else cc_in = p - retlen; + if (! (curCC && p < e)) + continue; + else + cc_in = p - retlen; cc_pos = 0; stk_cc[cc_pos].cc = curCC; stk_cc[cc_pos].uv = uv; stk_cc[cc_pos].pos = cc_pos; - while(p < e) { + while (p < e) { uv = utf8n_to_uvuni(p, e - p, &retlen, 0); curCC = getCombinClass(uv); - if(!curCC) break; + if (!curCC) + break; p += retlen; cc_pos++; - if(stk_cc_max <= cc_pos) { /* extend if need */ + if (stk_cc_max <= cc_pos) { /* extend if need */ stk_cc_max = cc_pos + 1; Renew(stk_cc, stk_cc_max, UNF_cc); } @@ -238,18 +270,19 @@ reorder(arg) } /* only one c.c. in cc_len from cc_in, no need of reordering */ - if(!cc_pos) continue; + if (!cc_pos) + continue; qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc); cc_len = p - cc_in; p = cc_in; - for(cc_iter = 0; cc_iter <= cc_pos; cc_iter++) { + for (cc_iter = 0; cc_iter <= cc_pos; cc_iter++) { p = uvuni_to_utf8(p, stk_cc[cc_iter].uv); } } Safefree(stk_cc); - RETVAL = src; + RETVAL = dst; OUTPUT: RETVAL @@ -266,7 +299,7 @@ compose(arg) STRLEN srclen, dstlen, tmplen, retlen; bool beginning = TRUE; CODE: - if(SvUTF8(arg)) { + if (SvUTF8(arg)) { src = arg; } else { src = sv_mortalcopy(arg); @@ -286,12 +319,12 @@ compose(arg) (void)SvPOK_only(tmp); SvUTF8_on(tmp); - for(p = s; p < e;){ - if(beginning) { + for (p = s; p < e;) { + if (beginning) { uvS = utf8n_to_uvuni(p, e - p, &retlen, 0); p += retlen; - if (getCombinClass(uvS)){ /* no Starter found yet */ + if (getCombinClass(uvS)) { /* no Starter found yet */ d = uvuni_to_utf8(d, uvS); continue; } @@ -303,20 +336,18 @@ compose(arg) preCC = 0; /* to the next Starter */ - while(p < e) { + while (p < e) { uv = utf8n_to_uvuni(p, e - p, &retlen, 0); p += retlen; curCC = getCombinClass(uv); - if(preCC && preCC == curCC) { + if (preCC && preCC == curCC) { preCC = curCC; t = uvuni_to_utf8(t, uv); } else { uvComp = composite_uv(uvS, uv); - /* S + C + S => S-S + C would be also blocked. */ - if( uvComp && ! isExclusion(uvComp) && preCC <= curCC) - { + if (uvComp && ! isExclusion(uvComp) && preCC <= curCC) { STRLEN leftcur, rightcur, dstcur; leftcur = UNISKIP(uvComp); rightcur = UNISKIP(uvS) + UNISKIP(uv); @@ -326,7 +357,6 @@ compose(arg) dstlen += leftcur - rightcur; d = (U8*)SvGROW(dst,dstlen) + dstcur; } - /* preCC not changed to curCC */ uvS = uvComp; } else if (! curCC && p < e) { /* blocked */ @@ -341,7 +371,8 @@ compose(arg) tmplen = t - tmp_start; if (tmplen) { /* uncomposed combining char */ t = (U8*)SvPVX(tmp); - while(tmplen--) *d++ = *t++; + while (tmplen--) + *d++ = *t++; } uvS = uv; } /* for */ @@ -352,13 +383,169 @@ compose(arg) +void +checkNFD(arg) + SV * arg + PROTOTYPE: $ + ALIAS: + checkNFKD = 1 + PREINIT: + UV uv; + SV *src; + STRLEN srclen, retlen; + U8 *s, *e, *p, curCC, preCC; + PPCODE: + if (SvUTF8(arg)) { + src = arg; + } else { + src = sv_mortalcopy(arg); + sv_utf8_upgrade(src); + } + + s = (U8*)SvPV(src,srclen); + e = s + srclen; + + preCC = 0; + for (p = s; p < e; p += retlen) { + uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + curCC = getCombinClass(uv); + if (preCC > curCC && curCC != 0) /* canonical ordering violated */ + XSRETURN_NO; + if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) + XSRETURN_NO; + preCC = curCC; + } + XSRETURN_YES; + + + +void +checkNFC(arg) + SV * arg + PROTOTYPE: $ + ALIAS: + checkNFKC = 1 + PREINIT: + UV uv; + SV *src; + STRLEN srclen, retlen; + U8 *s, *e, *p, curCC, preCC; + bool isMAYBE; + PPCODE: + if (SvUTF8(arg)) { + src = arg; + } else { + src = sv_mortalcopy(arg); + sv_utf8_upgrade(src); + } + + s = (U8*)SvPV(src,srclen); + e = s + srclen; + + preCC = 0; + isMAYBE = FALSE; + for (p = s; p < e; p += retlen) { + uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + curCC = getCombinClass(uv); + + if (preCC > curCC && curCC != 0) /* canonical ordering violated */ + XSRETURN_NO; + + /* get NFC/NFKC property */ + if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */ + ; /* YES */ + else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) + XSRETURN_NO; + else if (isComp2nd(uv)) + isMAYBE = TRUE; + else if (ix) { + char *canon, *compat; + /* + * NFKC_NO when having compatibility mapping; + * i.e. dec_compat(uv) defined & different with dec_canonical(uv). + */ + canon = (char *) dec_canonical(uv); + compat = (char *) dec_compat(uv); + if (compat && (!canon || strNE(canon, compat))) + XSRETURN_NO; + } /* end of get NFC/NFKC property */ + + preCC = curCC; + } + if (isMAYBE) + XSRETURN_UNDEF; + else + XSRETURN_YES; + + + U8 getCombinClass(uv) UV uv + PROTOTYPE: $ bool isExclusion(uv) UV uv + PROTOTYPE: $ + +bool +isSingleton(uv) + UV uv + PROTOTYPE: $ + +bool +isNonStDecomp(uv) + UV uv + PROTOTYPE: $ + +bool +isComp2nd(uv) + UV uv + PROTOTYPE: $ + ALIAS: + isNFC_MAYBE = 1 + isNFKC_MAYBE = 2 + + + +void +isNFD_NO(uv) + UV uv + PROTOTYPE: $ + ALIAS: + isNFKD_NO = 1 + PPCODE: + if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) + XSRETURN_YES; /* NFD_NO or NFKD_NO */ + else + XSRETURN_NO; + + + +void +isComp_Ex(uv) + UV uv + PROTOTYPE: $ + ALIAS: + isNFC_NO = 0 + isNFKC_NO = 1 + PPCODE: + if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) + XSRETURN_YES; /* NFC_NO or NFKC_NO */ + else if (ix) { + char *canon, *compat; + canon = (char *) dec_canonical(uv); + compat = (char *) dec_compat(uv); + if (compat && (!canon || strNE(canon, compat))) + XSRETURN_YES; /* NFC_NO or NFKC_NO */ + else + XSRETURN_NO; + } + else + XSRETURN_NO; + + SV* getComposite(uv, uv2) @@ -373,6 +560,8 @@ getComposite(uv, uv2) OUTPUT: RETVAL + + SV* getCanon(uv) UV uv @@ -382,7 +571,7 @@ getCanon(uv) PREINIT: U8 * rstr; CODE: - if(Hangul_IsS(uv)) { + if (Hangul_IsS(uv)) { SV * dst; dst = newSV(1); (void)SvPOK_only(dst); @@ -390,7 +579,8 @@ getCanon(uv) RETVAL = dst; } else { rstr = ix ? dec_compat(uv) : dec_canonical(uv); - if(!rstr) XSRETURN_UNDEF; + if (!rstr) + XSRETURN_UNDEF; RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr)); } SvUTF8_on(RETVAL); diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README index 2404f2f..1f28333 100644 --- a/ext/Unicode/Normalize/README +++ b/ext/Unicode/Normalize/README @@ -1,4 +1,4 @@ -Unicode/Normalize version 0.14 +Unicode/Normalize version 0.15 =================================== Unicode::Normalize - normalized forms of Unicode text @@ -7,23 +7,26 @@ SYNOPSIS use Unicode::Normalize; - $string_NFD = NFD($string); # Normalization Form D - $string_NFC = NFC($string); # Normalization Form C - $string_NFKD = NFKD($string); # Normalization Form KD - $string_NFKC = NFKC($string); # Normalization Form KC + $NFD_string = NFD($string); # Normalization Form D + $NFC_string = NFC($string); # Normalization Form C + $NFKD_string = NFKD($string); # Normalization Form KD + $NFKC_string = NFKC($string); # Normalization Form KC + + + or - or use Unicode::Normalize 'normalize'; - $string_NFD = normalize('D', $string); # Normalization Form D - $string_NFC = normalize('C', $string); # Normalization Form C - $string_NFKD = normalize('KD', $string); # Normalization Form KD - $string_NFKC = normalize('KC', $string); # Normalization Form KC + $NFD_string = normalize('D', $string); # Normalization Form D + $NFC_string = normalize('C', $string); # Normalization Form C + $NFKD_string = normalize('KD', $string); # Normalization Form KD + $NFKC_string = normalize('KC', $string); # Normalization Form KC + INSTALLATION -Perl 5.006 or later +Perl 5.6 or later To install this module type the following: @@ -58,26 +61,30 @@ Exporter File::Copy File::Spec -unicore/CombiningClass.pl - (or unicode/CombiningClass.pl) -unicore/Decomposition.pl - (or unicode/Decomposition.pl) -unicore/CompositionExclusions.txt - (or unicode/CompExcl.txt) +unicore/CombiningClass.pl (or unicode/CombiningClass.pl) +unicore/Decomposition.pl (or unicode/Decomposition.pl) +unicore/CompositionExclusions.txt (or unicode/CompExcl.txt) -# CAVEAT: -# In bleadperl, unicore/CompExcl.txt is renamed -# unicore/CompositionExclusions.txt. +CAVEAT -And for the NoXS version, in addition to the above, +(1) In bleadperl, unicore/CompExcl.txt is renamed + unicore/CompositionExclusions.txt. - Lingua::KO::Hangul::Util 0.06 +(2) When these unicore/*.* files are updated; -is required. + in the case of an XS version: + You must rebuild the module, + as the data will be compiled on building. + + in the case of a NoXS version: + Rebuilding is not necessary, + as the data will be read on requirement. COPYRIGHT AND LICENCE -SADAHIRO Tomoyuki, ESADAHIRO@cpan.orgE + SADAHIRO Tomoyuki + + SADAHIRO@cpan.org http://homepage1.nifty.com/nomenclator/perl/ @@ -85,4 +92,3 @@ SADAHIRO Tomoyuki, ESADAHIRO@cpan.orgE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. - diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader index 12c5aa8..67a132b 100644 --- a/ext/Unicode/Normalize/mkheader +++ b/ext/Unicode/Normalize/mkheader @@ -14,257 +14,316 @@ use Carp; our $PACKAGE = 'Unicode::Normalize, mkheader'; our $Combin = do "unicore/CombiningClass.pl" - || do "unicode/CombiningClass.pl" - || croak "$PACKAGE: CombiningClass.pl not found"; + || do "unicode/CombiningClass.pl" + || croak "$PACKAGE: CombiningClass.pl not found"; our $Decomp = do "unicore/Decomposition.pl" - || do "unicode/Decomposition.pl" - || croak "$PACKAGE: Decomposition.pl not found"; + || do "unicode/Decomposition.pl" + || croak "$PACKAGE: Decomposition.pl not found"; our %Combin; # $codepoint => $number : combination class our %Canon; # $codepoint => $hexstring : canonical decomp. our %Compat; # $codepoint => $hexstring : compat. decomp. our %Compos; # $string => $codepoint : composite - our %Exclus; # $codepoint => 1 : composition exclusions +our %Single; # $codepoint => 1 : singletons +our %NonStD; # $codepoint => 1 : non-starter decompositions { - my($f, $fh); - foreach my $d (@INC) { - use File::Spec; - $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt"); - last if open($fh, $f); - $f = File::Spec->catfile($d, "unicode", "CompExcl.txt"); - last if open($fh, $f); - $f = undef; - } - croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f; - while(<$fh>) { - next if /^#/ or /^$/; - s/#.*//; - $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/; - } - close $fh; + my($f, $fh); + foreach my $d (@INC) { + use File::Spec; + $f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt"); + last if open($fh, $f); + $f = File::Spec->catfile($d, "unicode", "CompExcl.txt"); + last if open($fh, $f); + $f = undef; + } + croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f; + while (<$fh>) { + next if /^#/ or /^$/; + s/#.*//; + $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/; + } + close $fh; } -while($Combin =~ /(.+)/g) { - my @tab = split /\t/, $1; - my $ini = hex $tab[0]; - if($tab[1] eq '') { - $Combin{ $ini } = $tab[2]; - } else { - $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]); - } +while ($Combin =~ /(.+)/g) { + my @tab = split /\t/, $1; + my $ini = hex $tab[0]; + if ($tab[1] eq '') { + $Combin{ $ini } = $tab[2]; + } else { + $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]); + } } -while($Decomp =~ /(.+)/g) { - my @tab = split /\t/, $1; - my $compat = $tab[2] =~ s/<[^>]+>//; - my $dec = [ _getHexArray($tab[2]) ]; # decomposition - my $com = pack('U*', @$dec); # composable sequence - my $ini = hex($tab[0]); - if($tab[1] eq '') { - $Compat{ $ini } = $dec; - if(! $compat) { - $Canon{ $ini } = $dec; - $Compos{ $com } = $ini if @$dec > 1; +while ($Decomp =~ /(.+)/g) { + my @tab = split /\t/, $1; + my $compat = $tab[2] =~ s/<[^>]+>//; + my $dec = [ _getHexArray($tab[2]) ]; # decomposition + my $com = pack('U*', @$dec); # composable sequence + my $ini = hex($tab[0]); # initial decomposable character + if ($tab[1] eq '') { + $Compat{ $ini } = $dec; + + if (! $compat) { + $Canon{ $ini } = $dec; + + if (@$dec > 1) { + if ($Combin{ $dec->[0] }) { + $NonStD{ $ini } = 1; + } else { + $Compos{ $com } = $ini; + } + } else { + $Single{ $ini } = 1; + } + } + } else { + foreach my $u ($ini .. hex($tab[1])){ + $Compat{ $u } = $dec; + if (! $compat) { + $Canon{ $u } = $dec; + + if (@$dec > 1) { + if ($Combin{ $dec->[0] }) { + $NonStD{ $u } = 1; + } else { + $Compos{ $com } = $u; + } + } else { + $Single{ $u } = 1; + } + } + } } - } else { - foreach my $u ($ini .. hex($tab[1])){ - $Compat{ $u } = $dec; - if(! $compat){ - $Canon{ $u } = $dec; - $Compos{ $com } = $ini if @$dec > 1; - } - } - } } # exhaustive decomposition foreach my $key (keys %Canon) { - $Canon{$key} = [ getCanonList($key) ]; + $Canon{$key} = [ getCanonList($key) ]; } # exhaustive decomposition foreach my $key (keys %Compat) { - $Compat{$key} = [ getCompatList($key) ]; + $Compat{$key} = [ getCompatList($key) ]; } sub getCanonList { - my @src = @_; - my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src; - join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec); - # condition @src == @dec is not ok. + my @src = @_; + my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src; + return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec); + # condition @src == @dec is not ok. } sub getCompatList { - my @src = @_; - my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src; - join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec); - # condition @src == @dec is not ok. + my @src = @_; + my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src; + return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec); + # condition @src == @dec is not ok. } -sub _getHexArray { - my $str = shift; - map hex(), $str =~ /([0-9A-Fa-f]+)/g; -} +sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g } sub _U_stringify { - sprintf '"%s"', join '', - map sprintf("\\x%02x", $_), unpack 'C*', pack 'U*', @_; + sprintf '"%s"', join '', + map sprintf("\\x%02x", $_), unpack 'C*', pack 'U*', @_; } foreach my $hash (\%Canon, \%Compat) { - foreach my $key (keys %$hash) { - $hash->{$key} = _U_stringify( @{ $hash->{$key} } ); - } + foreach my $key (keys %$hash) { + $hash->{$key} = _U_stringify( @{ $hash->{$key} } ); + } } my $prefix = "UNF_"; - my $structname = "${prefix}complist"; -our (%Comp1st, %CompList); +our (%Comp1st, %Comp2nd, %CompList); -foreach(sort keys %Compos) { - my @a = unpack('U*', $_); - my $val = $Compos{$_}; - my $name = sprintf "${structname}_%06x", $a[0]; - $Comp1st{ $a[0] } = $name; - $CompList{ $name }{ $a[1] } = $val; +foreach (sort keys %Compos) { + my @a = unpack('U*', $_); + my $val = $Compos{$_}; + my $name = sprintf "${structname}_%06x", $a[0]; + $Comp1st{$a[0]} = $name; + $Comp2nd{$a[1]} = 1 if ! $Exclus{$Compos{$_}} && ! $Combin{$a[0]}; + $CompList{$name}{$a[1]} = $val; +} + +# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo +foreach (0x1161..0x1175, 0x11A8..0x11C2) { + $Comp2nd{$_} = 1; } my $compinit = - "typedef struct { UV nextchar; UV composite; } $structname;\n\n"; + "typedef struct { UV nextchar; UV composite; } $structname;\n\n"; foreach my $i (sort keys %CompList) { - $compinit .= "$structname $i [] = {\n"; - $compinit .= join ",\n", - map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}), - sort {$a <=> $b } keys %{ $CompList{$i} }; - $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel + $compinit .= "$structname $i [] = {\n"; + $compinit .= join ",\n", + map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}), + sort {$a <=> $b } keys %{ $CompList{$i} }; + $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel } #################################### -my @Exclus = sort {$a <=> $b} keys %Exclus; +my @boolfunc = ( + { + name => "Exclusion", + type => "bool", + hash => \%Exclus, + }, + { + name => "Singleton", + type => "bool", + hash => \%Single, + }, + { + name => "NonStDecomp", + type => "bool", + hash => \%NonStD, + }, + { + name => "Comp2nd", + type => "bool", + hash => \%Comp2nd, + }, +); my $file = "unfexc.h"; open FH, ">$file" or croak "$PACKAGE: $file can't be made"; binmode FH; select FH; -print "bool isExclusion (UV uv) \n{\nreturn\n\t"; + print << 'EOF'; +/* + * This file is auto-generated by mkheader. + * Any changes here will be lost! + */ +EOF -while(@Exclus) { - my $cur = shift @Exclus; - if(@Exclus && $cur + 1 == $Exclus[0]) { - print "($cur <= uv && uv <= "; - while(@Exclus && $cur + 1 == $Exclus[0]) { - $cur = shift @Exclus; +foreach my $tbl (@boolfunc) { + my @temp = sort {$a <=> $b} keys %{$tbl->{hash}}; + my $type = $tbl->{type}; + my $name = $tbl->{name}; + print "$type is$name (UV uv)\n{\nreturn\n\t"; + + while (@temp) { + my $cur = shift @temp; + if (@temp && $cur + 1 == $temp[0]) { + print "($cur <= uv && uv <= "; + while (@temp && $cur + 1 == $temp[0]) { + $cur = shift @temp; + } + print "$cur)"; + print "\n\t|| " if @temp; + } else { + print "uv == $cur"; + print "\n\t|| " if @temp; + } } - print "$cur)"; - print "\n\t|| " if @Exclus; - } else { - print "uv == $cur"; - print "\n\t|| " if @Exclus; - } + print "\n\t? TRUE : FALSE;\n}\n\n"; } -print "\n\t? TRUE : FALSE;\n}\n\n"; close FH; + #################################### my @tripletable = ( - { - file => "unfcmb", - name => "combin", - type => "STDCHAR", - hash => \%Combin, - null => 0, - }, - { - file => "unfcan", - name => "canon", - type => "char*", - hash => \%Canon, - null => "NULL", - }, - { - file => "unfcpt", - name => "compat", - type => "char*", - hash => \%Compat, - null => "NULL", - }, - { - file => "unfcmp", - name => "compos", - type => "$structname *", - hash => \%Comp1st, - null => "NULL", - init => $compinit, - }, + { + file => "unfcmb", + name => "combin", + type => "STDCHAR", + hash => \%Combin, + null => 0, + }, + { + file => "unfcan", + name => "canon", + type => "char*", + hash => \%Canon, + null => "NULL", + }, + { + file => "unfcpt", + name => "compat", + type => "char*", + hash => \%Compat, + null => "NULL", + }, + { + file => "unfcmp", + name => "compos", + type => "$structname *", + hash => \%Comp1st, + null => "NULL", + init => $compinit, + }, ); foreach my $tbl (@tripletable) { - my $file = "$tbl->{file}.h"; - my $head = "${prefix}$tbl->{name}"; - my $type = $tbl->{type}; - my $hash = $tbl->{hash}; - my $null = $tbl->{null}; - my $init = $tbl->{init}; - - open FH, ">$file" or croak "$PACKAGE: $file can't be made"; - binmode FH; select FH; - my %val; - - print FH << 'EOF'; + my $file = "$tbl->{file}.h"; + my $head = "${prefix}$tbl->{name}"; + my $type = $tbl->{type}; + my $hash = $tbl->{hash}; + my $null = $tbl->{null}; + my $init = $tbl->{init}; + + open FH, ">$file" or croak "$PACKAGE: $file can't be made"; + binmode FH; select FH; + my %val; + + print FH << 'EOF'; /* * This file is auto-generated by mkheader. * Any changes here will be lost! */ EOF - print $init if defined $init; - - foreach my $uv (keys %$hash) { - my @c = unpack 'CCCC', pack 'N', $uv; - $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; - } - - foreach my $p (sort { $a <=> $b } keys %val) { - next if ! $val{ $p }; - for(my $r = 0; $r < 256; $r++){ - next if ! $val{ $p }{ $r }; - printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r; - for(my $c = 0; $c < 256; $c++){ - print "\t", defined $val{$p}{$r}{$c} - ? "($type)".$val{$p}{$r}{$c} : $null; - print ',' if $c != 255; - print "\n" if $c % 8 == 7; - } - print "};\n\n"; + print $init if defined $init; + + foreach my $uv (keys %$hash) { + my @c = unpack 'CCCC', pack 'N', $uv; + $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; + } + + foreach my $p (sort { $a <=> $b } keys %val) { + next if ! $val{ $p }; + for (my $r = 0; $r < 256; $r++) { + next if ! $val{ $p }{ $r }; + printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r; + for (my $c = 0; $c < 256; $c++) { + print "\t", defined $val{$p}{$r}{$c} + ? "($type)".$val{$p}{$r}{$c} + : $null; + print ',' if $c != 255; + print "\n" if $c % 8 == 7; + } + print "};\n\n"; + } + } + foreach my $p (sort { $a <=> $b } keys %val) { + next if ! $val{ $p }; + printf "$type* ${head}_%02x [256] = {\n", $p; + for (my $r = 0; $r < 256; $r++) { + print $val{ $p }{ $r } + ? sprintf("${head}_%02x_%02x", $p, $r) + : "NULL"; + print ',' if $r != 255; + print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; + } + print "};\n\n"; } - } - foreach my $p (sort { $a <=> $b } keys %val) { - next if ! $val{ $p }; - printf "$type* ${head}_%02x [256] = {\n", $p; - for(my $r = 0; $r < 256; $r++){ - print $val{ $p }{ $r } ? sprintf("${head}_%02x_%02x", $p, $r) : "NULL"; - print ',' if $r != 255; - print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; + print "$type** $head [] = {\n"; + for (my $p = 0; $p <= 0x10; $p++) { + print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; + print ',' if $p != 0x10; + print "\n"; } print "};\n\n"; - } - print "$type** $head [] = {\n"; - for(my $p = 0; $p <= 0x10; $p++){ - print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; - print ',' if $p != 0x10; - print "\n"; - } - print "};\n\n"; - close FH; + close FH; } __END__ diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t index c436a7c..86320cc 100644 --- a/ext/Unicode/Normalize/t/func.t +++ b/ext/Unicode/Normalize/t/func.t @@ -3,8 +3,8 @@ BEGIN { if (ord("A") == 193) { - print "1..0 # Unicode::Normalize not ported to EBCDIC\n"; - exit 0; + print "1..0 # Unicode::Normalize not ported to EBCDIC\n"; + exit 0; } } @@ -13,7 +13,7 @@ BEGIN { use Test; use strict; use warnings; -BEGIN { plan tests => 6 }; +BEGIN { plan tests => 10 }; use Unicode::Normalize qw(:all); ok(1); # If we made it this far, we're ok. @@ -22,7 +22,7 @@ ok(1); # If we made it this far, we're ok. print getCombinClass( 0) == 0 && getCombinClass( 768) == 230 && getCombinClass(1809) == 36 -# && getCombinClass(119143) == 1 +# && getCombinClass(119143) == 1 # U+1D167, a Unicode 3.1 character ? "ok" : "not ok", " 2\n"; print ! defined getCanon( 0) @@ -68,13 +68,60 @@ print ! defined getComposite( 0, 0) && 0xADF8 == getComposite(0x1100, 0x1173) && ! defined getComposite(0x1100, 0x11AF) && ! defined getComposite(0x1173, 0x11AF) + && ! defined getComposite(0xAC00, 0x11A7) + && 0xAC01 == getComposite(0xAC00, 0x11A8) && 0xAE00 == getComposite(0xADF8, 0x11AF) ? "ok" : "not ok", " 5\n"; print ! isExclusion( 0) && ! isExclusion(41) - && isExclusion(2392) - && isExclusion(3907) - && isExclusion(64334) + && isExclusion(2392) # DEVANAGARI LETTER QA + && isExclusion(3907) # TIBETAN LETTER GHA + && isExclusion(64334) # HEBREW LETTER PE WITH RAFE ? "ok" : "not ok", " 6\n"; +print ! isSingleton( 0) + && isSingleton(0x212B) # ANGSTROM SIGN + ? "ok" : "not ok", " 7\n"; + +print reorder("") eq "" + && reorder(pack("U*", 0x0041, 0x0300, 0x0315, 0x0313, 0x031b, 0x0061)) + eq pack("U*", 0x0041, 0x031b, 0x0300, 0x0313, 0x0315, 0x0061) + && reorder(pack("U*", 0x00C1, 0x0300, 0x0315, 0x0313, 0x031b, + 0x0061, 0x309A, 0x3099)) + eq pack("U*", 0x00C1, 0x031b, 0x0300, 0x0313, 0x0315, + 0x0061, 0x309A, 0x3099) + ? "ok" : "not ok", " 8\n"; + +sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } + +print answer(checkNFD("")) eq "YES" + && answer(checkNFC("")) eq "YES" + && answer(checkNFKD("")) eq "YES" + && answer(checkNFKC("")) eq "YES" + && answer(check("NFD", "")) eq "YES" + && answer(check("NFC", "")) eq "YES" + && answer(check("NFKD","")) eq "YES" + && answer(check("NFKC","")) eq "YES" +# U+0000 to U+007F are prenormalized in all the normalization forms. + && answer(checkNFD("AZaz\t12!#`")) eq "YES" + && answer(checkNFC("AZaz\t12!#`")) eq "YES" + && answer(checkNFKD("AZaz\t12!#`")) eq "YES" + && answer(checkNFKC("AZaz\t12!#`")) eq "YES" + && answer(check("D", "AZaz\t12!#`")) eq "YES" + && answer(check("C", "AZaz\t12!#`")) eq "YES" + && answer(check("KD","AZaz\t12!#`")) eq "YES" + && answer(check("KC","AZaz\t12!#`")) eq "YES" + ? "ok" : "not ok", " 9\n"; + +print 1 + && answer(checkNFD(NFD(pack('U*', 0xC1, 0x1100, 0x1173, 0x11AF)))) eq "YES" + && answer(checkNFD(pack('U*', 0x20, 0xC1, 0x1100, 0x1173, 0x11AF))) eq "NO" + && answer(checkNFC(pack('U*', 0x20, 0xC1, 0x1173, 0x11AF))) eq "MAYBE" + && answer(checkNFC(pack('U*', 0x20, 0xC1, 0xAE00, 0x1100))) eq "YES" + && answer(checkNFC(pack('U*', 0x20, 0xC1, 0xAE00, 0x1100, 0x300))) eq "MAYBE" + && answer(checkNFC(pack('U*', 0x20, 0xC1, 0xFF71, 0x2025))) eq "YES" + && answer(check("NFC", pack('U*', 0x20, 0xC1, 0x212B, 0x300))) eq "NO" + && answer(checkNFKD(pack('U*', 0x20, 0xC1, 0xFF71, 0x2025))) eq "NO" + && answer(checkNFKC(pack('U*', 0x20, 0xC1, 0xAE00, 0x2025))) eq "NO" + ? "ok" : "not ok", " 10\n";