ext/Unicode/Normalize/Normalize.pm Unicode::Normalize
ext/Unicode/Normalize/Normalize.xs Unicode::Normalize
ext/Unicode/Normalize/README Unicode::Normalize
+ext/Unicode/Normalize/t/fcdc.t Unicode::Normalize
+ext/Unicode/Normalize/t/form.t Unicode::Normalize
ext/Unicode/Normalize/t/func.t Unicode::Normalize
ext/Unicode/Normalize/t/norm.t Unicode::Normalize
+ext/Unicode/Normalize/t/proto.t Unicode::Normalize
+ext/Unicode/Normalize/t/split.t Unicode::Normalize
ext/Unicode/Normalize/t/test.t Unicode::Normalize
ext/util/make_ext Used by Makefile to execute extension Makefiles
ext/XS/APItest/APItest.pm XS::APItest extension
Revision history for Perl extension Unicode::Normalize.
+0.25 Mon Oct 6 22:26:03 2003
+ - added form.t and proto.t.
+
+0.24 Sat Oct 4 17:57:10 2003
+ - supports FCD and FCC (UTN #5):
+ FCD(), normalize('FCD'), checkFCD(), check('FCD');
+ FCC(), normalize('FCC'), checkFCC(), check('FCC').
+ - changed INSTALLATION (cf. README).
+ * Initial state of the distribution is changed to XSUB. To build
+ pure Perl, type <perl disableXS> before <perl Makefile.PL>.
+ * The purePerl-XSUB converter is now provided as two perl
+ script files, named "enableXS" and "disableXS".
+ (no longer <perl Makefile.PL xs> and <perl Makefile.PL noxs>.)
+ * simplified Makefile.PL.
+ - added fcdc.t and split.t.
+
0.23 Sat Jun 28 20:38:10 2003
- bug fix: \0-terminate in compose() in XS.
- tweak in pure perl: forced $codepoint to numeric (i.e. "+0065" to 65)
use ExtUtils::MakeMaker;
-# This code for XS-NOXS installer is shamelessly stolen
-# after Gurusamy Sarathy's Data::Dumper. Thank you!
-
-# a bit modified.
-
-use File::Copy qw();
-
-my $arg = $ARGV[0] || "";
-
-if ($arg =~ /^no/i and -f "Normalize.xs") {
- print STDERR "Disabling XS in sources...\n";
-
- die "***** Failed, sources could be inconsistent! *****\n"
- unless File::Copy::move('MANIFEST', 'MANIFEST.XS')
- and File::Copy::move('MANIFEST.NXS', 'MANIFEST')
- and File::Copy::move('Normalize.pm', 'Normalize.pm.XS')
- and File::Copy::move('Normalize.xs', 'Normalize.xs.XS')
- and File::Copy::move('Normalize.pm.NXS','Normalize.pm');
-}
-if ($arg =~ /^xs/i and -f "Normalize.xs.XS") {
- print STDERR "Enabling XS in sources...\n";
-
- die "***** Failed, sources could be inconsistent! *****\n"
- unless File::Copy::move('MANIFEST', 'MANIFEST.NXS')
- and File::Copy::move('MANIFEST.XS', 'MANIFEST')
- and File::Copy::move('Normalize.pm', 'Normalize.pm.NXS')
- and File::Copy::move('Normalize.xs.XS', 'Normalize.xs')
- and File::Copy::move('Normalize.pm.XS', 'Normalize.pm');
-}
-
my $clean = {};
if (-f "Normalize.xs") {
}
WriteMakefile(
- 'INSTALLDIRS' => $] > 5.007 ? 'perl' : 'site',
+ 'INSTALLDIRS' => $] >= 5.007 ? 'perl' : 'site',
'NAME' => 'Unicode::Normalize',
'VERSION_FROM' => 'Normalize.pm', # finds $VERSION
'clean' => $clean,
use warnings;
use Carp;
-our $VERSION = '0.23';
+our $VERSION = '0.25';
our $PACKAGE = __PACKAGE__;
require Exporter;
require DynaLoader;
-require AutoLoader;
our @ISA = qw(Exporter DynaLoader);
our @EXPORT = qw( NFC NFD NFKC NFKD );
getCanon getCompat getComposite getCombinClass
isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
+ FCD checkFCD FCC checkFCC composeContiguous
+ splitOnLastStarter
);
our %EXPORT_TAGS = (
all => [ @EXPORT, @EXPORT_OK ],
normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ],
check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ],
+ fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ],
);
+######
+
bootstrap Unicode::Normalize $VERSION;
+######
+
sub pack_U {
return pack('U*', @_);
}
return unpack('U*', pack('U*').shift);
}
+
+##
+## normalization forms
+##
+
use constant COMPAT => 1;
sub NFD ($) { reorder(decompose($_[0])) }
sub NFC ($) { compose(reorder(decompose($_[0]))) }
sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }
+sub FCD ($) {
+ my $str = shift;
+ return checkFCD($str) ? $str : NFD($str);
+}
+sub FCC ($) { composeContiguous(reorder(decompose($_[0]))) }
+
+our %formNorm = (
+ NFC => \&NFC, C => \&NFC,
+ NFD => \&NFD, D => \&NFD,
+ NFKC => \&NFKC, KC => \&NFKC,
+ NFKD => \&NFKD, KD => \&NFKD,
+ FCD => \&FCD, FCC => \&FCC,
+);
+
sub normalize($$)
{
my $form = shift;
my $str = shift;
- $form =~ s/^NF//;
- return
- $form eq 'D' ? NFD ($str) :
- $form eq 'C' ? NFC ($str) :
- $form eq 'KD' ? NFKD($str) :
- $form eq 'KC' ? NFKC($str) :
- croak $PACKAGE."::normalize: invalid form name: $form";
+ return exists $formNorm{$form}
+ ? $formNorm{$form}->($str)
+ : croak $PACKAGE."::normalize: invalid form name: $form";
}
+
+##
+## quick check
+##
+
+our %formCheck = (
+ NFC => \&checkNFC, C => \&checkNFC,
+ NFD => \&checkNFD, D => \&checkNFD,
+ NFKC => \&checkNFKC, KC => \&checkNFKC,
+ NFKD => \&checkNFKD, KD => \&checkNFKD,
+ FCD => \&checkFCD, FCC => \&checkFCC,
+);
+
sub check($$)
{
my $form = shift;
my $str = shift;
- $form =~ s/^NF//;
- return
- $form eq 'D' ? checkNFD ($str) :
- $form eq 'C' ? checkNFC ($str) :
- $form eq 'KD' ? checkNFKD($str) :
- $form eq 'KC' ? checkNFKC($str) :
- croak $PACKAGE."::check: invalid form name: $form";
+ return exists $formCheck{$form}
+ ? $formCheck{$form}->($str)
+ : croak $PACKAGE."::check: invalid form name: $form";
}
1;
returns the Normalization Form KC (formed by compatibility decomposition
followed by B<canonical> composition).
+=item C<$FCD_string = FCD($string)>
+
+If the given string is in FCD ("Fast C or D" form; cf. UTN #5),
+returns it without modification; otherwise returns an FCD string.
+
+Note: FCD is not always unique, then plural forms may be equivalent
+each other. C<FCD()> will return one of these equivalent forms.
+
+=item C<$FCC_string = FCC($string)>
+
+returns the FCC form ("Fast C Contiguous"; cf. UTN #5).
+
+Note: FCD is unique, as well as four normalization forms (NF*).
+
=item C<$normalized_string = normalize($form_name, $string)>
As C<$form_name>, one of the following names must be given.
- 'C' or 'NFC' for Normalization Form C
- 'D' or 'NFD' for Normalization Form D
- 'KC' or 'NFKC' for Normalization Form KC
- 'KD' or 'NFKD' for Normalization Form KD
+ 'C' or 'NFC' for Normalization Form C (UAX #15)
+ 'D' or 'NFD' for Normalization Form D (UAX #15)
+ 'KC' or 'NFKC' for Normalization Form KC (UAX #15)
+ 'KD' or 'NFKD' for Normalization Form KD (UAX #15)
+
+ 'FCD' for "Fast C or D" Form (UTN #5)
+ 'FCC' for "Fast C Contiguous" (UTN #5)
=back
=head2 Quick Check
-(see Annex 8, UAX #15, and F<DerivedNormalizationProps.txt>)
+(see Annex 8, UAX #15; and F<DerivedNormalizationProps.txt>)
The following functions check whether the string is in that normalization form.
returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).
+=item C<$result = checkFCD($string)>
+
+returns C<YES> (C<1>) or C<NO> (C<empty string>).
+
+=item C<$result = checkFCC($string)>
+
+returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).
+
+If a string is not in C<FCD>, it must not be in <FCC>.
+So C<checkFCC($not_FCD_string)> should return C<NO>.
+
=item C<$result = check($form_name, $string)>
returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).
B<Note>
-In the cases of NFD and NFKD, the answer must be either C<YES> or C<NO>.
-The answer C<MAYBE> may be returned in the cases of NFC and NFKC.
+In the cases of NFD, NFKD, and FCD, the answer must be
+either C<YES> or C<NO>. The answer C<MAYBE> may be returned
+in the cases of NFC, NFKC, and FCC.
-A MAYBE-NFC/NFKC string should contain at least
-one combining character or the like.
-For example, C<COMBINING ACUTE ACCENT> has
+A C<MAYBE> string should contain at least one combining character
+or the like. For example, C<COMBINING ACUTE ACCENT> has
the MAYBE_NFC/MAYBE_NFKC property.
+
Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")>
and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>.
C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC
(its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">),
while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC.
-If you want to check exactly, compare the string with its NFC/NFKC; i.e.,
+If you want to check exactly, compare the string with its NFC/NFKC/FCC;
+i.e.,
- $string eq NFC($string) # more thorough than checkNFC($string)
- $string eq NFKC($string) # more thorough than checkNFKC($string)
+ $string eq NFC($string) # thorough than checkNFC($string)
+ $string eq NFKC($string) # thorough than checkNFKC($string)
+ $string eq FCC($string) # thorough than checkFCC($string)
=head2 Character Data
=head1 AUTHOR
-SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
+SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>
http://homepage1.nifty.com/nomenclator/perl/
Derived Normalization Properties
+=item http://www.unicode.org/notes/tn5/
+
+Canonical Equivalence in Applications - UTN #5
+
=back
=cut
#define utf8n_to_uvuni utf8_to_uv
#endif /* utf8n_to_uvuni */
+/* if utf8n_to_uvuni() sets retlen to 0 when flags = 0 */
+#define ErrRetlenIsZero "panic (Unicode::Normalize): zero-length character"
+
+/* utf8_hop() hops back before start. Maybe broken UTF-8 */
+#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start"
+
/* At present, char > 0x10ffff are unaffected without complaint, right? */
#define VALID_UTF_MAX (0x10ffff)
#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
s = (U8*)SvPV(src,srclen);
e = s + srclen;
- for (p = s; p < e;) {
+ for (p = s; p < e; p += retlen) {
uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
- p += retlen;
+ if (!retlen)
+ croak(ErrRetlenIsZero);
+
if (Hangul_IsS(uv))
sv_cat_decompHangul(dst, uv);
else {
if (r)
sv_catpv(dst, (char *)r);
else
- sv_catpvn(dst, (char *)p - retlen, retlen);
+ sv_catpvn(dst, (char *)p, retlen);
}
}
RETVAL = dst;
STRLEN cc_len, cc_iter, cc_pos;
uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
- curCC = getCombinClass(uv);
+ if (!retlen)
+ croak(ErrRetlenIsZero);
p += retlen;
+
+
+ curCC = getCombinClass(uv);
if (! (curCC && p < e))
continue;
else
while (p < e) {
uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
+ if (!retlen)
+ croak(ErrRetlenIsZero);
+ p += retlen;
+
curCC = getCombinClass(uv);
if (!curCC)
break;
- p += retlen;
+
cc_pos++;
if (stk_cc_max <= cc_pos) { /* extend if need */
stk_cc_max = cc_pos + 1;
compose(arg)
SV * arg
PROTOTYPE: $
+ ALIAS:
+ composeContiguous = 1
PREINIT:
SV *src, *dst, *tmp;
U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC;
for (p = s; p < e;) {
if (beginning) {
uvS = utf8n_to_uvuni(p, e - p, &retlen, 0);
+ if (!retlen)
+ croak(ErrRetlenIsZero);
p += retlen;
if (getCombinClass(uvS)) { /* no Starter found yet */
/* to the next Starter */
while (p < e) {
uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
+ if (!retlen)
+ croak(ErrRetlenIsZero);
p += retlen;
+
curCC = getCombinClass(uv);
if (preCC && preCC == curCC) {
} else {
uvComp = composite_uv(uvS, uv);
- if (uvComp && ! isExclusion(uvComp) && preCC <= curCC) {
+ if (uvComp && ! isExclusion(uvComp) &&
+ (ix ? (t == tmp_start) : (preCC <= curCC))) {
STRLEN leftcur, rightcur, dstcur;
leftcur = UNISKIP(uvComp);
rightcur = UNISKIP(uvS) + UNISKIP(uv);
RETVAL
-
void
checkNFD(arg)
SV * arg
SV *src;
STRLEN srclen, retlen;
U8 *s, *e, *p, curCC, preCC;
- PPCODE:
+ CODE:
if (SvUTF8(arg)) {
src = arg;
} else {
preCC = 0;
for (p = s; p < e; p += retlen) {
uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
+ if (!retlen)
+ croak(ErrRetlenIsZero);
+
curCC = getCombinClass(uv);
if (preCC > curCC && curCC != 0) /* canonical ordering violated */
XSRETURN_NO;
STRLEN srclen, retlen;
U8 *s, *e, *p, curCC, preCC;
bool isMAYBE;
- PPCODE:
+ CODE:
if (SvUTF8(arg)) {
src = arg;
} else {
isMAYBE = FALSE;
for (p = s; p < e; p += retlen) {
uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
+ if (!retlen)
+ croak(ErrRetlenIsZero);
+
curCC = getCombinClass(uv);
if (preCC > curCC && curCC != 0) /* canonical ordering violated */
+void
+checkFCD(arg)
+ SV * arg
+ PROTOTYPE: $
+ ALIAS:
+ checkFCC = 1
+ PREINIT:
+ UV uv, uvLead, uvTrail;
+ SV *src;
+ STRLEN srclen, retlen, canlen, canret;
+ U8 *s, *e, *p, curCC, preCC;
+ U8 *sCan, *pCan, *eCan;
+ bool isMAYBE;
+ CODE:
+ if (SvUTF8(arg)) {
+ src = arg;
+ } else {
+ src = sv_mortalcopy(arg);
+ sv_utf8_upgrade(src);
+ }
+
+ s = (U8*)SvPV(src,srclen);
+ e = s + srclen;
+
+ preCC = 0;
+ isMAYBE = FALSE;
+ for (p = s; p < e; p += retlen) {
+ uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
+ if (!retlen)
+ croak(ErrRetlenIsZero);
+
+ sCan = (U8*) dec_canonical(uv);
+
+ if (sCan) {
+ canlen = (STRLEN)strlen((char *) sCan);
+ uvLead = utf8n_to_uvuni(sCan, canlen, &canret, 0);
+ }
+ else {
+ uvLead = uv;
+ }
+
+ curCC = getCombinClass(uvLead);
+
+ if (curCC != 0 && curCC < preCC) /* canonical ordering violated */
+ XSRETURN_NO;
+
+ if (ix) {
+ if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
+ XSRETURN_NO;
+ else if (isComp2nd(uv))
+ isMAYBE = TRUE;
+ }
+
+ if (sCan) {
+ eCan = sCan + canlen;
+ pCan = utf8_hop(eCan, -1);
+ if (pCan < sCan)
+ croak(ErrHopBeforeStart);
+ uvTrail = utf8n_to_uvuni(pCan, eCan - pCan, &canret, 0);
+ preCC = getCombinClass(uvTrail);
+ }
+ else {
+ preCC = curCC;
+ }
+ }
+ if (isMAYBE)
+ XSRETURN_UNDEF;
+ else
+ XSRETURN_YES;
+
+
+
U8
getCombinClass(uv)
UV uv
PROTOTYPE: $
ALIAS:
isNFKD_NO = 1
- PPCODE:
+ CODE:
if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
XSRETURN_YES; /* NFD_NO or NFKD_NO */
else
ALIAS:
isNFC_NO = 0
isNFKC_NO = 1
- PPCODE:
+ CODE:
if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
XSRETURN_YES; /* NFC_NO or NFKC_NO */
else if (ix) {
OUTPUT:
RETVAL
+
+void
+splitOnLastStarter(arg)
+ SV * arg
+ PREINIT:
+ UV uv;
+ SV *src, *svp;
+ STRLEN srclen, retlen;
+ U8 *s, *e, *p;
+ PPCODE:
+ if (SvUTF8(arg)) {
+ src = arg;
+ } else {
+ src = sv_mortalcopy(arg);
+ sv_utf8_upgrade(src);
+ }
+
+ s = (U8*)SvPV(src,srclen);
+ e = s + srclen;
+
+ for (p = e; s < p; ) {
+ p = utf8_hop(p, -1);
+ if (p < s)
+ croak(ErrHopBeforeStart);
+ uv = utf8n_to_uvuni(p, e - p, &retlen, 0);
+ if (getCombinClass(uv) == 0) /* Last Starter found */
+ break;
+ }
+
+ svp = sv_2mortal(newSVpvn((char*)s, p - s));
+ SvUTF8_on(svp);
+ XPUSHs(svp);
+
+ svp = sv_2mortal(newSVpvn((char*)p, e - p));
+ SvUTF8_on(svp);
+ XPUSHs(svp);
+
-Unicode/Normalize version 0.22
+Unicode/Normalize version 0.25
===================================
Unicode::Normalize - Unicode Normalization Forms
INSTALLATION
-Perl 5.6.1 or later
+Perl 5.6.1 or later (Caution: Perl 5.6.0 is not recommended.)
-To install this module type the following:
+To install this module (XSUB: needs a C compiler), type the following:
perl Makefile.PL
make
make test
make install
-If you have a C compiler and want to use the XS version,
-type the following:
+If you want to install pure Perl (i.e. no-XSUB),
+type the following (!! "disableXS" must run before "Makefile.PL" !!):
- perl Makefile.PL xs
+ perl disableXS
+ perl Makefile.PL
make
make test
make install
-If you decide to install the NoXS version after trying to build the XS,
-type the following:
+After building no-XSUB, if you decide to install XSUB,
+type the following (!! "enableXS" must run before "Makefile.PL" !!):
make clean
- perl Makefile.PL noxs
+ perl enableXS
+ perl Makefile.PL
make
make test
make install
DEPENDENCIES
-This module requires these other modules and libraries:
+This module requires other modules and libraries following:
Carp
Exporter
(2) After these unicore/*.* files are updated.
- In the case of an XS version:
+ In the case of an XS edition:
You must rebuild the module,
as the data will be compiled on building.
- In the case of a NoXS version:
+ In the case of a pure Perl edition:
Rebuilding is not necessary,
as the data will be read on requirement.
+(3) Pure Perl edition, Normalize.pmN, may work without any other file
+ in this distribution (it must be renamed Normalize.pm, though)
+
COPYRIGHT AND LICENCE
SADAHIRO Tomoyuki
--- /dev/null
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use Test;
+use strict;
+use warnings;
+BEGIN { plan tests => 35 };
+use Unicode::Normalize qw(:all);
+ok(1); # If we made it this far, we're ok.
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
+
+#########################
+
+ok(answer(checkFCD('')), 'YES');
+ok(answer(checkFCD('A')), 'YES');
+ok(answer(checkFCD("\x{030A}")), 'YES'); # 030A;COMBINING RING ABOVE
+ok(answer(checkFCD("\x{0327}")), 'YES'); # 0327;COMBINING CEDILLA
+ok(answer(checkFCD(_pack_U(0x00C5))), 'YES'); # A with ring above
+ok(answer(checkFCD(_pack_U(0x41, 0x30A))), 'YES'); # A+ring
+ok(answer(checkFCD(_pack_U(0x41, 0x327, 0x30A))), 'YES'); # A+cedilla+ring
+ok(answer(checkFCD(_pack_U(0x41, 0x30A, 0x327))), 'NO'); # A+ring+cedilla
+ok(answer(checkFCD(_pack_U(0xC5, 0x0327))), 'NO'); # A-ring+cedilla
+ok(answer(checkNFC(_pack_U(0xC5, 0x0327))), 'MAYBE'); # NFC: A-ring+cedilla
+ok(answer(check("FCD", _pack_U(0xC5, 0x0327))), 'NO');
+ok(answer(check("NFC", _pack_U(0xC5, 0x0327))), 'MAYBE');
+ok(answer(checkFCD("\x{AC01}\x{1100}\x{1161}")), 'YES'); # hangul
+ok(answer(checkFCD("\x{212B}\x{F900}")), 'YES'); # compat
+
+ok(FCD(''), "");
+ok(FCC(''), "");
+
+ok(FCD('A'), "A");
+ok(FCC('A'), "A");
+
+ok(answer(checkFCD(_pack_U(0x1EA7, 0x05AE, 0x0315, 0x0062))), "NO");
+ok(answer(checkFCC(_pack_U(0x1EA7, 0x05AE, 0x0315, 0x0062))), "NO");
+
+ok(FCC(_pack_U(0xC5, 0x327)), _pack_U(0x41, 0x327, 0x30A));
+ok(FCC(_pack_U(0x45, 0x304, 0x300)), _pack_U(0x1E14));
+ok(FCC("\x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}"), "\x{AC00}\x{AE00}");
+
+ok(answer(checkFCC('')), 'YES');
+ok(answer(checkFCC('A')), 'YES');
+ok(answer(checkFCC("\x{030A}")), 'MAYBE'); # 030A;COMBINING RING ABOVE
+ok(answer(checkFCC("\x{0327}")), 'MAYBE'); # 0327;COMBINING CEDILLA
+ok(answer(checkFCC(_pack_U(0x00C5))), 'YES'); # A with ring above
+ok(answer(checkFCC(_pack_U(0x41, 0x30A))), 'MAYBE'); # A+ring
+ok(answer(checkFCC(_pack_U(0x41, 0x327, 0x30A))), 'MAYBE'); # A+cedilla+ring
+ok(answer(checkFCC(_pack_U(0x41, 0x30A, 0x327))), 'NO'); # A+ring+cedilla
+ok(answer(checkFCC(_pack_U(0xC5, 0x0327))), 'NO'); # A-ring+cedilla
+ok(answer(checkFCC("\x{AC01}\x{1100}\x{1161}")), 'MAYBE'); # hangul
+ok(answer(checkFCC("\x{212B}\x{F900}")), 'NO'); # compat
+
--- /dev/null
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use Test;
+use strict;
+use warnings;
+BEGIN { plan tests => 37 };
+use Unicode::Normalize qw(:all);
+ok(1); # If we made it this far, we're ok.
+
+sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
+
+#########################
+
+ok(NFD ("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
+ok(NFC ("\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
+ok(NFKD("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
+ok(NFKC("\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
+
+ok(answer(checkNFD ("\x{304C}")), "NO");
+ok(answer(checkNFC ("\x{304C}")), "YES");
+ok(answer(checkNFKD("\x{304C}")), "NO");
+ok(answer(checkNFKC("\x{304C}")), "YES");
+ok(answer(checkNFD ("\x{FF76}")), "YES");
+ok(answer(checkNFC ("\x{FF76}")), "YES");
+ok(answer(checkNFKD("\x{FF76}")), "NO");
+ok(answer(checkNFKC("\x{FF76}")), "NO");
+
+ok(normalize('D', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
+ok(normalize('C', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
+ok(normalize('KD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
+ok(normalize('KC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
+
+ok(answer(check('D', "\x{304C}")), "NO");
+ok(answer(check('C', "\x{304C}")), "YES");
+ok(answer(check('KD',"\x{304C}")), "NO");
+ok(answer(check('KC',"\x{304C}")), "YES");
+ok(answer(check('D' ,"\x{FF76}")), "YES");
+ok(answer(check('C' ,"\x{FF76}")), "YES");
+ok(answer(check('KD',"\x{FF76}")), "NO");
+ok(answer(check('KC',"\x{FF76}")), "NO");
+
+ok(normalize('NFD', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
+ok(normalize('NFC', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
+ok(normalize('NFKD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
+ok(normalize('NFKC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
+
+ok(answer(check('NFD', "\x{304C}")), "NO");
+ok(answer(check('NFC', "\x{304C}")), "YES");
+ok(answer(check('NFKD',"\x{304C}")), "NO");
+ok(answer(check('NFKC',"\x{304C}")), "YES");
+ok(answer(check('NFD' ,"\x{FF76}")), "YES");
+ok(answer(check('NFC' ,"\x{FF76}")), "YES");
+ok(answer(check('NFKD',"\x{FF76}")), "NO");
+ok(answer(check('NFKC',"\x{FF76}")), "NO");
+
--- /dev/null
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use Test;
+use strict;
+use warnings;
+BEGIN { plan tests => 42 };
+use Unicode::Normalize qw(:all);
+ok(1); # If we made it this far, we're ok.
+
+#########################
+
+# unary op. RING-CEDILLA
+ok( "\x{30A}\x{327}" ne "\x{327}\x{30A}");
+ok(NFD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(NFC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(NFKD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(NFKC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(FCD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(FCC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(reorder "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+
+ok(prototype \&normalize,'$$');
+ok(prototype \&NFD, '$');
+ok(prototype \&NFC, '$');
+ok(prototype \&NFKD, '$');
+ok(prototype \&NFKC, '$');
+ok(prototype \&FCD, '$');
+ok(prototype \&FCC, '$');
+
+ok(prototype \&check, '$$');
+ok(prototype \&checkNFD, '$');
+ok(prototype \&checkNFC, '$');
+ok(prototype \&checkNFKD,'$');
+ok(prototype \&checkNFKC,'$');
+ok(prototype \&checkFCD, '$');
+ok(prototype \&checkFCC, '$');
+
+ok(prototype \&decompose, '$;$');
+ok(prototype \&reorder, '$');
+ok(prototype \&compose, '$');
+ok(prototype \&composeContiguous, '$');
+
+ok(prototype \&getCanon, '$');
+ok(prototype \&getCompat, '$');
+ok(prototype \&getComposite, '$$');
+ok(prototype \&getCombinClass,'$');
+ok(prototype \&isExclusion, '$');
+ok(prototype \&isSingleton, '$');
+ok(prototype \&isNonStDecomp, '$');
+ok(prototype \&isComp2nd, '$');
+ok(prototype \&isComp_Ex, '$');
+
+ok(prototype \&isNFD_NO, '$');
+ok(prototype \&isNFC_NO, '$');
+ok(prototype \&isNFC_MAYBE, '$');
+ok(prototype \&isNFKD_NO, '$');
+ok(prototype \&isNFKC_NO, '$');
+ok(prototype \&isNFKC_MAYBE, '$');
+
--- /dev/null
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use Test;
+use strict;
+use warnings;
+BEGIN { plan tests => 14 };
+use Unicode::Normalize qw(:all);
+ok(1); # If we made it this far, we're ok.
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+our $proc; # before the last starter
+our $unproc; # the last starter and after
+# If string has no starter, entire string is set to $unproc.
+
+# When you have $normalized string and $unnormalized string following,
+# a simple concatenation
+# C<$concat = $normalized . normalize($form, $unnormalized)>
+# is wrong. Instead of it, like this:
+#
+# ($processed, $unprocessed) = splitOnLastStarter($normalized);
+# $concat = $processed . normalize($form, $unprocessed.$unnormalized);
+
+($proc, $unproc) = splitOnLastStarter("");
+ok($proc, "");
+ok($unproc, "");
+
+($proc, $unproc) = splitOnLastStarter("A");
+ok($proc, "");
+ok($unproc, "A");
+
+($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42));
+ok($proc, _pack_U(0x41, 0x300, 0x327));
+ok($unproc, "B");
+
+($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301));
+ok($proc, _pack_U(0x4E00));
+ok($unproc, _pack_U(0x41, 0x301));
+
+($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300));
+ok($proc, "");
+ok($unproc, _pack_U(0x302, 0x301, 0x300));
+
+our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300);
+our $dakuten = _pack_U(0x3099);
+our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300);
+
+our ($p, $u) = splitOnLastStarter($ka_grave);
+our $concat = $p . NFC($u.$dakuten);
+
+ok(NFC($ka_grave.$dakuten) eq $ga_grave);
+ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
+ok($concat eq $ga_grave);
+