1 package Unicode::Normalize;
9 our $PACKAGE = __PACKAGE__;
15 our @ISA = qw(Exporter DynaLoader);
16 our @EXPORT = qw( NFC NFD NFKC NFKD );
18 normalize decompose reorder compose
19 getCanon getCompat getComposite getCombinClass isExclusion
21 our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] );
23 bootstrap Unicode::Normalize $VERSION;
25 use constant COMPAT => 1;
27 sub NFD ($) { reorder(decompose($_[0])) }
28 sub NFKD ($) { reorder(decompose($_[0], COMPAT)) }
30 sub NFC ($) { compose(reorder(decompose($_[0]))) }
31 sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }
38 $form eq 'D' ? NFD ($_[0]) :
39 $form eq 'C' ? NFC ($_[0]) :
40 $form eq 'KD' ? NFKD($_[0]) :
41 $form eq 'KC' ? NFKC($_[0]) :
42 croak $PACKAGE."::normalize: invalid form name: $form";
50 Unicode::Normalize - normalized forms of Unicode text
54 use Unicode::Normalize;
56 $string_NFD = NFD($raw_string); # Normalization Form D
57 $string_NFC = NFC($raw_string); # Normalization Form C
58 $string_NFKD = NFKD($raw_string); # Normalization Form KD
59 $string_NFKC = NFKC($raw_string); # Normalization Form KC
63 use Unicode::Normalize 'normalize';
65 $string_NFD = normalize('D', $raw_string); # Normalization Form D
66 $string_NFC = normalize('C', $raw_string); # Normalization Form C
67 $string_NFKD = normalize('KD', $raw_string); # Normalization Form KD
68 $string_NFKC = normalize('KC', $raw_string); # Normalization Form KC
72 =head2 Normalization Forms
76 =item C<$string_NFD = NFD($raw_string)>
78 returns the Normalization Form D (formed by canonical decomposition).
81 =item C<$string_NFC = NFC($raw_string)>
83 returns the Normalization Form C (formed by canonical decomposition
84 followed by canonical composition).
86 =item C<$string_NFKD = NFKD($raw_string)>
88 returns the Normalization Form KD (formed by compatibility decomposition).
90 =item C<$string_NFKC = NFKC($raw_string)>
92 returns the Normalization Form KC (formed by compatibility decomposition
93 followed by B<canonical> composition).
95 =item C<$normalized_string = normalize($form_name, $raw_string)>
97 As C<$form_name>, one of the following names must be given.
99 'C' or 'NFC' for Normalization Form C
100 'D' or 'NFD' for Normalization Form D
101 'KC' or 'NFKC' for Normalization Form KC
102 'KD' or 'NFKD' for Normalization Form KD
106 =head2 Character Data
108 These functions are interface of character data used internally.
109 If you want only to get Unicode normalization forms, you don't need
114 =item C<$canonical_decomposed = getCanon($codepoint)>
116 =item C<$compatibility_decomposed = getCompat($codepoint)>
118 If the character of the specified codepoint is canonically or
119 compatibility decomposable (including Hangul Syllables),
120 returns the B<completely decomposed> string equivalent to it.
122 If it is not decomposable, returns undef.
124 =item C<$uv_composite = getComposite($uv_here, $uv_next)>
126 If two characters here and next (as codepoints) are composable
127 (including Hangul Jamo/Syllables and Exclusions),
128 returns the codepoint of the composite.
130 If they are not composable, returns undef.
132 =item C<$combining_class = getCombinClass($codepoint)>
134 Returns the combining class as integer of the character.
136 =item C<$is_exclusion = isExclusion($codepoint)>
138 Returns a boolean whether the character of the specified codepoint is
139 a composition exclusion.
145 C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.
147 C<normalize> and other some functions: on request.
151 SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
153 http://homepage1.nifty.com/nomenclator/perl/
155 Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved.
157 This program is free software; you can redistribute it and/or
158 modify it under the same terms as Perl itself.
164 =item http://www.unicode.org/unicode/reports/tr15/
166 Unicode Normalization Forms - UAX #15