1 package Unicode::Normalize;
9 our $PACKAGE = __PACKAGE__;
15 our @ISA = qw(Exporter DynaLoader);
16 our @EXPORT = qw( NFC NFD NFKC NFKD );
18 normalize decompose reorder compose
19 getCanon getCompat getComposite getCombinClass isExclusion
21 our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] );
23 bootstrap Unicode::Normalize $VERSION;
25 use constant CANON => 0;
26 use constant COMPAT => 1;
28 sub NFD ($) { reorder(decompose($_[0], CANON )) }
29 sub NFKD ($) { reorder(decompose($_[0], COMPAT)) }
31 sub NFC ($) { compose(reorder(decompose($_[0], CANON ))) }
32 sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }
38 $form eq 'D' ? NFD ($_[0]) :
39 $form eq 'C' ? NFC ($_[0]) :
40 $form eq 'KD' ? NFKD($_[0]) :
41 $form eq 'KC' ? NFKC($_[0]) :
42 croak $PACKAGE."::normalize: invalid form name: $form";
50 Unicode::Normalize - normalized forms of Unicode text
54 use Unicode::Normalize;
56 $string_NFD = NFD($raw_string); # Normalization Form D
57 $string_NFC = NFC($raw_string); # Normalization Form C
58 $string_NFKD = NFKD($raw_string); # Normalization Form KD
59 $string_NFKC = NFKC($raw_string); # Normalization Form KC
63 use Unicode::Normalize 'normalize';
65 $string_NFD = normalize('D', $raw_string); # Normalization Form D
66 $string_NFC = normalize('C', $raw_string); # Normalization Form C
67 $string_NFKD = normalize('KD', $raw_string); # Normalization Form KD
68 $string_NFKC = normalize('KC', $raw_string); # Normalization Form KC
76 =item C<$string_NFD = NFD($raw_string)>
78 returns the Normalization Form D (formed by canonical decomposition).
81 =item C<$string_NFC = NFC($raw_string)>
83 returns the Normalization Form C (formed by canonical decomposition
84 followed by canonical composition).
86 =item C<$string_NFKD = NFKD($raw_string)>
88 returns the Normalization Form KD (formed by compatibility decomposition).
90 =item C<$string_NFKC = NFKC($raw_string)>
92 returns the Normalization Form KC (formed by compatibility decomposition
93 followed by B<canonical> composition).
95 =item C<$normalized_string = normalize($form_name, $raw_string)>
97 As C<$form_name>, one of the following names must be given.
99 'C' or 'NFC' for Normalization Form C
100 'D' or 'NFD' for Normalization Form D
101 'KC' or 'NFKC' for Normalization Form KC
102 'KD' or 'NFKD' for Normalization Form KD
106 =head2 Character Data
108 These functions are interface of character data used internally.
109 If you want only to get unicode normalization forms,
110 you need not to call them by yourself.
114 =item C<$canonical_decomposed = getCanon($codepoint)>
116 =item C<$compatibility_decomposed = getCompat($codepoint)>
118 If the character of the specified codepoint is canonically or
119 compatibility decomposable (including Hangul Syllables),
120 returns the B<completely decomposed> string equivalent to it.
122 If it is not decomposable, returns undef.
124 =item C<$uv_composite = getComposite($uv_here, $uv_next)>
126 If the couple of two characters here and next (as codepoints) is composable
127 (including Hangul Jamo/Syllables and Exclusions),
128 returns the codepoint of the composite.
130 If they are not composable, returns undef.
132 =item C<$combining_class = getCombinClass($codepoint)>
134 Returns the combining class as integer of the character.
136 =item C<$is_exclusion = isExclusion($codepoint)>
138 Returns a boolean whether the character of the specified codepoint is
139 a composition exclusion.
145 C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.
147 C<normalize> and other some functions: on request.
151 SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
153 http://homepage1.nifty.com/nomenclator/perl/
155 Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved.
157 This program is free software; you can redistribute it and/or
158 modify it under the same terms as Perl itself.
164 =item http://www.unicode.org/unicode/reports/tr15/
166 Unicode Normalization Forms - UAX #15