Commit | Line | Data |
ac5ea531 |
1 | package Unicode::Normalize; |
2 | |
4a2e806c |
3 | BEGIN { |
1efaba7f |
4 | unless ("A" eq pack('U', 0x41)) { |
9f1f04a1 |
5 | die "Unicode::Normalize cannot stringify a Unicode code point\n"; |
4a2e806c |
6 | } |
7 | } |
8 | |
ac5ea531 |
9 | use 5.006; |
10 | use strict; |
11 | use warnings; |
12 | use Carp; |
13 | |
e524f5b2 |
14 | no warnings 'utf8'; |
15 | |
51683ce6 |
16 | our $VERSION = '1.03'; |
ac5ea531 |
17 | our $PACKAGE = __PACKAGE__; |
18 | |
19 | require Exporter; |
20 | require DynaLoader; |
ac5ea531 |
21 | |
22 | our @ISA = qw(Exporter DynaLoader); |
23 | our @EXPORT = qw( NFC NFD NFKC NFKD ); |
2a204b45 |
24 | our @EXPORT_OK = qw( |
25 | normalize decompose reorder compose |
8f118dcd |
26 | checkNFD checkNFKD checkNFC checkNFKC check |
27 | getCanon getCompat getComposite getCombinClass |
28 | isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex |
29 | isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE |
82e740b6 |
30 | FCD checkFCD FCC checkFCC composeContiguous |
31 | splitOnLastStarter |
8f118dcd |
32 | ); |
33 | our %EXPORT_TAGS = ( |
34 | all => [ @EXPORT, @EXPORT_OK ], |
35 | normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ], |
36 | check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ], |
82e740b6 |
37 | fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ], |
2a204b45 |
38 | ); |
ac5ea531 |
39 | |
82e740b6 |
40 | ###### |
41 | |
ac5ea531 |
42 | bootstrap Unicode::Normalize $VERSION; |
43 | |
82e740b6 |
44 | ###### |
45 | |
fe067ad9 |
46 | ## |
47 | ## utilites for tests |
48 | ## |
49 | |
9f1f04a1 |
50 | sub pack_U { |
b8d10bc1 |
51 | return pack('U*', @_); |
9f1f04a1 |
52 | } |
53 | |
54 | sub unpack_U { |
fe067ad9 |
55 | return unpack('U*', shift(@_).pack('U*')); |
9f1f04a1 |
56 | } |
57 | |
82e740b6 |
58 | |
59 | ## |
60 | ## normalization forms |
61 | ## |
62 | |
82e740b6 |
63 | sub FCD ($) { |
64 | my $str = shift; |
65 | return checkFCD($str) ? $str : NFD($str); |
66 | } |
82e740b6 |
67 | |
68 | our %formNorm = ( |
69 | NFC => \&NFC, C => \&NFC, |
70 | NFD => \&NFD, D => \&NFD, |
71 | NFKC => \&NFKC, KC => \&NFKC, |
72 | NFKD => \&NFKD, KD => \&NFKD, |
73 | FCD => \&FCD, FCC => \&FCC, |
74 | ); |
75 | |
ac5ea531 |
76 | sub normalize($$) |
77 | { |
d85850a7 |
78 | my $form = shift; |
f027f502 |
79 | my $str = shift; |
fe067ad9 |
80 | if (exists $formNorm{$form}) { |
81 | return $formNorm{$form}->($str); |
82 | } |
83 | croak($PACKAGE."::normalize: invalid form name: $form"); |
ac5ea531 |
84 | } |
85 | |
82e740b6 |
86 | |
87 | ## |
88 | ## quick check |
89 | ## |
90 | |
91 | our %formCheck = ( |
92 | NFC => \&checkNFC, C => \&checkNFC, |
93 | NFD => \&checkNFD, D => \&checkNFD, |
94 | NFKC => \&checkNFKC, KC => \&checkNFKC, |
95 | NFKD => \&checkNFKD, KD => \&checkNFKD, |
96 | FCD => \&checkFCD, FCC => \&checkFCC, |
97 | ); |
98 | |
8f118dcd |
99 | sub check($$) |
100 | { |
101 | my $form = shift; |
f027f502 |
102 | my $str = shift; |
fe067ad9 |
103 | if (exists $formCheck{$form}) { |
104 | return $formCheck{$form}->($str); |
105 | } |
106 | croak($PACKAGE."::check: invalid form name: $form"); |
8f118dcd |
107 | } |
108 | |
ac5ea531 |
109 | 1; |
110 | __END__ |
2a204b45 |
111 | |
112 | =head1 NAME |
113 | |
f027f502 |
114 | Unicode::Normalize - Unicode Normalization Forms |
2a204b45 |
115 | |
116 | =head1 SYNOPSIS |
117 | |
a092bcfd |
118 | (1) using function names exported by default: |
119 | |
2a204b45 |
120 | use Unicode::Normalize; |
121 | |
8f118dcd |
122 | $NFD_string = NFD($string); # Normalization Form D |
123 | $NFC_string = NFC($string); # Normalization Form C |
124 | $NFKD_string = NFKD($string); # Normalization Form KD |
125 | $NFKC_string = NFKC($string); # Normalization Form KC |
2a204b45 |
126 | |
a092bcfd |
127 | (2) using function names exported on request: |
2a204b45 |
128 | |
129 | use Unicode::Normalize 'normalize'; |
130 | |
8f118dcd |
131 | $NFD_string = normalize('D', $string); # Normalization Form D |
132 | $NFC_string = normalize('C', $string); # Normalization Form C |
133 | $NFKD_string = normalize('KD', $string); # Normalization Form KD |
134 | $NFKC_string = normalize('KC', $string); # Normalization Form KC |
2a204b45 |
135 | |
136 | =head1 DESCRIPTION |
137 | |
00f2676f |
138 | Parameters: |
139 | |
fe067ad9 |
140 | C<$string> is used as a string under character semantics (see F<perlunicode>). |
00f2676f |
141 | |
fe067ad9 |
142 | C<$code_point> should be an unsigned integer representing a Unicode code point. |
00f2676f |
143 | |
628bbff0 |
144 | Note: Between XSUB and pure Perl, there is an incompatibility |
fe067ad9 |
145 | about the interpretation of C<$code_point> as a decimal number. |
146 | XSUB converts C<$code_point> to an unsigned integer, but pure Perl does not. |
147 | Do not use a floating point nor a negative sign in C<$code_point>. |
00f2676f |
148 | |
d85850a7 |
149 | =head2 Normalization Forms |
2a204b45 |
150 | |
151 | =over 4 |
152 | |
8f118dcd |
153 | =item C<$NFD_string = NFD($string)> |
2a204b45 |
154 | |
fe067ad9 |
155 | It returns the Normalization Form D (formed by canonical decomposition). |
2a204b45 |
156 | |
8f118dcd |
157 | =item C<$NFC_string = NFC($string)> |
2a204b45 |
158 | |
fe067ad9 |
159 | It returns the Normalization Form C (formed by canonical decomposition |
2a204b45 |
160 | followed by canonical composition). |
161 | |
8f118dcd |
162 | =item C<$NFKD_string = NFKD($string)> |
2a204b45 |
163 | |
fe067ad9 |
164 | It returns the Normalization Form KD (formed by compatibility decomposition). |
2a204b45 |
165 | |
8f118dcd |
166 | =item C<$NFKC_string = NFKC($string)> |
2a204b45 |
167 | |
fe067ad9 |
168 | It returns the Normalization Form KC (formed by compatibility decomposition |
2a204b45 |
169 | followed by B<canonical> composition). |
170 | |
82e740b6 |
171 | =item C<$FCD_string = FCD($string)> |
172 | |
173 | If the given string is in FCD ("Fast C or D" form; cf. UTN #5), |
fe067ad9 |
174 | it returns the string without modification; otherwise it returns an FCD string. |
82e740b6 |
175 | |
176 | Note: FCD is not always unique, then plural forms may be equivalent |
177 | each other. C<FCD()> will return one of these equivalent forms. |
178 | |
179 | =item C<$FCC_string = FCC($string)> |
180 | |
fe067ad9 |
181 | It returns the FCC form ("Fast C Contiguous"; cf. UTN #5). |
82e740b6 |
182 | |
e524f5b2 |
183 | Note: FCC is unique, as well as four normalization forms (NF*). |
82e740b6 |
184 | |
8f118dcd |
185 | =item C<$normalized_string = normalize($form_name, $string)> |
2a204b45 |
186 | |
fe067ad9 |
187 | It returns the normalization form of C<$form_name>. |
188 | |
2a204b45 |
189 | As C<$form_name>, one of the following names must be given. |
190 | |
82e740b6 |
191 | 'C' or 'NFC' for Normalization Form C (UAX #15) |
192 | 'D' or 'NFD' for Normalization Form D (UAX #15) |
193 | 'KC' or 'NFKC' for Normalization Form KC (UAX #15) |
194 | 'KD' or 'NFKD' for Normalization Form KD (UAX #15) |
195 | |
196 | 'FCD' for "Fast C or D" Form (UTN #5) |
197 | 'FCC' for "Fast C Contiguous" (UTN #5) |
2a204b45 |
198 | |
199 | =back |
200 | |
8f118dcd |
201 | =head2 Decomposition and Composition |
202 | |
203 | =over 4 |
204 | |
fe067ad9 |
205 | =item C<$decomposed_string = decompose($string [, $useCompatMapping])> |
8f118dcd |
206 | |
fe067ad9 |
207 | It returns the concatenation of the decomposition of each character |
208 | in the string. |
8f118dcd |
209 | |
fe067ad9 |
210 | If the second parameter (a boolean) is omitted or false, |
211 | the decomposition is canonical decomposition; |
212 | if the second parameter (a boolean) is true, |
213 | the decomposition is compatibility decomposition. |
8f118dcd |
214 | |
fe067ad9 |
215 | The string returned is not always in NFD/NFKD. Reordering may be required. |
8f118dcd |
216 | |
217 | $NFD_string = reorder(decompose($string)); # eq. to NFD() |
218 | $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD() |
219 | |
fe067ad9 |
220 | =item C<$reordered_string = reorder($string)> |
8f118dcd |
221 | |
fe067ad9 |
222 | It returns the result of reordering the combining characters |
223 | according to Canonical Ordering Behavior. |
8f118dcd |
224 | |
fe067ad9 |
225 | For example, when you have a list of NFD/NFKD strings, |
226 | you can get the concatenated NFD/NFKD string from them, by saying |
8f118dcd |
227 | |
228 | $concat_NFD = reorder(join '', @NFD_strings); |
229 | $concat_NFKD = reorder(join '', @NFKD_strings); |
230 | |
fe067ad9 |
231 | =item C<$composed_string = compose($string)> |
8f118dcd |
232 | |
fe067ad9 |
233 | It returns the result of canonical composition |
234 | without applying any decomposition. |
8f118dcd |
235 | |
fe067ad9 |
236 | For example, when you have a NFD/NFKD string, |
237 | you can get its NFC/NFKC string, by saying |
8f118dcd |
238 | |
239 | $NFC_string = compose($NFD_string); |
240 | $NFKC_string = compose($NFKD_string); |
241 | |
242 | =back |
243 | |
244 | =head2 Quick Check |
245 | |
82e740b6 |
246 | (see Annex 8, UAX #15; and F<DerivedNormalizationProps.txt>) |
8f118dcd |
247 | |
248 | The following functions check whether the string is in that normalization form. |
249 | |
fe067ad9 |
250 | The result returned will be one of the following: |
8f118dcd |
251 | |
252 | YES The string is in that normalization form. |
253 | NO The string is not in that normalization form. |
254 | MAYBE Dubious. Maybe yes, maybe no. |
255 | |
256 | =over 4 |
257 | |
258 | =item C<$result = checkNFD($string)> |
259 | |
fe067ad9 |
260 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. |
8f118dcd |
261 | |
262 | =item C<$result = checkNFC($string)> |
263 | |
fe067ad9 |
264 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; |
628bbff0 |
265 | C<undef> if C<MAYBE>. |
8f118dcd |
266 | |
267 | =item C<$result = checkNFKD($string)> |
268 | |
fe067ad9 |
269 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. |
8f118dcd |
270 | |
271 | =item C<$result = checkNFKC($string)> |
272 | |
fe067ad9 |
273 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; |
628bbff0 |
274 | C<undef> if C<MAYBE>. |
8f118dcd |
275 | |
82e740b6 |
276 | =item C<$result = checkFCD($string)> |
277 | |
fe067ad9 |
278 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. |
82e740b6 |
279 | |
280 | =item C<$result = checkFCC($string)> |
281 | |
fe067ad9 |
282 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; |
628bbff0 |
283 | C<undef> if C<MAYBE>. |
82e740b6 |
284 | |
fe067ad9 |
285 | Note: If a string is not in FCD, it must not be in FCC. |
82e740b6 |
286 | So C<checkFCC($not_FCD_string)> should return C<NO>. |
287 | |
8f118dcd |
288 | =item C<$result = check($form_name, $string)> |
289 | |
fe067ad9 |
290 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; |
628bbff0 |
291 | C<undef> if C<MAYBE>. |
8f118dcd |
292 | |
628bbff0 |
293 | As C<$form_name>, one of the following names must be given. |
294 | |
295 | 'C' or 'NFC' for Normalization Form C (UAX #15) |
296 | 'D' or 'NFD' for Normalization Form D (UAX #15) |
297 | 'KC' or 'NFKC' for Normalization Form KC (UAX #15) |
298 | 'KD' or 'NFKD' for Normalization Form KD (UAX #15) |
299 | |
300 | 'FCD' for "Fast C or D" Form (UTN #5) |
301 | 'FCC' for "Fast C Contiguous" (UTN #5) |
8f118dcd |
302 | |
303 | =back |
304 | |
305 | B<Note> |
306 | |
82e740b6 |
307 | In the cases of NFD, NFKD, and FCD, the answer must be |
308 | either C<YES> or C<NO>. The answer C<MAYBE> may be returned |
309 | in the cases of NFC, NFKC, and FCC. |
8f118dcd |
310 | |
82e740b6 |
311 | A C<MAYBE> string should contain at least one combining character |
312 | or the like. For example, C<COMBINING ACUTE ACCENT> has |
8f118dcd |
313 | the MAYBE_NFC/MAYBE_NFKC property. |
82e740b6 |
314 | |
8f118dcd |
315 | Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")> |
316 | and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>. |
f027f502 |
317 | C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC |
8f118dcd |
318 | (its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">), |
319 | while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC. |
320 | |
628bbff0 |
321 | If you want to check exactly, compare the string with its NFC/NFKC/FCC. |
322 | |
323 | if ($string eq NFC($string)) { |
324 | # $string is exactly normalized in NFC; |
325 | } else { |
326 | # $string is not normalized in NFC; |
327 | } |
8f118dcd |
328 | |
628bbff0 |
329 | if ($string eq NFKC($string)) { |
330 | # $string is exactly normalized in NFKC; |
331 | } else { |
332 | # $string is not normalized in NFKC; |
333 | } |
8f118dcd |
334 | |
2a204b45 |
335 | =head2 Character Data |
336 | |
337 | These functions are interface of character data used internally. |
d0ed0342 |
338 | If you want only to get Unicode normalization forms, you don't need |
339 | call them yourself. |
2a204b45 |
340 | |
341 | =over 4 |
342 | |
fe067ad9 |
343 | =item C<$canonical_decomposition = getCanon($code_point)> |
2a204b45 |
344 | |
fe067ad9 |
345 | If the character is canonically decomposable (including Hangul Syllables), |
346 | it returns the (full) canonical decomposition as a string. |
347 | Otherwise it returns C<undef>. |
8f118dcd |
348 | |
fe067ad9 |
349 | B<Note:> According to the Unicode standard, the canonical decomposition |
350 | of the character that is not canonically decomposable is same as |
351 | the character itself. |
8f118dcd |
352 | |
fe067ad9 |
353 | =item C<$compatibility_decomposition = getCompat($code_point)> |
2a204b45 |
354 | |
fe067ad9 |
355 | If the character is compatibility decomposable (including Hangul Syllables), |
356 | it returns the (full) compatibility decomposition as a string. |
357 | Otherwise it returns C<undef>. |
2a204b45 |
358 | |
fe067ad9 |
359 | B<Note:> According to the Unicode standard, the compatibility decomposition |
360 | of the character that is not compatibility decomposable is same as |
361 | the character itself. |
2a204b45 |
362 | |
fe067ad9 |
363 | =item C<$code_point_composite = getComposite($code_point_here, $code_point_next)> |
2a204b45 |
364 | |
fe067ad9 |
365 | If two characters here and next (as code points) are composable |
8f118dcd |
366 | (including Hangul Jamo/Syllables and Composition Exclusions), |
fe067ad9 |
367 | it returns the code point of the composite. |
368 | |
369 | If they are not composable, it returns C<undef>. |
2a204b45 |
370 | |
fe067ad9 |
371 | =item C<$combining_class = getCombinClass($code_point)> |
2a204b45 |
372 | |
fe067ad9 |
373 | It returns the combining class (as an integer) of the character. |
2a204b45 |
374 | |
fe067ad9 |
375 | =item C<$may_be_composed_with_prev_char = isComp2nd($code_point)> |
2a204b45 |
376 | |
fe067ad9 |
377 | It returns a boolean whether the character of the specified codepoint |
378 | may be composed with the previous one in a certain composition |
379 | (including Hangul Compositions, but excluding |
380 | Composition Exclusions and Non-Starter Decompositions). |
2a204b45 |
381 | |
fe067ad9 |
382 | =item C<$is_exclusion = isExclusion($code_point)> |
8f118dcd |
383 | |
fe067ad9 |
384 | It returns a boolean whether the code point is a composition exclusion. |
8f118dcd |
385 | |
fe067ad9 |
386 | =item C<$is_singleton = isSingleton($code_point)> |
8f118dcd |
387 | |
fe067ad9 |
388 | It returns a boolean whether the code point is a singleton |
8f118dcd |
389 | |
fe067ad9 |
390 | =item C<$is_non_starter_decomposition = isNonStDecomp($code_point)> |
8f118dcd |
391 | |
fe067ad9 |
392 | It returns a boolean whether the code point has Non-Starter Decomposition. |
8f118dcd |
393 | |
fe067ad9 |
394 | =item C<$is_Full_Composition_Exclusion = isComp_Ex($code_point)> |
395 | |
396 | It returns a boolean of the derived property Comp_Ex |
397 | (Full_Composition_Exclusion). This property is generated from |
398 | Composition Exclusions + Singletons + Non-Starter Decompositions. |
399 | |
400 | =item C<$NFD_is_NO = isNFD_NO($code_point)> |
401 | |
402 | It returns a boolean of the derived property NFD_NO |
403 | (NFD_Quick_Check=No). |
404 | |
405 | =item C<$NFC_is_NO = isNFC_NO($code_point)> |
406 | |
407 | It returns a boolean of the derived property NFC_NO |
408 | (NFC_Quick_Check=No). |
409 | |
410 | =item C<$NFC_is_MAYBE = isNFC_MAYBE($code_point)> |
411 | |
412 | It returns a boolean of the derived property NFC_MAYBE |
413 | (NFC_Quick_Check=Maybe). |
414 | |
415 | =item C<$NFKD_is_NO = isNFKD_NO($code_point)> |
416 | |
417 | It returns a boolean of the derived property NFKD_NO |
418 | (NFKD_Quick_Check=No). |
419 | |
420 | =item C<$NFKC_is_NO = isNFKC_NO($code_point)> |
421 | |
422 | It returns a boolean of the derived property NFKC_NO |
423 | (NFKC_Quick_Check=No). |
424 | |
425 | =item C<$NFKC_is_MAYBE = isNFKC_MAYBE($code_point)> |
426 | |
427 | It returns a boolean of the derived property NFKC_MAYBE |
428 | (NFKC_Quick_Check=Maybe). |
2a204b45 |
429 | |
430 | =back |
431 | |
628bbff0 |
432 | =head1 EXPORT |
2a204b45 |
433 | |
434 | C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default. |
435 | |
436 | C<normalize> and other some functions: on request. |
437 | |
628bbff0 |
438 | =head1 CAVEATS |
439 | |
440 | =over 4 |
441 | |
442 | =item Perl's version vs. Unicode version |
443 | |
444 | Since this module refers to perl core's Unicode database in the directory |
445 | F</lib/unicore> (or formerly F</lib/unicode>), the Unicode version of |
446 | normalization implemented by this module depends on your perl's version. |
447 | |
fe067ad9 |
448 | perl's version implemented Unicode version |
449 | 5.6.1 3.0.1 |
450 | 5.7.2 3.1.0 |
451 | 5.7.3 3.1.1 (normalization is same as 3.1.0) |
452 | 5.8.0 3.2.0 |
453 | 5.8.1-5.8.3 4.0.0 |
454 | 5.8.4-5.8.6 4.0.1 (normalization is same as 4.0.0) |
455 | 5.8.7-5.8.8 4.1.0 |
51683ce6 |
456 | 5.10.0 5.0.0 |
457 | 5.8.9 5.1.0 |
628bbff0 |
458 | |
459 | =item Correction of decomposition mapping |
460 | |
461 | In older Unicode versions, a small number of characters (all of which are |
462 | CJK compatibility ideographs as far as they have been found) may have |
463 | an erroneous decomposition mapping (see F<NormalizationCorrections.txt>). |
464 | Anyhow, this module will neither refer to F<NormalizationCorrections.txt> |
465 | nor provide any specific version of normalization. Therefore this module |
466 | running on an older perl with an older Unicode database may use |
467 | the erroneous decomposition mapping blindly conforming to the Unicode database. |
468 | |
469 | =item Revised definition of canonical composition |
470 | |
471 | In Unicode 4.1.0, the definition D2 of canonical composition (which |
472 | affects NFC and NFKC) has been changed (see Public Review Issue #29 |
473 | and recent UAX #15). This module has used the newer definition |
474 | since the version 0.07 (Oct 31, 2001). |
2b8d773d |
475 | This module will not support the normalization according to the older |
628bbff0 |
476 | definition, even if the Unicode version implemented by perl is |
477 | lower than 4.1.0. |
478 | |
479 | =back |
480 | |
2a204b45 |
481 | =head1 AUTHOR |
482 | |
a092bcfd |
483 | SADAHIRO Tomoyuki <SADAHIRO@cpan.org> |
2a204b45 |
484 | |
2b8d773d |
485 | Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved. |
2a204b45 |
486 | |
628bbff0 |
487 | This module is free software; you can redistribute it |
488 | and/or modify it under the same terms as Perl itself. |
2a204b45 |
489 | |
490 | =head1 SEE ALSO |
491 | |
492 | =over 4 |
493 | |
e524f5b2 |
494 | =item http://www.unicode.org/reports/tr15/ |
2a204b45 |
495 | |
496 | Unicode Normalization Forms - UAX #15 |
497 | |
fe067ad9 |
498 | =item http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt |
499 | |
500 | Composition Exclusion Table |
501 | |
14e6b36c |
502 | =item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt |
8f118dcd |
503 | |
504 | Derived Normalization Properties |
505 | |
628bbff0 |
506 | =item http://www.unicode.org/Public/UNIDATA/NormalizationCorrections.txt |
507 | |
508 | Normalization Corrections |
509 | |
510 | =item http://www.unicode.org/review/pr-29.html |
511 | |
512 | Public Review Issue #29: Normalization Issue |
513 | |
82e740b6 |
514 | =item http://www.unicode.org/notes/tn5/ |
515 | |
516 | Canonical Equivalence in Applications - UTN #5 |
517 | |
2a204b45 |
518 | =back |
519 | |
520 | =cut |