[p5sagit/p5-mst-13.2.git] / ext / Unicode / Normalize / Normalize.pm

package Unicode::Normalize;

BEGIN {
    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
	die "Unicode::Normalize cannot stringify a Unicode code point\n";
    }
}

use 5.006;
use strict;
use warnings;
use Carp;

our $VERSION = '0.21';
our $PACKAGE = __PACKAGE__;

require Exporter;
require DynaLoader;
require AutoLoader;

our @ISA = qw(Exporter DynaLoader);
our @EXPORT = qw( NFC NFD NFKC NFKD );
our @EXPORT_OK = qw(
    normalize decompose reorder compose
    checkNFD checkNFKD checkNFC checkNFKC check
    getCanon getCompat getComposite getCombinClass
    isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
    isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
);
our %EXPORT_TAGS = (
    all       => [ @EXPORT, @EXPORT_OK ],
    normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ],
    check     => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ],
);

bootstrap Unicode::Normalize $VERSION;

use constant UNICODE_FOR_PACK => "A" eq pack('U', 0x41);
use constant NATIVE_FOR_PACK  => "A" eq pack('U', ord("A"));

use constant UNICODE_FOR_UNPACK => 0x41 == unpack('U', "A");
use constant NATIVE_FOR_UNPACK  => ord("A") == unpack('U', "A");

sub pack_U {
    return UNICODE_FOR_PACK
	? pack('U*', @_)
	: NATIVE_FOR_PACK
	    ? pack('U*', map utf8::unicode_to_native($_), @_)
	    : die "$PACKAGE, a Unicode code point cannot be stringified.\n";
}

sub unpack_U {
    return UNICODE_FOR_UNPACK
	? unpack('U*', shift)
	: NATIVE_FOR_UNPACK
	    ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
	    : die "$PACKAGE, a code point returned from unpack U " .
		"cannot be converted into Unicode.\n";
}

use constant COMPAT => 1;

sub NFD  ($) { reorder(decompose($_[0])) }
sub NFKD ($) { reorder(decompose($_[0], COMPAT)) }
sub NFC  ($) { compose(reorder(decompose($_[0]))) }
sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }

sub normalize($$)
{
    my $form = shift;
    my $str = shift;
    $form =~ s/^NF//;
    return
	$form eq 'D'  ? NFD ($str) :
	$form eq 'C'  ? NFC ($str) :
	$form eq 'KD' ? NFKD($str) :
	$form eq 'KC' ? NFKC($str) :
      croak $PACKAGE."::normalize: invalid form name: $form";
}

sub check($$)
{
    my $form = shift;
    my $str = shift;
    $form =~ s/^NF//;
    return
	$form eq 'D'  ? checkNFD ($str) :
	$form eq 'C'  ? checkNFC ($str) :
	$form eq 'KD' ? checkNFKD($str) :
	$form eq 'KC' ? checkNFKC($str) :
      croak $PACKAGE."::check: invalid form name: $form";
}

1;
__END__

=head1 NAME

Unicode::Normalize - Unicode Normalization Forms

=head1 SYNOPSIS

  use Unicode::Normalize;

  $NFD_string  = NFD($string);  # Normalization Form D
  $NFC_string  = NFC($string);  # Normalization Form C
  $NFKD_string = NFKD($string); # Normalization Form KD
  $NFKC_string = NFKC($string); # Normalization Form KC

   or

  use Unicode::Normalize 'normalize';

  $NFD_string  = normalize('D',  $string);  # Normalization Form D
  $NFC_string  = normalize('C',  $string);  # Normalization Form C
  $NFKD_string = normalize('KD', $string);  # Normalization Form KD
  $NFKC_string = normalize('KC', $string);  # Normalization Form KC

=head1 DESCRIPTION

=head2 Normalization Forms

=over 4

=item C<$NFD_string = NFD($string)>

returns the Normalization Form D (formed by canonical decomposition).

=item C<$NFC_string = NFC($string)>

returns the Normalization Form C (formed by canonical decomposition
followed by canonical composition).

=item C<$NFKD_string = NFKD($string)>

returns the Normalization Form KD (formed by compatibility decomposition).

=item C<$NFKC_string = NFKC($string)>

returns the Normalization Form KC (formed by compatibility decomposition
followed by B<canonical> composition).

=item C<$normalized_string = normalize($form_name, $string)>

As C<$form_name>, one of the following names must be given.

  'C'  or 'NFC'  for Normalization Form C
  'D'  or 'NFD'  for Normalization Form D
  'KC' or 'NFKC' for Normalization Form KC
  'KD' or 'NFKD' for Normalization Form KD

=back

=head2 Decomposition and Composition

=over 4

=item C<$decomposed_string = decompose($string)>

=item C<$decomposed_string = decompose($string, $useCompatMapping)>

Decomposes the specified string and returns the result.

If the second parameter (a boolean) is omitted or false, decomposes it
using the Canonical Decomposition Mapping.
If true, decomposes it using the Compatibility Decomposition Mapping.

The string returned is not always in NFD/NFKD.
Reordering may be required.

    $NFD_string  = reorder(decompose($string));       # eq. to NFD()
    $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD()

=item C<$reordered_string  = reorder($string)>

Reorders the combining characters and the like in the canonical ordering
and returns the result.

E.g., when you have a list of NFD/NFKD strings,
you can get the concatenated NFD/NFKD string from them, saying

    $concat_NFD  = reorder(join '', @NFD_strings);
    $concat_NFKD = reorder(join '', @NFKD_strings);

=item C<$composed_string   = compose($string)>

Returns the string where composable pairs are composed.

E.g., when you have a NFD/NFKD string,
you can get its NFC/NFKC string, saying

    $NFC_string  = compose($NFD_string);
    $NFKC_string = compose($NFKD_string);

=back

=head2 Quick Check

(see Annex 8, UAX #15, and F<DerivedNormalizationProps.txt>)

The following functions check whether the string is in that normalization form.

The result returned will be:

    YES     The string is in that normalization form.
    NO      The string is not in that normalization form.
    MAYBE   Dubious. Maybe yes, maybe no.

=over 4

=item C<$result = checkNFD($string)>

returns C<YES> (C<1>) or C<NO> (C<empty string>).

=item C<$result = checkNFC($string)>

returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).

=item C<$result = checkNFKD($string)>

returns C<YES> (C<1>) or C<NO> (C<empty string>).

=item C<$result = checkNFKC($string)>

returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).

=item C<$result = check($form_name, $string)>

returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).

C<$form_name> is alike to that for C<normalize()>.

=back

B<Note>

In the cases of NFD and NFKD, the answer must be either C<YES> or C<NO>.
The answer C<MAYBE> may be returned in the cases of NFC and NFKC.

A MAYBE-NFC/NFKC string should contain at least
one combining character or the like.
For example, C<COMBINING ACUTE ACCENT> has
the MAYBE_NFC/MAYBE_NFKC property.
Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")>
and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>.
C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC
(its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">),
while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC.

If you want to check exactly, compare the string with its NFC/NFKC; i.e.,

    $string eq NFC($string)    # more thorough than checkNFC($string)
    $string eq NFKC($string)   # more thorough than checkNFKC($string)

=head2 Character Data

These functions are interface of character data used internally.
If you want only to get Unicode normalization forms, you don't need
call them yourself.

=over 4

=item C<$canonical_decomposed = getCanon($codepoint)>

If the character of the specified codepoint is canonically
decomposable (including Hangul Syllables),
returns the B<completely decomposed> string canonically equivalent to it.

If it is not decomposable, returns C<undef>.

=item C<$compatibility_decomposed = getCompat($codepoint)>

If the character of the specified codepoint is compatibility
decomposable (including Hangul Syllables),
returns the B<completely decomposed> string compatibility equivalent to it.

If it is not decomposable, returns C<undef>.

=item C<$codepoint_composite = getComposite($codepoint_here, $codepoint_next)>

If two characters here and next (as codepoints) are composable
(including Hangul Jamo/Syllables and Composition Exclusions),
returns the codepoint of the composite.

If they are not composable, returns C<undef>.

=item C<$combining_class = getCombinClass($codepoint)>

Returns the combining class of the character as an integer.

=item C<$is_exclusion = isExclusion($codepoint)>

Returns a boolean whether the character of the specified codepoint
is a composition exclusion.

=item C<$is_singleton = isSingleton($codepoint)>

Returns a boolean whether the character of the specified codepoint is
a singleton.

=item C<$is_non_starter_decomposition = isNonStDecomp($codepoint)>

Returns a boolean whether the canonical decomposition
of the character of the specified codepoint
is a Non-Starter Decomposition.

=item C<$may_be_composed_with_prev_char = isComp2nd($codepoint)>

Returns a boolean whether the character of the specified codepoint
may be composed with the previous one in a certain composition
(including Hangul Compositions, but excluding
Composition Exclusions and Non-Starter Decompositions).

=back

=head2 EXPORT

C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.

C<normalize> and other some functions: on request.

=head1 AUTHOR

SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>

  http://homepage1.nifty.com/nomenclator/perl/

  Copyright(C) 2001-2003, SADAHIRO Tomoyuki. Japan. All rights reserved.

  This module is free software; you can redistribute it
  and/or modify it under the same terms as Perl itself.

=head1 SEE ALSO

=over 4

=item http://www.unicode.org/unicode/reports/tr15/

Unicode Normalization Forms - UAX #15

=item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt

Derived Normalization Properties

=back

=cut
Commit	Line	Data
ac5ea531	1	package Unicode::Normalize;
ac5ea531	2
4a2e806c	3	BEGIN {
9f1f04a1	4	unless ("A" eq pack('U', 0x41) \|\| "A" eq pack('U', ord("A"))) {
9f1f04a1	5	die "Unicode::Normalize cannot stringify a Unicode code point\n";
4a2e806c	6	}
	7	}
	8
ac5ea531	9	use 5.006;
	10	use strict;
	11	use warnings;
	12	use Carp;
	13
9f1f04a1	14	our $VERSION = '0.21';
ac5ea531	15	our $PACKAGE = __PACKAGE__;
	16
	17	require Exporter;
	18	require DynaLoader;
	19	require AutoLoader;
	20
	21	our @ISA = qw(Exporter DynaLoader);
	22	our @EXPORT = qw( NFC NFD NFKC NFKD );
2a204b45	23	our @EXPORT_OK = qw(
2a204b45	24	normalize decompose reorder compose
8f118dcd	25	checkNFD checkNFKD checkNFC checkNFKC check
	26	getCanon getCompat getComposite getCombinClass
	27	isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
	28	isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
	29	);
	30	our %EXPORT_TAGS = (
	31	all => [ @EXPORT, @EXPORT_OK ],
	32	normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ],
	33	check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ],
2a204b45	34	);
ac5ea531	35
	36	bootstrap Unicode::Normalize $VERSION;
	37
9f1f04a1	38	use constant UNICODE_FOR_PACK => "A" eq pack('U', 0x41);
	39	use constant NATIVE_FOR_PACK => "A" eq pack('U', ord("A"));
	40
	41	use constant UNICODE_FOR_UNPACK => 0x41 == unpack('U', "A");
	42	use constant NATIVE_FOR_UNPACK => ord("A") == unpack('U', "A");
	43
	44	sub pack_U {
	45	return UNICODE_FOR_PACK
	46	? pack('U*', @_)
	47	: NATIVE_FOR_PACK
	48	? pack('U*', map utf8::unicode_to_native($_), @_)
	49	: die "$PACKAGE, a Unicode code point cannot be stringified.\n";
	50	}
	51
	52	sub unpack_U {
	53	return UNICODE_FOR_UNPACK
	54	? unpack('U*', shift)
	55	: NATIVE_FOR_UNPACK
	56	? map(utf8::native_to_unicode($_), unpack 'U*', shift)
	57	: die "$PACKAGE, a code point returned from unpack U " .
	58	"cannot be converted into Unicode.\n";
	59	}
	60
ac5ea531	61	use constant COMPAT => 1;
ac5ea531	62
d85850a7	63	sub NFD ($) { reorder(decompose($_[0])) }
ac5ea531	64	sub NFKD ($) { reorder(decompose($_[0], COMPAT)) }
d85850a7	65	sub NFC ($) { compose(reorder(decompose($_[0]))) }
ac5ea531	66	sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }
	67
	68	sub normalize($$)
	69	{
d85850a7	70	my $form = shift;
f027f502	71	my $str = shift;
d85850a7	72	$form =~ s/^NF//;
d85850a7	73	return
f027f502	74	$form eq 'D' ? NFD ($str) :
	75	$form eq 'C' ? NFC ($str) :
	76	$form eq 'KD' ? NFKD($str) :
	77	$form eq 'KC' ? NFKC($str) :
d85850a7	78	croak $PACKAGE."::normalize: invalid form name: $form";
ac5ea531	79	}
ac5ea531	80
8f118dcd	81	sub check($$)
	82	{
	83	my $form = shift;
f027f502	84	my $str = shift;
8f118dcd	85	$form =~ s/^NF//;
8f118dcd	86	return
f027f502	87	$form eq 'D' ? checkNFD ($str) :
	88	$form eq 'C' ? checkNFC ($str) :
	89	$form eq 'KD' ? checkNFKD($str) :
	90	$form eq 'KC' ? checkNFKC($str) :
8f118dcd	91	croak $PACKAGE."::check: invalid form name: $form";
	92	}
	93
ac5ea531	94	1;
ac5ea531	95	__END__
2a204b45	96
	97	=head1 NAME
	98
f027f502	99	Unicode::Normalize - Unicode Normalization Forms
2a204b45	100
	101	=head1 SYNOPSIS
	102
	103	use Unicode::Normalize;
	104
8f118dcd	105	$NFD_string = NFD($string); # Normalization Form D
	106	$NFC_string = NFC($string); # Normalization Form C
	107	$NFKD_string = NFKD($string); # Normalization Form KD
	108	$NFKC_string = NFKC($string); # Normalization Form KC
2a204b45	109
	110	or
	111
	112	use Unicode::Normalize 'normalize';
	113
8f118dcd	114	$NFD_string = normalize('D', $string); # Normalization Form D
	115	$NFC_string = normalize('C', $string); # Normalization Form C
	116	$NFKD_string = normalize('KD', $string); # Normalization Form KD
	117	$NFKC_string = normalize('KC', $string); # Normalization Form KC
2a204b45	118
	119	=head1 DESCRIPTION
	120
d85850a7	121	=head2 Normalization Forms
2a204b45	122
	123	=over 4
	124
8f118dcd	125	=item C<$NFD_string = NFD($string)>
2a204b45	126
	127	returns the Normalization Form D (formed by canonical decomposition).
	128
8f118dcd	129	=item C<$NFC_string = NFC($string)>
2a204b45	130
	131	returns the Normalization Form C (formed by canonical decomposition
	132	followed by canonical composition).
	133
8f118dcd	134	=item C<$NFKD_string = NFKD($string)>
2a204b45	135
	136	returns the Normalization Form KD (formed by compatibility decomposition).
	137
8f118dcd	138	=item C<$NFKC_string = NFKC($string)>
2a204b45	139
	140	returns the Normalization Form KC (formed by compatibility decomposition
	141	followed by B<canonical> composition).
	142
8f118dcd	143	=item C<$normalized_string = normalize($form_name, $string)>
2a204b45	144
	145	As C<$form_name>, one of the following names must be given.
	146
	147	'C' or 'NFC' for Normalization Form C
	148	'D' or 'NFD' for Normalization Form D
	149	'KC' or 'NFKC' for Normalization Form KC
	150	'KD' or 'NFKD' for Normalization Form KD
	151
	152	=back
	153
8f118dcd	154	=head2 Decomposition and Composition
	155
	156	=over 4
	157
	158	=item C<$decomposed_string = decompose($string)>
	159
	160	=item C<$decomposed_string = decompose($string, $useCompatMapping)>
	161
9f1f04a1	162	Decomposes the specified string and returns the result.
8f118dcd	163
	164	If the second parameter (a boolean) is omitted or false, decomposes it
	165	using the Canonical Decomposition Mapping.
	166	If true, decomposes it using the Compatibility Decomposition Mapping.
	167
	168	The string returned is not always in NFD/NFKD.
	169	Reordering may be required.
	170
	171	$NFD_string = reorder(decompose($string)); # eq. to NFD()
	172	$NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD()
	173
	174	=item C<$reordered_string = reorder($string)>
	175
9f1f04a1	176	Reorders the combining characters and the like in the canonical ordering
8f118dcd	177	and returns the result.
	178
	179	E.g., when you have a list of NFD/NFKD strings,
	180	you can get the concatenated NFD/NFKD string from them, saying
	181
	182	$concat_NFD = reorder(join '', @NFD_strings);
	183	$concat_NFKD = reorder(join '', @NFKD_strings);
	184
	185	=item C<$composed_string = compose($string)>
	186
	187	Returns the string where composable pairs are composed.
	188
	189	E.g., when you have a NFD/NFKD string,
	190	you can get its NFC/NFKC string, saying
	191
	192	$NFC_string = compose($NFD_string);
	193	$NFKC_string = compose($NFKD_string);
	194
	195	=back
	196
	197	=head2 Quick Check
	198
6c941e0c	199	(see Annex 8, UAX #15, and F<DerivedNormalizationProps.txt>)
8f118dcd	200
	201	The following functions check whether the string is in that normalization form.
	202
	203	The result returned will be:
	204
	205	YES The string is in that normalization form.
	206	NO The string is not in that normalization form.
	207	MAYBE Dubious. Maybe yes, maybe no.
	208
	209	=over 4
	210
	211	=item C<$result = checkNFD($string)>
	212
f027f502	213	returns C<YES> (C<1>) or C<NO> (C<empty string>).
8f118dcd	214
	215	=item C<$result = checkNFC($string)>
	216
f027f502	217	returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).
8f118dcd	218
	219	=item C<$result = checkNFKD($string)>
	220
f027f502	221	returns C<YES> (C<1>) or C<NO> (C<empty string>).
8f118dcd	222
	223	=item C<$result = checkNFKC($string)>
	224
f027f502	225	returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).
8f118dcd	226
	227	=item C<$result = check($form_name, $string)>
	228
f027f502	229	returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>).
8f118dcd	230
	231	C<$form_name> is alike to that for C<normalize()>.
	232
	233	=back
	234
	235	B<Note>
	236
	237	In the cases of NFD and NFKD, the answer must be either C<YES> or C<NO>.
	238	The answer C<MAYBE> may be returned in the cases of NFC and NFKC.
	239
	240	A MAYBE-NFC/NFKC string should contain at least
	241	one combining character or the like.
	242	For example, C<COMBINING ACUTE ACCENT> has
	243	the MAYBE_NFC/MAYBE_NFKC property.
	244	Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")>
	245	and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>.
f027f502	246	C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC
8f118dcd	247	(its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">),
	248	while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC.
	249
	250	If you want to check exactly, compare the string with its NFC/NFKC; i.e.,
	251
	252	$string eq NFC($string) # more thorough than checkNFC($string)
	253	$string eq NFKC($string) # more thorough than checkNFKC($string)
	254
2a204b45	255	=head2 Character Data
	256
	257	These functions are interface of character data used internally.
d0ed0342	258	If you want only to get Unicode normalization forms, you don't need
d0ed0342	259	call them yourself.
2a204b45	260
	261	=over 4
	262
	263	=item C<$canonical_decomposed = getCanon($codepoint)>
	264
8f118dcd	265	If the character of the specified codepoint is canonically
	266	decomposable (including Hangul Syllables),
	267	returns the B<completely decomposed> string canonically equivalent to it.
	268
f027f502	269	If it is not decomposable, returns C<undef>.
8f118dcd	270
2a204b45	271	=item C<$compatibility_decomposed = getCompat($codepoint)>
2a204b45	272
8f118dcd	273	If the character of the specified codepoint is compatibility
	274	decomposable (including Hangul Syllables),
	275	returns the B<completely decomposed> string compatibility equivalent to it.
2a204b45	276
f027f502	277	If it is not decomposable, returns C<undef>.
2a204b45	278
8f118dcd	279	=item C<$codepoint_composite = getComposite($codepoint_here, $codepoint_next)>
2a204b45	280
d85850a7	281	If two characters here and next (as codepoints) are composable
8f118dcd	282	(including Hangul Jamo/Syllables and Composition Exclusions),
2a204b45	283	returns the codepoint of the composite.
2a204b45	284
f027f502	285	If they are not composable, returns C<undef>.
2a204b45	286
	287	=item C<$combining_class = getCombinClass($codepoint)>
	288
8f118dcd	289	Returns the combining class of the character as an integer.
2a204b45	290
	291	=item C<$is_exclusion = isExclusion($codepoint)>
	292
8f118dcd	293	Returns a boolean whether the character of the specified codepoint
	294	is a composition exclusion.
	295
	296	=item C<$is_singleton = isSingleton($codepoint)>
	297
2a204b45	298	Returns a boolean whether the character of the specified codepoint is
8f118dcd	299	a singleton.
8f118dcd	300
6c941e0c	301	=item C<$is_non_starter_decomposition = isNonStDecomp($codepoint)>
8f118dcd	302
	303	Returns a boolean whether the canonical decomposition
	304	of the character of the specified codepoint
	305	is a Non-Starter Decomposition.
	306
	307	=item C<$may_be_composed_with_prev_char = isComp2nd($codepoint)>
	308
	309	Returns a boolean whether the character of the specified codepoint
	310	may be composed with the previous one in a certain composition
	311	(including Hangul Compositions, but excluding
	312	Composition Exclusions and Non-Starter Decompositions).
2a204b45	313
	314	=back
	315
	316	=head2 EXPORT
	317
	318	C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.
	319
	320	C<normalize> and other some functions: on request.
	321
	322	=head1 AUTHOR
	323
	324	SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
	325
	326	http://homepage1.nifty.com/nomenclator/perl/
	327
6c941e0c	328	Copyright(C) 2001-2003, SADAHIRO Tomoyuki. Japan. All rights reserved.
2a204b45	329
6c941e0c	330	This module is free software; you can redistribute it
6c941e0c	331	and/or modify it under the same terms as Perl itself.
2a204b45	332
	333	=head1 SEE ALSO
	334
	335	=over 4
	336
	337	=item http://www.unicode.org/unicode/reports/tr15/
	338
	339	Unicode Normalization Forms - UAX #15
	340
14e6b36c	341	=item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt
8f118dcd	342
	343	Derived Normalization Properties
	344
2a204b45	345	=back
	346
	347	=cut
	348