Re: PERFORCE change 12929 for review
[p5sagit/p5-mst-13.2.git] / lib / Unicode / Collate.pm
CommitLineData
45394607 1package Unicode::Collate;
2
3use 5.006;
4use strict;
5use warnings;
6use Carp;
45394607 7require Exporter;
8
d16e9e3d 9our $VERSION = '0.08';
45394607 10our $PACKAGE = __PACKAGE__;
11
12our @ISA = qw(Exporter);
13
14our %EXPORT_TAGS = ();
15our @EXPORT_OK = ();
16our @EXPORT = ();
17
18(our $Path = $INC{'Unicode/Collate.pm'}) =~ s/\.pm$//;
19our $KeyFile = "allkeys.txt";
20
ac5ea531 21# Lingua::KO::Hangul::Util not part of the standard distribution
22# but it will be used if available.
23
24eval { require Lingua::KO::Hangul::Util };
25my $hasHangulUtil = ! $@;
26if ($hasHangulUtil) {
27 Lingua::KO::Hangul::Util->import();
28}
29
45394607 30our %Combin; # combining class from Unicode::Normalize
31
32use constant Min2 => 0x20; # minimum weight at level 2
33use constant Min3 => 0x02; # minimum weight at level 3
34use constant UNDEFINED => 0xFF80; # special value for undefined CE
35
36##
37## constructor
38##
39sub new
40{
41 my $class = shift;
42 my $self = bless { @_ }, $class;
43
44 # alternate
45 $self->{alternate} =
46 ! exists $self->{alternate} ? 'shifted' :
47 ! defined $self->{alternate} ? '' : $self->{alternate};
48
49 # collation level
d16e9e3d 50 $self->{level} ||= ($self->{alternate} =~ /shift/ ? 4 : 3);
45394607 51
52 # normalization form
53 $self->{normalization} = 'D' if ! exists $self->{normalization};
54
55 eval "use Unicode::Normalize;" if defined $self->{normalization};
56
57 $self->{normalize} =
58 ! defined $self->{normalization} ? undef :
59 $self->{normalization} =~ /^(?:NF)?C$/ ? \&NFC :
60 $self->{normalization} =~ /^(?:NF)?D$/ ? \&NFD :
61 $self->{normalization} =~ /^(?:NF)?KC$/ ? \&NFKC :
62 $self->{normalization} =~ /^(?:NF)?KD$/ ? \&NFKD :
63 croak "$PACKAGE unknown normalization form name: $self->{normalization}";
64
65 *Combin = \%Unicode::Normalize::Combin if $self->{normalize} && ! %Combin;
66
67 # backwards
68 $self->{backwards} ||= [];
69 $self->{backwards} = [ $self->{backwards} ] if ! ref $self->{backwards};
70
71 # rearrange
72 $self->{rearrange} ||= []; # maybe not U+0000 (an ASCII)
73 $self->{rearrange} = [ $self->{rearrange} ] if ! ref $self->{rearrange};
74
75 # open the table file
76 my $file = defined $self->{table} ? $self->{table} : $KeyFile;
77 open my $fk, "<$Path/$file" or croak "File does not exist at $Path/$file";
78
79 while(<$fk>){
80 next if /^\s*#/;
81 if(/^\s*\@/){
82 if(/^\@version\s*(\S*)/){
83 $self->{version} ||= $1;
84 }
85 elsif(/^\@alternate\s+(.*)/){
86 $self->{alternate} ||= $1;
87 }
88 elsif(/^\@backwards\s+(.*)/){
89 push @{ $self->{backwards} }, $1;
90 }
91 elsif(/^\@rearrange\s+(.*)/){
92 push @{ $self->{rearrange} }, _getHexArray($1);
93 }
94 next;
95 }
96 $self->parseEntry($_);
97 }
98 close $fk;
99 if($self->{entry}){
100 $self->parseEntry($_) foreach split /\n/, $self->{entry};
101 }
102
103 # keys of $self->{rearrangeHash} are $self->{rearrange}.
104 $self->{rearrangeHash} = {};
105 @{ $self->{rearrangeHash} }{ @{ $self->{rearrange} } } = ();
106
107 return $self;
108}
109
110##
111## get $line, parse it, and write an entry in $self
112##
113sub parseEntry
114{
115 my $self = shift;
116 my $line = shift;
117 my($name, $ele, @key);
118
119 return if $line !~ /^\s*[0-9A-Fa-f]/;
120
121 # get name
122 $name = $1 if $line =~ s/#\s*(.*)//;
123 return if defined $self->{undefName} && $name =~ /$self->{undefName}/;
124
125 # get element
126 my($e, $k) = split /;/, $line;
127 my @e = _getHexArray($e);
128 $ele = pack('U*', @e);
129 return if defined $self->{undefChar} && $ele =~ /$self->{undefChar}/;
130
131 # get sort key
132 if(
133 defined $self->{ignoreName} && $name =~ /$self->{ignoreName}/ ||
134 defined $self->{ignoreChar} && $ele =~ /$self->{ignoreChar}/
135 )
136 {
d16e9e3d 137 $self->{entries}{$ele} = $self->{ignored}{$ele} = 1;
45394607 138 }
139 else
140 {
141 foreach my $arr ($k =~ /\[(\S+)\]/g) {
142 my $var = $arr =~ /\*/;
d16e9e3d 143 push @key, $self->altCE( $var, _getHexArray($arr) );
45394607 144 }
145 $self->{entries}{$ele} = \@key;
146 }
147 $self->{maxlength}{ord $ele} = scalar @e if @e > 1;
148}
149
150
151##
d16e9e3d 152## arrayref CE = altCE(bool variable?, list[num] weights)
45394607 153##
d16e9e3d 154sub altCE
45394607 155{
156 my $self = shift;
157 my $var = shift;
158 my @c = @_;
159
160 $self->{alternate} eq 'blanked' ?
d16e9e3d 161 $var ? [0,0,0] : [ @c[0..2] ] :
162 $self->{alternate} eq 'non-ignorable' ?
163 [ @c[0..2] ] :
45394607 164 $self->{alternate} eq 'shifted' ?
165 $var ? [0,0,0,$c[0] ] : [ @c[0..2], $c[0]+$c[1]+$c[2] ? 0xFFFF : 0 ] :
166 $self->{alternate} eq 'shift-trimmed' ?
167 $var ? [0,0,0,$c[0] ] : [ @c[0..2], 0 ] :
168 \@c;
169}
170
171##
d16e9e3d 172## string hex_sortkey = splitCE(string arg)
45394607 173##
174sub viewSortKey
175{
176 my $self = shift;
177 my $key = $self->getSortKey(@_);
178 my $view = join " ", map sprintf("%04X", $_), unpack 'n*', $key;
179 $view =~ s/ ?0000 ?/|/g;
180 "[$view]";
181}
182
d16e9e3d 183
45394607 184##
d16e9e3d 185## list[strings] elements = splitCE(string arg)
45394607 186##
d16e9e3d 187sub splitCE
45394607 188{
189 my $self = shift;
190 my $code = $self->{preprocess};
191 my $norm = $self->{normalize};
192 my $ent = $self->{entries};
45394607 193 my $max = $self->{maxlength};
45394607 194 my $rear = $self->{rearrangeHash};
195
196 my $str = ref $code ? &$code(shift) : shift;
197 $str = &$norm($str) if ref $norm;
198
199 my(@src, @buf);
200 @src = unpack('U*', $str);
201
202 # rearrangement
203 for(my $i = 0; $i < @src; $i++)
204 {
205 ($src[$i], $src[$i+1]) = ($src[$i+1], $src[$i])
206 if $rear->{ $src[$i] };
207 $i++;
208 }
209
210 for(my $i = 0; $i < @src; $i++)
211 {
212 my $ch;
213 my $u = $src[$i];
214
215 # non-characters
216 next if $u < 0 || 0x10FFFF < $u # out of range
217 || 0xD800 < $u && $u < 0xDFFF; # unpaired surrogates
218 my $four = $u & 0xFFFF;
219 next if $four == 0xFFFE || $four == 0xFFFF;
220
221 if($max->{$u}) # contract
222 {
223 for(my $j = $max->{$u}; $j >= 1; $j--)
224 {
225 next unless $i+$j-1 < @src;
226 $ch = pack 'U*', @src[$i .. $i+$j-1];
227 $i += $j-1, last if $ent->{$ch};
228 }
229 }
230 else { $ch = pack('U', $u) }
231
232 if(%Combin && defined $ch) # with Combining Char
233 {
234 for(my $j = $i+1; $j < @src && $Combin{ $src[$j] }; $j++)
235 {
236 my $comb = pack 'U', $src[$j];
237 next if ! $ent->{ $ch.$comb };
238 $ch .= $comb;
239 splice(@src, $j, 1);
240 last;
241 }
242 }
d16e9e3d 243 push @buf, $ch;
244 }
245 wantarray ? @buf : \@buf;
246}
45394607 247
d16e9e3d 248
249##
250## list[arrayrefs] weight = getWt(string element)
251##
252sub getWt
253{
254 my $self = shift;
255 my $ch = shift;
256 my $ent = $self->{entries};
257 my $ign = $self->{ignored};
258 my $cjk = $self->{overrideCJK};
259 my $hang = $self->{overrideHangul};
260 return if !defined $ch || $ign->{$ch}; # ignored
261 return @{ $ent->{$ch} } if $ent->{$ch};
262 my $u = unpack('U', $ch);
263 return
264 _isHangul($u)
265 ? $hang
266 ? &$hang($u)
ac5ea531 267 : ($hasHangulUtil ?
268 map(@{ $ent->{pack('U', $_)} }, decomposeHangul($u)) :
269 # runtime compile error...
270 (eval 'use Lingua::KO::Hangul::Util', print $@))
d16e9e3d 271 : _isCJK($u)
272 ? $cjk ? &$cjk($u) : map($self->altCE(0,@$_), _CJK($u))
273 : map($self->altCE(0,@$_), _derivCE($u));
274}
275
276##
277## int = index(string, substring)
278##
279sub index
280{
281 my $self = shift;
282 my $lev = $self->{level};
283 my $str = $self->splitCE(shift);
284 my $sub = $self->splitCE(shift);
285
286 return wantarray ? (0,0) : 0 if ! @$sub;
287 return wantarray ? () : -1 if ! @$str;
288
289 my @subWt = grep _ignorableAtLevel($_,$lev),
290 map $self->getWt($_), @$sub;
291
292 my(@strWt,@strPt);
293 my $count = 0;
294 for my $e (@$str){
295 my @tmp = grep _ignorableAtLevel($_,$lev), $self->getWt($e);
296 push @strWt, @tmp;
297 push @strPt, ($count) x @tmp;
298 $count += length $e;
299 while(@strWt >= @subWt){
300 if(_eqArray(\@strWt, \@subWt, $lev)){
301 my $pos = $strPt[0];
302 return wantarray ? ($pos, $count-$pos) : $pos;
303 }
304 shift @strWt;
305 shift @strPt;
306 }
307 }
308 return wantarray ? () : -1;
309}
310
311##
312## bool _eqArray(arrayref, arrayref, level)
313##
314sub _eqArray($$$)
315{
316 my $a = shift; # length $a >= length $b;
317 my $b = shift;
318 my $lev = shift;
319 for my $v (0..$lev-1){
320 for my $c (0..@$b-1){
321 return if $a->[$c][$v] != $b->[$c][$v];
322 }
45394607 323 }
d16e9e3d 324 return 1;
325}
326
327
328##
329## bool _ignorableAtLevel(CE, level)
330##
331sub _ignorableAtLevel($$)
332{
333 my $ce = shift;
334 return if ! defined $ce;
335 my $lv = shift;
336 ! grep { ! $ce->[$_] } 0..$lv-1;
337}
338
339
340##
341## string sortkey = getSortKey(string arg)
342##
343sub getSortKey
344{
345 my $self = shift;
346 my $lev = $self->{level};
347 my $rCE = $self->splitCE(shift); # get an arrayref
348
349 # weight arrays
350 my @buf = grep defined(), map $self->getWt($_), @$rCE;
45394607 351
352 # make sort key
353 my @ret = ([],[],[],[]);
354 foreach my $v (0..$lev-1){
355 foreach my $b (@buf){
356 push @{ $ret[$v] }, $b->[$v] if $b->[$v];
357 }
358 }
359 foreach (@{ $self->{backwards} }){
360 my $v = $_ - 1;
361 @{ $ret[$v] } = reverse @{ $ret[$v] };
362 }
363
364 # modification of tertiary weights
365 if($self->{upper_before_lower}){
366 foreach (@{ $ret[2] }){
367 if (0x8 <= $_ && $_ <= 0xC){ $_ -= 6 } # lower
368 elsif(0x2 <= $_ && $_ <= 0x6){ $_ += 6 } # upper
369 elsif($_ == 0x1C) { $_ += 1 } # square upper
370 elsif($_ == 0x1D) { $_ -= 1 } # square lower
371 }
372 }
373 if($self->{katakana_before_hiragana}){
374 foreach (@{ $ret[2] }){
375 if (0x0F <= $_ && $_ <= 0x13){ $_ -= 2 } # katakana
376 elsif(0x0D <= $_ && $_ <= 0x0E){ $_ += 5 } # hiragana
377 }
378 }
379 join "\0\0", map pack('n*', @$_), @ret;
380}
381
382
383##
d16e9e3d 384## int compare = cmp(string a, string b)
45394607 385##
386sub cmp
387{
388 my $obj = shift;
389 my $a = shift;
390 my $b = shift;
391 $obj->getSortKey($a) cmp $obj->getSortKey($b);
392}
393
394##
d16e9e3d 395## list[strings] sorted = sort(list[strings] arg)
45394607 396##
397sub sort
398{
399 my $obj = shift;
400
401 map { $_->[1] }
402 sort{ $a->[0] cmp $b->[0] }
403 map [ $obj->getSortKey($_), $_ ], @_;
404}
405
406##
d16e9e3d 407## list[arrayrefs] CE = _derivCE(int codepoint)
45394607 408##
409sub _derivCE
410{
411 my $code = shift;
412 my $a = UNDEFINED + ($code >> 15); # ok
413 my $b = ($code & 0x7FFF) | 0x8000; # ok
414# my $a = 0xFFC2 + ($code >> 15); # ng
415# my $b = $code & 0x7FFF | 0x1000; # ng
416 $b ? ([$a,2,1,$code],[$b,0,0,$code]) : [$a,2,1,$code];
417}
418
419##
420## "hhhh hhhh hhhh" to (dddd, dddd, dddd)
421##
422sub _getHexArray
423{
424 my $str = shift;
425 map hex(), $str =~ /([0-9a-fA-F]+)/g;
426}
427
428##
d16e9e3d 429## bool is_a_CJK_Unified_Ideograph = _isCJK(int codepoint)
45394607 430##
431sub _isCJK
432{
433 my $u = shift;
434 return 0x3400 <= $u && $u <= 0x4DB5
435 || 0x4E00 <= $u && $u <= 0x9FA5
436# || 0x20000 <= $u && $u <= 0x2A6D6;
437}
438
439##
d16e9e3d 440## list[arrayref] CE = _CJK(int codepoint_of_CJK)
45394607 441##
442sub _CJK
443{
444 my $u = shift;
445 $u > 0xFFFF ? _derivCE($u) : [$u,0x20,0x02,$u];
446}
447
448##
d16e9e3d 449## bool is_a_Hangul_Syllable = _isHangul(int codepoint)
45394607 450##
451sub _isHangul
452{
453 my $code = shift;
454 return 0xAC00 <= $code && $code <= 0xD7A3;
455}
456
4571;
458__END__
459
460=head1 NAME
461
462Unicode::Collate - use UCA (Unicode Collation Algorithm)
463
464=head1 SYNOPSIS
465
466 use Unicode::Collate;
467
468 #construct
469 $UCA = Unicode::Collate->new(%tailoring);
470
471 #sort
472 @sorted = $UCA->sort(@not_sorted);
473
474 #compare
475 $result = $UCA->cmp($a, $b); # returns 1, 0, or -1.
476
477=head1 DESCRIPTION
478
479=head2 Constructor and Tailoring
480
d16e9e3d 481The C<new> method returns a collator object.
482
45394607 483 $UCA = Unicode::Collate->new(
484 alternate => $alternate,
485 backwards => $levelNumber, # or \@levelNumbers
486 entry => $element,
487 normalization => $normalization_form,
488 ignoreName => qr/$ignoreName/,
489 ignoreChar => qr/$ignoreChar/,
490 katakana_before_hiragana => $bool,
491 level => $collationLevel,
492 overrideCJK => \&overrideCJK,
493 overrideHangul => \&overrideHangul,
494 preprocess => \&preprocess,
495 rearrange => \@charList,
496 table => $filename,
497 undefName => qr/$undefName/,
498 undefChar => qr/$undefChar/,
499 upper_before_lower => $bool,
500 );
501 # if %tailoring is false (empty),
502 # $UCA should do the default collation.
503
504=over 4
505
506=item alternate
507
508-- see 3.2.2 Alternate Weighting, UTR #10.
509
510 alternate => 'shifted', 'blanked', 'non-ignorable', or 'shift-trimmed'.
511
512By default (if specification is omitted), 'shifted' is adopted.
513
514=item backwards
515
516-- see 3.1.2 French Accents, UTR #10.
517
518 backwards => $levelNumber or \@levelNumbers
519
520Weights in reverse order; ex. level 2 (diacritic ordering) in French.
521If omitted, forwards at all the levels.
522
523=item entry
524
525-- see 3.1 Linguistic Features; 3.2.1 File Format, UTR #10.
526
527Overrides a default order or adds a new element
528
529 entry => <<'ENTRIES', # use the UCA file format
53000E6 ; [.0861.0020.0002.00E6] [.08B1.0020.0002.00E6] # ligature <ae> as <a e>
5310063 0068 ; [.0893.0020.0002.0063] # "ch" in traditional Spanish
5320043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish
533ENTRIES
534
535=item ignoreName
536
537=item ignoreChar
538
539-- see Completely Ignorable, 3.2.2 Alternate Weighting, UTR #10.
540
541Ignores the entry in the table.
542If an ignored collation element appears in the string to be collated,
543it is ignored as if the element had been deleted from there.
544
545E.g. when 'a' and 'e' are ignored,
546'element' is equal to 'lament' (or 'lmnt').
547
548=item level
549
550-- see 4.3 Form a sort key for each string, UTR #10.
551
552Set the maximum level.
553Any higher levels than the specified one are ignored.
554
555 Level 1: alphabetic ordering
556 Level 2: diacritic ordering
557 Level 3: case ordering
558 Level 4: tie-breaking (e.g. in the case when alternate is 'shifted')
559
560 ex.level => 2,
561
562=item normalization
563
564-- see 4.1 Normalize each input string, UTR #10.
565
566If specified, strings are normalized before preparation sort keys
567(the normalization is executed after preprocess).
568
569As a form name, one of the following names must be used.
570
571 'C' or 'NFC' for Normalization Form C
572 'D' or 'NFD' for Normalization Form D
573 'KC' or 'NFKC' for Normalization Form KC
574 'KD' or 'NFKD' for Normalization Form KD
575
576If omitted, the string is put into Normalization Form D.
577
578If undefined explicitly (as C<normalization =E<gt> undef>),
579any normalization is not carried out (this may make tailoring easier
580if any normalization is not desired).
581
582see B<CAVEAT>.
583
584=item overrideCJK
585
586=item overrideHangul
587
588-- see 7.1 Derived Collation Elements, UTR #10.
589
590By default, mapping of CJK Unified Ideographs
591uses the Unicode codepoint order
592and Hangul Syllables are decomposed into Hangul Jamo.
593
594The mapping of CJK Unified Ideographs
595or Hangul Syllables may be overrided.
596
597ex. CJK Unified Ideographs in the JIS codepoint order.
598
599 overrideCJK => sub {
600 my $u = shift; # get unicode codepoint
601 my $b = pack('n', $u); # to UTF-16BE
602 my $s = your_unicode_to_sjis_converter($b); # convert
603 my $n = unpack('n', $s); # convert sjis to short
604 [ $n, 1, 1 ]; # return collation element
605 },
606
607If you want to override the mapping of Hangul Syllables,
608the Normalization Forms D and KD are not appropriate
609(they will be decomposed before overriding).
610
611=item preprocess
612
613-- see 5.1 Preprocessing, UTR #10.
614
615If specified, the coderef is used to preprocess
616before the formation of sort keys.
617
618ex. dropping English articles, such as "a" or "the".
619Then, "the pen" is before "a pencil".
620
621 preprocess => sub {
622 my $str = shift;
623 $str =~ s/\b(?:an?|the)\s+//g;
624 $str;
625 },
626
627=item rearrange
628
629-- see 3.1.3 Rearrangement, UTR #10.
630
631Characters that are not coded in logical order and to be rearranged.
632By default,
633
634 rearrange => [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ],
635
636=item table
637
638-- see 3.2 Default Unicode Collation Element Table, UTR #10.
639
640You can use another element table if desired.
641The table file must be in your C<lib/Unicode/Collate> directory.
642
643By default, the file C<lib/Unicode/Collate/allkeys.txt> is used.
644
645=item undefName
646
647=item undefChar
648
649-- see 6.3.4 Reducing the Repertoire, UTR #10.
650
651Undefines the collation element as if it were unassigned in the table.
652This reduces the size of the table.
653If an unassigned character appears in the string to be collated,
654the sort key is made from its codepoint
655as a single-character collation element,
656as it is greater than any other assigned collation elements
657(in the codepoint order among the unassigned characters).
658But, it'd be better to ignore characters
659unfamiliar to you and maybe never used.
660
661=item katakana_before_hiragana
662
663=item upper_before_lower
664
665-- see 6.6 Case Comparisons; 7.3.1 Tertiary Weight Table, UTR #10.
666
667By default, lowercase is before uppercase
668and hiragana is before katakana.
669
670If the parameter is true, this is reversed.
671
672=back
673
674=head2 Other methods
675
676=over 4
677
678=item C<@sorted = $UCA-E<gt>sort(@not_sorted)>
679
680Sorts a list of strings.
681
682=item C<$result = $UCA-E<gt>cmp($a, $b)>
683
684Returns 1 (when C<$a> is greater than C<$b>)
685or 0 (when C<$a> is equal to C<$b>)
686or -1 (when C<$a> is lesser than C<$b>).
687
688=item C<$sortKey = $UCA-E<gt>getSortKey($string)>
689
690-- see 4.3 Form a sort key for each string, UTR #10.
691
692Returns a sort key.
693
694You compare the sort keys using a binary comparison
695and get the result of the comparison of the strings using UCA.
696
697 $UCA->getSortKey($a) cmp $UCA->getSortKey($b)
698
699 is equivalent to
700
701 $UCA->cmp($a, $b)
702
d16e9e3d 703=item C<$position = $UCA-E<gt>index($string, $substring)>
704
705=item C<($position, $length) = $UCA-E<gt>index($string, $substring)>
706
707-- see 6.8 Searching, UTR #10.
708
709If C<$substring> matches a part of C<$string>, returns
710the position of the first occurrence of the matching part in scalar context;
711in list context, returns a two-element list of
712the position and the length of the matching part.
713
714B<Notice> that the length of the matching part may differ from
715the length of C<$substring>.
716
717B<Note> that the position and the length are counted on the string
718after the process of preprocess, normalization, and rearrangement.
719Therefore, in case the specified string is not binary equal to
720the preprocessed/normalized/rearranged string, the position and the length
721may differ form those on the specified string. But it is guaranteed
722that, if matched, it returns a non-negative value as C<$position>.
723
724If C<$substring> does not match any part of C<$string>,
725returns C<-1> in scalar context and
726an empty list in list context.
727
728e.g. you say
729
730 my $UCA = Unicode::Collate->new( normalization => undef, level => 1 );
731 my $str = "Ich mu\x{00DF} studieren.";
732 my $sub = "m\x{00FC}ss";
733 my $match;
734 if(my @tmp = $UCA->index($str, $sub)){
735 $match = substr($str, $tmp[0], $tmp[1]);
736 }
737
738and get C<"mu\x{00DF}"> in C<$match> since C<"mu>E<223>C<">
739is primary equal to C<"m>E<252>C<ss">.
740
45394607 741=back
742
743=head2 EXPORT
744
745None by default.
746
747=head2 CAVEAT
748
749Use of the C<normalization> parameter requires
750the B<Unicode::Normalize> module.
751
752If you need not it (e.g. in the case when you need not
753handle any combining characters),
754assign C<normalization =E<gt> undef> explicitly.
755
756=head1 AUTHOR
757
758SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
759
760 http://homepage1.nifty.com/nomenclator/perl/
761
762 Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved.
763
764 This program is free software; you can redistribute it and/or
765 modify it under the same terms as Perl itself.
766
767=head1 SEE ALSO
768
769=over 4
770
771=item L<Lingua::KO::Hangul::Util>
772
773utility functions for Hangul Syllables
774
775=item L<Unicode::Normalize>
776
777normalized forms of Unicode text
778
779=item Unicode Collation Algorithm - Unicode TR #10
780
781http://www.unicode.org/unicode/reports/tr10/
782
783=back
784
785=cut