From: Jarkko Hietaniemi Date: Fri, 17 Aug 2001 03:22:38 +0000 (+0000) Subject: Integrate Lingua::KO::Hangul::Util 0.02, Unicode::Normalize, X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=453946077c880e7dc58ef4fa7340353d6caec987;p=p5sagit%2Fp5-mst-13.2.git Integrate Lingua::KO::Hangul::Util 0.02, Unicode::Normalize, and Unicode::Collate 0.07, from SADAHIRO Tomoyuki. p4raw-id: //depot/perl@11701 --- diff --git a/MANIFEST b/MANIFEST index 9b9b265..4b7d052 100644 --- a/MANIFEST +++ b/MANIFEST @@ -945,6 +945,10 @@ lib/IPC/Open3.t See if IPC::Open3 works lib/IPC/SysV.t See if IPC::SysV works lib/less.pm For "use less" lib/lib_pm.PL For "use lib", produces lib/lib.pm +lib/Lingua/KO/Hangul/Util/Changes Lingua::KO::Hangul::Util +lib/Lingua/KO/Hangul/Util/README Lingua::KO::Hangul::Util +lib/Lingua/KO/Hangul/Util/t/test.t Lingua::KO::Hangul::Util +lib/Lingua/KO/Hangul/Util.pm Lingua::KO::Hangul::Util lib/locale.pm For "use locale" lib/locale.t See if locale support works lib/Locale/Codes/t/all.t See if Locale::Codes work @@ -1175,7 +1179,19 @@ lib/Time/localtime.pm By-name interface to Perl's builtin localtime lib/Time/localtime.t Test for Time::localtime lib/Time/tm.pm Internal object for Time::{gm,local}time lib/timelocal.pl Perl library supporting inverse of localtime, gmtime +lib/Unicode/Collate.pm Unicode::Collate +lib/Unicode/Collate/Changes Unicode::Collate +lib/Unicode/Collate/keys.txt Unicode::Collate +lib/Unicode/Collate/README Unicode::Collate +lib/Unicode/Collate/t/test.t Unicode::Collate +lib/Unicode/Normalize.pm Unicode::Normalize +lib/Unicode/Normalize/Changes Unicode::Normalize +lib/Unicode/Normalize/README Unicode::Normalize +lib/Unicode/Normalize/t/norm.t Unicode::Normalize +lib/Unicode/Normalize/t/test.t Unicode::Normalize lib/unicode/README Explanation why this is there is. +lib/Unicode/UCD.pm Unicode character database +lib/Unicode/UCD.t See if Unicode character database works lib/unicore/ArabLink.pl Unicode character database lib/unicore/ArabLnkGrp.pl Unicode character database lib/unicore/ArabShap.txt Unicode character database @@ -1498,8 +1514,6 @@ lib/unicore/UCD.html Unicode character database lib/unicore/Unicode.html Unicode character database lib/unicore/Unicode.txt Unicode character database lib/unicore/version The version of the Unicode -lib/Unicode/UCD.pm Unicode character database -lib/Unicode/UCD.t See if Unicode character database works lib/UNIVERSAL.pm Base class for ALL classes lib/User/grent.pm By-name interface to Perl's builtin getgr* lib/User/grent.t See if User::grwent works diff --git a/lib/Lingua/KO/Hangul/Util.pm b/lib/Lingua/KO/Hangul/Util.pm new file mode 100644 index 0000000..3848592 --- /dev/null +++ b/lib/Lingua/KO/Hangul/Util.pm @@ -0,0 +1,278 @@ +package Lingua::KO::Hangul::Util; + +use 5.006; +use strict; +use warnings; + +require Exporter; + +our @ISA = qw(Exporter); +our %EXPORT_TAGS = (); +our @EXPORT_OK = (); +our @EXPORT = qw( + decomposeHangul + composeHangul + getHangulName + parseHangulName +); +our $VERSION = '0.02'; + +our @JamoL = ( # Initial (HANGUL CHOSEONG) + "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", + "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H", + ); + +our @JamoV = ( # Medial (HANGUL JUNGSEONG) + "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", + "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI", + "YU", "EU", "YI", "I", + ); + +our @JamoT = ( # Final (HANGUL JONGSEONG) + "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", + "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", + "S", "SS", "NG", "J", "C", "K", "T", "P", "H", + ); + +our $BlockName = "HANGUL SYLLABLE "; + +use constant SBase => 0xAC00; +use constant LBase => 0x1100; +use constant VBase => 0x1161; +use constant TBase => 0x11A7; +use constant LCount => 19; # scalar @JamoL +use constant VCount => 21; # scalar @JamoV +use constant TCount => 28; # scalar @JamoT +use constant NCount => 588; # VCount * TCount +use constant SCount => 11172; # LCount * NCount +use constant SFinal => 0xD7A3; # SBase -1 + SCount + +our(%CodeL, %CodeV, %CodeT); +@CodeL{@JamoL} = 0 .. LCount-1; +@CodeV{@JamoV} = 0 .. VCount-1; +@CodeT{@JamoT} = 0 .. TCount-1; + +sub getHangulName { + my $code = shift; + return undef unless SBase <= $code && $code <= SFinal; + my $SIndex = $code - SBase; + my $LIndex = int( $SIndex / NCount); + my $VIndex = int(($SIndex % NCount) / TCount); + my $TIndex = $SIndex % TCount; + "$BlockName$JamoL[$LIndex]$JamoV[$VIndex]$JamoT[$TIndex]"; +} + +sub parseHangulName { + my $arg = shift; + return undef unless $arg =~ s/$BlockName//o; + return undef unless $arg =~ /^([^AEIOUWY]*)([AEIOUWY]+)([^AEIOUWY]*)$/; + return undef unless exists $CodeL{$1} + && exists $CodeV{$2} + && exists $CodeT{$3}; + SBase + $CodeL{$1} * NCount + $CodeV{$2} * TCount + $CodeT{$3}; +} + +sub decomposeHangul { + my $code = shift; + return unless SBase <= $code && $code <= SFinal; + my $SIndex = $code - SBase; + my $LIndex = int( $SIndex / NCount); + my $VIndex = int(($SIndex % NCount) / TCount); + my $TIndex = $SIndex % TCount; + my @ret = ( + LBase + $LIndex, + VBase + $VIndex, + $TIndex ? (TBase + $TIndex) : (), + ); + wantarray ? @ret : pack('U*', @ret); +} + +# +# To Do: +# s/(\p{JamoL}\p{JamoV})/toHangLV($1)/ge; +# s/(\p{HangLV}\p{JamoT})/toHangLVT($1)/ge; +# +sub composeHangul { + my $str = shift; + return $str unless length $str; + my(@ret); + + foreach my $ch (unpack('U*', $str)) # Makes list! The string be short! + { + push(@ret, $ch) and next unless @ret; + + # 1. check to see if $ret[-1] is L and $ch is V. + my $LIndex = $ret[-1] - LBase; + if(0 <= $LIndex && $LIndex < LCount) + { + my $VIndex = $ch - VBase; + if(0 <= $VIndex && $VIndex < VCount) + { + $ret[-1] = SBase + ($LIndex * VCount + $VIndex) * TCount; + next; # discard $ch + } + } + + # 2. check to see if $ret[-1] is LV and $ch is T. + my $SIndex = $ret[-1] - SBase; + if(0 <= $SIndex && $SIndex < SCount && $SIndex % TCount == 0) + { + my $TIndex = $ch - TBase; + if(0 <= $TIndex && $TIndex < TCount) + { + $ret[-1] += $TIndex; + next; # discard $ch + } + } + + # 3. just append $ch + push(@ret, $ch); + } + wantarray ? @ret : pack('U*', @ret); +} + +1; +__END__ + +=head1 NAME + +Lingua::KO::Hangul::Util - utility functions for Hangul Syllables + +=head1 SYNOPSIS + + use Lingua::KO::Hangul::Util; + + decomposeHangul(0xAC00); + # (0x1100,0x1161) or "\x{1100}\x{1161}" + + composeHangul("\x{1100}\x{1161}"); + # "\x{AC00}" + + getHangulName(0xAC00); + # "HANGUL SYLLABLE GA" + + parseHangulName("HANGUL SYLLABLE GA"); + # 0xAC00 + +=head1 DESCRIPTION + +A Hangul syllable consists of Hangul Jamo. + +Hangul Jamo are classified into three classes: + + CHOSEONG (the initial sound) as a leading consonant (L), + JUNGSEONG (the medial sound) as a vowel (V), + JONGSEONG (the final sound) as a trailing consonant (T). + +Any Hangul syllable is a composition of + + i) CHOSEONG + JUNGSEONG (L + V) + + or + + ii) CHOSEONG + JUNGSEONG + JONGSEONG (L + V + T). + +Names of Hangul Syllables have a format of C<"HANGUL SYLLABLE %s">. + +=head2 Composition and Decomposition + +=over 4 + +=item C<$string_decomposed = decomposeHangul($codepoint)> + +=item C<@codepoints = decomposeHangul($codepoint)> + +Accepts unicode codepoint integer. + +If the specified codepoint is of a Hangul syllable, +returns a list of codepoints (in a list context) +or a UTF-8 string (in a scalar context) +of its decomposition. + + decomposeHangul(0xAC00) # U+AC00 is HANGUL SYLLABLE GA. + returns "\x{1100}\x{1161}" or (0x1100, 0x1161); + + decomposeHangul(0xAE00) # U+AE00 is HANGUL SYLLABLE GEUL. + returns "\x{1100}\x{1173}\x{11AF}" or (0x1100, 0x1173, 0x11AF); + +Otherwise, returns false (empty string or empty list). + + decomposeHangul(0x0041) # outside Hangul Syllables + returns empty string or empty list. + +=item C<$string_composed = composeHangul($src_string)> + +=item C<@codepoints_composed = composeHangul($src_string)> + +Any sequence of an initial Jamo C and a medial Jamo C +is composed into a syllable C; +then any sequence of a syllable C and a final Jamo C +is composed into a syllable C. + +Any characters other than Hangul Jamo and Hangul Syllables +are unaffected. + + composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}.") + returns "Hangul \x{AC00}\x{AE00}." or + (0x48,0x61,0x6E,0x67,0x75,0x6C,0x20,0xAC00,0xAE00,0x2E); + +=back + +=head2 Hangul Syllable Name + +=over 4 + +=item C<$name = getHangulName($codepoint)> + +If the specified codepoint is of a Hangul syllable, +returns its name; otherwise returns undef. + + getHangulName(0xAC00) returns "HANGUL SYLLABLE GA"; + getHangulName(0x0041) returns undef. + +=item C<$codepoint = parseHangulName($name)> + +If the specified name is of a Hangul syllable, +returns its codepoint; otherwise returns undef. + + parseHangulName("HANGUL SYLLABLE GEUL") returns 0xAE00; + + parseHangulName("LATIN SMALL LETTER A") returns undef; + + parseHangulName("HANGUL SYLLABLE PERL") returns undef; + # Regrettably, HANGUL SYLLABLE PERL does not exist :-) + +=back + +=head2 EXPORT + +By default, + + decomposeHangul + composeHangul + getHangulName + parseHangulName + +=head1 AUTHOR + +SADAHIRO Tomoyuki + + bqw10602@nifty.com + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. + +=head1 SEE ALSO + +=over 4 + +=item http://www.unicode.org/unicode/reports/tr15 + +Annex 10: Hangul, in Unicode Normalization Forms (UAX #15). + +=back + +=cut diff --git a/lib/Lingua/KO/Hangul/Util/Changes b/lib/Lingua/KO/Hangul/Util/Changes new file mode 100644 index 0000000..2e43817 --- /dev/null +++ b/lib/Lingua/KO/Hangul/Util/Changes @@ -0,0 +1,11 @@ +Revision history for Perl extension Lingua::KO::Hangul::Util. + +0.02 Sat Aug 11 00:16:02 2001 + - fix SEE ALSO (the Unicode Normalization Forms is UAX #15) + - getHangulName and parseHangulName return + a list (undef) of one element in list context. + +0.01 Fri Aug 3 21:25:11 2001 + - original version; created by h2xs 1.21 with options + -A -X -n Lingua::KO::Hangul::Util + diff --git a/lib/Lingua/KO/Hangul/Util/README b/lib/Lingua/KO/Hangul/Util/README new file mode 100644 index 0000000..9fc04d8 --- /dev/null +++ b/lib/Lingua/KO/Hangul/Util/README @@ -0,0 +1,44 @@ +Lingua/KO/Hangul/Util version 0.02 +================================== + +SYNOPSIS + + use Lingua::KO::Hangul::Util; + + decomposeHangul(0xAC00); + # (0x1100,0x1161) or "\x{1100}\x{1161}" + + composeHangul("\x{1100}\x{1161}"); + # "\x{AC00}" + + getHangulName(0xAC00); + # "HANGUL SYLLABLE GA" + + parseHangulName("HANGUL SYLLABLE GA"); + # 0xAC00 + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +Perl 5.006 or later + +COPYRIGHT AND LICENCE + +SADAHIRO Tomoyuki + + bqw10602@nifty.com + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. diff --git a/lib/Lingua/KO/Hangul/Util/t/test.t b/lib/Lingua/KO/Hangul/Util/t/test.t new file mode 100644 index 0000000..d4a5df5 --- /dev/null +++ b/lib/Lingua/KO/Hangul/Util/t/test.t @@ -0,0 +1,55 @@ +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +use Test; +use strict; +BEGIN { plan tests => 22 }; +use Lingua::KO::Hangul::Util; +ok(1); # If we made it this far, we're ok. + +######################### + +sub unpk { + join ':', map sprintf("%04X", $_), + @_ == 1 ? unpack('U*', shift) : @_; +} + +ok(getHangulName(0xAC00), "HANGUL SYLLABLE GA"); +ok(getHangulName(0xAE00), "HANGUL SYLLABLE GEUL"); +ok(getHangulName(0xC544), "HANGUL SYLLABLE A"); +ok(getHangulName(0xD7A3), "HANGUL SYLLABLE HIH"); +ok(getHangulName(0x11A3), undef); +ok(getHangulName(0x0000), undef); + +ok(unpk(decomposeHangul(0xAC00)), "1100:1161"); +ok(unpk(decomposeHangul(0xAE00)), "1100:1173:11AF"); +ok(unpk(scalar decomposeHangul(0xAC00)), "1100:1161"); +ok(unpk(scalar decomposeHangul(0xAE00)), "1100:1173:11AF"); +ok(scalar decomposeHangul(0x0041), undef); +ok(scalar decomposeHangul(0x0000), undef); + +ok(composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}."), + "Hangul \x{AC00}\x{AE00}."); + +ok(parseHangulName("HANGUL SYLLABLE GA"), 0xAC00); +ok(parseHangulName("HANGUL SYLLABLE GEUL"), 0xAE00); +ok(parseHangulName("HANGUL SYLLABLE A"), 0xC544); +ok(parseHangulName("HANGUL SYLLABLE HIH"), 0xD7A3); +ok(parseHangulName("HANGUL SYLLABLE PERL"), undef); +ok(parseHangulName("LATIN LETTER SMALL A"), undef); + +my $ng; + +$ng = 0; +foreach my $i (0xAC00..0xD7A3){ + $ng ++ if $i != parseHangulName(getHangulName($i)); +} +ok($ng, 0); + +$ng = 0; +foreach my $i (0xAC00..0xD7A3){ + $ng ++ if $i != (composeHangul scalar decomposeHangul($i))[0]; +} +ok($ng, 0); diff --git a/lib/Unicode/Collate.pm b/lib/Unicode/Collate.pm new file mode 100644 index 0000000..91a9574 --- /dev/null +++ b/lib/Unicode/Collate.pm @@ -0,0 +1,646 @@ +package Unicode::Collate; + +use 5.006; +use strict; +use warnings; +use Carp; +use Lingua::KO::Hangul::Util; +require Exporter; + +our $VERSION = '0.07'; +our $PACKAGE = __PACKAGE__; + +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = (); +our @EXPORT_OK = (); +our @EXPORT = (); + +(our $Path = $INC{'Unicode/Collate.pm'}) =~ s/\.pm$//; +our $KeyFile = "allkeys.txt"; + +our %Combin; # combining class from Unicode::Normalize + +use constant Min2 => 0x20; # minimum weight at level 2 +use constant Min3 => 0x02; # minimum weight at level 3 +use constant UNDEFINED => 0xFF80; # special value for undefined CE + +## +## constructor +## +sub new +{ + my $class = shift; + my $self = bless { @_ }, $class; + + # alternate + $self->{alternate} = + ! exists $self->{alternate} ? 'shifted' : + ! defined $self->{alternate} ? '' : $self->{alternate}; + + # collation level + $self->{level} ||= $self->{alternate} =~ /shift/ ? 4 : 3; + + # normalization form + $self->{normalization} = 'D' if ! exists $self->{normalization}; + + eval "use Unicode::Normalize;" if defined $self->{normalization}; + + $self->{normalize} = + ! defined $self->{normalization} ? undef : + $self->{normalization} =~ /^(?:NF)?C$/ ? \&NFC : + $self->{normalization} =~ /^(?:NF)?D$/ ? \&NFD : + $self->{normalization} =~ /^(?:NF)?KC$/ ? \&NFKC : + $self->{normalization} =~ /^(?:NF)?KD$/ ? \&NFKD : + croak "$PACKAGE unknown normalization form name: $self->{normalization}"; + + *Combin = \%Unicode::Normalize::Combin if $self->{normalize} && ! %Combin; + + # backwards + $self->{backwards} ||= []; + $self->{backwards} = [ $self->{backwards} ] if ! ref $self->{backwards}; + + # rearrange + $self->{rearrange} ||= []; # maybe not U+0000 (an ASCII) + $self->{rearrange} = [ $self->{rearrange} ] if ! ref $self->{rearrange}; + + # open the table file + my $file = defined $self->{table} ? $self->{table} : $KeyFile; + open my $fk, "<$Path/$file" or croak "File does not exist at $Path/$file"; + + while(<$fk>){ + next if /^\s*#/; + if(/^\s*\@/){ + if(/^\@version\s*(\S*)/){ + $self->{version} ||= $1; + } + elsif(/^\@alternate\s+(.*)/){ + $self->{alternate} ||= $1; + } + elsif(/^\@backwards\s+(.*)/){ + push @{ $self->{backwards} }, $1; + } + elsif(/^\@rearrange\s+(.*)/){ + push @{ $self->{rearrange} }, _getHexArray($1); + } + next; + } + $self->parseEntry($_); + } + close $fk; + if($self->{entry}){ + $self->parseEntry($_) foreach split /\n/, $self->{entry}; + } + + # keys of $self->{rearrangeHash} are $self->{rearrange}. + $self->{rearrangeHash} = {}; + @{ $self->{rearrangeHash} }{ @{ $self->{rearrange} } } = (); + + return $self; +} + +## +## get $line, parse it, and write an entry in $self +## +sub parseEntry +{ + my $self = shift; + my $line = shift; + my($name, $ele, @key); + + return if $line !~ /^\s*[0-9A-Fa-f]/; + + # get name + $name = $1 if $line =~ s/#\s*(.*)//; + return if defined $self->{undefName} && $name =~ /$self->{undefName}/; + + # get element + my($e, $k) = split /;/, $line; + my @e = _getHexArray($e); + $ele = pack('U*', @e); + return if defined $self->{undefChar} && $ele =~ /$self->{undefChar}/; + + # get sort key + if( + defined $self->{ignoreName} && $name =~ /$self->{ignoreName}/ || + defined $self->{ignoreChar} && $ele =~ /$self->{ignoreChar}/ + ) + { + $self->{ignored}{$ele} = 1; + $self->{entries}{$ele} = 1; # true + } + else + { + foreach my $arr ($k =~ /\[(\S+)\]/g) { + my $var = $arr =~ /\*/; + push @key, $self->getCE( $var, _getHexArray($arr) ); + } + $self->{entries}{$ele} = \@key; + } + $self->{maxlength}{ord $ele} = scalar @e if @e > 1; +} + + +## +## list to collation element +## +sub getCE +{ + my $self = shift; + my $var = shift; + my @c = @_; + + $self->{alternate} eq 'blanked' ? + $var ? [0,0,0] : [ @c[0..2] ] : + $self->{alternate} eq 'non-ignorable' ? [ @c[0..2] ] : + $self->{alternate} eq 'shifted' ? + $var ? [0,0,0,$c[0] ] : [ @c[0..2], $c[0]+$c[1]+$c[2] ? 0xFFFF : 0 ] : + $self->{alternate} eq 'shift-trimmed' ? + $var ? [0,0,0,$c[0] ] : [ @c[0..2], 0 ] : + \@c; +} + +## +## to debug +## +sub viewSortKey +{ + my $self = shift; + my $key = $self->getSortKey(@_); + my $view = join " ", map sprintf("%04X", $_), unpack 'n*', $key; + $view =~ s/ ?0000 ?/|/g; + "[$view]"; +} + +## +## sort key +## +sub getSortKey +{ + my $self = shift; + my $code = $self->{preprocess}; + my $norm = $self->{normalize}; + my $ent = $self->{entries}; + my $ign = $self->{ignored}; + my $max = $self->{maxlength}; + my $lev = $self->{level}; + my $cjk = $self->{overrideCJK}; + my $hang = $self->{overrideHangul}; + my $rear = $self->{rearrangeHash}; + + my $str = ref $code ? &$code(shift) : shift; + $str = &$norm($str) if ref $norm; + + my(@src, @buf); + @src = unpack('U*', $str); + + # rearrangement + for(my $i = 0; $i < @src; $i++) + { + ($src[$i], $src[$i+1]) = ($src[$i+1], $src[$i]) + if $rear->{ $src[$i] }; + $i++; + } + + for(my $i = 0; $i < @src; $i++) + { + my $ch; + my $u = $src[$i]; + + # non-characters + next if $u < 0 || 0x10FFFF < $u # out of range + || 0xD800 < $u && $u < 0xDFFF; # unpaired surrogates + my $four = $u & 0xFFFF; + next if $four == 0xFFFE || $four == 0xFFFF; + + if($max->{$u}) # contract + { + for(my $j = $max->{$u}; $j >= 1; $j--) + { + next unless $i+$j-1 < @src; + $ch = pack 'U*', @src[$i .. $i+$j-1]; + $i += $j-1, last if $ent->{$ch}; + } + } + else { $ch = pack('U', $u) } + + if(%Combin && defined $ch) # with Combining Char + { + for(my $j = $i+1; $j < @src && $Combin{ $src[$j] }; $j++) + { + my $comb = pack 'U', $src[$j]; + next if ! $ent->{ $ch.$comb }; + $ch .= $comb; + splice(@src, $j, 1); + last; + } + } + + next if !defined $ch || $ign->{$ch}; # ignored + + push @buf, + $ent->{$ch} + ? @{ $ent->{$ch} } + : _isHangul($u) + ? $hang + ? &$hang($u) + : map(@{ $ent->{pack('U', $_)} }, decomposeHangul($u)) + : _isCJK($u) + ? $cjk ? &$cjk($u) : map($self->getCE(0,@$_), _CJK($u)) + : map($self->getCE(0,@$_), _derivCE($u)); + } + + # make sort key + my @ret = ([],[],[],[]); + foreach my $v (0..$lev-1){ + foreach my $b (@buf){ + push @{ $ret[$v] }, $b->[$v] if $b->[$v]; + } + } + foreach (@{ $self->{backwards} }){ + my $v = $_ - 1; + @{ $ret[$v] } = reverse @{ $ret[$v] }; + } + + # modification of tertiary weights + if($self->{upper_before_lower}){ + foreach (@{ $ret[2] }){ + if (0x8 <= $_ && $_ <= 0xC){ $_ -= 6 } # lower + elsif(0x2 <= $_ && $_ <= 0x6){ $_ += 6 } # upper + elsif($_ == 0x1C) { $_ += 1 } # square upper + elsif($_ == 0x1D) { $_ -= 1 } # square lower + } + } + if($self->{katakana_before_hiragana}){ + foreach (@{ $ret[2] }){ + if (0x0F <= $_ && $_ <= 0x13){ $_ -= 2 } # katakana + elsif(0x0D <= $_ && $_ <= 0x0E){ $_ += 5 } # hiragana + } + } + join "\0\0", map pack('n*', @$_), @ret; +} + + +## +## cmp +## +sub cmp +{ + my $obj = shift; + my $a = shift; + my $b = shift; + $obj->getSortKey($a) cmp $obj->getSortKey($b); +} + +## +## sort +## +sub sort +{ + my $obj = shift; + + map { $_->[1] } + sort{ $a->[0] cmp $b->[0] } + map [ $obj->getSortKey($_), $_ ], @_; +} + +## +## Derived CE +## +sub _derivCE +{ + my $code = shift; + my $a = UNDEFINED + ($code >> 15); # ok + my $b = ($code & 0x7FFF) | 0x8000; # ok +# my $a = 0xFFC2 + ($code >> 15); # ng +# my $b = $code & 0x7FFF | 0x1000; # ng + $b ? ([$a,2,1,$code],[$b,0,0,$code]) : [$a,2,1,$code]; +} + +## +## "hhhh hhhh hhhh" to (dddd, dddd, dddd) +## +sub _getHexArray +{ + my $str = shift; + map hex(), $str =~ /([0-9a-fA-F]+)/g; +} + +## +## CJK Unified Ideographs +## +sub _isCJK +{ + my $u = shift; + return 0x3400 <= $u && $u <= 0x4DB5 + || 0x4E00 <= $u && $u <= 0x9FA5 +# || 0x20000 <= $u && $u <= 0x2A6D6; +} + +## +## CJK Unified Ideographs +## +sub _CJK +{ + my $u = shift; + $u > 0xFFFF ? _derivCE($u) : [$u,0x20,0x02,$u]; +} + +## +## Hangul Syllables +## +sub _isHangul +{ + my $code = shift; + return 0xAC00 <= $code && $code <= 0xD7A3; +} + +1; +__END__ + +=head1 NAME + +Unicode::Collate - use UCA (Unicode Collation Algorithm) + +=head1 SYNOPSIS + + use Unicode::Collate; + + #construct + $UCA = Unicode::Collate->new(%tailoring); + + #sort + @sorted = $UCA->sort(@not_sorted); + + #compare + $result = $UCA->cmp($a, $b); # returns 1, 0, or -1. + +=head1 DESCRIPTION + +=head2 Constructor and Tailoring + + $UCA = Unicode::Collate->new( + alternate => $alternate, + backwards => $levelNumber, # or \@levelNumbers + entry => $element, + normalization => $normalization_form, + ignoreName => qr/$ignoreName/, + ignoreChar => qr/$ignoreChar/, + katakana_before_hiragana => $bool, + level => $collationLevel, + overrideCJK => \&overrideCJK, + overrideHangul => \&overrideHangul, + preprocess => \&preprocess, + rearrange => \@charList, + table => $filename, + undefName => qr/$undefName/, + undefChar => qr/$undefChar/, + upper_before_lower => $bool, + ); + # if %tailoring is false (empty), + # $UCA should do the default collation. + +=over 4 + +=item alternate + +-- see 3.2.2 Alternate Weighting, UTR #10. + + alternate => 'shifted', 'blanked', 'non-ignorable', or 'shift-trimmed'. + +By default (if specification is omitted), 'shifted' is adopted. + +=item backwards + +-- see 3.1.2 French Accents, UTR #10. + + backwards => $levelNumber or \@levelNumbers + +Weights in reverse order; ex. level 2 (diacritic ordering) in French. +If omitted, forwards at all the levels. + +=item entry + +-- see 3.1 Linguistic Features; 3.2.1 File Format, UTR #10. + +Overrides a default order or adds a new element + + entry => <<'ENTRIES', # use the UCA file format +00E6 ; [.0861.0020.0002.00E6] [.08B1.0020.0002.00E6] # ligature as +0063 0068 ; [.0893.0020.0002.0063] # "ch" in traditional Spanish +0043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish +ENTRIES + +=item ignoreName + +=item ignoreChar + +-- see Completely Ignorable, 3.2.2 Alternate Weighting, UTR #10. + +Ignores the entry in the table. +If an ignored collation element appears in the string to be collated, +it is ignored as if the element had been deleted from there. + +E.g. when 'a' and 'e' are ignored, +'element' is equal to 'lament' (or 'lmnt'). + +=item level + +-- see 4.3 Form a sort key for each string, UTR #10. + +Set the maximum level. +Any higher levels than the specified one are ignored. + + Level 1: alphabetic ordering + Level 2: diacritic ordering + Level 3: case ordering + Level 4: tie-breaking (e.g. in the case when alternate is 'shifted') + + ex.level => 2, + +=item normalization + +-- see 4.1 Normalize each input string, UTR #10. + +If specified, strings are normalized before preparation sort keys +(the normalization is executed after preprocess). + +As a form name, one of the following names must be used. + + 'C' or 'NFC' for Normalization Form C + 'D' or 'NFD' for Normalization Form D + 'KC' or 'NFKC' for Normalization Form KC + 'KD' or 'NFKD' for Normalization Form KD + +If omitted, the string is put into Normalization Form D. + +If undefined explicitly (as C undef>), +any normalization is not carried out (this may make tailoring easier +if any normalization is not desired). + +see B. + +=item overrideCJK + +=item overrideHangul + +-- see 7.1 Derived Collation Elements, UTR #10. + +By default, mapping of CJK Unified Ideographs +uses the Unicode codepoint order +and Hangul Syllables are decomposed into Hangul Jamo. + +The mapping of CJK Unified Ideographs +or Hangul Syllables may be overrided. + +ex. CJK Unified Ideographs in the JIS codepoint order. + + overrideCJK => sub { + my $u = shift; # get unicode codepoint + my $b = pack('n', $u); # to UTF-16BE + my $s = your_unicode_to_sjis_converter($b); # convert + my $n = unpack('n', $s); # convert sjis to short + [ $n, 1, 1 ]; # return collation element + }, + +If you want to override the mapping of Hangul Syllables, +the Normalization Forms D and KD are not appropriate +(they will be decomposed before overriding). + +=item preprocess + +-- see 5.1 Preprocessing, UTR #10. + +If specified, the coderef is used to preprocess +before the formation of sort keys. + +ex. dropping English articles, such as "a" or "the". +Then, "the pen" is before "a pencil". + + preprocess => sub { + my $str = shift; + $str =~ s/\b(?:an?|the)\s+//g; + $str; + }, + +=item rearrange + +-- see 3.1.3 Rearrangement, UTR #10. + +Characters that are not coded in logical order and to be rearranged. +By default, + + rearrange => [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ], + +=item table + +-- see 3.2 Default Unicode Collation Element Table, UTR #10. + +You can use another element table if desired. +The table file must be in your C directory. + +By default, the file C is used. + +=item undefName + +=item undefChar + +-- see 6.3.4 Reducing the Repertoire, UTR #10. + +Undefines the collation element as if it were unassigned in the table. +This reduces the size of the table. +If an unassigned character appears in the string to be collated, +the sort key is made from its codepoint +as a single-character collation element, +as it is greater than any other assigned collation elements +(in the codepoint order among the unassigned characters). +But, it'd be better to ignore characters +unfamiliar to you and maybe never used. + +=item katakana_before_hiragana + +=item upper_before_lower + +-- see 6.6 Case Comparisons; 7.3.1 Tertiary Weight Table, UTR #10. + +By default, lowercase is before uppercase +and hiragana is before katakana. + +If the parameter is true, this is reversed. + +=back + +=head2 Other methods + +=over 4 + +=item C<@sorted = $UCA-Esort(@not_sorted)> + +Sorts a list of strings. + +=item C<$result = $UCA-Ecmp($a, $b)> + +Returns 1 (when C<$a> is greater than C<$b>) +or 0 (when C<$a> is equal to C<$b>) +or -1 (when C<$a> is lesser than C<$b>). + +=item C<$sortKey = $UCA-EgetSortKey($string)> + +-- see 4.3 Form a sort key for each string, UTR #10. + +Returns a sort key. + +You compare the sort keys using a binary comparison +and get the result of the comparison of the strings using UCA. + + $UCA->getSortKey($a) cmp $UCA->getSortKey($b) + + is equivalent to + + $UCA->cmp($a, $b) + +=back + +=head2 EXPORT + +None by default. + +=head2 CAVEAT + +Use of the C parameter requires +the B module. + +If you need not it (e.g. in the case when you need not +handle any combining characters), +assign C undef> explicitly. + +=head1 AUTHOR + +SADAHIRO Tomoyuki, ESADAHIRO@cpan.orgE + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. + +=head1 SEE ALSO + +=over 4 + +=item L + +utility functions for Hangul Syllables + +=item L + +normalized forms of Unicode text + +=item Unicode Collation Algorithm - Unicode TR #10 + +http://www.unicode.org/unicode/reports/tr10/ + +=back + +=cut diff --git a/lib/Unicode/Collate/Changes b/lib/Unicode/Collate/Changes new file mode 100644 index 0000000..57e56b2 --- /dev/null +++ b/lib/Unicode/Collate/Changes @@ -0,0 +1,26 @@ +Revision history for Perl extension Unicode::Collate. + +0.07 Thu Aug 16 23:42:02 2001 + - rename the module name to Unicode::Collate. + +0.06 Thu Aug 16 23:18:36 2001 + - add description of the getSortKey method. + +0.05 Mon Aug 13 22:23:11 2001 + - bug fix: on the things of 4.2.1, UTR #10 + - getSortKey returns a string, but not an arrayref. + +0.04 Mon Aug 13 22:23:11 2001 + - some bugs are fixed. + - some tailoring parameters are added. + +0.03 Mon Aug 06 06:26:35 2001 + - modify README + +0.02 Sun Aug 05 20:20:01 2001 + - some fix + +0.01 Sun Jul 29 16:16:15 2001 + - original version; created by h2xs 1.21 + with options -A -X -n Sort::UCA + diff --git a/lib/Unicode/Collate/README b/lib/Unicode/Collate/README new file mode 100644 index 0000000..bf0efff --- /dev/null +++ b/lib/Unicode/Collate/README @@ -0,0 +1,60 @@ +Unicode/Collate version 0.07 +=============================== + +use UCA (Unicode Collation Algorithm) + + See UCA - Unicode TR #10. + http://www.unicode.org/unicode/reports/tr10/ + + Fetch the following file before use and put it into the Unicode/Collate dir. + http://www.unicode.org/unicode/reports/tr10/allkeys.txt + + You can install this module using a subset "keys.txt" + contained in this distribution without the "allkeys.txt". + +SYNOPSIS + + use Unicode::Collate; + + #construct + $UCA = Unicode::Collate->new(%tailoring); + + #sort + @sorted = $UCA->sort(@not_sorted); + + #compare + $result = $UCA->cmp($a, $b); # returns 1, 0, or -1. + +INSTALLATION + +Perl 5.006 or later + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries: + +Carp +Exporter +Lingua::KO::Hangul::Util + +It's better if Unicode::Normalize has been installed +although Unicode::Collate can be used without Unicode::Normalize. + +COPYRIGHT AND LICENCE + +SADAHIRO Tomoyuki + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. + diff --git a/lib/Unicode/Collate/keys.txt b/lib/Unicode/Collate/keys.txt new file mode 100644 index 0000000..22ce673 --- /dev/null +++ b/lib/Unicode/Collate/keys.txt @@ -0,0 +1,1930 @@ +#### This file is generated from allkeys-3.0.1d9.txt (unicode.org) +#### by deleting mamy many elements. +#### +#### Remaining elements include LATIN, HANGUL, HIRAGANA, KATAKANA, +#### BOPOMOFO, CJK UNIFIED IDEOGRAPHS. +#### +#### To fetch the original file, access to: +#### http://www.unicode.org/unicode/reports/tr10/allkeys.txt +#### +# allkeys-3.0.1d9.txt +# Created: 2001-Feb-22 +# Posted: 2001-Mar-29 +# +# Note: This file was originally posted with the header section +# omitted, together with the @version and @rearrange lines. This +# 2001-Mar-29 reposting corrects that omission. All of the weight +# entries are identical to the originally posted allkeys.txt for +# UTS #10, Version 8.0. +# +# Derived from: unidata-3.0.1d9.txt +# Sifter version: 3.0.1d4, 2001-Feb-22 + +@version 3.0.1d9 + +@rearrange 0E40,0E41,0E42,0E43,0E44 +@rearrange 0EC0,0EC1,0EC2,0EC3,0EC4 + +0009 ; [*0201.0020.0002.0009] # HORIZONTAL TABULATION (in 6429) +000A ; [*0202.0020.0002.000A] # LINE FEED (in 6429) +000B ; [*0203.0020.0002.000B] # VERTICAL TABULATION (in 6429) +000C ; [*0204.0020.0002.000C] # FORM FEED (in 6429) +000D ; [*0205.0020.0002.000D] # CARRIAGE RETURN (in 6429) +0020 ; [*0209.0020.0002.0020] # SPACE +0021 ; [*0237.0020.0002.0021] # EXCLAMATION MARK +0022 ; [*025C.0020.0002.0022] # QUOTATION MARK +0023 ; [*0295.0020.0002.0023] # NUMBER SIGN +0025 ; [*0296.0020.0002.0025] # PERCENT SIGN +0026 ; [*0293.0020.0002.0026] # AMPERSAND +0027 ; [*0255.0020.0002.0027] # APOSTROPHE +0028 ; [*0266.0020.0002.0028] # LEFT PARENTHESIS +0029 ; [*0267.0020.0002.0029] # RIGHT PARENTHESIS +002A ; [*028E.0020.0002.002A] # ASTERISK +002B ; [*038B.0020.0002.002B] # PLUS SIGN +002C ; [*0219.0020.0002.002C] # COMMA +002D ; [*020D.0020.0002.002D] # HYPHEN-MINUS +002E ; [*0241.0020.0002.002E] # FULL STOP +002F ; [*0290.0020.0002.002F] # SOLIDUS +003A ; [*0223.0020.0002.003A] # COLON +003B ; [*0221.0020.0002.003B] # SEMICOLON +003C ; [*038F.0020.0002.003C] # LESS-THAN SIGN +003D ; [*0390.0020.0002.003D] # EQUALS SIGN +003E ; [*0391.0020.0002.003E] # GREATER-THAN SIGN +003F ; [*023A.0020.0002.003F] # QUESTION MARK +0040 ; [*028D.0020.0002.0040] # COMMERCIAL AT +005C ; [*0292.0020.0002.005C] # REVERSE SOLIDUS +007B ; [*026A.0020.0002.007B] # LEFT CURLY BRACKET +007C ; [*0393.0020.0002.007C] # VERTICAL LINE +007D ; [*026B.0020.0002.007D] # RIGHT CURLY BRACKET +007E ; [*0396.0020.0002.007E] # TILDE +0085 ; [*0206.0020.0002.0085] # NEXT LINE (in 6429) +00A1 ; [*0238.0020.0002.00A1] # INVERTED EXCLAMATION MARK +00A6 ; [*0394.0020.0002.00A6] # BROKEN BAR +00A7 ; [*0288.0020.0002.00A7] # SECTION SIGN +00A9 ; [*028B.0020.0002.00A9] # COPYRIGHT SIGN +00AC ; [*0392.0020.0002.00AC] # NOT SIGN +00AD ; [*020C.0020.0002.00AD] # SOFT HYPHEN +00AE ; [*028C.0020.0002.00AE] # REGISTERED SIGN +00B0 ; [*02F6.0020.0002.00B0] # DEGREE SIGN +00B1 ; [*038C.0020.0002.00B1] # PLUS-MINUS SIGN +00B6 ; [*0289.0020.0002.00B6] # PILCROW SIGN +00B7 ; [*024B.0020.0002.00B7] # MIDDLE DOT +00BF ; [*023B.0020.0002.00BF] # INVERTED QUESTION MARK +00D7 ; [*038E.0020.0002.00D7] # MULTIPLICATION SIGN +00F7 ; [*038D.0020.0002.00F7] # DIVISION SIGN +02B9 ; [*02D5.0020.0002.02B9] # MODIFIER LETTER PRIME +02BA ; [*02D7.0020.0002.02BA] # MODIFIER LETTER DOUBLE PRIME +02C2 ; [*02D8.0020.0002.02C2] # MODIFIER LETTER LEFT ARROWHEAD +02C3 ; [*02D9.0020.0002.02C3] # MODIFIER LETTER RIGHT ARROWHEAD +02C4 ; [*02DA.0020.0002.02C4] # MODIFIER LETTER UP ARROWHEAD +02C5 ; [*02DB.0020.0002.02C5] # MODIFIER LETTER DOWN ARROWHEAD +02C6 ; [*02DC.0020.0002.02C6] # MODIFIER LETTER CIRCUMFLEX ACCENT +02C7 ; [*02DD.0020.0002.02C7] # CARON +02C8 ; [*02DE.0020.0002.02C8] # MODIFIER LETTER VERTICAL LINE +02C9 ; [*02DF.0020.0002.02C9] # MODIFIER LETTER MACRON +02CA ; [*02E0.0020.0002.02CA] # MODIFIER LETTER ACUTE ACCENT +02CB ; [*02E1.0020.0002.02CB] # MODIFIER LETTER GRAVE ACCENT +02CC ; [*02E2.0020.0002.02CC] # MODIFIER LETTER LOW VERTICAL LINE +02CD ; [*02E3.0020.0002.02CD] # MODIFIER LETTER LOW MACRON +02CE ; [*02E4.0020.0002.02CE] # MODIFIER LETTER LOW GRAVE ACCENT +02CF ; [*02E5.0020.0002.02CF] # MODIFIER LETTER LOW ACUTE ACCENT +02D2 ; [*02E6.0020.0002.02D2] # MODIFIER LETTER CENTRED RIGHT HALF RING +02D3 ; [*02E7.0020.0002.02D3] # MODIFIER LETTER CENTRED LEFT HALF RING +02D4 ; [*02E8.0020.0002.02D4] # MODIFIER LETTER UP TACK +02D5 ; [*02E9.0020.0002.02D5] # MODIFIER LETTER DOWN TACK +02D6 ; [*02EA.0020.0002.02D6] # MODIFIER LETTER PLUS SIGN +02D7 ; [*02EB.0020.0002.02D7] # MODIFIER LETTER MINUS SIGN +02DE ; [*02EC.0020.0002.02DE] # MODIFIER LETTER RHOTIC HOOK +02E5 ; [*02ED.0020.0002.02E5] # MODIFIER LETTER EXTRA-HIGH TONE BAR +02E6 ; [*02EE.0020.0002.02E6] # MODIFIER LETTER HIGH TONE BAR +02E7 ; [*02EF.0020.0002.02E7] # MODIFIER LETTER MID TONE BAR +02E8 ; [*02F0.0020.0002.02E8] # MODIFIER LETTER LOW TONE BAR +02E9 ; [*02F1.0020.0002.02E9] # MODIFIER LETTER EXTRA-LOW TONE BAR +02EA ; [*02F2.0020.0002.02EA] # MODIFIER LETTER YIN DEPARTING TONE MARK +02EB ; [*02F3.0020.0002.02EB] # MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; [*02F4.0020.0002.02EC] # MODIFIER LETTER VOICING +02ED ; [*02F5.0020.0002.02ED] # MODIFIER LETTER UNASPIRATED +2000 ; [*0209.0020.0004.2000] # EN QUAD; CANON +2001 ; [*0209.0020.0004.2001] # EM QUAD; CANON +2010 ; [*0211.0020.0002.2010] # HYPHEN +2012 ; [*0212.0020.0002.2012] # FIGURE DASH +2013 ; [*0213.0020.0002.2013] # EN DASH +2014 ; [*0214.0020.0002.2014] # EM DASH +2015 ; [*0215.0020.0002.2015] # HORIZONTAL BAR +2016 ; [*0395.0020.0002.2016] # DOUBLE VERTICAL LINE +2018 ; [*0256.0020.0002.2018] # LEFT SINGLE QUOTATION MARK +2019 ; [*0257.0020.0002.2019] # RIGHT SINGLE QUOTATION MARK +201A ; [*0258.0020.0002.201A] # SINGLE LOW-9 QUOTATION MARK +201B ; [*0259.0020.0002.201B] # SINGLE HIGH-REVERSED-9 QUOTATION MARK +201C ; [*025D.0020.0002.201C] # LEFT DOUBLE QUOTATION MARK +201D ; [*025E.0020.0002.201D] # RIGHT DOUBLE QUOTATION MARK +201E ; [*025F.0020.0002.201E] # DOUBLE LOW-9 QUOTATION MARK +201F ; [*0260.0020.0002.201F] # DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020 ; [*029A.0020.0002.2020] # DAGGER +2021 ; [*029B.0020.0002.2021] # DOUBLE DAGGER +2022 ; [*029C.0020.0002.2022] # BULLET +2023 ; [*029D.0020.0002.2023] # TRIANGULAR BULLET +2027 ; [*029E.0020.0002.2027] # HYPHENATION POINT +2028 ; [*0207.0020.0002.2028] # LINE SEPARATOR +2029 ; [*0208.0020.0002.2029] # PARAGRAPH SEPARATOR +2030 ; [*0298.0020.0002.2030] # PER MILLE SIGN +2031 ; [*0299.0020.0002.2031] # PER TEN THOUSAND SIGN +2032 ; [*02A2.0020.0002.2032] # PRIME +2035 ; [*02A3.0020.0002.2035] # REVERSED PRIME +2038 ; [*02A5.0020.0002.2038] # CARET +203B ; [*02A6.0020.0002.203B] # REFERENCE MARK +203D ; [*0240.0020.0002.203D] # INTERROBANG +203F ; [*02A7.0020.0002.203F] # UNDERTIE +2040 ; [*02A8.0020.0002.2040] # CHARACTER TIE +2041 ; [*02A9.0020.0002.2041] # CARET INSERTION POINT +2042 ; [*02AA.0020.0002.2042] # ASTERISM +2043 ; [*029F.0020.0002.2043] # HYPHEN BULLET +2044 ; [*0291.0020.0002.2044] # FRACTION SLASH +204A ; [*0294.0020.0002.204A] # TIRONIAN SIGN ET +204B ; [*028A.0020.0002.204B] # REVERSED PILCROW SIGN +204C ; [*02A0.0020.0002.204C] # BLACK LEFTWARDS BULLET +204D ; [*02A1.0020.0002.204D] # BLACK RIGHTWARDS BULLET +2200 ; [*037C.0020.0002.2200] # FOR ALL +2201 ; [*037D.0020.0002.2201] # COMPLEMENT +2202 ; [*037E.0020.0002.2202] # PARTIAL DIFFERENTIAL +2203 ; [*037F.0020.0002.2203] # THERE EXISTS +2204 ; [*037F.0054.0002.2204] # THERE DOES NOT EXIST; CANONSEQ +2205 ; [*0380.0020.0002.2205] # EMPTY SET +2206 ; [*0381.0020.0002.2206] # INCREMENT +2207 ; [*0382.0020.0002.2207] # NABLA +2208 ; [*0383.0020.0002.2208] # ELEMENT OF +2209 ; [*0383.0054.0002.2209] # NOT AN ELEMENT OF; CANONSEQ +220A ; [*0384.0020.0002.220A] # SMALL ELEMENT OF +220B ; [*0385.0020.0002.220B] # CONTAINS AS MEMBER +220C ; [*0385.0054.0002.220C] # DOES NOT CONTAIN AS MEMBER; CANONSEQ +220D ; [*0386.0020.0002.220D] # SMALL CONTAINS AS MEMBER +220E ; [*0387.0020.0002.220E] # END OF PROOF +220F ; [*0388.0020.0002.220F] # N-ARY PRODUCT +2210 ; [*0389.0020.0002.2210] # N-ARY COPRODUCT +2211 ; [*038A.0020.0002.2211] # N-ARY SUMMATION +2212 ; [*0397.0020.0002.2212] # MINUS SIGN +2213 ; [*0398.0020.0002.2213] # MINUS-OR-PLUS SIGN +2214 ; [*0399.0020.0002.2214] # DOT PLUS +2215 ; [*039A.0020.0002.2215] # DIVISION SLASH +2216 ; [*039B.0020.0002.2216] # SET MINUS +2217 ; [*039C.0020.0002.2217] # ASTERISK OPERATOR +2218 ; [*039D.0020.0002.2218] # RING OPERATOR +2219 ; [*039E.0020.0002.2219] # BULLET OPERATOR +221B ; [*03A0.0020.0002.221B] # CUBE ROOT +221C ; [*03A1.0020.0002.221C] # FOURTH ROOT +221D ; [*03A2.0020.0002.221D] # PROPORTIONAL TO +221E ; [*03A3.0020.0002.221E] # INFINITY +2223 ; [*03A8.0020.0002.2223] # DIVIDES +2224 ; [*03A8.0054.0002.2224] # DOES NOT DIVIDE; CANONSEQ +2225 ; [*03A9.0020.0002.2225] # PARALLEL TO +2226 ; [*03A9.0054.0002.2226] # NOT PARALLEL TO; CANONSEQ +2227 ; [*03AA.0020.0002.2227] # LOGICAL AND +2228 ; [*03AB.0020.0002.2228] # LOGICAL OR +2229 ; [*03AC.0020.0002.2229] # INTERSECTION +222A ; [*03AD.0020.0002.222A] # UNION +2234 ; [*03B3.0020.0002.2234] # THEREFORE +2235 ; [*03B4.0020.0002.2235] # BECAUSE +2236 ; [*03B5.0020.0002.2236] # RATIO +2237 ; [*03B6.0020.0002.2237] # PROPORTION +2238 ; [*03B7.0020.0002.2238] # DOT MINUS +2239 ; [*03B8.0020.0002.2239] # EXCESS +223A ; [*03B9.0020.0002.223A] # GEOMETRIC PROPORTION +223B ; [*03BA.0020.0002.223B] # HOMOTHETIC +223C ; [*03BB.0020.0002.223C] # TILDE OPERATOR +223D ; [*03BC.0020.0002.223D] # REVERSED TILDE +223E ; [*03BD.0020.0002.223E] # INVERTED LAZY S +223F ; [*03BE.0020.0002.223F] # SINE WAVE +2240 ; [*03BF.0020.0002.2240] # WREATH PRODUCT +2241 ; [*03BB.0054.0002.2241] # NOT TILDE; CANONSEQ +2242 ; [*03C0.0020.0002.2242] # MINUS TILDE +2243 ; [*03C1.0020.0002.2243] # ASYMPTOTICALLY EQUAL TO +2244 ; [*03C1.0054.0002.2244] # NOT ASYMPTOTICALLY EQUAL TO; CANONSEQ +2245 ; [*03C2.0020.0002.2245] # APPROXIMATELY EQUAL TO +2246 ; [*03C3.0020.0002.2246] # APPROXIMATELY BUT NOT ACTUALLY EQUAL TO +2247 ; [*03C2.0054.0002.2247] # NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO; CANONSEQ +2248 ; [*03C4.0020.0002.2248] # ALMOST EQUAL TO +2249 ; [*03C4.0054.0002.2249] # NOT ALMOST EQUAL TO; CANONSEQ +224A ; [*03C5.0020.0002.224A] # ALMOST EQUAL OR EQUAL TO +224B ; [*03C6.0020.0002.224B] # TRIPLE TILDE +224C ; [*03C7.0020.0002.224C] # ALL EQUAL TO +224D ; [*03C8.0020.0002.224D] # EQUIVALENT TO +224E ; [*03C9.0020.0002.224E] # GEOMETRICALLY EQUIVALENT TO +224F ; [*03CA.0020.0002.224F] # DIFFERENCE BETWEEN +2250 ; [*03CB.0020.0002.2250] # APPROACHES THE LIMIT +2251 ; [*03CC.0020.0002.2251] # GEOMETRICALLY EQUAL TO +2252 ; [*03CD.0020.0002.2252] # APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253 ; [*03CE.0020.0002.2253] # IMAGE OF OR APPROXIMATELY EQUAL TO +2254 ; [*03CF.0020.0002.2254] # COLON EQUALS +2255 ; [*03D0.0020.0002.2255] # EQUALS COLON +2256 ; [*03D1.0020.0002.2256] # RING IN EQUAL TO +2257 ; [*03D2.0020.0002.2257] # RING EQUAL TO +2258 ; [*03D3.0020.0002.2258] # CORRESPONDS TO +2259 ; [*03D4.0020.0002.2259] # ESTIMATES +225A ; [*03D5.0020.0002.225A] # EQUIANGULAR TO +225C ; [*03D7.0020.0002.225C] # DELTA EQUAL TO +225D ; [*03D8.0020.0002.225D] # EQUAL TO BY DEFINITION +225E ; [*03D9.0020.0002.225E] # MEASURED BY +225F ; [*03DA.0020.0002.225F] # QUESTIONED EQUAL TO +2260 ; [*0390.0054.0002.2260] # NOT EQUAL TO; CANONSEQ +2261 ; [*03DB.0020.0002.2261] # IDENTICAL TO +2262 ; [*03DB.0054.0002.2262] # NOT IDENTICAL TO; CANONSEQ +2263 ; [*03DC.0020.0002.2263] # STRICTLY EQUIVALENT TO +2264 ; [*03DD.0020.0002.2264] # LESS-THAN OR EQUAL TO +2265 ; [*03DE.0020.0002.2265] # GREATER-THAN OR EQUAL TO +2266 ; [*03DF.0020.0002.2266] # LESS-THAN OVER EQUAL TO +2267 ; [*03E0.0020.0002.2267] # GREATER-THAN OVER EQUAL TO +2268 ; [*03E1.0020.0002.2268] # LESS-THAN BUT NOT EQUAL TO +2269 ; [*03E2.0020.0002.2269] # GREATER-THAN BUT NOT EQUAL TO +226A ; [*03E3.0020.0002.226A] # MUCH LESS-THAN +226B ; [*03E4.0020.0002.226B] # MUCH GREATER-THAN +226C ; [*03E5.0020.0002.226C] # BETWEEN +226D ; [*03C8.0054.0002.226D] # NOT EQUIVALENT TO; CANONSEQ +226E ; [*038F.0054.0002.226E] # NOT LESS-THAN; CANONSEQ +226F ; [*0391.0054.0002.226F] # NOT GREATER-THAN; CANONSEQ +2270 ; [*03DD.0054.0002.2270] # NEITHER LESS-THAN NOR EQUAL TO; CANONSEQ +2271 ; [*03DE.0054.0002.2271] # NEITHER GREATER-THAN NOR EQUAL TO; CANONSEQ +2272 ; [*03E6.0020.0002.2272] # LESS-THAN OR EQUIVALENT TO +2273 ; [*03E7.0020.0002.2273] # GREATER-THAN OR EQUIVALENT TO +2274 ; [*03E6.0054.0002.2274] # NEITHER LESS-THAN NOR EQUIVALENT TO; CANONSEQ +2275 ; [*03E7.0054.0002.2275] # NEITHER GREATER-THAN NOR EQUIVALENT TO; CANONSEQ +2276 ; [*03E8.0020.0002.2276] # LESS-THAN OR GREATER-THAN +2277 ; [*03E9.0020.0002.2277] # GREATER-THAN OR LESS-THAN +2278 ; [*03E8.0054.0002.2278] # NEITHER LESS-THAN NOR GREATER-THAN; CANONSEQ +2279 ; [*03E9.0054.0002.2279] # NEITHER GREATER-THAN NOR LESS-THAN; CANONSEQ +227A ; [*03EA.0020.0002.227A] # PRECEDES +227B ; [*03EB.0020.0002.227B] # SUCCEEDS +227C ; [*03EC.0020.0002.227C] # PRECEDES OR EQUAL TO +227D ; [*03ED.0020.0002.227D] # SUCCEEDS OR EQUAL TO +227E ; [*03EE.0020.0002.227E] # PRECEDES OR EQUIVALENT TO +227F ; [*03EF.0020.0002.227F] # SUCCEEDS OR EQUIVALENT TO +2280 ; [*03EA.0054.0002.2280] # DOES NOT PRECEDE; CANONSEQ +2281 ; [*03EB.0054.0002.2281] # DOES NOT SUCCEED; CANONSEQ +2282 ; [*03F0.0020.0002.2282] # SUBSET OF +2283 ; [*03F1.0020.0002.2283] # SUPERSET OF +2284 ; [*03F0.0054.0002.2284] # NOT A SUBSET OF; CANONSEQ +2285 ; [*03F1.0054.0002.2285] # NOT A SUPERSET OF; CANONSEQ +2286 ; [*03F2.0020.0002.2286] # SUBSET OF OR EQUAL TO +2287 ; [*03F3.0020.0002.2287] # SUPERSET OF OR EQUAL TO +2288 ; [*03F2.0054.0002.2288] # NEITHER A SUBSET OF NOR EQUAL TO; CANONSEQ +2289 ; [*03F3.0054.0002.2289] # NEITHER A SUPERSET OF NOR EQUAL TO; CANONSEQ +228A ; [*03F4.0020.0002.228A] # SUBSET OF WITH NOT EQUAL TO +228B ; [*03F5.0020.0002.228B] # SUPERSET OF WITH NOT EQUAL TO +228C ; [*03F6.0020.0002.228C] # MULTISET +228D ; [*03F7.0020.0002.228D] # MULTISET MULTIPLICATION +228E ; [*03F8.0020.0002.228E] # MULTISET UNION +22A2 ; [*040C.0020.0002.22A2] # RIGHT TACK +22A3 ; [*040D.0020.0002.22A3] # LEFT TACK +22A4 ; [*040E.0020.0002.22A4] # DOWN TACK +22A5 ; [*040F.0020.0002.22A5] # UP TACK +22A6 ; [*0410.0020.0002.22A6] # ASSERTION +22A7 ; [*0411.0020.0002.22A7] # MODELS +22A8 ; [*0412.0020.0002.22A8] # TRUE +22A9 ; [*0413.0020.0002.22A9] # FORCES +22AA ; [*0414.0020.0002.22AA] # TRIPLE VERTICAL BAR RIGHT TURNSTILE +22AB ; [*0415.0020.0002.22AB] # DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +22AC ; [*040C.0054.0002.22AC] # DOES NOT PROVE; CANONSEQ +22AD ; [*0412.0054.0002.22AD] # NOT TRUE; CANONSEQ +22AE ; [*0413.0054.0002.22AE] # DOES NOT FORCE; CANONSEQ +22AF ; [*0415.0054.0002.22AF] # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE; CANONSEQ +22B0 ; [*0416.0020.0002.22B0] # PRECEDES UNDER RELATION +22B1 ; [*0417.0020.0002.22B1] # SUCCEEDS UNDER RELATION +22B2 ; [*0418.0020.0002.22B2] # NORMAL SUBGROUP OF +22B3 ; [*0419.0020.0002.22B3] # CONTAINS AS NORMAL SUBGROUP +22B4 ; [*041A.0020.0002.22B4] # NORMAL SUBGROUP OF OR EQUAL TO +22B5 ; [*041B.0020.0002.22B5] # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +22B6 ; [*041C.0020.0002.22B6] # ORIGINAL OF +22B7 ; [*041D.0020.0002.22B7] # IMAGE OF +22B8 ; [*041E.0020.0002.22B8] # MULTIMAP +22B9 ; [*041F.0020.0002.22B9] # HERMITIAN CONJUGATE MATRIX +22BA ; [*0420.0020.0002.22BA] # INTERCALATE +22BB ; [*0421.0020.0002.22BB] # XOR +22BC ; [*0422.0020.0002.22BC] # NAND +22BD ; [*0423.0020.0002.22BD] # NOR +22C0 ; [*0426.0020.0002.22C0] # N-ARY LOGICAL AND +22C1 ; [*0427.0020.0002.22C1] # N-ARY LOGICAL OR +22C2 ; [*0428.0020.0002.22C2] # N-ARY INTERSECTION +22C3 ; [*0429.0020.0002.22C3] # N-ARY UNION +22C4 ; [*042A.0020.0002.22C4] # DIAMOND OPERATOR +22C5 ; [*042B.0020.0002.22C5] # DOT OPERATOR +22C7 ; [*042D.0020.0002.22C7] # DIVISION TIMES +22C8 ; [*042E.0020.0002.22C8] # BOWTIE +22C9 ; [*042F.0020.0002.22C9] # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +22CA ; [*0430.0020.0002.22CA] # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +22CB ; [*0431.0020.0002.22CB] # LEFT SEMIDIRECT PRODUCT +22CC ; [*0432.0020.0002.22CC] # RIGHT SEMIDIRECT PRODUCT +22CD ; [*0433.0020.0002.22CD] # REVERSED TILDE EQUALS +22CE ; [*0434.0020.0002.22CE] # CURLY LOGICAL OR +22CF ; [*0435.0020.0002.22CF] # CURLY LOGICAL AND +22D0 ; [*0436.0020.0002.22D0] # DOUBLE SUBSET +22D1 ; [*0437.0020.0002.22D1] # DOUBLE SUPERSET +22D2 ; [*0438.0020.0002.22D2] # DOUBLE INTERSECTION +22D3 ; [*0439.0020.0002.22D3] # DOUBLE UNION +22D4 ; [*043A.0020.0002.22D4] # PITCHFORK +22D5 ; [*043B.0020.0002.22D5] # EQUAL AND PARALLEL TO +22D6 ; [*043C.0020.0002.22D6] # LESS-THAN WITH DOT +22D7 ; [*043D.0020.0002.22D7] # GREATER-THAN WITH DOT +22D8 ; [*043E.0020.0002.22D8] # VERY MUCH LESS-THAN +22D9 ; [*043F.0020.0002.22D9] # VERY MUCH GREATER-THAN +22DA ; [*0440.0020.0002.22DA] # LESS-THAN EQUAL TO OR GREATER-THAN +22DB ; [*0441.0020.0002.22DB] # GREATER-THAN EQUAL TO OR LESS-THAN +22DC ; [*0442.0020.0002.22DC] # EQUAL TO OR LESS-THAN +22DD ; [*0443.0020.0002.22DD] # EQUAL TO OR GREATER-THAN +22DE ; [*0444.0020.0002.22DE] # EQUAL TO OR PRECEDES +22DF ; [*0445.0020.0002.22DF] # EQUAL TO OR SUCCEEDS +22E0 ; [*03EC.0054.0002.22E0] # DOES NOT PRECEDE OR EQUAL; CANONSEQ +22E1 ; [*03ED.0054.0002.22E1] # DOES NOT SUCCEED OR EQUAL; CANONSEQ +22E6 ; [*0448.0020.0002.22E6] # LESS-THAN BUT NOT EQUIVALENT TO +22E7 ; [*0449.0020.0002.22E7] # GREATER-THAN BUT NOT EQUIVALENT TO +22E8 ; [*044A.0020.0002.22E8] # PRECEDES BUT NOT EQUIVALENT TO +22E9 ; [*044B.0020.0002.22E9] # SUCCEEDS BUT NOT EQUIVALENT TO +22EA ; [*0418.0054.0002.22EA] # NOT NORMAL SUBGROUP OF; CANONSEQ +22EB ; [*0419.0054.0002.22EB] # DOES NOT CONTAIN AS NORMAL SUBGROUP; CANONSEQ +22EC ; [*041A.0054.0002.22EC] # NOT NORMAL SUBGROUP OF OR EQUAL TO; CANONSEQ +22ED ; [*041B.0054.0002.22ED] # DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL; CANONSEQ +22EE ; [*044C.0020.0002.22EE] # VERTICAL ELLIPSIS +22EF ; [*044D.0020.0002.22EF] # MIDLINE HORIZONTAL ELLIPSIS +22F0 ; [*044E.0020.0002.22F0] # UP RIGHT DIAGONAL ELLIPSIS +22F1 ; [*044F.0020.0002.22F1] # DOWN RIGHT DIAGONAL ELLIPSIS +2300 ; [*0450.0020.0002.2300] # DIAMETER SIGN +2302 ; [*0452.0020.0002.2302] # HOUSE +2303 ; [*0453.0020.0002.2303] # UP ARROWHEAD +2304 ; [*0454.0020.0002.2304] # DOWN ARROWHEAD +2305 ; [*0455.0020.0002.2305] # PROJECTIVE +2306 ; [*0456.0020.0002.2306] # PERSPECTIVE +2307 ; [*0457.0020.0002.2307] # WAVY LINE +2308 ; [*0458.0020.0002.2308] # LEFT CEILING +2309 ; [*0459.0020.0002.2309] # RIGHT CEILING +230A ; [*045A.0020.0002.230A] # LEFT FLOOR +230B ; [*045B.0020.0002.230B] # RIGHT FLOOR +230C ; [*045C.0020.0002.230C] # BOTTOM RIGHT CROP +230D ; [*045D.0020.0002.230D] # BOTTOM LEFT CROP +230E ; [*045E.0020.0002.230E] # TOP RIGHT CROP +230F ; [*045F.0020.0002.230F] # TOP LEFT CROP +2310 ; [*0460.0020.0002.2310] # REVERSED NOT SIGN +2312 ; [*0462.0020.0002.2312] # ARC +2313 ; [*0463.0020.0002.2313] # SEGMENT +2314 ; [*0464.0020.0002.2314] # SECTOR +2315 ; [*0465.0020.0002.2315] # TELEPHONE RECORDER +2316 ; [*0466.0020.0002.2316] # POSITION INDICATOR +2318 ; [*0468.0020.0002.2318] # PLACE OF INTEREST SIGN +2319 ; [*0469.0020.0002.2319] # TURNED NOT SIGN +231A ; [*046A.0020.0002.231A] # WATCH +231B ; [*046B.0020.0002.231B] # HOURGLASS +2322 ; [*0472.0020.0002.2322] # FROWN +2323 ; [*0473.0020.0002.2323] # SMILE +2324 ; [*0474.0020.0002.2324] # UP ARROWHEAD BETWEEN TWO HORIZONTAL BARS +2325 ; [*0475.0020.0002.2325] # OPTION KEY +2326 ; [*0476.0020.0002.2326] # ERASE TO THE RIGHT +2328 ; [*0478.0020.0002.2328] # KEYBOARD +232B ; [*0479.0020.0002.232B] # ERASE TO THE LEFT +232C ; [*047A.0020.0002.232C] # BENZENE RING +232D ; [*047B.0020.0002.232D] # CYLINDRICITY +232E ; [*047C.0020.0002.232E] # ALL AROUND-PROFILE +232F ; [*047D.0020.0002.232F] # SYMMETRY +2330 ; [*047E.0020.0002.2330] # TOTAL RUNOUT +2331 ; [*047F.0020.0002.2331] # DIMENSION ORIGIN +2332 ; [*0480.0020.0002.2332] # CONICAL TAPER +2333 ; [*0481.0020.0002.2333] # SLOPE +2334 ; [*0482.0020.0002.2334] # COUNTERBORE +2335 ; [*0483.0020.0002.2335] # COUNTERSINK +237B ; [*04C9.0020.0002.237B] # NOT CHECK MARK +237F ; [*04CC.0020.0002.237F] # VERTICAL LINE WITH MIDDLE DOT +2397 ; [*04E4.0020.0002.2397] # PREVIOUS PAGE +2398 ; [*04E5.0020.0002.2398] # NEXT PAGE +25B0 ; [*05C0.0020.0002.25B0] # BLACK PARALLELOGRAM +25B1 ; [*05C1.0020.0002.25B1] # WHITE PARALLELOGRAM +25C6 ; [*05D6.0020.0002.25C6] # BLACK DIAMOND +25C7 ; [*05D7.0020.0002.25C7] # WHITE DIAMOND +25C8 ; [*05D8.0020.0002.25C8] # WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND +25C9 ; [*05D9.0020.0002.25C9] # FISHEYE +25CA ; [*05DA.0020.0002.25CA] # LOZENGE +25CE ; [*05DE.0020.0002.25CE] # BULLSEYE +25D8 ; [*05E8.0020.0002.25D8] # INVERSE BULLET +25E6 ; [*05F6.0020.0002.25E6] # WHITE BULLET +2600 ; [*0608.0020.0002.2600] # BLACK SUN WITH RAYS +2601 ; [*0609.0020.0002.2601] # CLOUD +2602 ; [*060A.0020.0002.2602] # UMBRELLA +2603 ; [*060B.0020.0002.2603] # SNOWMAN +2604 ; [*060C.0020.0002.2604] # COMET +2607 ; [*060F.0020.0002.2607] # LIGHTNING +2608 ; [*0610.0020.0002.2608] # THUNDERSTORM +2609 ; [*0611.0020.0002.2609] # SUN +260A ; [*0612.0020.0002.260A] # ASCENDING NODE +260B ; [*0613.0020.0002.260B] # DESCENDING NODE +260C ; [*0614.0020.0002.260C] # CONJUNCTION +260D ; [*0615.0020.0002.260D] # OPPOSITION +260E ; [*0616.0020.0002.260E] # BLACK TELEPHONE +260F ; [*0617.0020.0002.260F] # WHITE TELEPHONE +2613 ; [*061B.0020.0002.2613] # SALTIRE +2619 ; [*061C.0020.0002.2619] # REVERSED ROTATED FLORAL HEART BULLET +2620 ; [*0623.0020.0002.2620] # SKULL AND CROSSBONES +2621 ; [*0624.0020.0002.2621] # CAUTION SIGN +2622 ; [*0625.0020.0002.2622] # RADIOACTIVE SIGN +2623 ; [*0626.0020.0002.2623] # BIOHAZARD SIGN +2624 ; [*0627.0020.0002.2624] # CADUCEUS +2625 ; [*0628.0020.0002.2625] # ANKH +2626 ; [*0629.0020.0002.2626] # ORTHODOX CROSS +2627 ; [*062A.0020.0002.2627] # CHI RHO +2628 ; [*062B.0020.0002.2628] # CROSS OF LORRAINE +2629 ; [*062C.0020.0002.2629] # CROSS OF JERUSALEM +262C ; [*062F.0020.0002.262C] # ADI SHAKTI +262D ; [*0630.0020.0002.262D] # HAMMER AND SICKLE +262F ; [*0632.0020.0002.262F] # YIN YANG +2638 ; [*063B.0020.0002.2638] # WHEEL OF DHARMA +2639 ; [*063C.0020.0002.2639] # WHITE FROWNING FACE +263A ; [*063D.0020.0002.263A] # WHITE SMILING FACE +263B ; [*063E.0020.0002.263B] # BLACK SMILING FACE +263C ; [*063F.0020.0002.263C] # WHITE SUN WITH RAYS +263D ; [*0640.0020.0002.263D] # FIRST QUARTER MOON +263E ; [*0641.0020.0002.263E] # LAST QUARTER MOON +263F ; [*0642.0020.0002.263F] # MERCURY +2640 ; [*0643.0020.0002.2640] # FEMALE SIGN +2641 ; [*0644.0020.0002.2641] # EARTH +2642 ; [*0645.0020.0002.2642] # MALE SIGN +2643 ; [*0646.0020.0002.2643] # JUPITER +2644 ; [*0647.0020.0002.2644] # SATURN +2645 ; [*0648.0020.0002.2645] # URANUS +2646 ; [*0649.0020.0002.2646] # NEPTUNE +2647 ; [*064A.0020.0002.2647] # PLUTO +2648 ; [*064B.0020.0002.2648] # ARIES +2649 ; [*064C.0020.0002.2649] # TAURUS +264A ; [*064D.0020.0002.264A] # GEMINI +264B ; [*064E.0020.0002.264B] # CANCER +264C ; [*064F.0020.0002.264C] # LEO +264D ; [*0650.0020.0002.264D] # VIRGO +264E ; [*0651.0020.0002.264E] # LIBRA +264F ; [*0652.0020.0002.264F] # SCORPIUS +2650 ; [*0653.0020.0002.2650] # SAGITTARIUS +2651 ; [*0654.0020.0002.2651] # CAPRICORN +2652 ; [*0655.0020.0002.2652] # AQUARIUS +2653 ; [*0656.0020.0002.2653] # PISCES +2668 ; [*066B.0020.0002.2668] # HOT SPRINGS +2669 ; [*066C.0020.0002.2669] # QUARTER NOTE +266A ; [*066D.0020.0002.266A] # EIGHTH NOTE +266B ; [*066E.0020.0002.266B] # BEAMED EIGHTH NOTES +266C ; [*066F.0020.0002.266C] # BEAMED SIXTEENTH NOTES +2701 ; [*0672.0020.0002.2701] # UPPER BLADE SCISSORS +2702 ; [*0673.0020.0002.2702] # BLACK SCISSORS +2703 ; [*0674.0020.0002.2703] # LOWER BLADE SCISSORS +2704 ; [*0675.0020.0002.2704] # WHITE SCISSORS +2706 ; [*0676.0020.0002.2706] # TELEPHONE LOCATION SIGN +2707 ; [*0677.0020.0002.2707] # TAPE DRIVE +2708 ; [*0678.0020.0002.2708] # AIRPLANE +2709 ; [*0679.0020.0002.2709] # ENVELOPE +270C ; [*067A.0020.0002.270C] # VICTORY HAND +270D ; [*067B.0020.0002.270D] # WRITING HAND +270E ; [*067C.0020.0002.270E] # LOWER RIGHT PENCIL +270F ; [*067D.0020.0002.270F] # PENCIL +2710 ; [*067E.0020.0002.2710] # UPPER RIGHT PENCIL +2711 ; [*067F.0020.0002.2711] # WHITE NIB +2712 ; [*0680.0020.0002.2712] # BLACK NIB +2713 ; [*0681.0020.0002.2713] # CHECK MARK +2714 ; [*0682.0020.0002.2714] # HEAVY CHECK MARK +2715 ; [*0683.0020.0002.2715] # MULTIPLICATION X +2716 ; [*0684.0020.0002.2716] # HEAVY MULTIPLICATION X +2717 ; [*0685.0020.0002.2717] # BALLOT X +2718 ; [*0686.0020.0002.2718] # HEAVY BALLOT X +271B ; [*0689.0020.0002.271B] # OPEN CENTRE CROSS +271C ; [*068A.0020.0002.271C] # HEAVY OPEN CENTRE CROSS +271D ; [*068B.0020.0002.271D] # LATIN CROSS +271E ; [*068C.0020.0002.271E] # SHADOWED WHITE LATIN CROSS +271F ; [*068D.0020.0002.271F] # OUTLINED LATIN CROSS +2720 ; [*068E.0020.0002.2720] # MALTESE CROSS +2722 ; [*0690.0020.0002.2722] # FOUR TEARDROP-SPOKED ASTERISK +2723 ; [*0691.0020.0002.2723] # FOUR BALLOON-SPOKED ASTERISK +2724 ; [*0692.0020.0002.2724] # HEAVY FOUR BALLOON-SPOKED ASTERISK +2725 ; [*0693.0020.0002.2725] # FOUR CLUB-SPOKED ASTERISK +2731 ; [*069E.0020.0002.2731] # HEAVY ASTERISK +2732 ; [*069F.0020.0002.2732] # OPEN CENTRE ASTERISK +2733 ; [*06A0.0020.0002.2733] # EIGHT SPOKED ASTERISK +273A ; [*06A7.0020.0002.273A] # SIXTEEN POINTED ASTERISK +273B ; [*06A8.0020.0002.273B] # TEARDROP-SPOKED ASTERISK +273C ; [*06A9.0020.0002.273C] # OPEN CENTRE TEARDROP-SPOKED ASTERISK +273D ; [*06AA.0020.0002.273D] # HEAVY TEARDROP-SPOKED ASTERISK +273E ; [*06AB.0020.0002.273E] # SIX PETALLED BLACK AND WHITE FLORETTE +273F ; [*06AC.0020.0002.273F] # BLACK FLORETTE +2740 ; [*06AD.0020.0002.2740] # WHITE FLORETTE +2741 ; [*06AE.0020.0002.2741] # EIGHT PETALLED OUTLINED BLACK FLORETTE +2743 ; [*06B0.0020.0002.2743] # HEAVY TEARDROP-SPOKED PINWHEEL ASTERISK +2744 ; [*06B1.0020.0002.2744] # SNOWFLAKE +2745 ; [*06B2.0020.0002.2745] # TIGHT TRIFOLIATE SNOWFLAKE +2746 ; [*06B3.0020.0002.2746] # HEAVY CHEVRON SNOWFLAKE +2747 ; [*06B4.0020.0002.2747] # SPARKLE +2748 ; [*06B5.0020.0002.2748] # HEAVY SPARKLE +2749 ; [*06B6.0020.0002.2749] # BALLOON-SPOKED ASTERISK +274A ; [*06B7.0020.0002.274A] # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +274B ; [*06B8.0020.0002.274B] # HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +2756 ; [*06BE.0020.0002.2756] # BLACK DIAMOND MINUS WHITE X +2758 ; [*06BF.0020.0002.2758] # LIGHT VERTICAL BAR +2759 ; [*06C0.0020.0002.2759] # MEDIUM VERTICAL BAR +275A ; [*06C1.0020.0002.275A] # HEAVY VERTICAL BAR +275B ; [*06C2.0020.0002.275B] # HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT +275C ; [*06C3.0020.0002.275C] # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT +275D ; [*06C4.0020.0002.275D] # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT +275E ; [*06C5.0020.0002.275E] # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +2761 ; [*06C6.0020.0002.2761] # CURVED STEM PARAGRAPH SIGN ORNAMENT +2762 ; [*06C7.0020.0002.2762] # HEAVY EXCLAMATION MARK ORNAMENT +2763 ; [*06C8.0020.0002.2763] # HEAVY HEART EXCLAMATION MARK ORNAMENT +2764 ; [*06C9.0020.0002.2764] # HEAVY BLACK HEART +2765 ; [*06CA.0020.0002.2765] # ROTATED HEAVY BLACK HEART BULLET +2766 ; [*06CB.0020.0002.2766] # FLORAL HEART +2767 ; [*06CC.0020.0002.2767] # ROTATED FLORAL HEART BULLET +27A2 ; [*06DB.0020.0002.27A2] # THREE-D TOP-LIGHTED RIGHTWARDS ARROWHEAD +27A3 ; [*06DC.0020.0002.27A3] # THREE-D BOTTOM-LIGHTED RIGHTWARDS ARROWHEAD +27A4 ; [*06DD.0020.0002.27A4] # BLACK RIGHTWARDS ARROWHEAD +2FF0 ; [*07F7.0020.0002.2FF0] # IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT +2FF1 ; [*07F8.0020.0002.2FF1] # IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF2 ; [*07F9.0020.0002.2FF2] # IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT +2FF3 ; [*07FA.0020.0002.2FF3] # IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW +2FF4 ; [*07FB.0020.0002.2FF4] # IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND +2FF5 ; [*07FC.0020.0002.2FF5] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE +2FF6 ; [*07FD.0020.0002.2FF6] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM BELOW +2FF7 ; [*07FE.0020.0002.2FF7] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LEFT +2FF8 ; [*07FF.0020.0002.2FF8] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER LEFT +2FF9 ; [*0800.0020.0002.2FF9] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT +2FFA ; [*0801.0020.0002.2FFA] # IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT +2FFB ; [*0802.0020.0002.2FFB] # IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3001 ; [*0220.0020.0002.3001] # IDEOGRAPHIC COMMA +3002 ; [*024A.0020.0002.3002] # IDEOGRAPHIC FULL STOP +3003 ; [*02A4.0020.0002.3003] # DITTO MARK +3010 ; [*027C.0020.0002.3010] # LEFT BLACK LENTICULAR BRACKET +3011 ; [*027D.0020.0002.3011] # RIGHT BLACK LENTICULAR BRACKET +3012 ; [*0804.0020.0002.3012] # POSTAL MARK +3013 ; [*0805.0020.0002.3013] # GETA MARK +3014 ; [*027E.0020.0002.3014] # LEFT TORTOISE SHELL BRACKET +3015 ; [*027F.0020.0002.3015] # RIGHT TORTOISE SHELL BRACKET +3016 ; [*0280.0020.0002.3016] # LEFT WHITE LENTICULAR BRACKET +3017 ; [*0281.0020.0002.3017] # RIGHT WHITE LENTICULAR BRACKET +3018 ; [*0282.0020.0002.3018] # LEFT WHITE TORTOISE SHELL BRACKET +3019 ; [*0283.0020.0002.3019] # RIGHT WHITE TORTOISE SHELL BRACKET +301C ; [*0216.0020.0002.301C] # WAVE DASH +301D ; [*0261.0020.0002.301D] # REVERSED DOUBLE PRIME QUOTATION MARK +301E ; [*0262.0020.0002.301E] # DOUBLE PRIME QUOTATION MARK +301F ; [*0263.0020.0002.301F] # LOW DOUBLE PRIME QUOTATION MARK +3020 ; [*0806.0020.0002.3020] # POSTAL MARK FACE +3030 ; [*0217.0020.0002.3030] # WAVY DASH +303E ; [*0808.0020.0002.303E] # IDEOGRAPHIC VARIATION INDICATOR +303F ; [*0809.0020.0002.303F] # IDEOGRAPHIC HALF FILL SPACE +30FB ; [*0218.0020.0002.30FB] # KATAKANA MIDDLE DOT +3190 ; [*080A.0020.0002.3190] # IDEOGRAPHIC ANNOTATION LINKING MARK +3191 ; [*080B.0020.0002.3191] # IDEOGRAPHIC ANNOTATION REVERSE MARK +FD3E ; [*0286.0020.0002.FD3E] # ORNATE LEFT PARENTHESIS +FD3F ; [*0287.0020.0002.FD3F] # ORNATE RIGHT PARENTHESIS +FEFF ; [.0000.0000.0000.FEFF] # ZERO WIDTH NO-BREAK SPACE +FFFC ; [*080D.0020.0002.FFFC] # OBJECT REPLACEMENT CHARACTER +FFFD ; [*080E.0020.0002.FFFD] # REPLACEMENT CHARACTER +0332 ; [.0000.0021.0002.0332] # COMBINING LOW LINE +0313 ; [.0000.0022.0002.0313] # COMBINING COMMA ABOVE +0314 ; [.0000.002A.0002.0314] # COMBINING REVERSED COMMA ABOVE +0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT +0341 ; [.0000.0032.0002.0341] # COMBINING ACUTE TONE MARK; CANON +0300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT +0340 ; [.0000.0035.0002.0340] # COMBINING GRAVE TONE MARK; CANON +0306 ; [.0000.0037.0002.0306] # COMBINING BREVE +0302 ; [.0000.003C.0002.0302] # COMBINING CIRCUMFLEX ACCENT +030C ; [.0000.0041.0002.030C] # COMBINING CARON +030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE +0308 ; [.0000.0047.0002.0308] # COMBINING DIAERESIS +030B ; [.0000.004D.0002.030B] # COMBINING DOUBLE ACUTE ACCENT +0303 ; [.0000.004E.0002.0303] # COMBINING TILDE +0307 ; [.0000.0052.0002.0307] # COMBINING DOT ABOVE +0338 ; [.0000.0054.0002.0338] # COMBINING LONG SOLIDUS OVERLAY +0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA +0328 ; [.0000.0058.0002.0328] # COMBINING OGONEK +0304 ; [.0000.005A.0002.0304] # COMBINING MACRON +0305 ; [.0000.005E.0002.0305] # COMBINING OVERLINE +0309 ; [.0000.005F.0002.0309] # COMBINING HOOK ABOVE +030D ; [.0000.0060.0002.030D] # COMBINING VERTICAL LINE ABOVE +030E ; [.0000.0061.0002.030E] # COMBINING DOUBLE VERTICAL LINE ABOVE +030F ; [.0000.0062.0002.030F] # COMBINING DOUBLE GRAVE ACCENT +0310 ; [.0000.0063.0002.0310] # COMBINING CANDRABINDU +0311 ; [.0000.0064.0002.0311] # COMBINING INVERTED BREVE +0312 ; [.0000.0065.0002.0312] # COMBINING TURNED COMMA ABOVE +0315 ; [.0000.0066.0002.0315] # COMBINING COMMA ABOVE RIGHT +0316 ; [.0000.0067.0002.0316] # COMBINING GRAVE ACCENT BELOW +0317 ; [.0000.0068.0002.0317] # COMBINING ACUTE ACCENT BELOW +0318 ; [.0000.0069.0002.0318] # COMBINING LEFT TACK BELOW +0319 ; [.0000.006A.0002.0319] # COMBINING RIGHT TACK BELOW +031B ; [.0000.006C.0002.031B] # COMBINING HORN +031C ; [.0000.0072.0002.031C] # COMBINING LEFT HALF RING BELOW +031D ; [.0000.0073.0002.031D] # COMBINING UP TACK BELOW +031E ; [.0000.0074.0002.031E] # COMBINING DOWN TACK BELOW +031F ; [.0000.0075.0002.031F] # COMBINING PLUS SIGN BELOW +0320 ; [.0000.0076.0002.0320] # COMBINING MINUS SIGN BELOW +0321 ; [.0000.0077.0002.0321] # COMBINING PALATALIZED HOOK BELOW +0322 ; [.0000.0078.0002.0322] # COMBINING RETROFLEX HOOK BELOW +0323 ; [.0000.0079.0002.0323] # COMBINING DOT BELOW +0324 ; [.0000.007E.0002.0324] # COMBINING DIAERESIS BELOW +0325 ; [.0000.007F.0002.0325] # COMBINING RING BELOW +0326 ; [.0000.0080.0002.0326] # COMBINING COMMA BELOW +0329 ; [.0000.0081.0002.0329] # COMBINING VERTICAL LINE BELOW +032A ; [.0000.0082.0002.032A] # COMBINING BRIDGE BELOW +032B ; [.0000.0083.0002.032B] # COMBINING INVERTED DOUBLE ARCH BELOW +032C ; [.0000.0084.0002.032C] # COMBINING CARON BELOW +032D ; [.0000.0085.0002.032D] # COMBINING CIRCUMFLEX ACCENT BELOW +032E ; [.0000.0086.0002.032E] # COMBINING BREVE BELOW +032F ; [.0000.0087.0002.032F] # COMBINING INVERTED BREVE BELOW +0330 ; [.0000.0088.0002.0330] # COMBINING TILDE BELOW +0331 ; [.0000.0089.0002.0331] # COMBINING MACRON BELOW +0333 ; [.0000.008A.0002.0333] # COMBINING DOUBLE LOW LINE +0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY +0335 ; [.0000.008C.0002.0335] # COMBINING SHORT STROKE OVERLAY +0336 ; [.0000.008D.0002.0336] # COMBINING LONG STROKE OVERLAY +0337 ; [.0000.008E.0002.0337] # COMBINING SHORT SOLIDUS OVERLAY +0339 ; [.0000.008F.0002.0339] # COMBINING RIGHT HALF RING BELOW +033A ; [.0000.0090.0002.033A] # COMBINING INVERTED BRIDGE BELOW +033C ; [.0000.0092.0002.033C] # COMBINING SEAGULL BELOW +033D ; [.0000.0093.0002.033D] # COMBINING X ABOVE +033E ; [.0000.0094.0002.033E] # COMBINING VERTICAL TILDE +033F ; [.0000.0095.0002.033F] # COMBINING DOUBLE OVERLINE +0346 ; [.0000.0097.0002.0346] # COMBINING BRIDGE ABOVE +0347 ; [.0000.0098.0002.0347] # COMBINING EQUALS SIGN BELOW +0348 ; [.0000.0099.0002.0348] # COMBINING DOUBLE VERTICAL LINE BELOW +034A ; [.0000.009B.0002.034A] # COMBINING NOT TILDE ABOVE +034B ; [.0000.009C.0002.034B] # COMBINING HOMOTHETIC ABOVE +034C ; [.0000.009D.0002.034C] # COMBINING ALMOST EQUAL TO ABOVE +0360 ; [.0000.00A0.0002.0360] # COMBINING DOUBLE TILDE +0361 ; [.0000.00A1.0002.0361] # COMBINING DOUBLE INVERTED BREVE +FE20 ; [.0000.00A3.0002.FE20] # COMBINING LIGATURE LEFT HALF +FE21 ; [.0000.00A4.0002.FE21] # COMBINING LIGATURE RIGHT HALF +FE22 ; [.0000.00A5.0002.FE22] # COMBINING DOUBLE TILDE LEFT HALF +FE23 ; [.0000.00A6.0002.FE23] # COMBINING DOUBLE TILDE RIGHT HALF +302A ; [.0000.0138.0002.302A] # IDEOGRAPHIC LEVEL TONE MARK +302B ; [.0000.0139.0002.302B] # IDEOGRAPHIC RISING TONE MARK +302C ; [.0000.013A.0002.302C] # IDEOGRAPHIC DEPARTING TONE MARK +302D ; [.0000.013B.0002.302D] # IDEOGRAPHIC ENTERING TONE MARK +302E ; [.0000.013C.0002.302E] # HANGUL SINGLE DOT TONE MARK +302F ; [.0000.013D.0002.302F] # HANGUL DOUBLE DOT TONE MARK +3099 ; [.0000.013E.0002.3099] # COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK +309A ; [.0000.013F.0002.309A] # COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +20D2 ; [.0000.0142.0002.20D2] # COMBINING LONG VERTICAL LINE OVERLAY +20D3 ; [.0000.0143.0002.20D3] # COMBINING SHORT VERTICAL LINE OVERLAY +20D8 ; [.0000.0148.0002.20D8] # COMBINING RING OVERLAY +20D9 ; [.0000.0149.0002.20D9] # COMBINING CLOCKWISE RING OVERLAY +20DA ; [.0000.014A.0002.20DA] # COMBINING ANTICLOCKWISE RING OVERLAY +20DB ; [.0000.014B.0002.20DB] # COMBINING THREE DOTS ABOVE +20DC ; [.0000.014C.0002.20DC] # COMBINING FOUR DOTS ABOVE +20DF ; [.0000.014F.0002.20DF] # COMBINING ENCLOSING DIAMOND +20E2 ; [.0000.0152.0002.20E2] # COMBINING ENCLOSING SCREEN +20E3 ; [.0000.0153.0002.20E3] # COMBINING ENCLOSING KEYCAP +02D0 ; [.081F.0020.0002.02D0] # MODIFIER LETTER TRIANGULAR COLON +02D1 ; [.0820.0020.0002.02D1] # MODIFIER LETTER HALF TRIANGULAR COLON +3005 ; [.0823.0020.0002.3005] # IDEOGRAPHIC ITERATION MARK +3031 ; [.0824.0020.0002.3031] # VERTICAL KANA REPEAT MARK +3032 ; [.0824.013E.0002.3032] # VERTICAL KANA REPEAT WITH VOICED SOUND MARK; CANONSEQ +3033 ; [.0825.0020.0002.3033] # VERTICAL KANA REPEAT MARK UPPER HALF +3034 ; [.0825.013E.0002.3034] # VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF; CANONSEQ +3035 ; [.0826.0020.0002.3035] # VERTICAL KANA REPEAT MARK LOWER HALF +309D ; [.0827.0020.0002.309D] # HIRAGANA ITERATION MARK +309E ; [.0827.013E.0002.309E] # HIRAGANA VOICED ITERATION MARK; CANONSEQ +30FC ; [.0828.0020.0002.30FC] # KATAKANA-HIRAGANA PROLONGED SOUND MARK +30FD ; [.0829.0020.0002.30FD] # KATAKANA ITERATION MARK +30FE ; [.0829.013E.0002.30FE] # KATAKANA VOICED ITERATION MARK; CANONSEQ +00A4 ; [.082A.0020.0002.00A4] # CURRENCY SIGN +00A2 ; [.082B.0020.0002.00A2] # CENT SIGN +0024 ; [.082C.0020.0002.0024] # DOLLAR SIGN +00A3 ; [.082D.0020.0002.00A3] # POUND SIGN +00A5 ; [.082E.0020.0002.00A5] # YEN SIGN +20A0 ; [.0833.0020.0002.20A0] # EURO-CURRENCY SIGN +20A1 ; [.0834.0020.0002.20A1] # COLON SIGN +20A2 ; [.0835.0020.0002.20A2] # CRUZEIRO SIGN +20A3 ; [.0836.0020.0002.20A3] # FRENCH FRANC SIGN +20A4 ; [.0837.0020.0002.20A4] # LIRA SIGN +20A5 ; [.0838.0020.0002.20A5] # MILL SIGN +20A6 ; [.0839.0020.0002.20A6] # NAIRA SIGN +20A7 ; [.083A.0020.0002.20A7] # PESETA SIGN +20A9 ; [.083B.0020.0002.20A9] # WON SIGN +20AA ; [.083C.0020.0002.20AA] # NEW SHEQEL SIGN +20AB ; [.083D.0020.0002.20AB] # DONG SIGN +20AC ; [.083E.0020.0002.20AC] # EURO SIGN +20AD ; [.083F.0020.0002.20AD] # KIP SIGN +20AE ; [.0840.0020.0002.20AE] # TUGRIK SIGN +20AF ; [.0841.0020.0002.20AF] # DRACHMA SIGN +2108 ; [.0843.0020.0002.2108] # SCRUPLE +2117 ; [.0845.0020.0002.2117] # SOUND RECORDING COPYRIGHT +2118 ; [.0846.0020.0002.2118] # SCRIPT CAPITAL P +211E ; [.0847.0020.0002.211E] # PRESCRIPTION TAKE +211F ; [.0848.0020.0002.211F] # RESPONSE +2123 ; [.0849.0020.0002.2123] # VERSICLE +2125 ; [.084A.0020.0002.2125] # OUNCE SIGN +2127 ; [.084B.0020.0002.2127] # INVERTED OHM SIGN +2132 ; [.084E.0020.0002.2132] # TURNED CAPITAL F +213A ; [.084F.0020.0002.213A] # ROTATED CAPITAL Q +2180 ; [.0850.0020.0002.2180] # ROMAN NUMERAL ONE THOUSAND C D +2181 ; [.0851.0020.0002.2181] # ROMAN NUMERAL FIVE THOUSAND +2182 ; [.0852.0020.0002.2182] # ROMAN NUMERAL TEN THOUSAND +2183 ; [.0853.0020.0002.2183] # ROMAN NUMERAL REVERSED ONE HUNDRED +266D ; [.0854.0020.0002.266D] # MUSIC FLAT SIGN +266E ; [.0855.0020.0002.266E] # MUSIC NATURAL SIGN +266F ; [.0856.0020.0002.266F] # MUSIC SHARP SIGN +0030 ; [.0857.0020.0002.0030] # DIGIT ZERO +3007 ; [.0857.016E.0002.3007] # IDEOGRAPHIC NUMBER ZERO +0031 ; [.0858.0020.0002.0031] # DIGIT ONE +0032 ; [.0859.0020.0002.0032] # DIGIT TWO +0033 ; [.085A.0020.0002.0033] # DIGIT THREE +0034 ; [.085B.0020.0002.0034] # DIGIT FOUR +0035 ; [.085C.0020.0002.0035] # DIGIT FIVE +0036 ; [.085D.0020.0002.0036] # DIGIT SIX +0037 ; [.085E.0020.0002.0037] # DIGIT SEVEN +0038 ; [.085F.0020.0002.0038] # DIGIT EIGHT +0039 ; [.0860.0020.0002.0039] # DIGIT NINE +0061 ; [.0861.0020.0002.0061] # LATIN SMALL LETTER A +0041 ; [.0861.0020.0008.0041] # LATIN CAPITAL LETTER A +00E1 ; [.0861.0032.0002.00E1] # LATIN SMALL LETTER A WITH ACUTE; CANONSEQ +00C1 ; [.0861.0032.0008.00C1] # LATIN CAPITAL LETTER A WITH ACUTE; CANONSEQ +00E0 ; [.0861.0035.0002.00E0] # LATIN SMALL LETTER A WITH GRAVE; CANONSEQ +00C0 ; [.0861.0035.0008.00C0] # LATIN CAPITAL LETTER A WITH GRAVE; CANONSEQ +0103 ; [.0861.0037.0002.0103] # LATIN SMALL LETTER A WITH BREVE; CANONSEQ +0102 ; [.0861.0037.0008.0102] # LATIN CAPITAL LETTER A WITH BREVE; CANONSEQ +1EAF ; [.0861.0038.0002.1EAF] # LATIN SMALL LETTER A WITH BREVE AND ACUTE; CANONSEQ +1EAE ; [.0861.0038.0008.1EAE] # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE; CANONSEQ +1EB1 ; [.0861.0039.0002.1EB1] # LATIN SMALL LETTER A WITH BREVE AND GRAVE; CANONSEQ +1EB0 ; [.0861.0039.0008.1EB0] # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE; CANONSEQ +1EB5 ; [.0861.003A.0002.1EB5] # LATIN SMALL LETTER A WITH BREVE AND TILDE; CANONSEQ +1EB4 ; [.0861.003A.0008.1EB4] # LATIN CAPITAL LETTER A WITH BREVE AND TILDE; CANONSEQ +1EB3 ; [.0861.003B.0002.1EB3] # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE; CANONSEQ +1EB2 ; [.0861.003B.0008.1EB2] # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE; CANONSEQ +00E2 ; [.0861.003C.0002.00E2] # LATIN SMALL LETTER A WITH CIRCUMFLEX; CANONSEQ +00C2 ; [.0861.003C.0008.00C2] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX; CANONSEQ +1EA5 ; [.0861.003D.0002.1EA5] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1EA4 ; [.0861.003D.0008.1EA4] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1EA7 ; [.0861.003E.0002.1EA7] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1EA6 ; [.0861.003E.0008.1EA6] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1EAB ; [.0861.003F.0002.1EAB] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE; CANONSEQ +1EAA ; [.0861.003F.0008.1EAA] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE; CANONSEQ +1EA9 ; [.0861.0040.0002.1EA9] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +1EA8 ; [.0861.0040.0008.1EA8] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +01CE ; [.0861.0041.0002.01CE] # LATIN SMALL LETTER A WITH CARON; CANONSEQ +01CD ; [.0861.0041.0008.01CD] # LATIN CAPITAL LETTER A WITH CARON; CANONSEQ +00E5 ; [.0861.0043.0002.00E5] # LATIN SMALL LETTER A WITH RING ABOVE; CANONSEQ +00C5 ; [.0861.0043.0008.00C5] # LATIN CAPITAL LETTER A WITH RING ABOVE; CANONSEQ +212B ; [.0861.0043.0008.212B] # ANGSTROM SIGN; CANONSEQ +01FB ; [.0861.0044.0002.01FB] # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE; CANONSEQ +01FA ; [.0861.0044.0008.01FA] # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE; CANONSEQ +00E4 ; [.0861.0047.0002.00E4] # LATIN SMALL LETTER A WITH DIAERESIS; CANONSEQ +00C4 ; [.0861.0047.0008.00C4] # LATIN CAPITAL LETTER A WITH DIAERESIS; CANONSEQ +01DF ; [.0861.004B.0002.01DF] # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON; CANONSEQ +01DE ; [.0861.004B.0008.01DE] # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON; CANONSEQ +00E3 ; [.0861.004E.0002.00E3] # LATIN SMALL LETTER A WITH TILDE; CANONSEQ +00C3 ; [.0861.004E.0008.00C3] # LATIN CAPITAL LETTER A WITH TILDE; CANONSEQ +0227 ; [.0861.0052.0002.0227] # LATIN SMALL LETTER A WITH DOT ABOVE; CANONSEQ +0226 ; [.0861.0052.0008.0226] # LATIN CAPITAL LETTER A WITH DOT ABOVE; CANONSEQ +01E1 ; [.0861.0053.0002.01E1] # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON; CANONSEQ +01E0 ; [.0861.0053.0008.01E0] # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON; CANONSEQ +0105 ; [.0861.0058.0002.0105] # LATIN SMALL LETTER A WITH OGONEK; CANONSEQ +0104 ; [.0861.0058.0008.0104] # LATIN CAPITAL LETTER A WITH OGONEK; CANONSEQ +0101 ; [.0861.005A.0002.0101] # LATIN SMALL LETTER A WITH MACRON; CANONSEQ +0100 ; [.0861.005A.0008.0100] # LATIN CAPITAL LETTER A WITH MACRON; CANONSEQ +1EA3 ; [.0861.005F.0002.1EA3] # LATIN SMALL LETTER A WITH HOOK ABOVE; CANONSEQ +1EA2 ; [.0861.005F.0008.1EA2] # LATIN CAPITAL LETTER A WITH HOOK ABOVE; CANONSEQ +0201 ; [.0861.0062.0002.0201] # LATIN SMALL LETTER A WITH DOUBLE GRAVE; CANONSEQ +0200 ; [.0861.0062.0008.0200] # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE; CANONSEQ +0203 ; [.0861.0064.0002.0203] # LATIN SMALL LETTER A WITH INVERTED BREVE; CANONSEQ +0202 ; [.0861.0064.0008.0202] # LATIN CAPITAL LETTER A WITH INVERTED BREVE; CANONSEQ +1EA1 ; [.0861.0079.0002.1EA1] # LATIN SMALL LETTER A WITH DOT BELOW; CANONSEQ +1EA0 ; [.0861.0079.0008.1EA0] # LATIN CAPITAL LETTER A WITH DOT BELOW; CANONSEQ +1EB7 ; [.0861.007A.0002.1EB7] # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW; CANONSEQ +1EB6 ; [.0861.007A.0008.1EB6] # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW; CANONSEQ +1EAD ; [.0861.007B.0002.1EAD] # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1EAC ; [.0861.007B.0008.1EAC] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1E01 ; [.0861.007F.0002.1E01] # LATIN SMALL LETTER A WITH RING BELOW; CANONSEQ +1E00 ; [.0861.007F.0008.1E00] # LATIN CAPITAL LETTER A WITH RING BELOW; CANONSEQ +00E6 ; [.0865.0020.0002.00E6] # LATIN SMALL LETTER AE +00C6 ; [.0865.0020.0008.00C6] # LATIN CAPITAL LETTER AE +01FD ; [.0865.0032.0002.01FD] # LATIN SMALL LETTER AE WITH ACUTE; CANONSEQ +01FC ; [.0865.0032.0008.01FC] # LATIN CAPITAL LETTER AE WITH ACUTE; CANONSEQ +01E3 ; [.0865.005A.0002.01E3] # LATIN SMALL LETTER AE WITH MACRON; CANONSEQ +01E2 ; [.0865.005A.0008.01E2] # LATIN CAPITAL LETTER AE WITH MACRON; CANONSEQ +0250 ; [.0869.0020.0002.0250] # LATIN SMALL LETTER TURNED A +0251 ; [.086D.0020.0002.0251] # LATIN SMALL LETTER ALPHA +0252 ; [.0871.0020.0002.0252] # LATIN SMALL LETTER TURNED ALPHA +0062 ; [.0875.0020.0002.0062] # LATIN SMALL LETTER B +0042 ; [.0875.0020.0008.0042] # LATIN CAPITAL LETTER B +1E03 ; [.0875.0052.0002.1E03] # LATIN SMALL LETTER B WITH DOT ABOVE; CANONSEQ +1E02 ; [.0875.0052.0008.1E02] # LATIN CAPITAL LETTER B WITH DOT ABOVE; CANONSEQ +1E05 ; [.0875.0079.0002.1E05] # LATIN SMALL LETTER B WITH DOT BELOW; CANONSEQ +1E04 ; [.0875.0079.0008.1E04] # LATIN CAPITAL LETTER B WITH DOT BELOW; CANONSEQ +1E07 ; [.0875.0089.0002.1E07] # LATIN SMALL LETTER B WITH LINE BELOW; CANONSEQ +1E06 ; [.0875.0089.0008.1E06] # LATIN CAPITAL LETTER B WITH LINE BELOW; CANONSEQ +0299 ; [.0879.0020.0002.0299] # LATIN LETTER SMALL CAPITAL B +0180 ; [.087D.0020.0002.0180] # LATIN SMALL LETTER B WITH STROKE +0253 ; [.0881.0020.0002.0253] # LATIN SMALL LETTER B WITH HOOK +0181 ; [.0881.0020.0008.0181] # LATIN CAPITAL LETTER B WITH HOOK +0183 ; [.0885.0020.0002.0183] # LATIN SMALL LETTER B WITH TOPBAR +0182 ; [.0885.0020.0008.0182] # LATIN CAPITAL LETTER B WITH TOPBAR +0063 ; [.0889.0020.0002.0063] # LATIN SMALL LETTER C +0043 ; [.0889.0020.0008.0043] # LATIN CAPITAL LETTER C +0107 ; [.0889.0032.0002.0107] # LATIN SMALL LETTER C WITH ACUTE; CANONSEQ +0106 ; [.0889.0032.0008.0106] # LATIN CAPITAL LETTER C WITH ACUTE; CANONSEQ +0109 ; [.0889.003C.0002.0109] # LATIN SMALL LETTER C WITH CIRCUMFLEX; CANONSEQ +0108 ; [.0889.003C.0008.0108] # LATIN CAPITAL LETTER C WITH CIRCUMFLEX; CANONSEQ +010D ; [.0889.0041.0002.010D] # LATIN SMALL LETTER C WITH CARON; CANONSEQ +010C ; [.0889.0041.0008.010C] # LATIN CAPITAL LETTER C WITH CARON; CANONSEQ +010B ; [.0889.0052.0002.010B] # LATIN SMALL LETTER C WITH DOT ABOVE; CANONSEQ +010A ; [.0889.0052.0008.010A] # LATIN CAPITAL LETTER C WITH DOT ABOVE; CANONSEQ +00E7 ; [.0889.0055.0002.00E7] # LATIN SMALL LETTER C WITH CEDILLA; CANONSEQ +00C7 ; [.0889.0055.0008.00C7] # LATIN CAPITAL LETTER C WITH CEDILLA; CANONSEQ +1E09 ; [.0889.0056.0002.1E09] # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE; CANONSEQ +1E08 ; [.0889.0056.0008.1E08] # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE; CANONSEQ +0188 ; [.088D.0020.0002.0188] # LATIN SMALL LETTER C WITH HOOK +0187 ; [.088D.0020.0008.0187] # LATIN CAPITAL LETTER C WITH HOOK +0255 ; [.0891.0020.0002.0255] # LATIN SMALL LETTER C WITH CURL +0064 ; [.0895.0020.0002.0064] # LATIN SMALL LETTER D +0044 ; [.0895.0020.0008.0044] # LATIN CAPITAL LETTER D +010F ; [.0895.0041.0002.010F] # LATIN SMALL LETTER D WITH CARON; CANONSEQ +010E ; [.0895.0041.0008.010E] # LATIN CAPITAL LETTER D WITH CARON; CANONSEQ +1E0B ; [.0895.0052.0002.1E0B] # LATIN SMALL LETTER D WITH DOT ABOVE; CANONSEQ +1E0A ; [.0895.0052.0008.1E0A] # LATIN CAPITAL LETTER D WITH DOT ABOVE; CANONSEQ +1E11 ; [.0895.0055.0002.1E11] # LATIN SMALL LETTER D WITH CEDILLA; CANONSEQ +1E10 ; [.0895.0055.0008.1E10] # LATIN CAPITAL LETTER D WITH CEDILLA; CANONSEQ +1E0D ; [.0895.0079.0002.1E0D] # LATIN SMALL LETTER D WITH DOT BELOW; CANONSEQ +1E0C ; [.0895.0079.0008.1E0C] # LATIN CAPITAL LETTER D WITH DOT BELOW; CANONSEQ +1E13 ; [.0895.0085.0002.1E13] # LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW; CANONSEQ +1E12 ; [.0895.0085.0008.1E12] # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW; CANONSEQ +1E0F ; [.0895.0089.0002.1E0F] # LATIN SMALL LETTER D WITH LINE BELOW; CANONSEQ +1E0E ; [.0895.0089.0008.1E0E] # LATIN CAPITAL LETTER D WITH LINE BELOW; CANONSEQ +0111 ; [.0899.0020.0002.0111] # LATIN SMALL LETTER D WITH STROKE +0110 ; [.0899.0020.0008.0110] # LATIN CAPITAL LETTER D WITH STROKE +0256 ; [.089D.0020.0002.0256] # LATIN SMALL LETTER D WITH TAIL +0189 ; [.089D.0020.0008.0189] # LATIN CAPITAL LETTER AFRICAN D +0257 ; [.08A1.0020.0002.0257] # LATIN SMALL LETTER D WITH HOOK +018A ; [.08A1.0020.0008.018A] # LATIN CAPITAL LETTER D WITH HOOK +018C ; [.08A5.0020.0002.018C] # LATIN SMALL LETTER D WITH TOPBAR +018B ; [.08A5.0020.0008.018B] # LATIN CAPITAL LETTER D WITH TOPBAR +00F0 ; [.08A9.0020.0002.00F0] # LATIN SMALL LETTER ETH +00D0 ; [.08A9.0020.0008.00D0] # LATIN CAPITAL LETTER ETH +018D ; [.08AD.0020.0002.018D] # LATIN SMALL LETTER TURNED DELTA +0065 ; [.08B1.0020.0002.0065] # LATIN SMALL LETTER E +0045 ; [.08B1.0020.0008.0045] # LATIN CAPITAL LETTER E +00E9 ; [.08B1.0032.0002.00E9] # LATIN SMALL LETTER E WITH ACUTE; CANONSEQ +00C9 ; [.08B1.0032.0008.00C9] # LATIN CAPITAL LETTER E WITH ACUTE; CANONSEQ +00E8 ; [.08B1.0035.0002.00E8] # LATIN SMALL LETTER E WITH GRAVE; CANONSEQ +00C8 ; [.08B1.0035.0008.00C8] # LATIN CAPITAL LETTER E WITH GRAVE; CANONSEQ +0115 ; [.08B1.0037.0002.0115] # LATIN SMALL LETTER E WITH BREVE; CANONSEQ +0114 ; [.08B1.0037.0008.0114] # LATIN CAPITAL LETTER E WITH BREVE; CANONSEQ +00EA ; [.08B1.003C.0002.00EA] # LATIN SMALL LETTER E WITH CIRCUMFLEX; CANONSEQ +00CA ; [.08B1.003C.0008.00CA] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX; CANONSEQ +1EBF ; [.08B1.003D.0002.1EBF] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1EBE ; [.08B1.003D.0008.1EBE] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1EC1 ; [.08B1.003E.0002.1EC1] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1EC0 ; [.08B1.003E.0008.1EC0] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1EC5 ; [.08B1.003F.0002.1EC5] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE; CANONSEQ +1EC4 ; [.08B1.003F.0008.1EC4] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE; CANONSEQ +1EC3 ; [.08B1.0040.0002.1EC3] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +1EC2 ; [.08B1.0040.0008.1EC2] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +011B ; [.08B1.0041.0002.011B] # LATIN SMALL LETTER E WITH CARON; CANONSEQ +011A ; [.08B1.0041.0008.011A] # LATIN CAPITAL LETTER E WITH CARON; CANONSEQ +00EB ; [.08B1.0047.0002.00EB] # LATIN SMALL LETTER E WITH DIAERESIS; CANONSEQ +00CB ; [.08B1.0047.0008.00CB] # LATIN CAPITAL LETTER E WITH DIAERESIS; CANONSEQ +1EBD ; [.08B1.004E.0002.1EBD] # LATIN SMALL LETTER E WITH TILDE; CANONSEQ +1EBC ; [.08B1.004E.0008.1EBC] # LATIN CAPITAL LETTER E WITH TILDE; CANONSEQ +0117 ; [.08B1.0052.0002.0117] # LATIN SMALL LETTER E WITH DOT ABOVE; CANONSEQ +0116 ; [.08B1.0052.0008.0116] # LATIN CAPITAL LETTER E WITH DOT ABOVE; CANONSEQ +0229 ; [.08B1.0055.0002.0229] # LATIN SMALL LETTER E WITH CEDILLA; CANONSEQ +0228 ; [.08B1.0055.0008.0228] # LATIN CAPITAL LETTER E WITH CEDILLA; CANONSEQ +1E1D ; [.08B1.0057.0002.1E1D] # LATIN SMALL LETTER E WITH CEDILLA AND BREVE; CANONSEQ +1E1C ; [.08B1.0057.0008.1E1C] # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE; CANONSEQ +0119 ; [.08B1.0058.0002.0119] # LATIN SMALL LETTER E WITH OGONEK; CANONSEQ +0118 ; [.08B1.0058.0008.0118] # LATIN CAPITAL LETTER E WITH OGONEK; CANONSEQ +0113 ; [.08B1.005A.0002.0113] # LATIN SMALL LETTER E WITH MACRON; CANONSEQ +0112 ; [.08B1.005A.0008.0112] # LATIN CAPITAL LETTER E WITH MACRON; CANONSEQ +1E17 ; [.08B1.005B.0002.1E17] # LATIN SMALL LETTER E WITH MACRON AND ACUTE; CANONSEQ +1E16 ; [.08B1.005B.0008.1E16] # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE; CANONSEQ +1E15 ; [.08B1.005C.0002.1E15] # LATIN SMALL LETTER E WITH MACRON AND GRAVE; CANONSEQ +1E14 ; [.08B1.005C.0008.1E14] # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE; CANONSEQ +1EBB ; [.08B1.005F.0002.1EBB] # LATIN SMALL LETTER E WITH HOOK ABOVE; CANONSEQ +1EBA ; [.08B1.005F.0008.1EBA] # LATIN CAPITAL LETTER E WITH HOOK ABOVE; CANONSEQ +0205 ; [.08B1.0062.0002.0205] # LATIN SMALL LETTER E WITH DOUBLE GRAVE; CANONSEQ +0204 ; [.08B1.0062.0008.0204] # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE; CANONSEQ +0207 ; [.08B1.0064.0002.0207] # LATIN SMALL LETTER E WITH INVERTED BREVE; CANONSEQ +0206 ; [.08B1.0064.0008.0206] # LATIN CAPITAL LETTER E WITH INVERTED BREVE; CANONSEQ +1EB9 ; [.08B1.0079.0002.1EB9] # LATIN SMALL LETTER E WITH DOT BELOW; CANONSEQ +1EB8 ; [.08B1.0079.0008.1EB8] # LATIN CAPITAL LETTER E WITH DOT BELOW; CANONSEQ +1EC7 ; [.08B1.007B.0002.1EC7] # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1EC6 ; [.08B1.007B.0008.1EC6] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1E19 ; [.08B1.0085.0002.1E19] # LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW; CANONSEQ +1E18 ; [.08B1.0085.0008.1E18] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW; CANONSEQ +1E1B ; [.08B1.0088.0002.1E1B] # LATIN SMALL LETTER E WITH TILDE BELOW; CANONSEQ +1E1A ; [.08B1.0088.0008.1E1A] # LATIN CAPITAL LETTER E WITH TILDE BELOW; CANONSEQ +01DD ; [.08B5.0020.0002.01DD] # LATIN SMALL LETTER TURNED E +018E ; [.08B5.0020.0008.018E] # LATIN CAPITAL LETTER REVERSED E +0259 ; [.08B9.0020.0002.0259] # LATIN SMALL LETTER SCHWA +018F ; [.08B9.0020.0008.018F] # LATIN CAPITAL LETTER SCHWA +025B ; [.08BD.0020.0002.025B] # LATIN SMALL LETTER OPEN E +0190 ; [.08BD.0020.0008.0190] # LATIN CAPITAL LETTER OPEN E +0258 ; [.08C1.0020.0002.0258] # LATIN SMALL LETTER REVERSED E +025A ; [.08C5.0020.0002.025A] # LATIN SMALL LETTER SCHWA WITH HOOK +025C ; [.08C9.0020.0002.025C] # LATIN SMALL LETTER REVERSED OPEN E +025D ; [.08CD.0020.0002.025D] # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK +025E ; [.08D1.0020.0002.025E] # LATIN SMALL LETTER CLOSED REVERSED OPEN E +029A ; [.08D5.0020.0002.029A] # LATIN SMALL LETTER CLOSED OPEN E +0264 ; [.08D9.0020.0002.0264] # LATIN SMALL LETTER RAMS HORN +0066 ; [.08DD.0020.0002.0066] # LATIN SMALL LETTER F +0046 ; [.08DD.0020.0008.0046] # LATIN CAPITAL LETTER F +1E1F ; [.08DD.0052.0002.1E1F] # LATIN SMALL LETTER F WITH DOT ABOVE; CANONSEQ +1E1E ; [.08DD.0052.0008.1E1E] # LATIN CAPITAL LETTER F WITH DOT ABOVE; CANONSEQ +0192 ; [.08E1.0020.0002.0192] # LATIN SMALL LETTER F WITH HOOK +0191 ; [.08E1.0020.0008.0191] # LATIN CAPITAL LETTER F WITH HOOK +0067 ; [.08E5.0020.0002.0067] # LATIN SMALL LETTER G +0047 ; [.08E5.0020.0008.0047] # LATIN CAPITAL LETTER G +01F5 ; [.08E5.0032.0002.01F5] # LATIN SMALL LETTER G WITH ACUTE; CANONSEQ +01F4 ; [.08E5.0032.0008.01F4] # LATIN CAPITAL LETTER G WITH ACUTE; CANONSEQ +011F ; [.08E5.0037.0002.011F] # LATIN SMALL LETTER G WITH BREVE; CANONSEQ +011E ; [.08E5.0037.0008.011E] # LATIN CAPITAL LETTER G WITH BREVE; CANONSEQ +011D ; [.08E5.003C.0002.011D] # LATIN SMALL LETTER G WITH CIRCUMFLEX; CANONSEQ +011C ; [.08E5.003C.0008.011C] # LATIN CAPITAL LETTER G WITH CIRCUMFLEX; CANONSEQ +01E7 ; [.08E5.0041.0002.01E7] # LATIN SMALL LETTER G WITH CARON; CANONSEQ +01E6 ; [.08E5.0041.0008.01E6] # LATIN CAPITAL LETTER G WITH CARON; CANONSEQ +0121 ; [.08E5.0052.0002.0121] # LATIN SMALL LETTER G WITH DOT ABOVE; CANONSEQ +0120 ; [.08E5.0052.0008.0120] # LATIN CAPITAL LETTER G WITH DOT ABOVE; CANONSEQ +0123 ; [.08E5.0055.0002.0123] # LATIN SMALL LETTER G WITH CEDILLA; CANONSEQ +0122 ; [.08E5.0055.0008.0122] # LATIN CAPITAL LETTER G WITH CEDILLA; CANONSEQ +1E21 ; [.08E5.005A.0002.1E21] # LATIN SMALL LETTER G WITH MACRON; CANONSEQ +1E20 ; [.08E5.005A.0008.1E20] # LATIN CAPITAL LETTER G WITH MACRON; CANONSEQ +0262 ; [.08E9.0020.0002.0262] # LATIN LETTER SMALL CAPITAL G +01E5 ; [.08ED.0020.0002.01E5] # LATIN SMALL LETTER G WITH STROKE +01E4 ; [.08ED.0020.0008.01E4] # LATIN CAPITAL LETTER G WITH STROKE +0260 ; [.08F1.0020.0002.0260] # LATIN SMALL LETTER G WITH HOOK +0193 ; [.08F1.0020.0008.0193] # LATIN CAPITAL LETTER G WITH HOOK +029B ; [.08F5.0020.0002.029B] # LATIN LETTER SMALL CAPITAL G WITH HOOK +0263 ; [.08F9.0020.0002.0263] # LATIN SMALL LETTER GAMMA +0194 ; [.08F9.0020.0008.0194] # LATIN CAPITAL LETTER GAMMA +0261 ; [.08FD.0020.0002.0261] # LATIN SMALL LETTER SCRIPT G +01A3 ; [.0901.0020.0002.01A3] # LATIN SMALL LETTER OI +01A2 ; [.0901.0020.0008.01A2] # LATIN CAPITAL LETTER OI +0068 ; [.0905.0020.0002.0068] # LATIN SMALL LETTER H +0048 ; [.0905.0020.0008.0048] # LATIN CAPITAL LETTER H +0125 ; [.0905.003C.0002.0125] # LATIN SMALL LETTER H WITH CIRCUMFLEX; CANONSEQ +0124 ; [.0905.003C.0008.0124] # LATIN CAPITAL LETTER H WITH CIRCUMFLEX; CANONSEQ +021F ; [.0905.0041.0002.021F] # LATIN SMALL LETTER H WITH CARON; CANONSEQ +021E ; [.0905.0041.0008.021E] # LATIN CAPITAL LETTER H WITH CARON; CANONSEQ +1E27 ; [.0905.0047.0002.1E27] # LATIN SMALL LETTER H WITH DIAERESIS; CANONSEQ +1E26 ; [.0905.0047.0008.1E26] # LATIN CAPITAL LETTER H WITH DIAERESIS; CANONSEQ +1E23 ; [.0905.0052.0002.1E23] # LATIN SMALL LETTER H WITH DOT ABOVE; CANONSEQ +1E22 ; [.0905.0052.0008.1E22] # LATIN CAPITAL LETTER H WITH DOT ABOVE; CANONSEQ +1E29 ; [.0905.0055.0002.1E29] # LATIN SMALL LETTER H WITH CEDILLA; CANONSEQ +1E28 ; [.0905.0055.0008.1E28] # LATIN CAPITAL LETTER H WITH CEDILLA; CANONSEQ +1E25 ; [.0905.0079.0002.1E25] # LATIN SMALL LETTER H WITH DOT BELOW; CANONSEQ +1E24 ; [.0905.0079.0008.1E24] # LATIN CAPITAL LETTER H WITH DOT BELOW; CANONSEQ +1E2B ; [.0905.0086.0002.1E2B] # LATIN SMALL LETTER H WITH BREVE BELOW; CANONSEQ +1E2A ; [.0905.0086.0008.1E2A] # LATIN CAPITAL LETTER H WITH BREVE BELOW; CANONSEQ +1E96 ; [.0905.0089.0002.1E96] # LATIN SMALL LETTER H WITH LINE BELOW; CANONSEQ +029C ; [.0909.0020.0002.029C] # LATIN LETTER SMALL CAPITAL H +0195 ; [.090D.0020.0002.0195] # LATIN SMALL LETTER HV +01F6 ; [.090D.0020.0008.01F6] # LATIN CAPITAL LETTER HWAIR +0127 ; [.0911.0020.0002.0127] # LATIN SMALL LETTER H WITH STROKE +0126 ; [.0911.0020.0008.0126] # LATIN CAPITAL LETTER H WITH STROKE +0266 ; [.0915.0020.0002.0266] # LATIN SMALL LETTER H WITH HOOK +0267 ; [.0919.0020.0002.0267] # LATIN SMALL LETTER HENG WITH HOOK +02BB ; [.091D.0020.0002.02BB] # MODIFIER LETTER TURNED COMMA +02BD ; [.091E.0020.0002.02BD] # MODIFIER LETTER REVERSED COMMA +0069 ; [.091F.0020.0002.0069] # LATIN SMALL LETTER I +0049 ; [.091F.0020.0008.0049] # LATIN CAPITAL LETTER I +00ED ; [.091F.0032.0002.00ED] # LATIN SMALL LETTER I WITH ACUTE; CANONSEQ +00CD ; [.091F.0032.0008.00CD] # LATIN CAPITAL LETTER I WITH ACUTE; CANONSEQ +00EC ; [.091F.0035.0002.00EC] # LATIN SMALL LETTER I WITH GRAVE; CANONSEQ +00CC ; [.091F.0035.0008.00CC] # LATIN CAPITAL LETTER I WITH GRAVE; CANONSEQ +012D ; [.091F.0037.0002.012D] # LATIN SMALL LETTER I WITH BREVE; CANONSEQ +012C ; [.091F.0037.0008.012C] # LATIN CAPITAL LETTER I WITH BREVE; CANONSEQ +00EE ; [.091F.003C.0002.00EE] # LATIN SMALL LETTER I WITH CIRCUMFLEX; CANONSEQ +00CE ; [.091F.003C.0008.00CE] # LATIN CAPITAL LETTER I WITH CIRCUMFLEX; CANONSEQ +01D0 ; [.091F.0041.0002.01D0] # LATIN SMALL LETTER I WITH CARON; CANONSEQ +01CF ; [.091F.0041.0008.01CF] # LATIN CAPITAL LETTER I WITH CARON; CANONSEQ +00EF ; [.091F.0047.0002.00EF] # LATIN SMALL LETTER I WITH DIAERESIS; CANONSEQ +00CF ; [.091F.0047.0008.00CF] # LATIN CAPITAL LETTER I WITH DIAERESIS; CANONSEQ +1E2F ; [.091F.0048.0002.1E2F] # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE; CANONSEQ +1E2E ; [.091F.0048.0008.1E2E] # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE; CANONSEQ +0129 ; [.091F.004E.0002.0129] # LATIN SMALL LETTER I WITH TILDE; CANONSEQ +0128 ; [.091F.004E.0008.0128] # LATIN CAPITAL LETTER I WITH TILDE; CANONSEQ +0130 ; [.091F.0052.0008.0130] # LATIN CAPITAL LETTER I WITH DOT ABOVE; CANONSEQ +012F ; [.091F.0058.0002.012F] # LATIN SMALL LETTER I WITH OGONEK; CANONSEQ +012E ; [.091F.0058.0008.012E] # LATIN CAPITAL LETTER I WITH OGONEK; CANONSEQ +012B ; [.091F.005A.0002.012B] # LATIN SMALL LETTER I WITH MACRON; CANONSEQ +012A ; [.091F.005A.0008.012A] # LATIN CAPITAL LETTER I WITH MACRON; CANONSEQ +1EC9 ; [.091F.005F.0002.1EC9] # LATIN SMALL LETTER I WITH HOOK ABOVE; CANONSEQ +1EC8 ; [.091F.005F.0008.1EC8] # LATIN CAPITAL LETTER I WITH HOOK ABOVE; CANONSEQ +0209 ; [.091F.0062.0002.0209] # LATIN SMALL LETTER I WITH DOUBLE GRAVE; CANONSEQ +0208 ; [.091F.0062.0008.0208] # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE; CANONSEQ +020B ; [.091F.0064.0002.020B] # LATIN SMALL LETTER I WITH INVERTED BREVE; CANONSEQ +020A ; [.091F.0064.0008.020A] # LATIN CAPITAL LETTER I WITH INVERTED BREVE; CANONSEQ +1ECB ; [.091F.0079.0002.1ECB] # LATIN SMALL LETTER I WITH DOT BELOW; CANONSEQ +1ECA ; [.091F.0079.0008.1ECA] # LATIN CAPITAL LETTER I WITH DOT BELOW; CANONSEQ +1E2D ; [.091F.0088.0002.1E2D] # LATIN SMALL LETTER I WITH TILDE BELOW; CANONSEQ +1E2C ; [.091F.0088.0008.1E2C] # LATIN CAPITAL LETTER I WITH TILDE BELOW; CANONSEQ +0131 ; [.0923.0020.0002.0131] # LATIN SMALL LETTER DOTLESS I +026A ; [.0927.0020.0002.026A] # LATIN LETTER SMALL CAPITAL I +0268 ; [.092B.0020.0002.0268] # LATIN SMALL LETTER I WITH STROKE +0197 ; [.092B.0020.0008.0197] # LATIN CAPITAL LETTER I WITH STROKE +0269 ; [.092F.0020.0002.0269] # LATIN SMALL LETTER IOTA +0196 ; [.092F.0020.0008.0196] # LATIN CAPITAL LETTER IOTA +006A ; [.0933.0020.0002.006A] # LATIN SMALL LETTER J +004A ; [.0933.0020.0008.004A] # LATIN CAPITAL LETTER J +0135 ; [.0933.003C.0002.0135] # LATIN SMALL LETTER J WITH CIRCUMFLEX; CANONSEQ +0134 ; [.0933.003C.0008.0134] # LATIN CAPITAL LETTER J WITH CIRCUMFLEX; CANONSEQ +01F0 ; [.0933.0041.0002.01F0] # LATIN SMALL LETTER J WITH CARON; CANONSEQ +029D ; [.0937.0020.0002.029D] # LATIN SMALL LETTER J WITH CROSSED-TAIL +025F ; [.093B.0020.0002.025F] # LATIN SMALL LETTER DOTLESS J WITH STROKE +0284 ; [.093F.0020.0002.0284] # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK +006B ; [.0943.0020.0002.006B] # LATIN SMALL LETTER K +004B ; [.0943.0020.0008.004B] # LATIN CAPITAL LETTER K +212A ; [.0943.0020.0008.212A] # KELVIN SIGN; CANON +1E31 ; [.0943.0032.0002.1E31] # LATIN SMALL LETTER K WITH ACUTE; CANONSEQ +1E30 ; [.0943.0032.0008.1E30] # LATIN CAPITAL LETTER K WITH ACUTE; CANONSEQ +01E9 ; [.0943.0041.0002.01E9] # LATIN SMALL LETTER K WITH CARON; CANONSEQ +01E8 ; [.0943.0041.0008.01E8] # LATIN CAPITAL LETTER K WITH CARON; CANONSEQ +0137 ; [.0943.0055.0002.0137] # LATIN SMALL LETTER K WITH CEDILLA; CANONSEQ +0136 ; [.0943.0055.0008.0136] # LATIN CAPITAL LETTER K WITH CEDILLA; CANONSEQ +1E33 ; [.0943.0079.0002.1E33] # LATIN SMALL LETTER K WITH DOT BELOW; CANONSEQ +1E32 ; [.0943.0079.0008.1E32] # LATIN CAPITAL LETTER K WITH DOT BELOW; CANONSEQ +1E35 ; [.0943.0089.0002.1E35] # LATIN SMALL LETTER K WITH LINE BELOW; CANONSEQ +1E34 ; [.0943.0089.0008.1E34] # LATIN CAPITAL LETTER K WITH LINE BELOW; CANONSEQ +0199 ; [.0947.0020.0002.0199] # LATIN SMALL LETTER K WITH HOOK +0198 ; [.0947.0020.0008.0198] # LATIN CAPITAL LETTER K WITH HOOK +029E ; [.094B.0020.0002.029E] # LATIN SMALL LETTER TURNED K +006C ; [.094F.0020.0002.006C] # LATIN SMALL LETTER L +004C ; [.094F.0020.0008.004C] # LATIN CAPITAL LETTER L +013A ; [.094F.0032.0002.013A] # LATIN SMALL LETTER L WITH ACUTE; CANONSEQ +0139 ; [.094F.0032.0008.0139] # LATIN CAPITAL LETTER L WITH ACUTE; CANONSEQ +013E ; [.094F.0041.0002.013E] # LATIN SMALL LETTER L WITH CARON; CANONSEQ +013D ; [.094F.0041.0008.013D] # LATIN CAPITAL LETTER L WITH CARON; CANONSEQ +013C ; [.094F.0055.0002.013C] # LATIN SMALL LETTER L WITH CEDILLA; CANONSEQ +013B ; [.094F.0055.0008.013B] # LATIN CAPITAL LETTER L WITH CEDILLA; CANONSEQ +1E37 ; [.094F.0079.0002.1E37] # LATIN SMALL LETTER L WITH DOT BELOW; CANONSEQ +1E36 ; [.094F.0079.0008.1E36] # LATIN CAPITAL LETTER L WITH DOT BELOW; CANONSEQ +1E39 ; [.094F.007D.0002.1E39] # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON; CANONSEQ +1E38 ; [.094F.007D.0008.1E38] # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON; CANONSEQ +1E3D ; [.094F.0085.0002.1E3D] # LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW; CANONSEQ +1E3C ; [.094F.0085.0008.1E3C] # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW; CANONSEQ +1E3B ; [.094F.0089.0002.1E3B] # LATIN SMALL LETTER L WITH LINE BELOW; CANONSEQ +1E3A ; [.094F.0089.0008.1E3A] # LATIN CAPITAL LETTER L WITH LINE BELOW; CANONSEQ +029F ; [.0953.0020.0002.029F] # LATIN LETTER SMALL CAPITAL L +0142 ; [.0957.0020.0002.0142] # LATIN SMALL LETTER L WITH STROKE +0141 ; [.0957.0020.0008.0141] # LATIN CAPITAL LETTER L WITH STROKE +019A ; [.095B.0020.0002.019A] # LATIN SMALL LETTER L WITH BAR +026B ; [.095F.0020.0002.026B] # LATIN SMALL LETTER L WITH MIDDLE TILDE +026C ; [.0963.0020.0002.026C] # LATIN SMALL LETTER L WITH BELT +026D ; [.0967.0020.0002.026D] # LATIN SMALL LETTER L WITH RETROFLEX HOOK +026E ; [.096B.0020.0002.026E] # LATIN SMALL LETTER LEZH +019B ; [.096F.0020.0002.019B] # LATIN SMALL LETTER LAMBDA WITH STROKE +028E ; [.0973.0020.0002.028E] # LATIN SMALL LETTER TURNED Y +006D ; [.0977.0020.0002.006D] # LATIN SMALL LETTER M +004D ; [.0977.0020.0008.004D] # LATIN CAPITAL LETTER M +1E3F ; [.0977.0032.0002.1E3F] # LATIN SMALL LETTER M WITH ACUTE; CANONSEQ +1E3E ; [.0977.0032.0008.1E3E] # LATIN CAPITAL LETTER M WITH ACUTE; CANONSEQ +1E41 ; [.0977.0052.0002.1E41] # LATIN SMALL LETTER M WITH DOT ABOVE; CANONSEQ +1E40 ; [.0977.0052.0008.1E40] # LATIN CAPITAL LETTER M WITH DOT ABOVE; CANONSEQ +1E43 ; [.0977.0079.0002.1E43] # LATIN SMALL LETTER M WITH DOT BELOW; CANONSEQ +1E42 ; [.0977.0079.0008.1E42] # LATIN CAPITAL LETTER M WITH DOT BELOW; CANONSEQ +0271 ; [.097B.0020.0002.0271] # LATIN SMALL LETTER M WITH HOOK +006E ; [.097F.0020.0002.006E] # LATIN SMALL LETTER N +004E ; [.097F.0020.0008.004E] # LATIN CAPITAL LETTER N +0144 ; [.097F.0032.0002.0144] # LATIN SMALL LETTER N WITH ACUTE; CANONSEQ +0143 ; [.097F.0032.0008.0143] # LATIN CAPITAL LETTER N WITH ACUTE; CANONSEQ +01F9 ; [.097F.0035.0002.01F9] # LATIN SMALL LETTER N WITH GRAVE; CANONSEQ +01F8 ; [.097F.0035.0008.01F8] # LATIN CAPITAL LETTER N WITH GRAVE; CANONSEQ +0148 ; [.097F.0041.0002.0148] # LATIN SMALL LETTER N WITH CARON; CANONSEQ +0147 ; [.097F.0041.0008.0147] # LATIN CAPITAL LETTER N WITH CARON; CANONSEQ +00F1 ; [.097F.004E.0002.00F1] # LATIN SMALL LETTER N WITH TILDE; CANONSEQ +00D1 ; [.097F.004E.0008.00D1] # LATIN CAPITAL LETTER N WITH TILDE; CANONSEQ +1E45 ; [.097F.0052.0002.1E45] # LATIN SMALL LETTER N WITH DOT ABOVE; CANONSEQ +1E44 ; [.097F.0052.0008.1E44] # LATIN CAPITAL LETTER N WITH DOT ABOVE; CANONSEQ +0146 ; [.097F.0055.0002.0146] # LATIN SMALL LETTER N WITH CEDILLA; CANONSEQ +0145 ; [.097F.0055.0008.0145] # LATIN CAPITAL LETTER N WITH CEDILLA; CANONSEQ +1E47 ; [.097F.0079.0002.1E47] # LATIN SMALL LETTER N WITH DOT BELOW; CANONSEQ +1E46 ; [.097F.0079.0008.1E46] # LATIN CAPITAL LETTER N WITH DOT BELOW; CANONSEQ +1E4B ; [.097F.0085.0002.1E4B] # LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW; CANONSEQ +1E4A ; [.097F.0085.0008.1E4A] # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW; CANONSEQ +1E49 ; [.097F.0089.0002.1E49] # LATIN SMALL LETTER N WITH LINE BELOW; CANONSEQ +1E48 ; [.097F.0089.0008.1E48] # LATIN CAPITAL LETTER N WITH LINE BELOW; CANONSEQ +0274 ; [.0983.0020.0002.0274] # LATIN LETTER SMALL CAPITAL N +0272 ; [.0987.0020.0002.0272] # LATIN SMALL LETTER N WITH LEFT HOOK +019D ; [.0987.0020.0008.019D] # LATIN CAPITAL LETTER N WITH LEFT HOOK +019E ; [.098B.0020.0002.019E] # LATIN SMALL LETTER N WITH LONG RIGHT LEG +0273 ; [.098F.0020.0002.0273] # LATIN SMALL LETTER N WITH RETROFLEX HOOK +014B ; [.0993.0020.0002.014B] # LATIN SMALL LETTER ENG +014A ; [.0993.0020.0008.014A] # LATIN CAPITAL LETTER ENG +006F ; [.0997.0020.0002.006F] # LATIN SMALL LETTER O +004F ; [.0997.0020.0008.004F] # LATIN CAPITAL LETTER O +00F3 ; [.0997.0032.0002.00F3] # LATIN SMALL LETTER O WITH ACUTE; CANONSEQ +00D3 ; [.0997.0032.0008.00D3] # LATIN CAPITAL LETTER O WITH ACUTE; CANONSEQ +00F2 ; [.0997.0035.0002.00F2] # LATIN SMALL LETTER O WITH GRAVE; CANONSEQ +00D2 ; [.0997.0035.0008.00D2] # LATIN CAPITAL LETTER O WITH GRAVE; CANONSEQ +014F ; [.0997.0037.0002.014F] # LATIN SMALL LETTER O WITH BREVE; CANONSEQ +014E ; [.0997.0037.0008.014E] # LATIN CAPITAL LETTER O WITH BREVE; CANONSEQ +00F4 ; [.0997.003C.0002.00F4] # LATIN SMALL LETTER O WITH CIRCUMFLEX; CANONSEQ +00D4 ; [.0997.003C.0008.00D4] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX; CANONSEQ +1ED1 ; [.0997.003D.0002.1ED1] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1ED0 ; [.0997.003D.0008.1ED0] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE; CANONSEQ +1ED3 ; [.0997.003E.0002.1ED3] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1ED2 ; [.0997.003E.0008.1ED2] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE; CANONSEQ +1ED7 ; [.0997.003F.0002.1ED7] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE; CANONSEQ +1ED6 ; [.0997.003F.0008.1ED6] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE; CANONSEQ +1ED5 ; [.0997.0040.0002.1ED5] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +1ED4 ; [.0997.0040.0008.1ED4] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE; CANONSEQ +01D2 ; [.0997.0041.0002.01D2] # LATIN SMALL LETTER O WITH CARON; CANONSEQ +01D1 ; [.0997.0041.0008.01D1] # LATIN CAPITAL LETTER O WITH CARON; CANONSEQ +00F6 ; [.0997.0047.0002.00F6] # LATIN SMALL LETTER O WITH DIAERESIS; CANONSEQ +00D6 ; [.0997.0047.0008.00D6] # LATIN CAPITAL LETTER O WITH DIAERESIS; CANONSEQ +022B ; [.0997.004B.0002.022B] # LATIN SMALL LETTER O WITH DIAERESIS AND MACRON; CANONSEQ +022A ; [.0997.004B.0008.022A] # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON; CANONSEQ +0151 ; [.0997.004D.0002.0151] # LATIN SMALL LETTER O WITH DOUBLE ACUTE; CANONSEQ +0150 ; [.0997.004D.0008.0150] # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE; CANONSEQ +00F5 ; [.0997.004E.0002.00F5] # LATIN SMALL LETTER O WITH TILDE; CANONSEQ +00D5 ; [.0997.004E.0008.00D5] # LATIN CAPITAL LETTER O WITH TILDE; CANONSEQ +1E4D ; [.0997.004F.0002.1E4D] # LATIN SMALL LETTER O WITH TILDE AND ACUTE; CANONSEQ +1E4C ; [.0997.004F.0008.1E4C] # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE; CANONSEQ +1E4F ; [.0997.0050.0002.1E4F] # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS; CANONSEQ +1E4E ; [.0997.0050.0008.1E4E] # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS; CANONSEQ +022D ; [.0997.0051.0002.022D] # LATIN SMALL LETTER O WITH TILDE AND MACRON; CANONSEQ +022C ; [.0997.0051.0008.022C] # LATIN CAPITAL LETTER O WITH TILDE AND MACRON; CANONSEQ +022F ; [.0997.0052.0002.022F] # LATIN SMALL LETTER O WITH DOT ABOVE; CANONSEQ +022E ; [.0997.0052.0008.022E] # LATIN CAPITAL LETTER O WITH DOT ABOVE; CANONSEQ +0231 ; [.0997.0053.0002.0231] # LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON; CANONSEQ +0230 ; [.0997.0053.0008.0230] # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON; CANONSEQ +01EB ; [.0997.0058.0002.01EB] # LATIN SMALL LETTER O WITH OGONEK; CANONSEQ +01EA ; [.0997.0058.0008.01EA] # LATIN CAPITAL LETTER O WITH OGONEK; CANONSEQ +01ED ; [.0997.0059.0002.01ED] # LATIN SMALL LETTER O WITH OGONEK AND MACRON; CANONSEQ +01EC ; [.0997.0059.0008.01EC] # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON; CANONSEQ +014D ; [.0997.005A.0002.014D] # LATIN SMALL LETTER O WITH MACRON; CANONSEQ +014C ; [.0997.005A.0008.014C] # LATIN CAPITAL LETTER O WITH MACRON; CANONSEQ +1E53 ; [.0997.005B.0002.1E53] # LATIN SMALL LETTER O WITH MACRON AND ACUTE; CANONSEQ +1E52 ; [.0997.005B.0008.1E52] # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE; CANONSEQ +1E51 ; [.0997.005C.0002.1E51] # LATIN SMALL LETTER O WITH MACRON AND GRAVE; CANONSEQ +1E50 ; [.0997.005C.0008.1E50] # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE; CANONSEQ +1ECF ; [.0997.005F.0002.1ECF] # LATIN SMALL LETTER O WITH HOOK ABOVE; CANONSEQ +1ECE ; [.0997.005F.0008.1ECE] # LATIN CAPITAL LETTER O WITH HOOK ABOVE; CANONSEQ +020D ; [.0997.0062.0002.020D] # LATIN SMALL LETTER O WITH DOUBLE GRAVE; CANONSEQ +020C ; [.0997.0062.0008.020C] # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE; CANONSEQ +020F ; [.0997.0064.0002.020F] # LATIN SMALL LETTER O WITH INVERTED BREVE; CANONSEQ +020E ; [.0997.0064.0008.020E] # LATIN CAPITAL LETTER O WITH INVERTED BREVE; CANONSEQ +01A1 ; [.0997.006C.0002.01A1] # LATIN SMALL LETTER O WITH HORN; CANONSEQ +01A0 ; [.0997.006C.0008.01A0] # LATIN CAPITAL LETTER O WITH HORN; CANONSEQ +1EDB ; [.0997.006D.0002.1EDB] # LATIN SMALL LETTER O WITH HORN AND ACUTE; CANONSEQ +1EDA ; [.0997.006D.0008.1EDA] # LATIN CAPITAL LETTER O WITH HORN AND ACUTE; CANONSEQ +1EDD ; [.0997.006E.0002.1EDD] # LATIN SMALL LETTER O WITH HORN AND GRAVE; CANONSEQ +1EDC ; [.0997.006E.0008.1EDC] # LATIN CAPITAL LETTER O WITH HORN AND GRAVE; CANONSEQ +1EE1 ; [.0997.006F.0002.1EE1] # LATIN SMALL LETTER O WITH HORN AND TILDE; CANONSEQ +1EE0 ; [.0997.006F.0008.1EE0] # LATIN CAPITAL LETTER O WITH HORN AND TILDE; CANONSEQ +1EDF ; [.0997.0070.0002.1EDF] # LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE; CANONSEQ +1EDE ; [.0997.0070.0008.1EDE] # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE; CANONSEQ +1EE3 ; [.0997.0071.0002.1EE3] # LATIN SMALL LETTER O WITH HORN AND DOT BELOW; CANONSEQ +1EE2 ; [.0997.0071.0008.1EE2] # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW; CANONSEQ +1ECD ; [.0997.0079.0002.1ECD] # LATIN SMALL LETTER O WITH DOT BELOW; CANONSEQ +1ECC ; [.0997.0079.0008.1ECC] # LATIN CAPITAL LETTER O WITH DOT BELOW; CANONSEQ +1ED9 ; [.0997.007B.0002.1ED9] # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +1ED8 ; [.0997.007B.0008.1ED8] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW; CANONSEQ +0276 ; [.099B.0020.0002.0276] # LATIN LETTER SMALL CAPITAL OE +00F8 ; [.099F.0020.0002.00F8] # LATIN SMALL LETTER O WITH STROKE +00D8 ; [.099F.0020.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE +01FF ; [.099F.0032.0002.01FF] # LATIN SMALL LETTER O WITH STROKE AND ACUTE; CANONSEQ +01FE ; [.099F.0032.0008.01FE] # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE; CANONSEQ +0254 ; [.09A3.0020.0002.0254] # LATIN SMALL LETTER OPEN O +0186 ; [.09A3.0020.0008.0186] # LATIN CAPITAL LETTER OPEN O +0275 ; [.09A7.0020.0002.0275] # LATIN SMALL LETTER BARRED O +019F ; [.09A7.0020.0008.019F] # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +0277 ; [.09AB.0020.0002.0277] # LATIN SMALL LETTER CLOSED OMEGA +0223 ; [.09AF.0020.0002.0223] # LATIN SMALL LETTER OU +0222 ; [.09AF.0020.0008.0222] # LATIN CAPITAL LETTER OU +0070 ; [.09B3.0020.0002.0070] # LATIN SMALL LETTER P +0050 ; [.09B3.0020.0008.0050] # LATIN CAPITAL LETTER P +1E55 ; [.09B3.0032.0002.1E55] # LATIN SMALL LETTER P WITH ACUTE; CANONSEQ +1E54 ; [.09B3.0032.0008.1E54] # LATIN CAPITAL LETTER P WITH ACUTE; CANONSEQ +1E57 ; [.09B3.0052.0002.1E57] # LATIN SMALL LETTER P WITH DOT ABOVE; CANONSEQ +1E56 ; [.09B3.0052.0008.1E56] # LATIN CAPITAL LETTER P WITH DOT ABOVE; CANONSEQ +01A5 ; [.09B7.0020.0002.01A5] # LATIN SMALL LETTER P WITH HOOK +01A4 ; [.09B7.0020.0008.01A4] # LATIN CAPITAL LETTER P WITH HOOK +0278 ; [.09BB.0020.0002.0278] # LATIN SMALL LETTER PHI +0071 ; [.09BF.0020.0002.0071] # LATIN SMALL LETTER Q +0051 ; [.09BF.0020.0008.0051] # LATIN CAPITAL LETTER Q +02A0 ; [.09C3.0020.0002.02A0] # LATIN SMALL LETTER Q WITH HOOK +0138 ; [.09C7.0020.0002.0138] # LATIN SMALL LETTER KRA +0072 ; [.09CB.0020.0002.0072] # LATIN SMALL LETTER R +0052 ; [.09CB.0020.0008.0052] # LATIN CAPITAL LETTER R +0155 ; [.09CB.0032.0002.0155] # LATIN SMALL LETTER R WITH ACUTE; CANONSEQ +0154 ; [.09CB.0032.0008.0154] # LATIN CAPITAL LETTER R WITH ACUTE; CANONSEQ +0159 ; [.09CB.0041.0002.0159] # LATIN SMALL LETTER R WITH CARON; CANONSEQ +0158 ; [.09CB.0041.0008.0158] # LATIN CAPITAL LETTER R WITH CARON; CANONSEQ +1E59 ; [.09CB.0052.0002.1E59] # LATIN SMALL LETTER R WITH DOT ABOVE; CANONSEQ +1E58 ; [.09CB.0052.0008.1E58] # LATIN CAPITAL LETTER R WITH DOT ABOVE; CANONSEQ +0157 ; [.09CB.0055.0002.0157] # LATIN SMALL LETTER R WITH CEDILLA; CANONSEQ +0156 ; [.09CB.0055.0008.0156] # LATIN CAPITAL LETTER R WITH CEDILLA; CANONSEQ +0211 ; [.09CB.0062.0002.0211] # LATIN SMALL LETTER R WITH DOUBLE GRAVE; CANONSEQ +0210 ; [.09CB.0062.0008.0210] # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE; CANONSEQ +0213 ; [.09CB.0064.0002.0213] # LATIN SMALL LETTER R WITH INVERTED BREVE; CANONSEQ +0212 ; [.09CB.0064.0008.0212] # LATIN CAPITAL LETTER R WITH INVERTED BREVE; CANONSEQ +1E5B ; [.09CB.0079.0002.1E5B] # LATIN SMALL LETTER R WITH DOT BELOW; CANONSEQ +1E5A ; [.09CB.0079.0008.1E5A] # LATIN CAPITAL LETTER R WITH DOT BELOW; CANONSEQ +1E5D ; [.09CB.007D.0002.1E5D] # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON; CANONSEQ +1E5C ; [.09CB.007D.0008.1E5C] # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON; CANONSEQ +1E5F ; [.09CB.0089.0002.1E5F] # LATIN SMALL LETTER R WITH LINE BELOW; CANONSEQ +1E5E ; [.09CB.0089.0008.1E5E] # LATIN CAPITAL LETTER R WITH LINE BELOW; CANONSEQ +0280 ; [.09CF.0020.0002.0280] # LATIN LETTER SMALL CAPITAL R +01A6 ; [.09CF.0020.0008.01A6] # LATIN LETTER YR +0279 ; [.09D3.0020.0002.0279] # LATIN SMALL LETTER TURNED R +027A ; [.09D7.0020.0002.027A] # LATIN SMALL LETTER TURNED R WITH LONG LEG +027B ; [.09DB.0020.0002.027B] # LATIN SMALL LETTER TURNED R WITH HOOK +027C ; [.09DF.0020.0002.027C] # LATIN SMALL LETTER R WITH LONG LEG +027D ; [.09E3.0020.0002.027D] # LATIN SMALL LETTER R WITH TAIL +027E ; [.09E7.0020.0002.027E] # LATIN SMALL LETTER R WITH FISHHOOK +027F ; [.09EB.0020.0002.027F] # LATIN SMALL LETTER REVERSED R WITH FISHHOOK +0281 ; [.09EF.0020.0002.0281] # LATIN LETTER SMALL CAPITAL INVERTED R +0073 ; [.09F3.0020.0002.0073] # LATIN SMALL LETTER S +0053 ; [.09F3.0020.0008.0053] # LATIN CAPITAL LETTER S +015B ; [.09F3.0032.0002.015B] # LATIN SMALL LETTER S WITH ACUTE; CANONSEQ +015A ; [.09F3.0032.0008.015A] # LATIN CAPITAL LETTER S WITH ACUTE; CANONSEQ +1E65 ; [.09F3.0033.0002.1E65] # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE; CANONSEQ +1E64 ; [.09F3.0033.0008.1E64] # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE; CANONSEQ +015D ; [.09F3.003C.0002.015D] # LATIN SMALL LETTER S WITH CIRCUMFLEX; CANONSEQ +015C ; [.09F3.003C.0008.015C] # LATIN CAPITAL LETTER S WITH CIRCUMFLEX; CANONSEQ +0161 ; [.09F3.0041.0002.0161] # LATIN SMALL LETTER S WITH CARON; CANONSEQ +0160 ; [.09F3.0041.0008.0160] # LATIN CAPITAL LETTER S WITH CARON; CANONSEQ +1E67 ; [.09F3.0042.0002.1E67] # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE; CANONSEQ +1E66 ; [.09F3.0042.0008.1E66] # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE; CANONSEQ +1E61 ; [.09F3.0052.0002.1E61] # LATIN SMALL LETTER S WITH DOT ABOVE; CANONSEQ +1E60 ; [.09F3.0052.0008.1E60] # LATIN CAPITAL LETTER S WITH DOT ABOVE; CANONSEQ +015F ; [.09F3.0055.0002.015F] # LATIN SMALL LETTER S WITH CEDILLA; CANONSEQ +015E ; [.09F3.0055.0008.015E] # LATIN CAPITAL LETTER S WITH CEDILLA; CANONSEQ +1E63 ; [.09F3.0079.0002.1E63] # LATIN SMALL LETTER S WITH DOT BELOW; CANONSEQ +1E62 ; [.09F3.0079.0008.1E62] # LATIN CAPITAL LETTER S WITH DOT BELOW; CANONSEQ +1E69 ; [.09F3.007C.0002.1E69] # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE; CANONSEQ +1E68 ; [.09F3.007C.0008.1E68] # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE; CANONSEQ +0219 ; [.09F3.0080.0002.0219] # LATIN SMALL LETTER S WITH COMMA BELOW; CANONSEQ +0218 ; [.09F3.0080.0008.0218] # LATIN CAPITAL LETTER S WITH COMMA BELOW; CANONSEQ +0282 ; [.09F7.0020.0002.0282] # LATIN SMALL LETTER S WITH HOOK +0283 ; [.09FB.0020.0002.0283] # LATIN SMALL LETTER ESH +01A9 ; [.09FB.0020.0008.01A9] # LATIN CAPITAL LETTER ESH +01AA ; [.09FF.0020.0002.01AA] # LATIN LETTER REVERSED ESH LOOP +0285 ; [.0A03.0020.0002.0285] # LATIN SMALL LETTER SQUAT REVERSED ESH +0286 ; [.0A07.0020.0002.0286] # LATIN SMALL LETTER ESH WITH CURL +0074 ; [.0A0B.0020.0002.0074] # LATIN SMALL LETTER T +0054 ; [.0A0B.0020.0008.0054] # LATIN CAPITAL LETTER T +0165 ; [.0A0B.0041.0002.0165] # LATIN SMALL LETTER T WITH CARON; CANONSEQ +0164 ; [.0A0B.0041.0008.0164] # LATIN CAPITAL LETTER T WITH CARON; CANONSEQ +1E97 ; [.0A0B.0047.0002.1E97] # LATIN SMALL LETTER T WITH DIAERESIS; CANONSEQ +1E6B ; [.0A0B.0052.0002.1E6B] # LATIN SMALL LETTER T WITH DOT ABOVE; CANONSEQ +1E6A ; [.0A0B.0052.0008.1E6A] # LATIN CAPITAL LETTER T WITH DOT ABOVE; CANONSEQ +0163 ; [.0A0B.0055.0002.0163] # LATIN SMALL LETTER T WITH CEDILLA; CANONSEQ +0162 ; [.0A0B.0055.0008.0162] # LATIN CAPITAL LETTER T WITH CEDILLA; CANONSEQ +1E6D ; [.0A0B.0079.0002.1E6D] # LATIN SMALL LETTER T WITH DOT BELOW; CANONSEQ +1E6C ; [.0A0B.0079.0008.1E6C] # LATIN CAPITAL LETTER T WITH DOT BELOW; CANONSEQ +021B ; [.0A0B.0080.0002.021B] # LATIN SMALL LETTER T WITH COMMA BELOW; CANONSEQ +021A ; [.0A0B.0080.0008.021A] # LATIN CAPITAL LETTER T WITH COMMA BELOW; CANONSEQ +1E71 ; [.0A0B.0085.0002.1E71] # LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW; CANONSEQ +1E70 ; [.0A0B.0085.0008.1E70] # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW; CANONSEQ +1E6F ; [.0A0B.0089.0002.1E6F] # LATIN SMALL LETTER T WITH LINE BELOW; CANONSEQ +1E6E ; [.0A0B.0089.0008.1E6E] # LATIN CAPITAL LETTER T WITH LINE BELOW; CANONSEQ +0167 ; [.0A0F.0020.0002.0167] # LATIN SMALL LETTER T WITH STROKE +0166 ; [.0A0F.0020.0008.0166] # LATIN CAPITAL LETTER T WITH STROKE +01AB ; [.0A13.0020.0002.01AB] # LATIN SMALL LETTER T WITH PALATAL HOOK +01AD ; [.0A17.0020.0002.01AD] # LATIN SMALL LETTER T WITH HOOK +01AC ; [.0A17.0020.0008.01AC] # LATIN CAPITAL LETTER T WITH HOOK +0288 ; [.0A1B.0020.0002.0288] # LATIN SMALL LETTER T WITH RETROFLEX HOOK +01AE ; [.0A1B.0020.0008.01AE] # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +0287 ; [.0A1F.0020.0002.0287] # LATIN SMALL LETTER TURNED T +0075 ; [.0A23.0020.0002.0075] # LATIN SMALL LETTER U +0055 ; [.0A23.0020.0008.0055] # LATIN CAPITAL LETTER U +00FA ; [.0A23.0032.0002.00FA] # LATIN SMALL LETTER U WITH ACUTE; CANONSEQ +00DA ; [.0A23.0032.0008.00DA] # LATIN CAPITAL LETTER U WITH ACUTE; CANONSEQ +00F9 ; [.0A23.0035.0002.00F9] # LATIN SMALL LETTER U WITH GRAVE; CANONSEQ +00D9 ; [.0A23.0035.0008.00D9] # LATIN CAPITAL LETTER U WITH GRAVE; CANONSEQ +016D ; [.0A23.0037.0002.016D] # LATIN SMALL LETTER U WITH BREVE; CANONSEQ +016C ; [.0A23.0037.0008.016C] # LATIN CAPITAL LETTER U WITH BREVE; CANONSEQ +00FB ; [.0A23.003C.0002.00FB] # LATIN SMALL LETTER U WITH CIRCUMFLEX; CANONSEQ +00DB ; [.0A23.003C.0008.00DB] # LATIN CAPITAL LETTER U WITH CIRCUMFLEX; CANONSEQ +01D4 ; [.0A23.0041.0002.01D4] # LATIN SMALL LETTER U WITH CARON; CANONSEQ +01D3 ; [.0A23.0041.0008.01D3] # LATIN CAPITAL LETTER U WITH CARON; CANONSEQ +016F ; [.0A23.0043.0002.016F] # LATIN SMALL LETTER U WITH RING ABOVE; CANONSEQ +016E ; [.0A23.0043.0008.016E] # LATIN CAPITAL LETTER U WITH RING ABOVE; CANONSEQ +00FC ; [.0A23.0047.0002.00FC] # LATIN SMALL LETTER U WITH DIAERESIS; CANONSEQ +00DC ; [.0A23.0047.0008.00DC] # LATIN CAPITAL LETTER U WITH DIAERESIS; CANONSEQ +01D8 ; [.0A23.0048.0002.01D8] # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE; CANONSEQ +01D7 ; [.0A23.0048.0008.01D7] # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE; CANONSEQ +01DC ; [.0A23.0049.0002.01DC] # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE; CANONSEQ +01DB ; [.0A23.0049.0008.01DB] # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE; CANONSEQ +01DA ; [.0A23.004A.0002.01DA] # LATIN SMALL LETTER U WITH DIAERESIS AND CARON; CANONSEQ +01D9 ; [.0A23.004A.0008.01D9] # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON; CANONSEQ +01D6 ; [.0A23.004B.0002.01D6] # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON; CANONSEQ +01D5 ; [.0A23.004B.0008.01D5] # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON; CANONSEQ +0171 ; [.0A23.004D.0002.0171] # LATIN SMALL LETTER U WITH DOUBLE ACUTE; CANONSEQ +0170 ; [.0A23.004D.0008.0170] # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE; CANONSEQ +0169 ; [.0A23.004E.0002.0169] # LATIN SMALL LETTER U WITH TILDE; CANONSEQ +0168 ; [.0A23.004E.0008.0168] # LATIN CAPITAL LETTER U WITH TILDE; CANONSEQ +1E79 ; [.0A23.004F.0002.1E79] # LATIN SMALL LETTER U WITH TILDE AND ACUTE; CANONSEQ +1E78 ; [.0A23.004F.0008.1E78] # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE; CANONSEQ +0173 ; [.0A23.0058.0002.0173] # LATIN SMALL LETTER U WITH OGONEK; CANONSEQ +0172 ; [.0A23.0058.0008.0172] # LATIN CAPITAL LETTER U WITH OGONEK; CANONSEQ +016B ; [.0A23.005A.0002.016B] # LATIN SMALL LETTER U WITH MACRON; CANONSEQ +016A ; [.0A23.005A.0008.016A] # LATIN CAPITAL LETTER U WITH MACRON; CANONSEQ +1E7B ; [.0A23.005D.0002.1E7B] # LATIN SMALL LETTER U WITH MACRON AND DIAERESIS; CANONSEQ +1E7A ; [.0A23.005D.0008.1E7A] # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS; CANONSEQ +1EE7 ; [.0A23.005F.0002.1EE7] # LATIN SMALL LETTER U WITH HOOK ABOVE; CANONSEQ +1EE6 ; [.0A23.005F.0008.1EE6] # LATIN CAPITAL LETTER U WITH HOOK ABOVE; CANONSEQ +0215 ; [.0A23.0062.0002.0215] # LATIN SMALL LETTER U WITH DOUBLE GRAVE; CANONSEQ +0214 ; [.0A23.0062.0008.0214] # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE; CANONSEQ +0217 ; [.0A23.0064.0002.0217] # LATIN SMALL LETTER U WITH INVERTED BREVE; CANONSEQ +0216 ; [.0A23.0064.0008.0216] # LATIN CAPITAL LETTER U WITH INVERTED BREVE; CANONSEQ +01B0 ; [.0A23.006C.0002.01B0] # LATIN SMALL LETTER U WITH HORN; CANONSEQ +01AF ; [.0A23.006C.0008.01AF] # LATIN CAPITAL LETTER U WITH HORN; CANONSEQ +1EE9 ; [.0A23.006D.0002.1EE9] # LATIN SMALL LETTER U WITH HORN AND ACUTE; CANONSEQ +1EE8 ; [.0A23.006D.0008.1EE8] # LATIN CAPITAL LETTER U WITH HORN AND ACUTE; CANONSEQ +1EEB ; [.0A23.006E.0002.1EEB] # LATIN SMALL LETTER U WITH HORN AND GRAVE; CANONSEQ +1EEA ; [.0A23.006E.0008.1EEA] # LATIN CAPITAL LETTER U WITH HORN AND GRAVE; CANONSEQ +1EEF ; [.0A23.006F.0002.1EEF] # LATIN SMALL LETTER U WITH HORN AND TILDE; CANONSEQ +1EEE ; [.0A23.006F.0008.1EEE] # LATIN CAPITAL LETTER U WITH HORN AND TILDE; CANONSEQ +1EED ; [.0A23.0070.0002.1EED] # LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE; CANONSEQ +1EEC ; [.0A23.0070.0008.1EEC] # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE; CANONSEQ +1EF1 ; [.0A23.0071.0002.1EF1] # LATIN SMALL LETTER U WITH HORN AND DOT BELOW; CANONSEQ +1EF0 ; [.0A23.0071.0008.1EF0] # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW; CANONSEQ +1EE5 ; [.0A23.0079.0002.1EE5] # LATIN SMALL LETTER U WITH DOT BELOW; CANONSEQ +1EE4 ; [.0A23.0079.0008.1EE4] # LATIN CAPITAL LETTER U WITH DOT BELOW; CANONSEQ +1E73 ; [.0A23.007E.0002.1E73] # LATIN SMALL LETTER U WITH DIAERESIS BELOW; CANONSEQ +1E72 ; [.0A23.007E.0008.1E72] # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW; CANONSEQ +1E77 ; [.0A23.0085.0002.1E77] # LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW; CANONSEQ +1E76 ; [.0A23.0085.0008.1E76] # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW; CANONSEQ +1E75 ; [.0A23.0088.0002.1E75] # LATIN SMALL LETTER U WITH TILDE BELOW; CANONSEQ +1E74 ; [.0A23.0088.0008.1E74] # LATIN CAPITAL LETTER U WITH TILDE BELOW; CANONSEQ +0289 ; [.0A27.0020.0002.0289] # LATIN SMALL LETTER U BAR +0265 ; [.0A2B.0020.0002.0265] # LATIN SMALL LETTER TURNED H +026F ; [.0A2F.0020.0002.026F] # LATIN SMALL LETTER TURNED M +019C ; [.0A2F.0020.0008.019C] # LATIN CAPITAL LETTER TURNED M +0270 ; [.0A33.0020.0002.0270] # LATIN SMALL LETTER TURNED M WITH LONG LEG +028A ; [.0A37.0020.0002.028A] # LATIN SMALL LETTER UPSILON +01B1 ; [.0A37.0020.0008.01B1] # LATIN CAPITAL LETTER UPSILON +0076 ; [.0A3B.0020.0002.0076] # LATIN SMALL LETTER V +0056 ; [.0A3B.0020.0008.0056] # LATIN CAPITAL LETTER V +1E7D ; [.0A3B.004E.0002.1E7D] # LATIN SMALL LETTER V WITH TILDE; CANONSEQ +1E7C ; [.0A3B.004E.0008.1E7C] # LATIN CAPITAL LETTER V WITH TILDE; CANONSEQ +1E7F ; [.0A3B.0079.0002.1E7F] # LATIN SMALL LETTER V WITH DOT BELOW; CANONSEQ +1E7E ; [.0A3B.0079.0008.1E7E] # LATIN CAPITAL LETTER V WITH DOT BELOW; CANONSEQ +028B ; [.0A3F.0020.0002.028B] # LATIN SMALL LETTER V WITH HOOK +01B2 ; [.0A3F.0020.0008.01B2] # LATIN CAPITAL LETTER V WITH HOOK +028C ; [.0A43.0020.0002.028C] # LATIN SMALL LETTER TURNED V +0077 ; [.0A47.0020.0002.0077] # LATIN SMALL LETTER W +0057 ; [.0A47.0020.0008.0057] # LATIN CAPITAL LETTER W +1E83 ; [.0A47.0032.0002.1E83] # LATIN SMALL LETTER W WITH ACUTE; CANONSEQ +1E82 ; [.0A47.0032.0008.1E82] # LATIN CAPITAL LETTER W WITH ACUTE; CANONSEQ +1E81 ; [.0A47.0035.0002.1E81] # LATIN SMALL LETTER W WITH GRAVE; CANONSEQ +1E80 ; [.0A47.0035.0008.1E80] # LATIN CAPITAL LETTER W WITH GRAVE; CANONSEQ +0175 ; [.0A47.003C.0002.0175] # LATIN SMALL LETTER W WITH CIRCUMFLEX; CANONSEQ +0174 ; [.0A47.003C.0008.0174] # LATIN CAPITAL LETTER W WITH CIRCUMFLEX; CANONSEQ +1E98 ; [.0A47.0043.0002.1E98] # LATIN SMALL LETTER W WITH RING ABOVE; CANONSEQ +1E85 ; [.0A47.0047.0002.1E85] # LATIN SMALL LETTER W WITH DIAERESIS; CANONSEQ +1E84 ; [.0A47.0047.0008.1E84] # LATIN CAPITAL LETTER W WITH DIAERESIS; CANONSEQ +1E87 ; [.0A47.0052.0002.1E87] # LATIN SMALL LETTER W WITH DOT ABOVE; CANONSEQ +1E86 ; [.0A47.0052.0008.1E86] # LATIN CAPITAL LETTER W WITH DOT ABOVE; CANONSEQ +1E89 ; [.0A47.0079.0002.1E89] # LATIN SMALL LETTER W WITH DOT BELOW; CANONSEQ +1E88 ; [.0A47.0079.0008.1E88] # LATIN CAPITAL LETTER W WITH DOT BELOW; CANONSEQ +028D ; [.0A4B.0020.0002.028D] # LATIN SMALL LETTER TURNED W +0078 ; [.0A4F.0020.0002.0078] # LATIN SMALL LETTER X +0058 ; [.0A4F.0020.0008.0058] # LATIN CAPITAL LETTER X +1E8D ; [.0A4F.0047.0002.1E8D] # LATIN SMALL LETTER X WITH DIAERESIS; CANONSEQ +1E8C ; [.0A4F.0047.0008.1E8C] # LATIN CAPITAL LETTER X WITH DIAERESIS; CANONSEQ +1E8B ; [.0A4F.0052.0002.1E8B] # LATIN SMALL LETTER X WITH DOT ABOVE; CANONSEQ +1E8A ; [.0A4F.0052.0008.1E8A] # LATIN CAPITAL LETTER X WITH DOT ABOVE; CANONSEQ +0079 ; [.0A53.0020.0002.0079] # LATIN SMALL LETTER Y +0059 ; [.0A53.0020.0008.0059] # LATIN CAPITAL LETTER Y +00FD ; [.0A53.0032.0002.00FD] # LATIN SMALL LETTER Y WITH ACUTE; CANONSEQ +00DD ; [.0A53.0032.0008.00DD] # LATIN CAPITAL LETTER Y WITH ACUTE; CANONSEQ +1EF3 ; [.0A53.0035.0002.1EF3] # LATIN SMALL LETTER Y WITH GRAVE; CANONSEQ +1EF2 ; [.0A53.0035.0008.1EF2] # LATIN CAPITAL LETTER Y WITH GRAVE; CANONSEQ +0177 ; [.0A53.003C.0002.0177] # LATIN SMALL LETTER Y WITH CIRCUMFLEX; CANONSEQ +0176 ; [.0A53.003C.0008.0176] # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX; CANONSEQ +1E99 ; [.0A53.0043.0002.1E99] # LATIN SMALL LETTER Y WITH RING ABOVE; CANONSEQ +00FF ; [.0A53.0047.0002.00FF] # LATIN SMALL LETTER Y WITH DIAERESIS; CANONSEQ +0178 ; [.0A53.0047.0008.0178] # LATIN CAPITAL LETTER Y WITH DIAERESIS; CANONSEQ +1EF9 ; [.0A53.004E.0002.1EF9] # LATIN SMALL LETTER Y WITH TILDE; CANONSEQ +1EF8 ; [.0A53.004E.0008.1EF8] # LATIN CAPITAL LETTER Y WITH TILDE; CANONSEQ +1E8F ; [.0A53.0052.0002.1E8F] # LATIN SMALL LETTER Y WITH DOT ABOVE; CANONSEQ +1E8E ; [.0A53.0052.0008.1E8E] # LATIN CAPITAL LETTER Y WITH DOT ABOVE; CANONSEQ +0233 ; [.0A53.005A.0002.0233] # LATIN SMALL LETTER Y WITH MACRON; CANONSEQ +0232 ; [.0A53.005A.0008.0232] # LATIN CAPITAL LETTER Y WITH MACRON; CANONSEQ +1EF7 ; [.0A53.005F.0002.1EF7] # LATIN SMALL LETTER Y WITH HOOK ABOVE; CANONSEQ +1EF6 ; [.0A53.005F.0008.1EF6] # LATIN CAPITAL LETTER Y WITH HOOK ABOVE; CANONSEQ +1EF5 ; [.0A53.0079.0002.1EF5] # LATIN SMALL LETTER Y WITH DOT BELOW; CANONSEQ +1EF4 ; [.0A53.0079.0008.1EF4] # LATIN CAPITAL LETTER Y WITH DOT BELOW; CANONSEQ +028F ; [.0A57.0020.0002.028F] # LATIN LETTER SMALL CAPITAL Y +01B4 ; [.0A5B.0020.0002.01B4] # LATIN SMALL LETTER Y WITH HOOK +01B3 ; [.0A5B.0020.0008.01B3] # LATIN CAPITAL LETTER Y WITH HOOK +007A ; [.0A5F.0020.0002.007A] # LATIN SMALL LETTER Z +005A ; [.0A5F.0020.0008.005A] # LATIN CAPITAL LETTER Z +017A ; [.0A5F.0032.0002.017A] # LATIN SMALL LETTER Z WITH ACUTE; CANONSEQ +0179 ; [.0A5F.0032.0008.0179] # LATIN CAPITAL LETTER Z WITH ACUTE; CANONSEQ +1E91 ; [.0A5F.003C.0002.1E91] # LATIN SMALL LETTER Z WITH CIRCUMFLEX; CANONSEQ +1E90 ; [.0A5F.003C.0008.1E90] # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX; CANONSEQ +017E ; [.0A5F.0041.0002.017E] # LATIN SMALL LETTER Z WITH CARON; CANONSEQ +017D ; [.0A5F.0041.0008.017D] # LATIN CAPITAL LETTER Z WITH CARON; CANONSEQ +017C ; [.0A5F.0052.0002.017C] # LATIN SMALL LETTER Z WITH DOT ABOVE; CANONSEQ +017B ; [.0A5F.0052.0008.017B] # LATIN CAPITAL LETTER Z WITH DOT ABOVE; CANONSEQ +1E93 ; [.0A5F.0079.0002.1E93] # LATIN SMALL LETTER Z WITH DOT BELOW; CANONSEQ +1E92 ; [.0A5F.0079.0008.1E92] # LATIN CAPITAL LETTER Z WITH DOT BELOW; CANONSEQ +1E95 ; [.0A5F.0089.0002.1E95] # LATIN SMALL LETTER Z WITH LINE BELOW; CANONSEQ +1E94 ; [.0A5F.0089.0008.1E94] # LATIN CAPITAL LETTER Z WITH LINE BELOW; CANONSEQ +01B6 ; [.0A63.0020.0002.01B6] # LATIN SMALL LETTER Z WITH STROKE +01B5 ; [.0A63.0020.0008.01B5] # LATIN CAPITAL LETTER Z WITH STROKE +0225 ; [.0A67.0020.0002.0225] # LATIN SMALL LETTER Z WITH HOOK +0224 ; [.0A67.0020.0008.0224] # LATIN CAPITAL LETTER Z WITH HOOK +0290 ; [.0A6B.0020.0002.0290] # LATIN SMALL LETTER Z WITH RETROFLEX HOOK +0291 ; [.0A6F.0020.0002.0291] # LATIN SMALL LETTER Z WITH CURL +0292 ; [.0A73.0020.0002.0292] # LATIN SMALL LETTER EZH +01B7 ; [.0A73.0020.0008.01B7] # LATIN CAPITAL LETTER EZH +01EF ; [.0A73.0041.0002.01EF] # LATIN SMALL LETTER EZH WITH CARON; CANONSEQ +01EE ; [.0A73.0041.0008.01EE] # LATIN CAPITAL LETTER EZH WITH CARON; CANONSEQ +01B9 ; [.0A77.0020.0002.01B9] # LATIN SMALL LETTER EZH REVERSED +01B8 ; [.0A77.0020.0008.01B8] # LATIN CAPITAL LETTER EZH REVERSED +01BA ; [.0A7B.0020.0002.01BA] # LATIN SMALL LETTER EZH WITH TAIL +0293 ; [.0A7F.0020.0002.0293] # LATIN SMALL LETTER EZH WITH CURL +021D ; [.0A83.0020.0002.021D] # LATIN SMALL LETTER YOGH +021C ; [.0A83.0020.0008.021C] # LATIN CAPITAL LETTER YOGH +00FE ; [.0A87.0020.0002.00FE] # LATIN SMALL LETTER THORN +00DE ; [.0A87.0020.0008.00DE] # LATIN CAPITAL LETTER THORN +01BF ; [.0A8B.0020.0002.01BF] # LATIN LETTER WYNN +01F7 ; [.0A8B.0020.0008.01F7] # LATIN CAPITAL LETTER WYNN +01BB ; [.0A8F.0020.0002.01BB] # LATIN LETTER TWO WITH STROKE +01A8 ; [.0A93.0020.0002.01A8] # LATIN SMALL LETTER TONE TWO +01A7 ; [.0A93.0020.0008.01A7] # LATIN CAPITAL LETTER TONE TWO +01BD ; [.0A97.0020.0002.01BD] # LATIN SMALL LETTER TONE FIVE +01BC ; [.0A97.0020.0008.01BC] # LATIN CAPITAL LETTER TONE FIVE +0185 ; [.0A9B.0020.0002.0185] # LATIN SMALL LETTER TONE SIX +0184 ; [.0A9B.0020.0008.0184] # LATIN CAPITAL LETTER TONE SIX +0294 ; [.0A9F.0020.0002.0294] # LATIN LETTER GLOTTAL STOP +02C0 ; [.0AA3.0020.0002.02C0] # MODIFIER LETTER GLOTTAL STOP +02BC ; [.0AA4.0020.0002.02BC] # MODIFIER LETTER APOSTROPHE +02EE ; [.0AA5.0020.0002.02EE] # MODIFIER LETTER DOUBLE APOSTROPHE +02BE ; [.0AA6.0020.0002.02BE] # MODIFIER LETTER RIGHT HALF RING +0295 ; [.0AA7.0020.0002.0295] # LATIN LETTER PHARYNGEAL VOICED FRICATIVE +02BF ; [.0AAB.0020.0002.02BF] # MODIFIER LETTER LEFT HALF RING +02C1 ; [.0AAC.0020.0002.02C1] # MODIFIER LETTER REVERSED GLOTTAL STOP +01BE ; [.0AAD.0020.0002.01BE] # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE +02A1 ; [.0AB1.0020.0002.02A1] # LATIN LETTER GLOTTAL STOP WITH STROKE +02A2 ; [.0AB5.0020.0002.02A2] # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE +0296 ; [.0AB9.0020.0002.0296] # LATIN LETTER INVERTED GLOTTAL STOP +01C0 ; [.0ABD.0020.0002.01C0] # LATIN LETTER DENTAL CLICK +01C1 ; [.0AC1.0020.0002.01C1] # LATIN LETTER LATERAL CLICK +01C2 ; [.0AC5.0020.0002.01C2] # LATIN LETTER ALVEOLAR CLICK +01C3 ; [.0AC9.0020.0002.01C3] # LATIN LETTER RETROFLEX CLICK +0297 ; [.0ACD.0020.0002.0297] # LATIN LETTER STRETCHED C +0298 ; [.0AD1.0020.0002.0298] # LATIN LETTER BILABIAL CLICK +02AC ; [.0AD5.0020.0002.02AC] # LATIN LETTER BILABIAL PERCUSSIVE +02AD ; [.0AD9.0020.0002.02AD] # LATIN LETTER BIDENTAL PERCUSSIVE +2126 ; [.0AF8.0020.0008.2126] # OHM SIGN; CANON +1100 ; [.1684.0020.0002.1100] # HANGUL CHOSEONG KIYEOK +1101 ; [.1685.0020.0002.1101] # HANGUL CHOSEONG SSANGKIYEOK +1102 ; [.1686.0020.0002.1102] # HANGUL CHOSEONG NIEUN +1103 ; [.1687.0020.0002.1103] # HANGUL CHOSEONG TIKEUT +1104 ; [.1688.0020.0002.1104] # HANGUL CHOSEONG SSANGTIKEUT +1105 ; [.1689.0020.0002.1105] # HANGUL CHOSEONG RIEUL +1106 ; [.168A.0020.0002.1106] # HANGUL CHOSEONG MIEUM +1107 ; [.168B.0020.0002.1107] # HANGUL CHOSEONG PIEUP +1108 ; [.168C.0020.0002.1108] # HANGUL CHOSEONG SSANGPIEUP +1109 ; [.168D.0020.0002.1109] # HANGUL CHOSEONG SIOS +110A ; [.168E.0020.0002.110A] # HANGUL CHOSEONG SSANGSIOS +110B ; [.168F.0020.0002.110B] # HANGUL CHOSEONG IEUNG +110C ; [.1690.0020.0002.110C] # HANGUL CHOSEONG CIEUC +110D ; [.1691.0020.0002.110D] # HANGUL CHOSEONG SSANGCIEUC +110E ; [.1692.0020.0002.110E] # HANGUL CHOSEONG CHIEUCH +110F ; [.1693.0020.0002.110F] # HANGUL CHOSEONG KHIEUKH +1110 ; [.1694.0020.0002.1110] # HANGUL CHOSEONG THIEUTH +1111 ; [.1695.0020.0002.1111] # HANGUL CHOSEONG PHIEUPH +1112 ; [.1696.0020.0002.1112] # HANGUL CHOSEONG HIEUH +1113 ; [.1697.0020.0002.1113] # HANGUL CHOSEONG NIEUN-KIYEOK +1114 ; [.1698.0020.0002.1114] # HANGUL CHOSEONG SSANGNIEUN +1115 ; [.1699.0020.0002.1115] # HANGUL CHOSEONG NIEUN-TIKEUT +1116 ; [.169A.0020.0002.1116] # HANGUL CHOSEONG NIEUN-PIEUP +1117 ; [.169B.0020.0002.1117] # HANGUL CHOSEONG TIKEUT-KIYEOK +1118 ; [.169C.0020.0002.1118] # HANGUL CHOSEONG RIEUL-NIEUN +1119 ; [.169D.0020.0002.1119] # HANGUL CHOSEONG SSANGRIEUL +111A ; [.169E.0020.0002.111A] # HANGUL CHOSEONG RIEUL-HIEUH +111B ; [.169F.0020.0002.111B] # HANGUL CHOSEONG KAPYEOUNRIEUL +111C ; [.16A0.0020.0002.111C] # HANGUL CHOSEONG MIEUM-PIEUP +111D ; [.16A1.0020.0002.111D] # HANGUL CHOSEONG KAPYEOUNMIEUM +111E ; [.16A2.0020.0002.111E] # HANGUL CHOSEONG PIEUP-KIYEOK +111F ; [.16A3.0020.0002.111F] # HANGUL CHOSEONG PIEUP-NIEUN +1120 ; [.16A4.0020.0002.1120] # HANGUL CHOSEONG PIEUP-TIKEUT +1121 ; [.16A5.0020.0002.1121] # HANGUL CHOSEONG PIEUP-SIOS +1122 ; [.16A6.0020.0002.1122] # HANGUL CHOSEONG PIEUP-SIOS-KIYEOK +1123 ; [.16A7.0020.0002.1123] # HANGUL CHOSEONG PIEUP-SIOS-TIKEUT +1124 ; [.16A8.0020.0002.1124] # HANGUL CHOSEONG PIEUP-SIOS-PIEUP +1125 ; [.16A9.0020.0002.1125] # HANGUL CHOSEONG PIEUP-SSANGSIOS +1126 ; [.16AA.0020.0002.1126] # HANGUL CHOSEONG PIEUP-SIOS-CIEUC +1127 ; [.16AB.0020.0002.1127] # HANGUL CHOSEONG PIEUP-CIEUC +1128 ; [.16AC.0020.0002.1128] # HANGUL CHOSEONG PIEUP-CHIEUCH +1129 ; [.16AD.0020.0002.1129] # HANGUL CHOSEONG PIEUP-THIEUTH +112A ; [.16AE.0020.0002.112A] # HANGUL CHOSEONG PIEUP-PHIEUPH +112B ; [.16AF.0020.0002.112B] # HANGUL CHOSEONG KAPYEOUNPIEUP +112C ; [.16B0.0020.0002.112C] # HANGUL CHOSEONG KAPYEOUNSSANGPIEUP +112D ; [.16B1.0020.0002.112D] # HANGUL CHOSEONG SIOS-KIYEOK +112E ; [.16B2.0020.0002.112E] # HANGUL CHOSEONG SIOS-NIEUN +112F ; [.16B3.0020.0002.112F] # HANGUL CHOSEONG SIOS-TIKEUT +1130 ; [.16B4.0020.0002.1130] # HANGUL CHOSEONG SIOS-RIEUL +1131 ; [.16B5.0020.0002.1131] # HANGUL CHOSEONG SIOS-MIEUM +1132 ; [.16B6.0020.0002.1132] # HANGUL CHOSEONG SIOS-PIEUP +1133 ; [.16B7.0020.0002.1133] # HANGUL CHOSEONG SIOS-PIEUP-KIYEOK +1134 ; [.16B8.0020.0002.1134] # HANGUL CHOSEONG SIOS-SSANGSIOS +1135 ; [.16B9.0020.0002.1135] # HANGUL CHOSEONG SIOS-IEUNG +1136 ; [.16BA.0020.0002.1136] # HANGUL CHOSEONG SIOS-CIEUC +1137 ; [.16BB.0020.0002.1137] # HANGUL CHOSEONG SIOS-CHIEUCH +1138 ; [.16BC.0020.0002.1138] # HANGUL CHOSEONG SIOS-KHIEUKH +1139 ; [.16BD.0020.0002.1139] # HANGUL CHOSEONG SIOS-THIEUTH +113A ; [.16BE.0020.0002.113A] # HANGUL CHOSEONG SIOS-PHIEUPH +113B ; [.16BF.0020.0002.113B] # HANGUL CHOSEONG SIOS-HIEUH +113C ; [.16C0.0020.0002.113C] # HANGUL CHOSEONG CHITUEUMSIOS +113D ; [.16C1.0020.0002.113D] # HANGUL CHOSEONG CHITUEUMSSANGSIOS +113E ; [.16C2.0020.0002.113E] # HANGUL CHOSEONG CEONGCHIEUMSIOS +113F ; [.16C3.0020.0002.113F] # HANGUL CHOSEONG CEONGCHIEUMSSANGSIOS +1140 ; [.16C4.0020.0002.1140] # HANGUL CHOSEONG PANSIOS +1141 ; [.16C5.0020.0002.1141] # HANGUL CHOSEONG IEUNG-KIYEOK +1142 ; [.16C6.0020.0002.1142] # HANGUL CHOSEONG IEUNG-TIKEUT +1143 ; [.16C7.0020.0002.1143] # HANGUL CHOSEONG IEUNG-MIEUM +1144 ; [.16C8.0020.0002.1144] # HANGUL CHOSEONG IEUNG-PIEUP +1145 ; [.16C9.0020.0002.1145] # HANGUL CHOSEONG IEUNG-SIOS +1146 ; [.16CA.0020.0002.1146] # HANGUL CHOSEONG IEUNG-PANSIOS +1147 ; [.16CB.0020.0002.1147] # HANGUL CHOSEONG SSANGIEUNG +1148 ; [.16CC.0020.0002.1148] # HANGUL CHOSEONG IEUNG-CIEUC +1149 ; [.16CD.0020.0002.1149] # HANGUL CHOSEONG IEUNG-CHIEUCH +114A ; [.16CE.0020.0002.114A] # HANGUL CHOSEONG IEUNG-THIEUTH +114B ; [.16CF.0020.0002.114B] # HANGUL CHOSEONG IEUNG-PHIEUPH +114C ; [.16D0.0020.0002.114C] # HANGUL CHOSEONG YESIEUNG +114D ; [.16D1.0020.0002.114D] # HANGUL CHOSEONG CIEUC-IEUNG +114E ; [.16D2.0020.0002.114E] # HANGUL CHOSEONG CHITUEUMCIEUC +114F ; [.16D3.0020.0002.114F] # HANGUL CHOSEONG CHITUEUMSSANGCIEUC +1150 ; [.16D4.0020.0002.1150] # HANGUL CHOSEONG CEONGCHIEUMCIEUC +1151 ; [.16D5.0020.0002.1151] # HANGUL CHOSEONG CEONGCHIEUMSSANGCIEUC +1152 ; [.16D6.0020.0002.1152] # HANGUL CHOSEONG CHIEUCH-KHIEUKH +1153 ; [.16D7.0020.0002.1153] # HANGUL CHOSEONG CHIEUCH-HIEUH +1154 ; [.16D8.0020.0002.1154] # HANGUL CHOSEONG CHITUEUMCHIEUCH +1155 ; [.16D9.0020.0002.1155] # HANGUL CHOSEONG CEONGCHIEUMCHIEUCH +1156 ; [.16DA.0020.0002.1156] # HANGUL CHOSEONG PHIEUPH-PIEUP +1157 ; [.16DB.0020.0002.1157] # HANGUL CHOSEONG KAPYEOUNPHIEUPH +1158 ; [.16DC.0020.0002.1158] # HANGUL CHOSEONG SSANGHIEUH +1159 ; [.16DD.0020.0002.1159] # HANGUL CHOSEONG YEORINHIEUH +115F ; [.16DE.0020.0002.115F] # HANGUL CHOSEONG FILLER +1160 ; [.16DF.0020.0002.1160] # HANGUL JUNGSEONG FILLER +1161 ; [.16E0.0020.0002.1161] # HANGUL JUNGSEONG A +1162 ; [.16E1.0020.0002.1162] # HANGUL JUNGSEONG AE +1163 ; [.16E2.0020.0002.1163] # HANGUL JUNGSEONG YA +1164 ; [.16E3.0020.0002.1164] # HANGUL JUNGSEONG YAE +1165 ; [.16E4.0020.0002.1165] # HANGUL JUNGSEONG EO +1166 ; [.16E5.0020.0002.1166] # HANGUL JUNGSEONG E +1167 ; [.16E6.0020.0002.1167] # HANGUL JUNGSEONG YEO +1168 ; [.16E7.0020.0002.1168] # HANGUL JUNGSEONG YE +1169 ; [.16E8.0020.0002.1169] # HANGUL JUNGSEONG O +116A ; [.16E9.0020.0002.116A] # HANGUL JUNGSEONG WA +116B ; [.16EA.0020.0002.116B] # HANGUL JUNGSEONG WAE +116C ; [.16EB.0020.0002.116C] # HANGUL JUNGSEONG OE +116D ; [.16EC.0020.0002.116D] # HANGUL JUNGSEONG YO +116E ; [.16ED.0020.0002.116E] # HANGUL JUNGSEONG U +116F ; [.16EE.0020.0002.116F] # HANGUL JUNGSEONG WEO +1170 ; [.16EF.0020.0002.1170] # HANGUL JUNGSEONG WE +1171 ; [.16F0.0020.0002.1171] # HANGUL JUNGSEONG WI +1172 ; [.16F1.0020.0002.1172] # HANGUL JUNGSEONG YU +1173 ; [.16F2.0020.0002.1173] # HANGUL JUNGSEONG EU +1174 ; [.16F3.0020.0002.1174] # HANGUL JUNGSEONG YI +1175 ; [.16F4.0020.0002.1175] # HANGUL JUNGSEONG I +1176 ; [.16F5.0020.0002.1176] # HANGUL JUNGSEONG A-O +1177 ; [.16F6.0020.0002.1177] # HANGUL JUNGSEONG A-U +1178 ; [.16F7.0020.0002.1178] # HANGUL JUNGSEONG YA-O +1179 ; [.16F8.0020.0002.1179] # HANGUL JUNGSEONG YA-YO +117A ; [.16F9.0020.0002.117A] # HANGUL JUNGSEONG EO-O +117B ; [.16FA.0020.0002.117B] # HANGUL JUNGSEONG EO-U +117C ; [.16FB.0020.0002.117C] # HANGUL JUNGSEONG EO-EU +117D ; [.16FC.0020.0002.117D] # HANGUL JUNGSEONG YEO-O +117E ; [.16FD.0020.0002.117E] # HANGUL JUNGSEONG YEO-U +117F ; [.16FE.0020.0002.117F] # HANGUL JUNGSEONG O-EO +1180 ; [.16FF.0020.0002.1180] # HANGUL JUNGSEONG O-E +1181 ; [.1700.0020.0002.1181] # HANGUL JUNGSEONG O-YE +1182 ; [.1701.0020.0002.1182] # HANGUL JUNGSEONG O-O +1183 ; [.1702.0020.0002.1183] # HANGUL JUNGSEONG O-U +1184 ; [.1703.0020.0002.1184] # HANGUL JUNGSEONG YO-YA +1185 ; [.1704.0020.0002.1185] # HANGUL JUNGSEONG YO-YAE +1186 ; [.1705.0020.0002.1186] # HANGUL JUNGSEONG YO-YEO +1187 ; [.1706.0020.0002.1187] # HANGUL JUNGSEONG YO-O +1188 ; [.1707.0020.0002.1188] # HANGUL JUNGSEONG YO-I +1189 ; [.1708.0020.0002.1189] # HANGUL JUNGSEONG U-A +118A ; [.1709.0020.0002.118A] # HANGUL JUNGSEONG U-AE +118B ; [.170A.0020.0002.118B] # HANGUL JUNGSEONG U-EO-EU +118C ; [.170B.0020.0002.118C] # HANGUL JUNGSEONG U-YE +118D ; [.170C.0020.0002.118D] # HANGUL JUNGSEONG U-U +118E ; [.170D.0020.0002.118E] # HANGUL JUNGSEONG YU-A +118F ; [.170E.0020.0002.118F] # HANGUL JUNGSEONG YU-EO +1190 ; [.170F.0020.0002.1190] # HANGUL JUNGSEONG YU-E +1191 ; [.1710.0020.0002.1191] # HANGUL JUNGSEONG YU-YEO +1192 ; [.1711.0020.0002.1192] # HANGUL JUNGSEONG YU-YE +1193 ; [.1712.0020.0002.1193] # HANGUL JUNGSEONG YU-U +1194 ; [.1713.0020.0002.1194] # HANGUL JUNGSEONG YU-I +1195 ; [.1714.0020.0002.1195] # HANGUL JUNGSEONG EU-U +1196 ; [.1715.0020.0002.1196] # HANGUL JUNGSEONG EU-EU +1197 ; [.1716.0020.0002.1197] # HANGUL JUNGSEONG YI-U +1198 ; [.1717.0020.0002.1198] # HANGUL JUNGSEONG I-A +1199 ; [.1718.0020.0002.1199] # HANGUL JUNGSEONG I-YA +119A ; [.1719.0020.0002.119A] # HANGUL JUNGSEONG I-O +119B ; [.171A.0020.0002.119B] # HANGUL JUNGSEONG I-U +119C ; [.171B.0020.0002.119C] # HANGUL JUNGSEONG I-EU +119D ; [.171C.0020.0002.119D] # HANGUL JUNGSEONG I-ARAEA +119E ; [.171D.0020.0002.119E] # HANGUL JUNGSEONG ARAEA +119F ; [.171E.0020.0002.119F] # HANGUL JUNGSEONG ARAEA-EO +11A0 ; [.171F.0020.0002.11A0] # HANGUL JUNGSEONG ARAEA-U +11A1 ; [.1720.0020.0002.11A1] # HANGUL JUNGSEONG ARAEA-I +11A2 ; [.1721.0020.0002.11A2] # HANGUL JUNGSEONG SSANGARAEA +11A8 ; [.1722.0020.0002.11A8] # HANGUL JONGSEONG KIYEOK +11A9 ; [.1723.0020.0002.11A9] # HANGUL JONGSEONG SSANGKIYEOK +11AA ; [.1724.0020.0002.11AA] # HANGUL JONGSEONG KIYEOK-SIOS +11AB ; [.1725.0020.0002.11AB] # HANGUL JONGSEONG NIEUN +11AC ; [.1726.0020.0002.11AC] # HANGUL JONGSEONG NIEUN-CIEUC +11AD ; [.1727.0020.0002.11AD] # HANGUL JONGSEONG NIEUN-HIEUH +11AE ; [.1728.0020.0002.11AE] # HANGUL JONGSEONG TIKEUT +11AF ; [.1729.0020.0002.11AF] # HANGUL JONGSEONG RIEUL +11B0 ; [.172A.0020.0002.11B0] # HANGUL JONGSEONG RIEUL-KIYEOK +11B1 ; [.172B.0020.0002.11B1] # HANGUL JONGSEONG RIEUL-MIEUM +11B2 ; [.172C.0020.0002.11B2] # HANGUL JONGSEONG RIEUL-PIEUP +11B3 ; [.172D.0020.0002.11B3] # HANGUL JONGSEONG RIEUL-SIOS +11B4 ; [.172E.0020.0002.11B4] # HANGUL JONGSEONG RIEUL-THIEUTH +11B5 ; [.172F.0020.0002.11B5] # HANGUL JONGSEONG RIEUL-PHIEUPH +11B6 ; [.1730.0020.0002.11B6] # HANGUL JONGSEONG RIEUL-HIEUH +11B7 ; [.1731.0020.0002.11B7] # HANGUL JONGSEONG MIEUM +11B8 ; [.1732.0020.0002.11B8] # HANGUL JONGSEONG PIEUP +11B9 ; [.1733.0020.0002.11B9] # HANGUL JONGSEONG PIEUP-SIOS +11BA ; [.1734.0020.0002.11BA] # HANGUL JONGSEONG SIOS +11BB ; [.1735.0020.0002.11BB] # HANGUL JONGSEONG SSANGSIOS +11BC ; [.1736.0020.0002.11BC] # HANGUL JONGSEONG IEUNG +11BD ; [.1737.0020.0002.11BD] # HANGUL JONGSEONG CIEUC +11BE ; [.1738.0020.0002.11BE] # HANGUL JONGSEONG CHIEUCH +11BF ; [.1739.0020.0002.11BF] # HANGUL JONGSEONG KHIEUKH +11C0 ; [.173A.0020.0002.11C0] # HANGUL JONGSEONG THIEUTH +11C1 ; [.173B.0020.0002.11C1] # HANGUL JONGSEONG PHIEUPH +11C2 ; [.173C.0020.0002.11C2] # HANGUL JONGSEONG HIEUH +11C3 ; [.173D.0020.0002.11C3] # HANGUL JONGSEONG KIYEOK-RIEUL +11C4 ; [.173E.0020.0002.11C4] # HANGUL JONGSEONG KIYEOK-SIOS-KIYEOK +11C5 ; [.173F.0020.0002.11C5] # HANGUL JONGSEONG NIEUN-KIYEOK +11C6 ; [.1740.0020.0002.11C6] # HANGUL JONGSEONG NIEUN-TIKEUT +11C7 ; [.1741.0020.0002.11C7] # HANGUL JONGSEONG NIEUN-SIOS +11C8 ; [.1742.0020.0002.11C8] # HANGUL JONGSEONG NIEUN-PANSIOS +11C9 ; [.1743.0020.0002.11C9] # HANGUL JONGSEONG NIEUN-THIEUTH +11CA ; [.1744.0020.0002.11CA] # HANGUL JONGSEONG TIKEUT-KIYEOK +11CB ; [.1745.0020.0002.11CB] # HANGUL JONGSEONG TIKEUT-RIEUL +11CC ; [.1746.0020.0002.11CC] # HANGUL JONGSEONG RIEUL-KIYEOK-SIOS +11CD ; [.1747.0020.0002.11CD] # HANGUL JONGSEONG RIEUL-NIEUN +11CE ; [.1748.0020.0002.11CE] # HANGUL JONGSEONG RIEUL-TIKEUT +11CF ; [.1749.0020.0002.11CF] # HANGUL JONGSEONG RIEUL-TIKEUT-HIEUH +11D0 ; [.174A.0020.0002.11D0] # HANGUL JONGSEONG SSANGRIEUL +11D1 ; [.174B.0020.0002.11D1] # HANGUL JONGSEONG RIEUL-MIEUM-KIYEOK +11D2 ; [.174C.0020.0002.11D2] # HANGUL JONGSEONG RIEUL-MIEUM-SIOS +11D3 ; [.174D.0020.0002.11D3] # HANGUL JONGSEONG RIEUL-PIEUP-SIOS +11D4 ; [.174E.0020.0002.11D4] # HANGUL JONGSEONG RIEUL-PIEUP-HIEUH +11D5 ; [.174F.0020.0002.11D5] # HANGUL JONGSEONG RIEUL-KAPYEOUNPIEUP +11D6 ; [.1750.0020.0002.11D6] # HANGUL JONGSEONG RIEUL-SSANGSIOS +11D7 ; [.1751.0020.0002.11D7] # HANGUL JONGSEONG RIEUL-PANSIOS +11D8 ; [.1752.0020.0002.11D8] # HANGUL JONGSEONG RIEUL-KHIEUKH +11D9 ; [.1753.0020.0002.11D9] # HANGUL JONGSEONG RIEUL-YEORINHIEUH +11DA ; [.1754.0020.0002.11DA] # HANGUL JONGSEONG MIEUM-KIYEOK +11DB ; [.1755.0020.0002.11DB] # HANGUL JONGSEONG MIEUM-RIEUL +11DC ; [.1756.0020.0002.11DC] # HANGUL JONGSEONG MIEUM-PIEUP +11DD ; [.1757.0020.0002.11DD] # HANGUL JONGSEONG MIEUM-SIOS +11DE ; [.1758.0020.0002.11DE] # HANGUL JONGSEONG MIEUM-SSANGSIOS +11DF ; [.1759.0020.0002.11DF] # HANGUL JONGSEONG MIEUM-PANSIOS +11E0 ; [.175A.0020.0002.11E0] # HANGUL JONGSEONG MIEUM-CHIEUCH +11E1 ; [.175B.0020.0002.11E1] # HANGUL JONGSEONG MIEUM-HIEUH +11E2 ; [.175C.0020.0002.11E2] # HANGUL JONGSEONG KAPYEOUNMIEUM +11E3 ; [.175D.0020.0002.11E3] # HANGUL JONGSEONG PIEUP-RIEUL +11E4 ; [.175E.0020.0002.11E4] # HANGUL JONGSEONG PIEUP-PHIEUPH +11E5 ; [.175F.0020.0002.11E5] # HANGUL JONGSEONG PIEUP-HIEUH +11E6 ; [.1760.0020.0002.11E6] # HANGUL JONGSEONG KAPYEOUNPIEUP +11E7 ; [.1761.0020.0002.11E7] # HANGUL JONGSEONG SIOS-KIYEOK +11E8 ; [.1762.0020.0002.11E8] # HANGUL JONGSEONG SIOS-TIKEUT +11E9 ; [.1763.0020.0002.11E9] # HANGUL JONGSEONG SIOS-RIEUL +11EA ; [.1764.0020.0002.11EA] # HANGUL JONGSEONG SIOS-PIEUP +11EB ; [.1765.0020.0002.11EB] # HANGUL JONGSEONG PANSIOS +11EC ; [.1766.0020.0002.11EC] # HANGUL JONGSEONG IEUNG-KIYEOK +11ED ; [.1767.0020.0002.11ED] # HANGUL JONGSEONG IEUNG-SSANGKIYEOK +11EE ; [.1768.0020.0002.11EE] # HANGUL JONGSEONG SSANGIEUNG +11EF ; [.1769.0020.0002.11EF] # HANGUL JONGSEONG IEUNG-KHIEUKH +11F0 ; [.176A.0020.0002.11F0] # HANGUL JONGSEONG YESIEUNG +11F1 ; [.176B.0020.0002.11F1] # HANGUL JONGSEONG YESIEUNG-SIOS +11F2 ; [.176C.0020.0002.11F2] # HANGUL JONGSEONG YESIEUNG-PANSIOS +11F3 ; [.176D.0020.0002.11F3] # HANGUL JONGSEONG PHIEUPH-PIEUP +11F4 ; [.176E.0020.0002.11F4] # HANGUL JONGSEONG KAPYEOUNPHIEUPH +11F5 ; [.176F.0020.0002.11F5] # HANGUL JONGSEONG HIEUH-NIEUN +11F6 ; [.1770.0020.0002.11F6] # HANGUL JONGSEONG HIEUH-RIEUL +11F7 ; [.1771.0020.0002.11F7] # HANGUL JONGSEONG HIEUH-MIEUM +11F8 ; [.1772.0020.0002.11F8] # HANGUL JONGSEONG HIEUH-PIEUP +11F9 ; [.1773.0020.0002.11F9] # HANGUL JONGSEONG YEORINHIEUH +3041 ; [.1774.0020.000D.3041] # HIRAGANA LETTER SMALL A +3042 ; [.1774.0020.000E.3042] # HIRAGANA LETTER A +30A1 ; [.1774.0020.000F.30A1] # KATAKANA LETTER SMALL A +30A2 ; [.1774.0020.0011.30A2] # KATAKANA LETTER A +3043 ; [.1775.0020.000D.3043] # HIRAGANA LETTER SMALL I +3044 ; [.1775.0020.000E.3044] # HIRAGANA LETTER I +30A3 ; [.1775.0020.000F.30A3] # KATAKANA LETTER SMALL I +30A4 ; [.1775.0020.0011.30A4] # KATAKANA LETTER I +3045 ; [.1776.0020.000D.3045] # HIRAGANA LETTER SMALL U +3046 ; [.1776.0020.000E.3046] # HIRAGANA LETTER U +30A5 ; [.1776.0020.000F.30A5] # KATAKANA LETTER SMALL U +30A6 ; [.1776.0020.0011.30A6] # KATAKANA LETTER U +3094 ; [.1776.013E.000E.3094] # HIRAGANA LETTER VU; CANONSEQ +30F4 ; [.1776.013E.0011.30F4] # KATAKANA LETTER VU; CANONSEQ +3047 ; [.1777.0020.000D.3047] # HIRAGANA LETTER SMALL E +3048 ; [.1777.0020.000E.3048] # HIRAGANA LETTER E +30A7 ; [.1777.0020.000F.30A7] # KATAKANA LETTER SMALL E +30A8 ; [.1777.0020.0011.30A8] # KATAKANA LETTER E +3049 ; [.1778.0020.000D.3049] # HIRAGANA LETTER SMALL O +304A ; [.1778.0020.000E.304A] # HIRAGANA LETTER O +30A9 ; [.1778.0020.000F.30A9] # KATAKANA LETTER SMALL O +30AA ; [.1778.0020.0011.30AA] # KATAKANA LETTER O +304B ; [.1779.0020.000E.304B] # HIRAGANA LETTER KA +30F5 ; [.1779.0020.000F.30F5] # KATAKANA LETTER SMALL KA +30AB ; [.1779.0020.0011.30AB] # KATAKANA LETTER KA +304C ; [.1779.013E.000E.304C] # HIRAGANA LETTER GA; CANONSEQ +30AC ; [.1779.013E.0011.30AC] # KATAKANA LETTER GA; CANONSEQ +304D ; [.177A.0020.000E.304D] # HIRAGANA LETTER KI +30AD ; [.177A.0020.0011.30AD] # KATAKANA LETTER KI +304E ; [.177A.013E.000E.304E] # HIRAGANA LETTER GI; CANONSEQ +30AE ; [.177A.013E.0011.30AE] # KATAKANA LETTER GI; CANONSEQ +304F ; [.177B.0020.000E.304F] # HIRAGANA LETTER KU +30AF ; [.177B.0020.0011.30AF] # KATAKANA LETTER KU +3050 ; [.177B.013E.000E.3050] # HIRAGANA LETTER GU; CANONSEQ +30B0 ; [.177B.013E.0011.30B0] # KATAKANA LETTER GU; CANONSEQ +3051 ; [.177C.0020.000E.3051] # HIRAGANA LETTER KE +30F6 ; [.177C.0020.000F.30F6] # KATAKANA LETTER SMALL KE +30B1 ; [.177C.0020.0011.30B1] # KATAKANA LETTER KE +3052 ; [.177C.013E.000E.3052] # HIRAGANA LETTER GE; CANONSEQ +30B2 ; [.177C.013E.0011.30B2] # KATAKANA LETTER GE; CANONSEQ +3053 ; [.177D.0020.000E.3053] # HIRAGANA LETTER KO +30B3 ; [.177D.0020.0011.30B3] # KATAKANA LETTER KO +3054 ; [.177D.013E.000E.3054] # HIRAGANA LETTER GO; CANONSEQ +30B4 ; [.177D.013E.0011.30B4] # KATAKANA LETTER GO; CANONSEQ +3055 ; [.177E.0020.000E.3055] # HIRAGANA LETTER SA +30B5 ; [.177E.0020.0011.30B5] # KATAKANA LETTER SA +3056 ; [.177E.013E.000E.3056] # HIRAGANA LETTER ZA; CANONSEQ +30B6 ; [.177E.013E.0011.30B6] # KATAKANA LETTER ZA; CANONSEQ +3057 ; [.177F.0020.000E.3057] # HIRAGANA LETTER SI +30B7 ; [.177F.0020.0011.30B7] # KATAKANA LETTER SI +3058 ; [.177F.013E.000E.3058] # HIRAGANA LETTER ZI; CANONSEQ +30B8 ; [.177F.013E.0011.30B8] # KATAKANA LETTER ZI; CANONSEQ +3059 ; [.1780.0020.000E.3059] # HIRAGANA LETTER SU +30B9 ; [.1780.0020.0011.30B9] # KATAKANA LETTER SU +305A ; [.1780.013E.000E.305A] # HIRAGANA LETTER ZU; CANONSEQ +30BA ; [.1780.013E.0011.30BA] # KATAKANA LETTER ZU; CANONSEQ +305B ; [.1781.0020.000E.305B] # HIRAGANA LETTER SE +30BB ; [.1781.0020.0011.30BB] # KATAKANA LETTER SE +305C ; [.1781.013E.000E.305C] # HIRAGANA LETTER ZE; CANONSEQ +30BC ; [.1781.013E.0011.30BC] # KATAKANA LETTER ZE; CANONSEQ +305D ; [.1782.0020.000E.305D] # HIRAGANA LETTER SO +30BD ; [.1782.0020.0011.30BD] # KATAKANA LETTER SO +305E ; [.1782.013E.000E.305E] # HIRAGANA LETTER ZO; CANONSEQ +30BE ; [.1782.013E.0011.30BE] # KATAKANA LETTER ZO; CANONSEQ +305F ; [.1783.0020.000E.305F] # HIRAGANA LETTER TA +30BF ; [.1783.0020.0011.30BF] # KATAKANA LETTER TA +3060 ; [.1783.013E.000E.3060] # HIRAGANA LETTER DA; CANONSEQ +30C0 ; [.1783.013E.0011.30C0] # KATAKANA LETTER DA; CANONSEQ +3061 ; [.1784.0020.000E.3061] # HIRAGANA LETTER TI +30C1 ; [.1784.0020.0011.30C1] # KATAKANA LETTER TI +3062 ; [.1784.013E.000E.3062] # HIRAGANA LETTER DI; CANONSEQ +30C2 ; [.1784.013E.0011.30C2] # KATAKANA LETTER DI; CANONSEQ +3063 ; [.1785.0020.000D.3063] # HIRAGANA LETTER SMALL TU +3064 ; [.1785.0020.000E.3064] # HIRAGANA LETTER TU +30C3 ; [.1785.0020.000F.30C3] # KATAKANA LETTER SMALL TU +30C4 ; [.1785.0020.0011.30C4] # KATAKANA LETTER TU +3065 ; [.1785.013E.000E.3065] # HIRAGANA LETTER DU; CANONSEQ +30C5 ; [.1785.013E.0011.30C5] # KATAKANA LETTER DU; CANONSEQ +3066 ; [.1786.0020.000E.3066] # HIRAGANA LETTER TE +30C6 ; [.1786.0020.0011.30C6] # KATAKANA LETTER TE +3067 ; [.1786.013E.000E.3067] # HIRAGANA LETTER DE; CANONSEQ +30C7 ; [.1786.013E.0011.30C7] # KATAKANA LETTER DE; CANONSEQ +3068 ; [.1787.0020.000E.3068] # HIRAGANA LETTER TO +30C8 ; [.1787.0020.0011.30C8] # KATAKANA LETTER TO +3069 ; [.1787.013E.000E.3069] # HIRAGANA LETTER DO; CANONSEQ +30C9 ; [.1787.013E.0011.30C9] # KATAKANA LETTER DO; CANONSEQ +306A ; [.1788.0020.000E.306A] # HIRAGANA LETTER NA +30CA ; [.1788.0020.0011.30CA] # KATAKANA LETTER NA +306B ; [.1789.0020.000E.306B] # HIRAGANA LETTER NI +30CB ; [.1789.0020.0011.30CB] # KATAKANA LETTER NI +306C ; [.178A.0020.000E.306C] # HIRAGANA LETTER NU +30CC ; [.178A.0020.0011.30CC] # KATAKANA LETTER NU +306D ; [.178B.0020.000E.306D] # HIRAGANA LETTER NE +30CD ; [.178B.0020.0011.30CD] # KATAKANA LETTER NE +306E ; [.178C.0020.000E.306E] # HIRAGANA LETTER NO +30CE ; [.178C.0020.0011.30CE] # KATAKANA LETTER NO +306F ; [.178D.0020.000E.306F] # HIRAGANA LETTER HA +30CF ; [.178D.0020.0011.30CF] # KATAKANA LETTER HA +3070 ; [.178D.013E.000E.3070] # HIRAGANA LETTER BA; CANONSEQ +30D0 ; [.178D.013E.0011.30D0] # KATAKANA LETTER BA; CANONSEQ +3071 ; [.178D.013F.000E.3071] # HIRAGANA LETTER PA; CANONSEQ +30D1 ; [.178D.013F.0011.30D1] # KATAKANA LETTER PA; CANONSEQ +3072 ; [.178E.0020.000E.3072] # HIRAGANA LETTER HI +30D2 ; [.178E.0020.0011.30D2] # KATAKANA LETTER HI +3073 ; [.178E.013E.000E.3073] # HIRAGANA LETTER BI; CANONSEQ +30D3 ; [.178E.013E.0011.30D3] # KATAKANA LETTER BI; CANONSEQ +3074 ; [.178E.013F.000E.3074] # HIRAGANA LETTER PI; CANONSEQ +30D4 ; [.178E.013F.0011.30D4] # KATAKANA LETTER PI; CANONSEQ +3075 ; [.178F.0020.000E.3075] # HIRAGANA LETTER HU +30D5 ; [.178F.0020.0011.30D5] # KATAKANA LETTER HU +3076 ; [.178F.013E.000E.3076] # HIRAGANA LETTER BU; CANONSEQ +30D6 ; [.178F.013E.0011.30D6] # KATAKANA LETTER BU; CANONSEQ +3077 ; [.178F.013F.000E.3077] # HIRAGANA LETTER PU; CANONSEQ +30D7 ; [.178F.013F.0011.30D7] # KATAKANA LETTER PU; CANONSEQ +3078 ; [.1790.0020.000E.3078] # HIRAGANA LETTER HE +30D8 ; [.1790.0020.0011.30D8] # KATAKANA LETTER HE +3079 ; [.1790.013E.000E.3079] # HIRAGANA LETTER BE; CANONSEQ +30D9 ; [.1790.013E.0011.30D9] # KATAKANA LETTER BE; CANONSEQ +307A ; [.1790.013F.000E.307A] # HIRAGANA LETTER PE; CANONSEQ +30DA ; [.1790.013F.0011.30DA] # KATAKANA LETTER PE; CANONSEQ +307B ; [.1791.0020.000E.307B] # HIRAGANA LETTER HO +30DB ; [.1791.0020.0011.30DB] # KATAKANA LETTER HO +307C ; [.1791.013E.000E.307C] # HIRAGANA LETTER BO; CANONSEQ +30DC ; [.1791.013E.0011.30DC] # KATAKANA LETTER BO; CANONSEQ +307D ; [.1791.013F.000E.307D] # HIRAGANA LETTER PO; CANONSEQ +30DD ; [.1791.013F.0011.30DD] # KATAKANA LETTER PO; CANONSEQ +307E ; [.1792.0020.000E.307E] # HIRAGANA LETTER MA +30DE ; [.1792.0020.0011.30DE] # KATAKANA LETTER MA +307F ; [.1793.0020.000E.307F] # HIRAGANA LETTER MI +30DF ; [.1793.0020.0011.30DF] # KATAKANA LETTER MI +3080 ; [.1794.0020.000E.3080] # HIRAGANA LETTER MU +30E0 ; [.1794.0020.0011.30E0] # KATAKANA LETTER MU +3081 ; [.1795.0020.000E.3081] # HIRAGANA LETTER ME +30E1 ; [.1795.0020.0011.30E1] # KATAKANA LETTER ME +3082 ; [.1796.0020.000E.3082] # HIRAGANA LETTER MO +30E2 ; [.1796.0020.0011.30E2] # KATAKANA LETTER MO +3083 ; [.1797.0020.000D.3083] # HIRAGANA LETTER SMALL YA +3084 ; [.1797.0020.000E.3084] # HIRAGANA LETTER YA +30E3 ; [.1797.0020.000F.30E3] # KATAKANA LETTER SMALL YA +30E4 ; [.1797.0020.0011.30E4] # KATAKANA LETTER YA +3085 ; [.1798.0020.000D.3085] # HIRAGANA LETTER SMALL YU +3086 ; [.1798.0020.000E.3086] # HIRAGANA LETTER YU +30E5 ; [.1798.0020.000F.30E5] # KATAKANA LETTER SMALL YU +30E6 ; [.1798.0020.0011.30E6] # KATAKANA LETTER YU +3087 ; [.1799.0020.000D.3087] # HIRAGANA LETTER SMALL YO +3088 ; [.1799.0020.000E.3088] # HIRAGANA LETTER YO +30E7 ; [.1799.0020.000F.30E7] # KATAKANA LETTER SMALL YO +30E8 ; [.1799.0020.0011.30E8] # KATAKANA LETTER YO +3089 ; [.179A.0020.000E.3089] # HIRAGANA LETTER RA +30E9 ; [.179A.0020.0011.30E9] # KATAKANA LETTER RA +308A ; [.179B.0020.000E.308A] # HIRAGANA LETTER RI +30EA ; [.179B.0020.0011.30EA] # KATAKANA LETTER RI +308B ; [.179C.0020.000E.308B] # HIRAGANA LETTER RU +30EB ; [.179C.0020.0011.30EB] # KATAKANA LETTER RU +308C ; [.179D.0020.000E.308C] # HIRAGANA LETTER RE +30EC ; [.179D.0020.0011.30EC] # KATAKANA LETTER RE +308D ; [.179E.0020.000E.308D] # HIRAGANA LETTER RO +30ED ; [.179E.0020.0011.30ED] # KATAKANA LETTER RO +308E ; [.179F.0020.000D.308E] # HIRAGANA LETTER SMALL WA +308F ; [.179F.0020.000E.308F] # HIRAGANA LETTER WA +30EE ; [.179F.0020.000F.30EE] # KATAKANA LETTER SMALL WA +30EF ; [.179F.0020.0011.30EF] # KATAKANA LETTER WA +30F7 ; [.179F.013E.0011.30F7] # KATAKANA LETTER VA; CANONSEQ +3090 ; [.17A0.0020.000E.3090] # HIRAGANA LETTER WI +30F0 ; [.17A0.0020.0011.30F0] # KATAKANA LETTER WI +30F8 ; [.17A0.013E.0011.30F8] # KATAKANA LETTER VI; CANONSEQ +3091 ; [.17A1.0020.000E.3091] # HIRAGANA LETTER WE +30F1 ; [.17A1.0020.0011.30F1] # KATAKANA LETTER WE +30F9 ; [.17A1.013E.0011.30F9] # KATAKANA LETTER VE; CANONSEQ +3092 ; [.17A2.0020.000E.3092] # HIRAGANA LETTER WO +30F2 ; [.17A2.0020.0011.30F2] # KATAKANA LETTER WO +30FA ; [.17A2.013E.0011.30FA] # KATAKANA LETTER VO; CANONSEQ +3093 ; [.17A3.0020.000E.3093] # HIRAGANA LETTER N +30F3 ; [.17A3.0020.0011.30F3] # KATAKANA LETTER N +3105 ; [.17A4.0020.0002.3105] # BOPOMOFO LETTER B +3106 ; [.17A5.0020.0002.3106] # BOPOMOFO LETTER P +3107 ; [.17A6.0020.0002.3107] # BOPOMOFO LETTER M +3108 ; [.17A7.0020.0002.3108] # BOPOMOFO LETTER F +312A ; [.17A8.0020.0002.312A] # BOPOMOFO LETTER V +3109 ; [.17A9.0020.0002.3109] # BOPOMOFO LETTER D +310A ; [.17AA.0020.0002.310A] # BOPOMOFO LETTER T +310B ; [.17AB.0020.0002.310B] # BOPOMOFO LETTER N +310C ; [.17AC.0020.0002.310C] # BOPOMOFO LETTER L +310D ; [.17AD.0020.0002.310D] # BOPOMOFO LETTER G +310E ; [.17AE.0020.0002.310E] # BOPOMOFO LETTER K +312B ; [.17AF.0020.0002.312B] # BOPOMOFO LETTER NG +31AD ; [.17B0.0020.0002.31AD] # BOPOMOFO LETTER NGG +310F ; [.17B1.0020.0002.310F] # BOPOMOFO LETTER H +3110 ; [.17B2.0020.0002.3110] # BOPOMOFO LETTER J +3111 ; [.17B3.0020.0002.3111] # BOPOMOFO LETTER Q +3112 ; [.17B4.0020.0002.3112] # BOPOMOFO LETTER X +312C ; [.17B5.0020.0002.312C] # BOPOMOFO LETTER GN +3113 ; [.17B6.0020.0002.3113] # BOPOMOFO LETTER ZH +3114 ; [.17B7.0020.0002.3114] # BOPOMOFO LETTER CH +3115 ; [.17B8.0020.0002.3115] # BOPOMOFO LETTER SH +3116 ; [.17B9.0020.0002.3116] # BOPOMOFO LETTER R +3117 ; [.17BA.0020.0002.3117] # BOPOMOFO LETTER Z +3118 ; [.17BB.0020.0002.3118] # BOPOMOFO LETTER C +3119 ; [.17BC.0020.0002.3119] # BOPOMOFO LETTER S +311A ; [.17BD.0020.0002.311A] # BOPOMOFO LETTER A +311B ; [.17BE.0020.0002.311B] # BOPOMOFO LETTER O +31A6 ; [.17BF.0020.0002.31A6] # BOPOMOFO LETTER OO +311C ; [.17C0.0020.0002.311C] # BOPOMOFO LETTER E +311D ; [.17C1.0020.0002.311D] # BOPOMOFO LETTER EH +31A4 ; [.17C2.0020.0002.31A4] # BOPOMOFO LETTER EE +311E ; [.17C3.0020.0002.311E] # BOPOMOFO LETTER AI +311F ; [.17C4.0020.0002.311F] # BOPOMOFO LETTER EI +3120 ; [.17C5.0020.0002.3120] # BOPOMOFO LETTER AU +3121 ; [.17C6.0020.0002.3121] # BOPOMOFO LETTER OU +3122 ; [.17C7.0020.0002.3122] # BOPOMOFO LETTER AN +3123 ; [.17C8.0020.0002.3123] # BOPOMOFO LETTER EN +3124 ; [.17C9.0020.0002.3124] # BOPOMOFO LETTER ANG +31B2 ; [.17CA.0020.0002.31B2] # BOPOMOFO LETTER ONG +3125 ; [.17CB.0020.0002.3125] # BOPOMOFO LETTER ENG +31B0 ; [.17CC.0020.0002.31B0] # BOPOMOFO LETTER AM +31B1 ; [.17CD.0020.0002.31B1] # BOPOMOFO LETTER OM +31AC ; [.17CE.0020.0002.31AC] # BOPOMOFO LETTER IM +3126 ; [.17CF.0020.0002.3126] # BOPOMOFO LETTER ER +3127 ; [.17D0.0020.0002.3127] # BOPOMOFO LETTER I +3128 ; [.17D1.0020.0002.3128] # BOPOMOFO LETTER U +3129 ; [.17D2.0020.0002.3129] # BOPOMOFO LETTER IU diff --git a/lib/Unicode/Collate/t/test.t b/lib/Unicode/Collate/t/test.t new file mode 100644 index 0000000..be4cc4a --- /dev/null +++ b/lib/Unicode/Collate/t/test.t @@ -0,0 +1,115 @@ +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +use Test; +BEGIN { plan tests => 20 }; +use Unicode::Collate; +ok(1); # If we made it this far, we're ok. + +######################### + +my $UCA = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, +); + +ok(ref $UCA, "Unicode::Collate"); + +ok( + join(':', $UCA->sort( + qw/ lib strict Carp ExtUtils CGI Time warnings Math overload Pod CPAN / + ) ), + join(':', + qw/ Carp CGI CPAN ExtUtils lib Math overload Pod strict Time warnings / + ), +); + +my $A_acute = pack('U', 0x00C1); +my $acute = pack('U', 0x0301); + +ok($UCA->cmp("A$acute", $A_acute), -1); + +ok($UCA->cmp("", ""), 0); +ok($UCA->cmp("", "perl"), -1); + +eval "use Unicode::Normalize"; + +if(!$@){ + my $NFD = Unicode::Collate->new( + table => 'keys.txt', + ); + ok($NFD->cmp("A$acute", $A_acute), 0); +} +else{ + ok(1,1); +} + +my $tr = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + ignoreName => qr/^(?:HANGUL|HIRAGANA|KATAKANA|BOPOMOFO)$/, + entry => <<'ENTRIES', +0063 0068 ; [.0893.0020.0002.0063] # "ch" in traditional Spanish +0043 0068 ; [.0893.0020.0008.0043] # "Ch" in traditional Spanish +ENTRIES +); + +ok( + join(':', $tr->sort( + qw/ acha aca ada acia acka / + ) ), + join(':', + qw/ aca acia acka acha ada / + ), +); + +ok( + join(':', $UCA->sort( + qw/ acha aca ada acia acka / + ) ), + join(':', + qw/ aca acha acia acka ada / + ), +); + +my $old_level = $UCA->{level}; +my $hiragana = "\x{3042}\x{3044}"; +my $katakana = "\x{30A2}\x{30A4}"; + +$UCA->{level} = 2; + +ok( $UCA->cmp("ABC","abc"), 0); +ok( $UCA->cmp($hiragana, $katakana), 0); + +$UCA->{level} = $old_level; + +$UCA->{katakana_before_hiragana} = 1; + +ok( $UCA->cmp("abc", "ABC"), -1); +ok( $UCA->cmp($hiragana, $katakana), 1); + +$UCA->{upper_before_lower} = 1; + +ok( $UCA->cmp("abc", "ABC"), 1); +ok( $UCA->cmp($hiragana, $katakana), 1); + +$UCA->{katakana_before_hiragana} = 0; + +ok( $UCA->cmp("abc", "ABC"), 1); +ok( $UCA->cmp($hiragana, $katakana), -1); + +$UCA->{upper_before_lower} = 0; + +ok( $UCA->cmp("abc", "ABC"), -1); +ok( $UCA->cmp($hiragana, $katakana), -1); + +my $ign = Unicode::Collate->new( + table => 'keys.txt', + normalization => undef, + ignoreChar => qr/^[ae]$/, +); + +ok( $ign->cmp("element","lament"), 0); + diff --git a/lib/Unicode/Normalize.pm b/lib/Unicode/Normalize.pm new file mode 100644 index 0000000..79939b1 --- /dev/null +++ b/lib/Unicode/Normalize.pm @@ -0,0 +1,314 @@ +package Unicode::Normalize; + +use 5.006; +use strict; +use warnings; +use Carp; +use Lingua::KO::Hangul::Util; + +our $VERSION = '0.04'; +our $PACKAGE = __PACKAGE__; + +require Exporter; +our @ISA = qw(Exporter); +our @EXPORT = qw( NFC NFD NFKC NFKD ); +our @EXPORT_OK = qw( normalize ); +our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] ); + +our $Combin = do "unicore/CombiningClass.pl" + || do "unicode/CombiningClass.pl" + || croak "$PACKAGE: CombiningClass.pl not found"; + +our $Decomp = do "unicore/Decomposition.pl" + || do "unicode/Decomposition.pl" + || croak "$PACKAGE: Decomposition.pl not found"; + +our %Combin; # $codepoint => $number : combination class +our %Canon; # $codepoint => \@codepoints : canonical decomp. +our %Compat; # $codepoint => \@codepoints : compat. decomp. +our %Compos; # $string => $codepoint : composite +our %Exclus; # $codepoint => 1 : composition exclusions + +{ + my($f, $fh); + foreach my $d (@INC) { + use File::Spec; + $f = File::Spec->catfile($d, "unicore", "CompExcl.txt"); + last if open($fh, $f); + $f = File::Spec->catfile($d, "unicode", "CompExcl.txt"); + last if open($fh, $f); + $f = undef; + } + croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f; + while(<$fh>){ + next if /^#/ or /^$/; + s/#.*//; + $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/; + } + close $fh; +} + +while($Combin =~ /(.+)/g) +{ + my @tab = split /\t/, $1; + my $ini = hex $tab[0]; + if($tab[1] eq '') + { + $Combin{ $ini } = $tab[2]; + } + else + { + $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]); + } +} + +while($Decomp =~ /(.+)/g) +{ + my @tab = split /\t/, $1; + my $compat = $tab[2] =~ s/<[^>]+>//; + my $dec = [ _getHexArray($tab[2]) ]; # decomposition + my $com = pack('U*', @$dec); # composable sequence + my $ini = hex($tab[0]); + if($tab[1] eq '') + { + $Compat{ $ini } = $dec; + if(! $compat){ + $Canon{ $ini } = $dec; + $Compos{ $com } = $ini; + } + } + else + { + foreach my $u ($ini .. hex($tab[1])){ + $Compat{ $u } = $dec; + if(! $compat){ + $Canon{ $u } = $dec; + $Compos{ $com } = $ini; + } + } + } +} + +foreach my $key (keys %Canon) # exhaustive decomposition +{ + $Canon{$key} = [ getCanonList($key) ]; +} + +foreach my $key (keys %Compat) # exhaustive decomposition +{ + $Compat{$key} = [ getCompatList($key) ]; +} + +sub getCanonList +{ + my @src = @_; + my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src; + join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec); + # condition @src == @dec is not ok. +} + +sub getCompatList +{ + my @src = @_; + my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src; + join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec); + # condition @src == @dec is not ok. +} + +sub NFD($){ _decompose(shift, 0) } + +sub NFKD($){ _decompose(shift, 1) } + +sub NFC($){ _compose(NFD(shift)) } + +sub NFKC($){ _compose(NFKD(shift)) } + +sub normalize($$) +{ + my($form,$str) = @_; + $form eq 'D' || $form eq 'NFD' ? NFD($str) : + $form eq 'C' || $form eq 'NFC' ? NFC($str) : + $form eq 'KD' || $form eq 'NFKD' ? NFKD($str) : + $form eq 'KC' || $form eq 'NFKC' ? NFKC($str) : + croak $PACKAGE."::normalize: invalid form name: $form"; +} + + +## +## string _decompose(string, compat?) +## +sub _decompose +{ + my $str = $_[0]; + my $hash = $_[1] ? \%Compat : \%Canon; + my @ret; + my $retstr=""; + foreach my $u (unpack 'U*', $str){ + push @ret, + $hash->{ $u } ? @{ $hash->{ $u } } : + _isHangul($u) ? decomposeHangul($u) : $u; + } + for(my $i=0; $i<@ret;){ + $retstr .= pack('U', $ret[$i++]), next + unless $Combin{ $ret[$i] } && $i+1 < @ret && $Combin{ $ret[$i+1] }; + my @tmp; + push(@tmp, $ret[$i++]) while $i < @ret && $Combin{ $ret[$i] }; + $retstr .= pack 'U*', @tmp[ + sort { + $Combin{ $tmp[$a] } <=> $Combin{ $tmp[$b] } || $a <=> $b + } 0 .. @tmp - 1, + ]; + } + $retstr; +} + +## +## string _compose(string) +## +## S : starter; NS : not starter; +## +## composable sequence begins at S. +## S + S or (S + S) + S may be composed. +## NS + NS must not be composed. +## +sub _compose +{ + my @src = unpack('U*', composeHangul shift); # get codepoints + for(my $s = 0; $s+1 < @src; $s++){ + next unless defined $src[$s] && ! $Combin{ $src[$s] }; # S only + my($c, $blocked); + for(my $j = $s+1; $j < @src && !$blocked; $j++){ + $blocked = 1 if ! $Combin{ $src[$j] }; + + next if $j != $s + 1 && defined $src[$j-1] + && $Combin{ $src[$j-1] } && $Combin{ $src[$j] } + && $Combin{ $src[$j-1] } == $Combin{ $src[$j] }; + + if( # $c != 0, maybe. + $c = $Compos{pack('U*', @src[$s,$j])} and ! $Exclus{$c} + ) + { + $src[$s] = $c; $src[$j] = undef; $blocked = 0; + } + } + } + pack 'U*', grep defined(), @src; +} + +## +## "hhhh hhhh hhhh" to (dddd, dddd, dddd) +## +sub _getHexArray +{ + my $str = shift; + map hex(), $str =~ /([0-9A-Fa-f]+)/g; +} + +## +## Hangul Syllables +## +sub _isHangul +{ + my $code = shift; + return 0xAC00 <= $code && $code <= 0xD7A3; +} + +## +## for Debug +## +sub _getCombin { wantarray ? %Combin : \%Combin } +sub _getCanon { wantarray ? %Canon : \%Canon } +sub _getCompat { wantarray ? %Compat : \%Compat } +sub _getCompos { wantarray ? %Compos : \%Compos } +sub _getExclus { wantarray ? %Exclus : \%Exclus } +1; +__END__ + +=head1 NAME + +Unicode::Normalize - normalized forms of Unicode text + +=head1 SYNOPSIS + + use Unicode::Normalize; + + $string_NFD = NFD($raw_string); # Normalization Form D + $string_NFC = NFC($raw_string); # Normalization Form C + $string_NFKD = NFKD($raw_string); # Normalization Form KD + $string_NFKC = NFKC($raw_string); # Normalization Form KC + + or + + use Unicode::Normalize 'normalize'; + + $string_NFD = normalize('D', $raw_string); # Normalization Form D + $string_NFC = normalize('C', $raw_string); # Normalization Form C + $string_NFKD = normalize('KD', $raw_string); # Normalization Form KD + $string_NFKC = normalize('KC', $raw_string); # Normalization Form KC + +=head1 DESCRIPTION + +=over 4 + +=item C<$string_NFD = NFD($raw_string)> + +returns the Normalization Form D (formed by canonical decomposition). + + +=item C<$string_NFC = NFC($raw_string)> + +returns the Normalization Form C (formed by canonical decomposition +followed by canonical composition). + +=item C<$string_NFKD = NFKD($raw_string)> + +returns the Normalization Form KD (formed by compatibility decomposition). + +=item C<$string_NFKC = NFKC($raw_string)> + +returns the Normalization Form KC (formed by compatibility decomposition +followed by B composition). + +=item C<$normalized_string = normalize($form_name, $raw_string)> + +As C<$form_name>, one of the following names must be given. + + 'C' or 'NFC' for Normalization Form C + 'D' or 'NFD' for Normalization Form D + 'KC' or 'NFKC' for Normalization Form KC + 'KD' or 'NFKD' for Normalization Form KD + +=back + +=head2 EXPORT + +C, C, C, C: by default. + +C: on request. + +=head1 AUTHOR + +SADAHIRO Tomoyuki, ESADAHIRO@cpan.orgE + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. + +=head1 SEE ALSO + +=over 4 + +=item L + +utility functions for Hangul Syllables + +=item http://www.unicode.org/unicode/reports/tr15/ + +Unicode Normalization Forms - UAX #15 + +=back + +=cut diff --git a/lib/Unicode/Normalize/Changes b/lib/Unicode/Normalize/Changes new file mode 100644 index 0000000..910016c --- /dev/null +++ b/lib/Unicode/Normalize/Changes @@ -0,0 +1,16 @@ +Revision history for Perl extension Unicode::Normalize. + +0.04 Wed Aug 15 19:02:41 2001 + - fix: NFD("") and NFKD("") must return "", not but undef. + +0.03 Fri Aug 10 22:44:18 2001 + - rename the module name to Unicode::Normalize. + - normalize takes two arguments. + +0.02 Thu Aug 9 22:56:36 2001 + - add function normalize + +0.01 Mon Aug 6 21:45:11 2001 + - original version; created by h2xs 1.21 with options + -A -X -n Text::Unicode::Normalize + diff --git a/lib/Unicode/Normalize/README b/lib/Unicode/Normalize/README new file mode 100644 index 0000000..e1f9e96 --- /dev/null +++ b/lib/Unicode/Normalize/README @@ -0,0 +1,57 @@ +Unicode/Normalize version 0.04 +=================================== + +Unicode::Normalize - normalized forms of Unicode text + +SYNOPSIS + + use Unicode::Normalize; + + $string_NFD = NFD($string); # Normalization Form D + $string_NFC = NFC($string); # Normalization Form C + $string_NFKD = NFKD($string); # Normalization Form KD + $string_NFKC = NFKC($string); # Normalization Form KC + + or + + use Unicode::Normalize 'normalize'; + + $string_NFD = normalize('D', $string); # Normalization Form D + $string_NFC = normalize('C', $string); # Normalization Form C + $string_NFKD = normalize('KD', $string); # Normalization Form KD + $string_NFKC = normalize('KC', $string); # Normalization Form KC + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries: + +Carp +Exporter +File::Spec +Lingua::KO::Hangul::Util +$unidir/CombiningClass.pl +$unidir/Decomposition.pl +$unidir/CompExcl.txt + +# $unidir is $LIB/unicore or $LIB/unicode + +COPYRIGHT AND LICENCE + +SADAHIRO Tomoyuki, ESADAHIRO@cpan.orgE + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. + diff --git a/lib/Unicode/Normalize/t/norm.t b/lib/Unicode/Normalize/t/norm.t new file mode 100644 index 0000000..88e4e7d --- /dev/null +++ b/lib/Unicode/Normalize/t/norm.t @@ -0,0 +1,42 @@ +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 15 }; +use Unicode::Normalize qw(normalize); +ok(1); # If we made it this far, we're ok. + +######################### + +ok(normalize('C', ""), ""); +ok(normalize('D', ""), ""); + +sub hexNFC { + join " ", map sprintf("%04X", $_), + unpack 'U*', normalize 'C', pack 'U*', map hex(), split ' ', shift; +} +sub hexNFD { + join " ", map sprintf("%04X", $_), + unpack 'U*', normalize 'D', pack 'U*', map hex(), split ' ', shift; +} + +ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); + +ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); + +ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); + +ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); +ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); + diff --git a/lib/Unicode/Normalize/t/test.t b/lib/Unicode/Normalize/t/test.t new file mode 100644 index 0000000..499f3ae --- /dev/null +++ b/lib/Unicode/Normalize/t/test.t @@ -0,0 +1,42 @@ +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 15 }; +use Unicode::Normalize; +ok(1); # If we made it this far, we're ok. + +######################### + +ok(NFC(""), ""); +ok(NFD(""), ""); + +sub hexNFC { + join " ", map sprintf("%04X", $_), + unpack 'U*', NFC pack 'U*', map hex(), split ' ', shift; +} +sub hexNFD { + join " ", map sprintf("%04X", $_), + unpack 'U*', NFD pack 'U*', map hex(), split ' ', shift; +} + +ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); + +ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); +ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); + +ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); +ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); + +ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); +ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); +