ext/Encode/bin/enc2xs Encode module generator
ext/Encode/bin/piconv iconv by perl
ext/Encode/bin/ucm2table Table Generator for testing
+ext/Encode/bin/ucmlint A UCM Lint utility
ext/Encode/bin/unidump Unicode Dump like hexdump(1)
ext/Encode/encengine.c Encode extension
ext/Encode/encoding.pm Perl Pragmactic Module
ext/Encode/lib/Encode/Alias.pm Encode extension
ext/Encode/lib/Encode/CJKConstants.pm Encode extension
ext/Encode/lib/Encode/CN/HZ.pm Encode extension
-ext/Encode/lib/Encode/Config.pm Encode configuration module
+ext/Encode/lib/Encode/Config.pm Encode configuration module
ext/Encode/lib/Encode/Encoder.pm OO Encoder
ext/Encode/lib/Encode/Encoding.pm Encode extension
ext/Encode/lib/Encode/JP/2022_JP.pm Encode extension
ext/Encode/t/Aliases.t Encode extension test
ext/Encode/t/CN.t Encode extension test
ext/Encode/t/Encode.t Encode extension test
-ext/Encode/t/Encoder.t Encode::Encoder test
+ext/Encode/t/Encoder.t Encode::Encoder test
ext/Encode/t/JP.t Encode extension test
ext/Encode/t/KR.t Encode extension test
ext/Encode/t/TW.t Encode extension test
ext/Encode/t/Unicode.t Encode extension test
+ext/Encode/t/bogus.ucm Sample data for ucmlint
ext/Encode/t/encoding.t encoding extension test
ext/Encode/t/gb2312.euc test data
ext/Encode/t/gb2312.ref test data
ext/Encode/ucm/macChintrad.ucm Unicode Character Map
ext/Encode/ucm/macCroatian.ucm Unicode Character Map
ext/Encode/ucm/macCyrillic.ucm Unicode Character Map
-ext/Encode/ucm/macDevanaga.ucm Unicode Character Map
ext/Encode/ucm/macDingbats.ucm Unicode Character Map
ext/Encode/ucm/macFarsi.ucm Unicode Character Map
ext/Encode/ucm/macGreek.ucm Unicode Character Map
-ext/Encode/ucm/macGujarati.ucm Unicode Character Map
-ext/Encode/ucm/macGurmukhi.ucm Unicode Character Map
ext/Encode/ucm/macHebrew.ucm Unicode Character Map
ext/Encode/ucm/macIceland.ucm Unicode Character Map
ext/Encode/ucm/macJapanese.ucm Unicode Character Map
Nicholas Clark <nick@ccl4.org>
Nick Ing-Simmons <nick@ing-simmons.net>
Paul Marquess <paul_marquess@yahoo.co.uk>
+Philip Newton <Philip.Newton@gmx.net>
SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
Spider Boardman <spider@web.zk3.dec.com>
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.31 2002/04/08 18:08:07 dankogai Exp dankogai $
+# $Id: Changes,v 1.32 2002/04/09 20:06:15 dankogai Exp dankogai $
#
-1.31 $Date: 2002/04/08 18:08:07 $
+1.32 $Date: 2002/04/09 20:06:15 $
++ bin/ucmlint
++ t/bogus.ucm
+- ucm/macDevanaga.ucm Unicode Character Map
+- ucm/macGujarati.ucm Unicode Character Map
+- ucm/macGurmukhi.ucm Unicode Character Map
+ A utility to check integrity of .ucm files. t/bogus.ucm is a
+ ucm that is deliberately bogus. unused Indic mappings are removed
+ for the time being.
+! Encode.pm
+ resolve_alias() added as suggested by jhi. Same as
+ find_encoding("alias")->name. For convenience. This one is
+ defined in Encode.pm instead of Alias.pm.
+ Message-Id: <20020409215846.H17022@alpha.hut.fi>
+! Encode.xs
+ Memory Allocate but detected during the devel of ucmlint -- fixed.
+ Message-Id: <C0DDCE16-4BE7-11D6-9204-00039301D480@dan.co.jp>
+! lib/Encode/Unicode.pm
+ valid_ucs2(0) is false but must be true.
+ 3 patches from NI-S as follows. This also has fixed the incident
+ Andy has reported.
+! lib/Encode/Alias.pm
+ find_alias() recursion prevention
+! t/Aliases.t
+ Checks for the patch above
+! t/Encode/Unicode.pm
+ An extra "F" that causes valid_ucs2() return a bogus value fixed
+ Message-Id: <20020409133927.17803.1@bactrian.elixent.com>
+ Message-Id: <Pine.SOL.4.10.10204091338220.10390-100000@maxwell.phys.lafayette.edu>
+ 2 Small Patches from jhi as follows:
+! Encode.pm
+ Encode->encodings() lists in case-insensitve order (as it was)
+! bin/piconv
+ -l option prints avaiable encodings to STDOUT instead of STDERR
+! lib/Encode/Aliases.pm
+ s/defintion/definition/
+ Message-Id: <200204082306.CAA21033@alpha.hut.fi>
+! AUTHORS
+! lib/Encode/Supported.pod
+! lib/Encode/Unicode.pm
+ POD revise by Philip Newton. This adds Philip to AUTHORS list.
+ Thank you for the exact quote of Douglas Adams :)
+ Message-Id: <22s3bu4gpvhhsses64nj3afuu0lo927rv3@4ax.com>
+
+1.31 2002/04/08 18:08:07
! lib/Encode/Encoder.pm
+ t/Encoder.t
Encode::Encoder, once just a placeholder of an idea, is now much more
Typo fixes and improvements by jhi
Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
-1.11 $Date: 2002/04/08 18:08:07 $
+1.11 $Date: 2002/04/09 20:06:15 $
+ t/encoding.t
+ t/jperl.t
! MANIFEST
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.31 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.32 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
require DynaLoader;
# Public, encouraged API is exported by default
our @EXPORT = qw (
- encode
decode
- encode_utf8
decode_utf8
- find_encoding
+ encode
+ encode_utf8
encodings
+ find_encoding
);
our @EXPORT_OK =
qw(
+ _utf8_off
+ _utf8_on
define_encoding
from_to
- is_utf8
- is_8bit
is_16bit
- utf8_upgrade
+ is_8bit
+ is_utf8
+ resolve_alias
utf8_downgrade
- _utf8_on
- _utf8_off
+ utf8_upgrade
);
bootstrap Encode ();
return __PACKAGE__->getEncoding($name,$skip_external);
}
+sub resolve_alias {
+ my $obj = find_encoding(shift);
+ defined $obj and return $obj->name;
+ return;
+}
+
sub encode
{
my ($name,$string,$check) = @_;
ENCODING may be either the name of an encoding or an
I<encoding object>
+But before you do so, make sure the alias is nonexistent with
+C<resolve_alias()>, which returns the canonical name thereof.
+i.e.
+
+ Encode::resolve_alias("latin1") eq "iso-8859-1" # true
+ Encode::resolve_alias("iso-8859-12") # false; nonexistent
+ Encode::resolve_alias($name) eq $name # true if $name is canonical
+
+This resolve_alias() does not need C<use Encode::Alias> and is
+exported via C<use encode qw(resolve_alias)>.
+
See L<Encode::Alias> on details.
=head1 Encoding and IO
#include "encode.h"
#include "def_t.h"
-#define ENCODE_XS_PROFILE 0 /* set 1 to profile.
+#define ENCODE_XS_PROFILE 0 /* set 1 or more to profile.
t/encoding.t dumps core because of
Perl_warner and PerlIO don't work well */
switch (code) {
case ENCODE_NOSPACE:
{
- STRLEN more, sleft;
+ STRLEN more = 0; /* make sure you initialize! */
+ STRLEN sleft;
sdone += slen;
ddone += dlen;
sleft = tlen - sdone;
- if (sdone) { /* has src ever been processed ? */
+#if ENCODE_XS_PROFILE >= 2
+ Perl_warn(aTHX_
+ "more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n",
+ more, sdone, sleft, SvLEN(dst));
+#endif
+ if (sdone != 0) { /* has src ever been processed ? */
#if ENCODE_XS_USEFP == 2
more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone
- SvLEN(dst);
bin/enc2xs Encode module generator
bin/piconv iconv by perl
bin/ucm2table Table Generator for testing
+bin/ucmlint A UCM Lint utility
bin/unidump Unicode Dump like hexdump(1)
encengine.c Encode extension
encoding.pm Perl Pragmactic Module
t/KR.t Encode extension test
t/TW.t Encode extension test
t/Unicode.t Encode extension test
+t/bogus.ucm Sample data for ucmlint
t/encoding.t encoding extension test
t/gb2312.euc test data
t/gb2312.ref test data
ucm/macChintrad.ucm Unicode Character Map
ucm/macCroatian.ucm Unicode Character Map
ucm/macCyrillic.ucm Unicode Character Map
-ucm/macDevanaga.ucm Unicode Character Map
ucm/macDingbats.ucm Unicode Character Map
ucm/macFarsi.ucm Unicode Character Map
ucm/macGreek.ucm Unicode Character Map
-ucm/macGujarati.ucm Unicode Character Map
-ucm/macGurmukhi.ucm Unicode Character Map
ucm/macHebrew.ucm Unicode Character Map
ucm/macIceland.ucm Unicode Character Map
ucm/macJapanese.ucm Unicode Character Map
#!./perl
-# $Id: piconv,v 1.20 2002/04/04 19:50:52 dankogai Exp $
+# $Id: piconv,v 1.21 2002/04/09 20:06:15 dankogai Exp dankogai $
#
use 5.7.3;
use strict;
--- /dev/null
+#!/usr/local/bin/perl
+#
+# $Id: ucmlint,v 0.1 2002/04/09 20:04:30 dankogai Exp $
+#
+
+use strict;
+our $VERSION = do { my @r = (q$Revision: 0.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+
+use Getopt::Std;
+our %Opt;
+getopts("Dehfv", \%Opt);
+
+if ($Opt{e}){
+ eval{ require Encode; };
+ $@ and die "can't load Encode : $@";
+}
+
+$Opt{h} and help();
+@ARGV or help();
+
+sub help{
+ print <<"";
+$0 -[Dehfv] [ucm files ...]
+ -D debug mode on
+ -e test with Encode module also (requires perl 5.7.3 or higher)
+ -h shows this message
+ -f forces roundtrip check even for |[123]
+ -v verbose mode
+
+}
+
+$| = 1;
+my (%Hdr, %U2E, %E2U);
+my $in_charmap = 0;
+my $nerror = 0;
+my $nwarning = 0;
+
+sub nit($;$){
+ my ($msg, $level) = @_;
+ my $lstr;
+ if ($level == 2){
+ $lstr = 'notice';
+ }elsif ($level == 1){
+ $lstr = 'warning'; $nwarning++;
+ }else{
+ $lstr = 'error'; $nerror++;
+ }
+ print "$ARGV:$lstr in line $.: $msg\n";
+}
+
+for $ARGV (@ARGV){
+ open UCM, $ARGV or die "$ARGV:$!";
+ %Hdr = %U2E = %E2U = ();
+ $in_charmap = $nerror = $nwarning = 0;
+ $. = 0;
+ while(<UCM>){
+ chomp;
+ s/\s*#.*$//o; /^$/ and next;
+ if ($_ eq "CHARMAP"){
+ $in_charmap = 1;
+ for my $must (qw/code_set_name mb_cur_min mb_cur_max/){
+ exists $Hdr{$must} or nit "<$must> nonexistent";
+ }
+ $Hdr{mb_cur_min} > $Hdr{mb_cur_max}
+ and nit sprintf("mb_cur_min(%d) > mb_cur_max(%d)",
+ $Hdr{mb_cur_min},$Hdr{mb_cur_max});
+ $in_charmap = 1;
+ next;
+ }
+ unless ($in_charmap){
+ my($hkey, $hvalue) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or next;
+ $Opt{D} and warn "$hkey => $hvalue";
+ if ($hkey eq "code_set_name"){ # name check
+ exists $Hdr{code_set_name}
+ and nit "Duplicate <code_set_name>: $hkey";
+ }
+ if ($hkey eq "code_set_alias"){ # alias check
+ $hvalue eq $Hdr{code_set_name}
+ and nit qq(alias "$hvalue" is already in <code_set_name>);
+ }
+ $Hdr{$hkey} = $hvalue;
+ }else{
+ my $name = $Hdr{code_set_name};
+ my($unistr, $encstr, $fb) = /^(\S+)\s+(\S+)\s(\S+)/o or next;
+ $Opt{v} and nit $_, 2;
+ my $uni = uniparse($unistr);
+ my $enc = encparse($encstr);
+ $fb =~ /^\|([0123])$/ or nit "malformed fallback: $fb";
+ $fb = $1;
+ $Opt{f} and $fb = 0;
+ unless ($fb == 1){ # check uni -> enc
+ if (exists $U2E{$uni}){
+ nit "dupe encode map: U$uni => $U2E{$uni} and $enc", 1;
+ }else{
+ $U2E{$uni} = $enc;
+ if ($Opt{e} and $fb != 3) {
+ my $e = hex2enc($enc);
+ my $u = hex2uni($uni);
+ my $eu = Encode::encode($name, $u);
+ $e eq $eu
+ or nit qq(encode('$name', $uni) != $enc);
+ }
+ }
+ }
+ unless ($fb == 3){ # check enc -> uni
+ if (exists $E2U{$enc}){
+ nit "dupe decode map: $enc => U$E2U{$enc} and U$uni", 1;
+ }else{
+ $E2U{$enc} = $uni;
+ if ($Opt{e} and $fb != 1) {
+ my $e = hex2enc($enc);
+ my $u = hex2uni($uni);
+ $Opt{D} and warn "$uni, $enc";
+ my $de = Encode::decode($name, $e);
+ $de eq $u
+ or nit qq(decode('$name', $enc) != $uni);
+ }
+ }
+ }
+ # warn "$uni, $enc, $fb";
+ }
+ }
+ $in_charmap or nit "Where is CHARMAP?";
+ checkRT();
+ printf ("$ARGV: %s error%s found\n",
+ ($nerror == 0 ? 'no' : $nerror),
+ ($nerror > 1 ? 's' : ''));
+}
+
+exit;
+
+sub hex2enc{
+ pack("C*", map {hex($_)} split(",", shift));
+}
+sub hex2uni{
+ join("", map { chr(hex($_)) } split(",", shift));
+}
+
+sub checkRT{
+ for my $uni (keys %E2U){
+ my $enc = $U2E{$uni} or next; # okay
+ $E2U{$U2E{$uni}} eq $uni or
+ nit "RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}";
+ }
+ for my $enc (keys %E2U){
+ my $uni = $E2U{$enc} or next; # okay
+ $U2E{$E2U{$enc}} eq $enc or
+ nit "RT failure: $enc => U$uni => $U2E{$E2U{$enc}}";
+ }
+}
+
+
+sub uniparse{
+ my $str = shift;
+ my @u;
+ push @u, $1 while($str =~ /\G<U(.*?)>/ig);
+ for my $u (@u){
+ $u =~ /^([0-9A-Za-z]+)$/o
+ or nit "malformed Unicode character: $u";
+ }
+ return join(',', @u);
+}
+
+sub encparse{
+ my $str = shift;
+ my @e;
+ for my $e (split /\\x/io, $str){
+ $e or next; # first \x
+ $e =~ /^([0-9A-Za-z]{1,2})$/io
+ or nit "Hex $e in $str is bogus";
+ push @e, $1;
+ }
+ return join(',', @e);
+}
+
+
+
+__END__
+
+UCM file looks like this.
+
+ #
+ # Comments
+ #
+ <code_set_name> "US-ascii" # Required
+ <code_set_alias> "ascii" # Optional
+ <mb_cur_min> 1 # Required; usually 1
+ <mb_cur_max> 1 # Max. # of bytes/char
+ <subchar> \x3F # Substitution char
+ #
+ CHARMAP
+ <U0000> \x00 |0 # <control>
+ <U0001> \x01 |0 # <control>
+ <U0002> \x02 |0 # <control>
+ ....
+ <U007C> \x7C |0 # VERTICAL LINE
+ <U007D> \x7D |0 # RIGHT CURLY BRACKET
+ <U007E> \x7E |0 # TILDE
+ <U007F> \x7F |0 # <control>
+ END CHARMAP
+
package Encode::Alias;
use strict;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.27 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
require Exporter;
# Public, encouraged API is exported by default
-our @EXPORT =
+our @EXPORT =
qw (
define_alias
find_alias
# has been redefined as the euro symbol.)
define_alias( qr/^(.+)\@euro$/i => '"$1"' );
- define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
+ define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i
=> '"iso-8859-$Encode::Alias::Latin2iso[$1]"' );
define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish|
- hebrew|arabic|baltic|vietnamese)$/ix =>
+ hebrew|arabic|baltic|vietnamese)$/ix =>
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' );
# Common names for non-latin prefered MIME names
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
# And Microsoft has their own naming (again, surprisingly).
- # And windows-* is registered in IANA!
+ # And windows-* is registered in IANA!
define_alias( qr/\b(?:ibm|ms|windows)[-_]?(\d\d\d\d?)$/i => '"cp$1"');
# Sometimes seen with a leading zero.
define_alias( qr/^mac_(.*)$/i => '"mac$1"');
# Ououououou. gone. They are differente!
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"');
-
+
# Standardize on the dashed versions.
# define_alias( qr/\butf8$/i => 'utf-8' );
define_alias( qr/\bkoi8r$/i => 'koi8-r' );
=head1 DESCRIPTION
Allows newName to be used as an alias for ENCODING. ENCODING may be
-either the name of an encoding or an encoding object (as described
+either the name of an encoding or an encoding object (as described
in L<Encode>).
Currently I<newName> can be specified in the following ways:
#
-# $Id: Encoder.pm,v 0.2 2002/04/08 18:08:07 dankogai Exp dankogai $
+# $Id: Encoder.pm,v 0.2 2002/04/08 18:08:07 dankogai Exp $
#
package Encode::Encoder;
use strict;
JIS has not endorsed the full Microsoft standard however.
The official C<Shift_JIS> includes only JIS X 0201 and JIS X 0208
subsets, while Microsoft has always been meaning C<Shift_JIS> to
-encode a wider character repertoire, see C<IANA> registration for
+encode a wider character repertoire. See C<IANA> registration for
C<Windows-31J>.
As a historical predecessor Microsoft's variant
that Microsoft shouldn't have used JIS as part of the name
in the first place.
-Unabiguous name: C<CP932>. C<IANA> name (not used?): C<Windows-31J>.
+Unambiguous name: C<CP932>. C<IANA> name (not used?): C<Windows-31J>.
Encode separately supports C<Shift_JIS> and C<cp932>.
use strict;
use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.28 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.29 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
#
# Aux. subs & constants
sub BOM32LE(){ 0xFFFe0000 }
sub valid_ucs2($){
- if ($_[0] < 0xD800){
- return $_[0] > 0;
- }else{
- return ($_[0] > 0xDFFF && $_[0] <= 0xFFFF);
- }
+ return
+ (0 <= $_[0] && $_[0] < 0xD800)
+ || ( 0xDFFF < $_[0] && $_[0] <= 0xFFFF);
}
sub issurrogate($){ 0xD800 <= $_[0] && $_[0] <= 0xDFFF }
sub new_sequence { $_[0] };
#
-# the two implementation of (en|de)code exist. *_modern use
-# array and *_classic stick with substr. *_classic is much
-# slower but more memory conservative. *_moder is default.
+# two implementation of (en|de)code exist. *_modern use
+# an array and *_classic stick with substr. *_classic is much
+# slower but more memory conservative. *_modern is default.
sub set_transcoder{
no warnings qw(redefine);
*decode = \&decode_classic;
*encode = \&encode_classic;
}else{
- require Carp;
+ require Carp;
Carp::croak __PACKAGE__, "::set_transcoder(modern|classic)";
}
}
my $ord = shift @ord;
unless ($size == 4 or valid_ucs2($ord &= $mask)){
if ($ucs2){
- $chk and
+ $chk and
poisoned2death($obj, "no surrogates allowed", $ord);
shift @ord; # skip the next one as well
$ord = FBCHAR;
unless ($size == 4 or valid_ucs2($ord)) {
unless(issurrogate($ord)){
if ($ucs2){
- $chk and
+ $chk and
poisoned2death($obj, "code point too high", $ord);
push @str, FBCHAR;
}else{
-
+
push @str, ensurrogate($ord);
}
}else{ # not supposed to happen
my $ord = unpack($endian, substr($str, 0, $size, ''));
unless ($size == 4 or valid_ucs2($ord &= $mask)){
if ($ucs2){
- $chk and
+ $chk and
poisoned2death($obj, "no surrogates allowed", $ord);
substr($str,0,$size,''); # skip the next one as well
$ord = FBCHAR;
unless ($size == 4 or valid_ucs2($ord)) {
unless(issurrogate($ord)){
if ($ucs2){
- $chk and
+ $chk and
poisoned2death($obj, "code point too high", $ord);
$str .= pack($endian, FBCHAR);
}else{
my ($size, $bom) = @_;
my $N = $size == 2 ? 'n' : 'N';
my $ord = unpack($N, $bom);
- return ($ord eq BOM_BE) ? $N :
+ return ($ord eq BOM_BE) ? $N :
($ord eq BOM16LE) ? 'v' : ($ord eq BOM32LE) ? 'V' : undef;
}
=head1 SYNOPSIS
- use Encode qw/encode decode/;
+ use Encode qw/encode decode/;
$ucs2 = encode("UCS-2BE", $utf8);
$utf8 = decode("UCS-2BE", $ucs2);
=head2 by Size
UCS-2 is a fixed-length encoding with each character taking 16 bits.
-It B<does not> support I<Surrogate Pair>. When surrogate pair is
-encountered during decode(), it fills its place with \xFFFD without
-I<CHECK> or croaks if I<CHECK>. When a character which ord value is
-larger than 0xFFFF, it uses 0xFFFD without I<CHECK> or croaks if
-<CHECK>.
+It B<does not> support I<Surrogate Pairs>. When a surrogate pair is
+encountered during decode(), its place is filled with \xFFFD without
+I<CHECK> or croaks if I<CHECK>. When a character whose ord value is
+larger than 0xFFFF is encountered, it uses 0xFFFD without I<CHECK> or
+croaks if <CHECK>.
-UTF-16 is almost the same as UCS-2 but it supports I<Surrogate Pair>.
+UTF-16 is almost the same as UCS-2 but it supports I<Surrogate Pairs>.
When it encounters a high surrogate (0xD800-0xDBFF), it fetches the
-following low surrogate (0xDC00-0xDFFF), C<desurrogate> them to form a
+following low surrogate (0xDC00-0xDFFF), C<desurrogate>s them to form a
character. Bogus surrogates result in death. When \x{10000} or above
-is encountered during encode(), it C<ensurrogate>s them and push the
+is encountered during encode(), it C<ensurrogate>s them and pushes the
surrogate pair to the output stream.
UTF-32 is a fixed-length encoding with each character taking 32 bits.
-Since it is 32-bit there is no need for I<Surrogate Pair>.
+Since it is 32-bit there is no need for I<Surrogate Pairs>.
=head2 by Endianness
First (and now failed) goal of Unicode was to map all character
-repartories into a fixed-length integer so programmers are happy.
+repertories into a fixed-length integer so programmers are happy.
Since each character is either I<short> or I<long> in C, you have to
put endianness of each platform when you pass data to one another.
=over 4
-=item BOM as integer
+=item BOM as integer when fetched in network byte order
- 16 32 bits/char
--------------------------
-BE 0xFeFF 0x0000FeFF
-LE 0xFFeF 0xFFFe0000
--------------------------
+ 16 32 bits/char
+ -------------------------
+ BE 0xFeFF 0x0000FeFF
+ LE 0xFFeF 0xFFFe0000
+ -------------------------
=back
-
+
This modules handles BOM as follows.
=over 4
When BE or LE is omitted during decode(), it checks if BOM is in the
beginning of the string and if found endianness is set to what BOM
-says. if not found, dies.
+says. If not found, dies.
=item *
=back
-=head1 The Surrogate Pair
+=head1 Surrogate Pairs
-To say the least, surrogate pair was the biggest mistake by Unicode
-Consortium. I don't give a darn if they admit it or not. But
-according to late Douglas Adams in I<The Hitchhiker's Guide to the
-Galaxy> Triology, C<First the Universe was created and it was a bad
-move>. Their mistake was not this magnitude so let's forgive them.
+To say the least, surrogate pairs were the biggest mistake of the
+Unicode Consortium. But according to the late Douglas Adams in I<The
+Hitchhiker's Guide to the Galaxy> Trilogy, C<In the beginning the
+Universe was created. This has made a lot of people very angry and
+been widely regarded as a bad move>. Their mistake was not of this
+magnitude so let's forgive them.
(I don't dare make any comparison with Unicode Consortium and the
Vogons here ;) Or, comparing Encode to Babel Fish is completely
appropriate -- if you can only stick this into your ear :)
-A surrogate pair was born when Unicode Consortium had finally
-admitted that 16 bit was not big enough to hold all the world's
-character repartorie. But they have already made UCS-2 16-bit. What
+Surrogate pairs were born when Unicode Consortium finally
+admitted that 16 bits were not big enough to hold all the world's
+character repertoire. But they have already made UCS-2 16-bit. What
do we do?
Back then 0xD800-0xDFFF was not allocated. Let's split them half and
* 1024 = 1048576 more characters. Now we can store character ranges
up to \x{10ffff} even with 16-bit encodings. This pair of
half-character is now called a I<Surrogate Pair> and UTF-16 is the
-name of encoding that embraces them.
+name of the encoding that embraces them.
Here is a fomula to ensurrogate a Unicode character \x{10000} and
above;
$uni = 0x10000 + ($hi - 0xD800) * 0x400 + ($lo - 0xDC00);
-Note this move has made \x{D800}-\x{DFFF} forbidden zone but perl
-does not prohibit them for uses.
+Note this move has made \x{D800}-\x{DFFF} into a forbidden zone but
+perl does not prohibit the use of characters within this range. To perl,
+every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I<a character>.
+
+ (*) or \x{ffff_ffff_ffff_ffff} if your perl is compiled with 64-bit
+ integer support! (**)
+
+ (**) Is anything beyond \x{11_0000} still Unicode :?
=head1 SEE ALSO
L<http://www.unicode.org/unicode/faq/utf_bom.html>
+Ch. 15, pp. 403 of C<Programming Perl (3rd Edition)>
+by Larry Wall, Tom Christiansen, Jon Orwant;
+O'Reilly & Associates; ISBN 0-596-00027-8
+
=cut
--- /dev/null
+#
+# $Id: bogus.ucm,v 1.1 2002/04/09 20:06:15 dankogai Exp dankogai $
+#
+# based upon euc-jp
+#
+<code_set_name> "euc-bogus"
+<code_set_alias> "euc-bogus" # error 1
+<mb_cur_min> 3 # error 2
+<mb_cur_max> 1
+<subchar> \x3F
+#
+CHARMAP
+#
+# ASCII
+#
+<U0000> \x00 |0 # <control>
+<U0001> \x01 |0 # <control>
+<U0002> \x02 |0 # <control>
+<U0003> \x03 |0 # <control>
+<U0004> \x04 |0 # <control>
+<U0005> \x05 |0 # <control>
+<U0006> \x06 |0 # <control>
+<U0007> \x07 |0 # <control>
+<U0008> \x08 |0 # <control>
+<U0009> \x09 |0 # <control>
+<U000A> \x0A |0 # <control>
+<U000B> \x0B |0 # <control>
+<U000C> \x0C |0 # <control>
+<U000D> \x0D |0 # <control>
+<U000E> \x0E |0 # <control>
+<U000F> \x0F |0 # <control>
+<U0010> \x10 |0 # <control>
+<U0011> \x11 |0 # <control>
+<U0012> \x12 |0 # <control>
+<U0013> \x13 |0 # <control>
+<U0014> \x14 |0 # <control>
+<U0015> \x15 |0 # <control>
+<U0016> \x16 |0 # <control>
+<U0017> \x17 |0 # <control>
+<U0018> \x18 |0 # <control>
+<U0019> \x19 |0 # <control>
+<U001A> \x1A |0 # <control>
+<U001B> \x1B |0 # <control>
+<U001C> \x1C |0 # <control>
+<U001D> \x1D |0 # <control>
+<U001E> \x1E |0 # <control>
+<U001F> \x1F |0 # <control>
+<U0020> \x20 |0 # SPACE
+<U0021> \x21 |0 # EXCLAMATION MARK
+<U0022> \x22 |0 # QUOTATION MARK
+<U0023> \x23 |0 # NUMBER SIGN
+<U0024> \x24 |0 # DOLLAR SIGN
+<U0025> \x25 |0 # PERCENT SIGN
+<U0026> \x26 |0 # AMPERSAND
+<U0027> \x27 |0 # APOSTROPHE
+<U0028> \x28 |0 # LEFT PARENTHESIS
+<U0029> \x29 |0 # RIGHT PARENTHESIS
+<U002A> \x2A |0 # ASTERISK
+<U002B> \x2B |0 # PLUS SIGN
+<U002C> \x2C |0 # COMMA
+<U002D> \x2D |0 # HYPHEN-MINUS
+<U002E> \x2E |0 # FULL STOP
+<U002F> \x2F |0 # SOLIDUS
+<U0030> \x30 |0 # DIGIT ZERO
+<U0031> \x31 |0 # DIGIT ONE
+<U0032> \x32 |0 # DIGIT TWO
+<U0033> \x33 |0 # DIGIT THREE
+<U0034> \x34 |0 # DIGIT FOUR
+<U0035> \x35 |0 # DIGIT FIVE
+<U0036> \x36 |0 # DIGIT SIX
+<U0037> \x37 |0 # DIGIT SEVEN
+<U0038> \x38 |0 # DIGIT EIGHT
+<U0039> \x39 |0 # DIGIT NINE
+<U003A> \x3A |0 # COLON
+<U003B> \x3B |0 # SEMICOLON
+<U003C> \x3C |0 # LESS-THAN SIGN
+<U003D> \x3D |0 # EQUALS SIGN
+<U003E> \x3E |0 # GREATER-THAN SIGN
+<U003F> \x3F |0 # QUESTION MARK
+<U0040> \x40 |0 # COMMERCIAL AT
+<U0041> \x41 |0 # LATIN CAPITAL LETTER A
+<U0042> \x42 |0 # LATIN CAPITAL LETTER B
+<U0043> \x43 |0 # LATIN CAPITAL LETTER C
+<U0044> \x44 |0 # LATIN CAPITAL LETTER D
+<U0045> \x45 |0 # LATIN CAPITAL LETTER E
+<U0046> \x46 |0 # LATIN CAPITAL LETTER F
+<U0047> \x47 |0 # LATIN CAPITAL LETTER G
+<U0048> \x48 |0 # LATIN CAPITAL LETTER H
+<U0049> \x49 |0 # LATIN CAPITAL LETTER I
+<U004A> \x4A |0 # LATIN CAPITAL LETTER J
+<U004B> \x4B |0 # LATIN CAPITAL LETTER K
+<U004C> \x4C |0 # LATIN CAPITAL LETTER L
+<U004D> \x4D |0 # LATIN CAPITAL LETTER M
+<U004E> \x4E |0 # LATIN CAPITAL LETTER N
+<U004F> \x4F |0 # LATIN CAPITAL LETTER O
+<U0050> \x50 |0 # LATIN CAPITAL LETTER P
+<U0051> \x51 |0 # LATIN CAPITAL LETTER Q
+<U0052> \x52 |0 # LATIN CAPITAL LETTER R
+<U0053> \x53 |0 # LATIN CAPITAL LETTER S
+<U0054> \x54 |0 # LATIN CAPITAL LETTER T
+<U0055> \x55 |0 # LATIN CAPITAL LETTER U
+<U0056> \x56 |0 # LATIN CAPITAL LETTER V
+<U0057> \x57 |0 # LATIN CAPITAL LETTER W
+<U0058> \x58 |0 # LATIN CAPITAL LETTER X
+<U0059> \x59 |0 # LATIN CAPITAL LETTER Y
+<U005A> \x5A |0 # LATIN CAPITAL LETTER Z
+<U005B> \x5B |0 # LEFT SQUARE BRACKET
+<U005C> \x5C |0 # REVERSE SOLIDUS
+<U005D> \x5D |0 # RIGHT SQUARE BRACKET
+<U005E> \x5E |0 # CIRCUMFLEX ACCENT
+<U005F> \x5F |0 # LOW LINE
+<U0060> \x60 |0 # GRAVE ACCENT
+<U0061> \x61 |0 # LATIN SMALL LETTER A
+<U0062> \x62 |0 # LATIN SMALL LETTER B
+<U0063> \x63 |0 # LATIN SMALL LETTER C
+<U0064> \x64 |0 # LATIN SMALL LETTER D
+<U0065> \x65 |0 # LATIN SMALL LETTER E
+<U0066> \x66 |0 # LATIN SMALL LETTER F
+<U0067> \x67 |0 # LATIN SMALL LETTER G
+<U0068> \x68 |0 # LATIN SMALL LETTER H
+<U0069> \x69 |0 # LATIN SMALL LETTER I
+<U006A> \x6A |0 # LATIN SMALL LETTER J
+<U006B> \x6B |0 # LATIN SMALL LETTER K
+<U006C> \x6C |0 # LATIN SMALL LETTER L
+<U006D> \x6D |0 # LATIN SMALL LETTER M
+<U006E> \x6E |0 # LATIN SMALL LETTER N
+<U006F> \x6F |0 # LATIN SMALL LETTER O
+<U0070> \x70 |0 # LATIN SMALL LETTER P
+<U0071> \x71 |0 # LATIN SMALL LETTER Q
+<U0072> \x72 |0 # LATIN SMALL LETTER R
+<U0073> \x73 |0 # LATIN SMALL LETTER S
+<U0074> \x74 |0 # LATIN SMALL LETTER T
+<U0075> \x75 |0 # LATIN SMALL LETTER U
+<U0076> \x76 |0 # LATIN SMALL LETTER V
+<U0077> \x77 |0 # LATIN SMALL LETTER W
+<U0078> \x78 |0 # LATIN SMALL LETTER X
+<U0079> \x79 |0 # LATIN SMALL LETTER Y
+<U007A> \x7A |0 # LATIN SMALL LETTER Z
+<U007B> \x7B |0 # LEFT CURLY BRACKET
+<U007C> \x7C |0 # VERTICAL LINE
+<U007D> \x7D |0 # RIGHT CURLY BRACKET
+<U007E> \x7E |0 # TILDE
+<U007F> \x7F |0 # <control>
+#
+# jisx0201-1978
+#
+<UFF61> \x8E\xA1 |0 # HALFWIDTH IDEOGRAPHIC FULL STOP
+<UFF62> \x8E\xA2 |0 # HALFWIDTH LEFT CORNER BRACKET
+<UFF63> \x8E\xA3 |0 # HALFWIDTH RIGHT CORNER BRACKET
+<UFF64> \x8E\xA4 |0 # HALFWIDTH IDEOGRAPHIC COMMA
+<UFF65> \x8E\xA5 |0 # HALFWIDTH KATAKANA MIDDLE DOT
+<UFF66> \x8E\xA6 |0 # HALFWIDTH KATAKANA LETTER WO
+<UFF67> \x8E\xA7 |0 # HALFWIDTH KATAKANA LETTER SMALL A
+<UFF68> \x8E\xA8 |0 # HALFWIDTH KATAKANA LETTER SMALL I
+<UFF69> \x8E\xA9 |0 # HALFWIDTH KATAKANA LETTER SMALL U
+<UFF6A> \x8E\xAA |0 # HALFWIDTH KATAKANA LETTER SMALL E
+<UFF6B> \x8E\xAB |0 # HALFWIDTH KATAKANA LETTER SMALL O
+<UFF6C> \x8E\xAC |0 # HALFWIDTH KATAKANA LETTER SMALL YA
+<UFF6D> \x8E\xAD |0 # HALFWIDTH KATAKANA LETTER SMALL YU
+<UFF6E> \x8E\xAE |0 # HALFWIDTH KATAKANA LETTER SMALL YO
+<UFF6F> \x8E\xAF |0 # HALFWIDTH KATAKANA LETTER SMALL TU
+<UFF70> \x8E\xB0 |0 # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+<UFF71> \x8E\xB1 |0 # HALFWIDTH KATAKANA LETTER A
+<UFF72> \x8E\xB2 |0 # HALFWIDTH KATAKANA LETTER I
+<UFF73> \x8E\xB3 |0 # HALFWIDTH KATAKANA LETTER U
+<UFF74> \x8E\xB4 |0 # HALFWIDTH KATAKANA LETTER E
+<UFF75> \x8E\xB5 |0 # HALFWIDTH KATAKANA LETTER O
+<UFF76> \x8E\xB6 |0 # HALFWIDTH KATAKANA LETTER KA
+<UFF77> \x8E\xB7 |0 # HALFWIDTH KATAKANA LETTER KI
+<UFF78> \x8E\xB8 |0 # HALFWIDTH KATAKANA LETTER KU
+<UFF79> \x8E\xB9 |0 # HALFWIDTH KATAKANA LETTER KE
+<UFF7A> \x8E\xBA |0 # HALFWIDTH KATAKANA LETTER KO
+<UFF7B> \x8E\xBB |0 # HALFWIDTH KATAKANA LETTER SA
+<UFF7C> \x8E\xBC |0 # HALFWIDTH KATAKANA LETTER SI
+<UFF7D> \x8E\xBD |0 # HALFWIDTH KATAKANA LETTER SU
+<UFF7E> \x8E\xBE |0 # HALFWIDTH KATAKANA LETTER SE
+<UFF7F> \x8E\xBF |0 # HALFWIDTH KATAKANA LETTER SO
+<UFF80> \x8E\xC0 |0 # HALFWIDTH KATAKANA LETTER TA
+<UFF81> \x8E\xC1 |0 # HALFWIDTH KATAKANA LETTER TI
+<UFF82> \x8E\xC2 |0 # HALFWIDTH KATAKANA LETTER TU
+<UFF83> \x8E\xC3 |0 # HALFWIDTH KATAKANA LETTER TE
+<UFF84> \x8E\xC4 |0 # HALFWIDTH KATAKANA LETTER TO
+<UFF85> \x8E\xC5 |0 # HALFWIDTH KATAKANA LETTER NA
+<UFF86> \x8E\xC6 |0 # HALFWIDTH KATAKANA LETTER NI
+<UFF87> \x8E\xC7 |0 # HALFWIDTH KATAKANA LETTER NU
+<UFF88> \x8E\xC8 |0 # HALFWIDTH KATAKANA LETTER NE
+<UFF89> \x8E\xC9 |0 # HALFWIDTH KATAKANA LETTER NO
+<UFF8A> \x8E\xCA |0 # HALFWIDTH KATAKANA LETTER HA
+<UFF8B> \x8E\xCB |0 # HALFWIDTH KATAKANA LETTER HI
+<UFF8C> \x8E\xCC |0 # HALFWIDTH KATAKANA LETTER HU
+<UFF8D> \x8E\xCD |0 # HALFWIDTH KATAKANA LETTER HE
+<UFF8E> \x8E\xCE |0 # HALFWIDTH KATAKANA LETTER HO
+<UFF8F> \x8E\xCF |0 # HALFWIDTH KATAKANA LETTER MA
+<UFF90> \x8E\xD0 |0 # HALFWIDTH KATAKANA LETTER MI
+<UFF91> \x8E\xD1 |0 # HALFWIDTH KATAKANA LETTER MU
+<UFF92> \x8E\xD2 |0 # HALFWIDTH KATAKANA LETTER ME
+<UFF93> \x8E\xD3 |0 # HALFWIDTH KATAKANA LETTER MO
+<UFF94> \x8E\xD4 |0 # HALFWIDTH KATAKANA LETTER YA
+<UFF95> \x8E\xD5 |0 # HALFWIDTH KATAKANA LETTER YU
+<UFF96> \x8E\xD6 |0 # HALFWIDTH KATAKANA LETTER YO
+<UFF97> \x8E\xD7 |0 # HALFWIDTH KATAKANA LETTER RA
+<UFF98> \x8E\xD8 |0 # HALFWIDTH KATAKANA LETTER RI
+<UFF99> \x8E\xD9 |0 # HALFWIDTH KATAKANA LETTER RU
+<UFF9A> \x8E\xDA |0 # HALFWIDTH KATAKANA LETTER RE
+<UFF9B> \x8E\xDB |0 # HALFWIDTH KATAKANA LETTER RO
+<UFF9C> \x8E\xDC |0 # HALFWIDTH KATAKANA LETTER WA
+<UFF9D> \x8E\xDD |0 # HALFWIDTH KATAKANA LETTER N
+<UFF9E> \x8E\xDE |0 # HALFWIDTH KATAKANA VOICED SOUND MARK
+<UFF9F> \x8E\xDF |0 # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+#
+# jisx0208-1990, just a part of it
+#
+<U3000> \xA1\xA1 |0 # IDEOGRAPHIC SPACE
+<U3001> \xA1\xA2 |0 # IDEOGRAPHIC COMMA
+<U3002> \xA1\xA3 |0 # IDEOGRAPHIC FULL STOP
+<UFF0C> \xA1\xA4 |0 # FULLWIDTH COMMA
+<UFF0E> \xA1\xA5 |0 # FULLWIDTH FULL STOP
+<U30FB> \xA1\xA6 |0 # KATAKANA MIDDLE DOT
+<UFF1A> \xA1\xA7 |0 # FULLWIDTH COLON
+<UFF1B> \xA1\xA8 |0 # FULLWIDTH SEMICOLON
+<UFF1F> \xA1\xA9 |0 # FULLWIDTH QUESTION MARK
+<UFF01> \xA1\xAA |0 # FULLWIDTH EXCLAMATION MARK
+<U309B> \xA1\xAB |0 # KATAKANA-HIRAGANA VOICED SOUND MARK
+<U309C> \xA1\xAC |0 # KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+<U00B4> \xA1\xAD |0 # ACUTE ACCENT
+<UFF40> \xA1\xAE |0 # FULLWIDTH GRAVE ACCENT
+<U00A8> \xA1\xAF |0 # DIAERESIS
+<UFF3E> \xA1\xB0 |0 # FULLWIDTH CIRCUMFLEX ACCENT
+<UFFE3> \xA1\xB1 |0 # FULLWIDTH MACRON
+<UFF3F> \xA1\xB2 |0 # FULLWIDTH LOW LINE
+<U30FD> \xA1\xB3 |0 # KATAKANA ITERATION MARK
+<U30FE> \xA1\xB4 |0 # KATAKANA VOICED ITERATION MARK
+<U309D> \xA1\xB5 |0 # HIRAGANA ITERATION MARK
+<U309E> \xA1\xB6 |0 # HIRAGANA VOICED ITERATION MARK
+<U3003> \xA1\xB7 |0 # DITTO MARK
+<U4EDD> \xA1\xB8 |0 # CJK Ideograph
+<U3005> \xA1\xB9 |0 # IDEOGRAPHIC ITERATION MARK
+<U3006> \xA1\xBA |0 # IDEOGRAPHIC CLOSING MARK
+<U3007> \xA1\xBB |0 # IDEOGRAPHIC NUMBER ZERO
+<U30FC> \xA1\xBC |0 # KATAKANA-HIRAGANA PROLONGED SOUND MARK
+<U2015> \xA1\xBD |0 # HORIZONTAL BAR
+<U2010> \xA1\xBE |0 # HYPHEN
+<UFF0F> \xA1\xBF |0 # FULLWIDTH SOLIDUS
+<UFF3C> \xA1\xC0 |0 # FULLWIDTH REVERSE SOLIDUS
+<U301C> \xA1\xC1 |0 # WAVE DASH
+<U2016> \xA1\xC2 |0 # DOUBLE VERTICAL LINE
+<UFF5C> \xA1\xC3 |0 # FULLWIDTH VERTICAL LINE
+<U2026> \xA1\xC4 |0 # HORIZONTAL ELLIPSIS
+<U2025> \xA1\xC5 |0 # TWO DOT LEADER
+<U2018> \xA1\xC6 |0 # LEFT SINGLE QUOTATION MARK
+<U2019> \xA1\xC7 |0 # RIGHT SINGLE QUOTATION MARK
+<U201C> \xA1\xC8 |0 # LEFT DOUBLE QUOTATION MARK
+<U201D> \xA1\xC9 |0 # RIGHT DOUBLE QUOTATION MARK
+<UFF08> \xA1\xCA |0 # FULLWIDTH LEFT PARENTHESIS
+<UFF09> \xA1\xCB |0 # FULLWIDTH RIGHT PARENTHESIS
+<U3014> \xA1\xCC |0 # LEFT TORTOISE SHELL BRACKET
+<U3015> \xA1\xCD |0 # RIGHT TORTOISE SHELL BRACKET
+<UFF3B> \xA1\xCE |0 # FULLWIDTH LEFT SQUARE BRACKET
+<UFF3D> \xA1\xCF |0 # FULLWIDTH RIGHT SQUARE BRACKET
+<UFF5B> \xA1\xD0 |0 # FULLWIDTH LEFT CURLY BRACKET
+<UFF5D> \xA1\xD1 |0 # FULLWIDTH RIGHT CURLY BRACKET
+<U3008> \xA1\xD2 |0 # LEFT ANGLE BRACKET
+<U3009> \xA1\xD3 |0 # RIGHT ANGLE BRACKET
+<U300A> \xA1\xD4 |0 # LEFT DOUBLE ANGLE BRACKET
+<U300B> \xA1\xD5 |0 # RIGHT DOUBLE ANGLE BRACKET
+<U300C> \xA1\xD6 |0 # LEFT CORNER BRACKET
+<U300D> \xA1\xD7 |0 # RIGHT CORNER BRACKET
+<U300E> \xA1\xD8 |0 # LEFT WHITE CORNER BRACKET
+<U300F> \xA1\xD9 |0 # RIGHT WHITE CORNER BRACKET
+<U3010> \xA1\xDA |0 # LEFT BLACK LENTICULAR BRACKET
+<U3011> \xA1\xDB |0 # RIGHT BLACK LENTICULAR BRACKET
+<UFF0B> \xA1\xDC |0 # FULLWIDTH PLUS SIGN
+<U2212> \xA1\xDD |0 # MINUS SIGN
+<U00B1> \xA1\xDE |0 # PLUS-MINUS SIGN
+<U00D7> \xA1\xDF |0 # MULTIPLICATION SIGN
+<U00F7> \xA1\xE0 |0 # DIVISION SIGN
+<UFF1D> \xA1\xE1 |0 # FULLWIDTH EQUALS SIGN
+<U2260> \xA1\xE2 |0 # NOT EQUAL TO
+<UFF1C> \xA1\xE3 |0 # FULLWIDTH LESS-THAN SIGN
+<UFF1E> \xA1\xE4 |0 # FULLWIDTH GREATER-THAN SIGN
+<U2266> \xA1\xE5 |0 # LESS-THAN OVER EQUAL TO
+<U2267> \xA1\xE6 |0 # GREATER-THAN OVER EQUAL TO
+<U221E> \xA1\xE7 |0 # INFINITY
+<U2234> \xA1\xE8 |0 # THEREFORE
+<U2642> \xA1\xE9 |0 # MALE SIGN
+<U2640> \xA1\xEA |0 # FEMALE SIGN
+<U00B0> \xA1\xEB |0 # DEGREE SIGN
+<U2032> \xA1\xEC |0 # PRIME
+<U2033> \xA1\xED |0 # DOUBLE PRIME
+<U2103> \xA1\xEE |0 # DEGREE CELSIUS
+<UFFE5> \xA1\xEF |0 # FULLWIDTH YEN SIGN
+<UFF04> \xA1\xF0 |0 # FULLWIDTH DOLLAR SIGN
+<U00A2> \xA1\xF1 |0 # CENT SIGN
+<U00A3> \xA1\xF2 |0 # POUND SIGN
+<UFF05> \xA1\xF3 |0 # FULLWIDTH PERCENT SIGN
+<UFF03> \xA1\xF4 |0 # FULLWIDTH NUMBER SIGN
+<UFF06> \xA1\xF5 |0 # FULLWIDTH AMPERSAND
+<UFF0A> \xA1\xF6 |0 # FULLWIDTH ASTERISK
+<UFF20> \xA1\xF7 |0 # FULLWIDTH COMMERCIAL AT
+<U00A7> \xA1\xF8 |0 # SECTION SIGN
+<U2606> \xA1\xF9 |0 # WHITE STAR
+<U2605> \xA1\xFA |0 # BLACK STAR
+<U25CB> \xA1\xFB |0 # WHITE CIRCLE
+<U25CF> \xA1\xFC |0 # BLACK CIRCLE
+<U25CE> \xA1\xFD |0 # BULLSEYE
+<U25C7> \xA1\xFE |0 # WHITE DIAMOND
+<U25C6> \xA2\xA1 |0 # BLACK DIAMOND
+<U25A1> \xA2\xA2 |0 # WHITE SQUARE
+<U25A0> \xA2\xA3 |0 # BLACK SQUARE
+<U25B3> \xA2\xA4 |0 # WHITE UP-POINTING TRIANGLE
+<U25B2> \xA2\xA5 |0 # BLACK UP-POINTING TRIANGLE
+<U25BD> \xA2\xA6 |0 # WHITE DOWN-POINTING TRIANGLE
+<U25BC> \xA2\xA7 |0 # BLACK DOWN-POINTING TRIANGLE
+<U203B> \xA2\xA8 |0 # REFERENCE MARK
+<U3012> \xA2\xA9 |0 # POSTAL MARK
+<U2192> \xA2\xAA |0 # RIGHTWARDS ARROW
+<U2190> \xA2\xAB |0 # LEFTWARDS ARROW
+<U2191> \xA2\xAC |0 # UPWARDS ARROW
+<U2193> \xA2\xAD |0 # DOWNWARDS ARROW
+<U3013> \xA2\xAE |0 # GETA MARK
+<U2208> \xA2\xBA |0 # ELEMENT OF
+<U220B> \xA2\xBB |0 # CONTAINS AS MEMBER
+<U2286> \xA2\xBC |0 # SUBSET OF OR EQUAL TO
+<U2287> \xA2\xBD |0 # SUPERSET OF OR EQUAL TO
+<U2282> \xA2\xBE |0 # SUBSET OF
+<U2283> \xA2\xBF |0 # SUPERSET OF
+<U222A> \xA2\xC0 |0 # UNION
+<U2229> \xA2\xC1 |0 # INTERSECTION
+<U2227> \xA2\xCA |0 # LOGICAL AND
+<U2228> \xA2\xCB |0 # LOGICAL OR
+<U00AC> \xA2\xCC |0 # NOT SIGN
+<U21D2> \xA2\xCD |0 # RIGHTWARDS DOUBLE ARROW
+<U21D4> \xA2\xCE |0 # LEFT RIGHT DOUBLE ARROW
+<U2200> \xA2\xCF |0 # FOR ALL
+<U2203> \xA2\xD0 |0 # THERE EXISTS
+<U2220> \xA2\xDC |0 # ANGLE
+<U22A5> \xA2\xDD |0 # UP TACK
+<U2312> \xA2\xDE |0 # ARC
+<U2202> \xA2\xDF |0 # PARTIAL DIFFERENTIAL
+<U2207> \xA2\xE0 |0 # NABLA
+<U2261> \xA2\xE1 |0 # IDENTICAL TO
+<U2252> \xA2\xE2 |0 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
+<U226A> \xA2\xE3 |0 # MUCH LESS-THAN
+<U226B> \xA2\xE4 |0 # MUCH GREATER-THAN
+<U221A> \xA2\xE5 |0 # SQUARE ROOT
+<U223D> \xA2\xE6 |0 # REVERSED TILDE
+<U221D> \xA2\xE7 |0 # PROPORTIONAL TO
+<U2235> \xA2\xE8 |0 # BECAUSE
+<U222B> \xA2\xE9 |0 # INTEGRAL
+<U222C> \xA2\xEA |0 # DOUBLE INTEGRAL
+<U212B> \xA2\xF2 |0 # ANGSTROM SIGN
+<U2030> \xA2\xF3 |0 # PER MILLE SIGN
+<U266F> \xA2\xF4 |0 # MUSIC SHARP SIGN
+<U266D> \xA2\xF5 |0 # MUSIC FLAT SIGN
+<U266A> \xA2\xF6 |0 # EIGHTH NOTE
+<U2020> \xA2\xF7 |0 # DAGGER
+<U2021> \xA2\xF8 |0 # DOUBLE DAGGER
+<U00B6> \xA2\xF9 |0 # PILCROW SIGN
+<U25EF> \xA2\xFE |0 # LARGE CIRCLE
+#
+# jisx0212-1990, just part of it
+#
+<U02D8> \x8F\xA2\xAF |0 # BREVE
+<U02C7> \x8F\xA2\xB0 |0 # CARON (Mandarin Chinese third tone)
+<U00B8> \x8F\xA2\xB1 |0 # CEDILLA
+<U02D9> \x8F\xA2\xB2 |0 # DOT ABOVE (Mandarin Chinese light tone)
+<U02DD> \x8F\xA2\xB3 |0 # DOUBLE ACUTE ACCENT
+<U00AF> \x8F\xA2\xB4 |0 # MACRON
+<U02DB> \x8F\xA2\xB5 |0 # OGONEK
+<U02DA> \x8F\xA2\xB6 |0 # RING ABOVE
+<U007E> \x8F\xA2\xB7 |0 # TILDE -- deliberately cause error
+<U0384> \x8F\xA2\xB8 |0 # GREEK TONOS
+<U0385> \x8F\xA2\xB9 |0 # GREEK DIALYTIKA TONOS
+<U00A1> \x8F\xA2\xC2 |0 # INVERTED EXCLAMATION MARK
+<U00A6> \x8F\xA2\xC3 |0 # BROKEN BAR
+<U00BF> \x8F\xA2\xC4 |0 # INVERTED QUESTION MARK
+<U00BA> \x8F\xA2\xEB |0 # MASCULINE ORDINAL INDICATOR
+<U00AA> \x8F\xA2\xEC |0 # FEMININE ORDINAL INDICATOR
+<U00A9> \x8F\xA2\xED |0 # COPYRIGHT SIGN
+<U00AE> \x8F\xA2\xEE |0 # REGISTERED SIGN
+<U2122> \x8F\xA2\xEF |0 # TRADE MARK SIGN
+<U00A4> \x8F\xA2\xF0 |0 # CURRENCY SIGN
+<U2116> \x8F\xA2\xF1 |0 # NUMERO SIGN
+END CHARMAP
##
-# $Id: adobeStdenc.ucm,v 1.20 2002/04/04 19:50:53 dankogai Exp $
+# $Id: adobeStdenc.ucm,v 1.21 2002/04/09 20:06:15 dankogai Exp dankogai $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/stdenc.txt
<U007C> \x7C |0 # VERTICAL LINE # bar
<U007D> \x7D |0 # RIGHT CURLY BRACKET # braceright
<U007E> \x7E |0 # TILDE # asciitilde
-<U00A0> \x20 |0 # NO-BREAK SPACE # space
+<U00A0> \x20 |1 # NO-BREAK SPACE # space
<U00A1> \xA1 |0 # INVERTED EXCLAMATION MARK # exclamdown
<U00A2> \xA2 |0 # CENT SIGN # cent
<U00A3> \xA3 |0 # POUND SIGN # sterling
<U00A8> \xC8 |0 # DIAERESIS # dieresis
<U00AA> \xE3 |0 # FEMININE ORDINAL INDICATOR # ordfeminine
<U00AB> \xAB |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK # guillemotleft
-<U00AD> \x2D |0 # SOFT HYPHEN # hyphen
+<U00AD> \x2D |1 # SOFT HYPHEN # hyphen
<U00AF> \xC5 |0 # MACRON # macron
<U00B4> \xC2 |0 # ACUTE ACCENT # acute
<U00B6> \xB6 |0 # PILCROW SIGN # paragraph
<U0192> \xA6 |0 # LATIN SMALL LETTER F WITH HOOK # florin
<U02C6> \xC3 |0 # MODIFIER LETTER CIRCUMFLEX ACCENT # circumflex
<U02C7> \xCF |0 # CARON # caron
-<U02C9> \xC5 |0 # MODIFIER LETTER MACRON # macron
+<U02C9> \xC5 |1 # MODIFIER LETTER MACRON # macron
<U02D8> \xC6 |0 # BREVE # breve
<U02D9> \xC7 |0 # DOT ABOVE # dotaccent
<U02DA> \xCA |0 # RING ABOVE # ring
<U2039> \xAC |0 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK # guilsinglleft
<U203A> \xAD |0 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK # guilsinglright
<U2044> \xA4 |0 # FRACTION SLASH # fraction
-<U2215> \xA4 |0 # DIVISION SLASH # fraction
-<U2219> \xB4 |0 # BULLET OPERATOR # periodcentered
+<U2215> \xA4 |1 # DIVISION SLASH # fraction
+<U2219> \xB4 |1 # BULLET OPERATOR # periodcentered
<UFB01> \xAE |0 # LATIN SMALL LIGATURE FI # fi
<UFB02> \xAF |0 # LATIN SMALL LIGATURE FL # fl
END CHARMAP
#
-# $Id: adobeSymbol.ucm,v 1.20 2002/04/04 19:50:53 dankogai Exp $
+# $Id: adobeSymbol.ucm,v 1.21 2002/04/09 20:06:15 dankogai Exp dankogai $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/symbol.txt
<U007B> \x7B |0 # LEFT CURLY BRACKET # braceleft
<U007C> \x7C |0 # VERTICAL LINE # bar
<U007D> \x7D |0 # RIGHT CURLY BRACKET # braceright
-<U00A0> \x20 |0 # NO-BREAK SPACE # space
+<U00A0> \x20 |1 # NO-BREAK SPACE # space
<U00AC> \xD8 |0 # NOT SIGN # logicalnot
<U00B0> \xB0 |0 # DEGREE SIGN # degree
<U00B1> \xB1 |0 # PLUS-MINUS SIGN # plusminus
<U03B9> \x69 |0 # GREEK SMALL LETTER IOTA # iota
<U03BA> \x6B |0 # GREEK SMALL LETTER KAPPA # kappa
<U03BB> \x6C |0 # GREEK SMALL LETTER LAMDA # lambda
-<U03BC> \x6D |0 # GREEK SMALL LETTER MU # mu
+<U03BC> \x6D |1 # GREEK SMALL LETTER MU # mu
<U03BD> \x6E |0 # GREEK SMALL LETTER NU # nu
<U03BE> \x78 |0 # GREEK SMALL LETTER XI # xi
<U03BF> \x6F |0 # GREEK SMALL LETTER OMICRON # omicron
<U2111> \xC1 |0 # BLACK-LETTER CAPITAL I # Ifraktur
<U2118> \xC3 |0 # SCRIPT CAPITAL P # weierstrass
<U211C> \xC2 |0 # BLACK-LETTER CAPITAL R # Rfraktur
-<U2126> \x57 |0 # OHM SIGN # Omega
+<U2126> \x57 |1 # OHM SIGN # Omega
<U2135> \xC0 |0 # ALEF SYMBOL # aleph
<U2190> \xAC |0 # LEFTWARDS ARROW # arrowleft
<U2191> \xAD |0 # UPWARDS ARROW # arrowup
<U2202> \xB6 |0 # PARTIAL DIFFERENTIAL # partialdiff
<U2203> \x24 |0 # THERE EXISTS # existential
<U2205> \xC6 |0 # EMPTY SET # emptyset
-<U2206> \x44 |0 # INCREMENT # Delta
+<U2206> \x44 |1 # INCREMENT # Delta
<U2207> \xD1 |0 # NABLA # gradient
<U2208> \xCE |0 # ELEMENT OF # element
<U2209> \xCF |0 # NOT AN ELEMENT OF # notelement
<U220F> \xD5 |0 # N-ARY PRODUCT # product
<U2211> \xE5 |0 # N-ARY SUMMATION # summation
<U2212> \x2D |0 # MINUS SIGN # minus
-<U2215> \xA4 |0 # DIVISION SLASH # fraction
+<U2215> \xA4 |1 # DIVISION SLASH # fraction
<U2217> \x2A |0 # ASTERISK OPERATOR # asteriskmath
<U221A> \xD6 |0 # SQUARE ROOT # radical
<U221D> \xB5 |0 # PROPORTIONAL TO # proportional
#
-# $Id: adobeZdingbat.ucm,v 1.20 2002/04/04 19:50:53 dankogai Exp $
+# $Id: adobeZdingbat.ucm,v 1.21 2002/04/09 20:06:15 dankogai Exp dankogai $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
<U001E> \x1E |0 # <control>
<U001F> \x1F |0 # <control>
<U0020> \x20 |0 # SPACE # space
-<U00A0> \x20 |0 # NO-BREAK SPACE # space
+<U00A0> \x20 |1 # NO-BREAK SPACE # space
<U2192> \xD5 |0 # RIGHTWARDS ARROW # a161
<U2194> \xD6 |0 # LEFT RIGHT ARROW # a163
<U2195> \xD7 |0 # UP DOWN ARROW # a164
+++ /dev/null
-#
-# $Id: macDevanaga.ucm,v 1.20 2002/04/04 19:50:54 dankogai Exp $
-#
-# Original table can be obtained at
-# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/DEVANAGA.TXT
-#
-<code_set_name> "MacDevanagari"
-<code_set_alias> "MacDevanaga"
-<mb_cur_min> 1
-<mb_cur_max> 2
-<subchar> \x3F
-CHARMAP
-<U0000> \x00 |0 # <control>
-<U0001> \x01 |0 # <control>
-<U0002> \x02 |0 # <control>
-<U0003> \x03 |0 # <control>
-<U0004> \x04 |0 # <control>
-<U0005> \x05 |0 # <control>
-<U0006> \x06 |0 # <control>
-<U0007> \x07 |0 # <control>
-<U0008> \x08 |0 # <control>
-<U0009> \x09 |0 # <control>
-<U000A> \x0A |0 # <control>
-<U000B> \x0B |0 # <control>
-<U000C> \x0C |0 # <control>
-<U000D> \x0D |0 # <control>
-<U000E> \x0E |0 # <control>
-<U000F> \x0F |0 # <control>
-<U0010> \x10 |0 # <control>
-<U0011> \x11 |0 # <control>
-<U0012> \x12 |0 # <control>
-<U0013> \x13 |0 # <control>
-<U0014> \x14 |0 # <control>
-<U0015> \x15 |0 # <control>
-<U0016> \x16 |0 # <control>
-<U0017> \x17 |0 # <control>
-<U0018> \x18 |0 # <control>
-<U0019> \x19 |0 # <control>
-<U001A> \x1A |0 # <control>
-<U001B> \x1B |0 # <control>
-<U001C> \x1C |0 # <control>
-<U001D> \x1D |0 # <control>
-<U001E> \x1E |0 # <control>
-<U001F> \x1F |0 # <control>
-<U0020> \x20 |0 # SPACE
-<U0021> \x21 |0 # EXCLAMATION MARK
-<U0022> \x22 |0 # QUOTATION MARK
-<U0023> \x23 |0 # NUMBER SIGN
-<U0024> \x24 |0 # DOLLAR SIGN
-<U0025> \x25 |0 # PERCENT SIGN
-<U0026> \x26 |0 # AMPERSAND
-<U0027> \x27 |0 # APOSTROPHE
-<U0028> \x28 |0 # LEFT PARENTHESIS
-<U0029> \x29 |0 # RIGHT PARENTHESIS
-<U002A> \x2A |0 # ASTERISK
-<U002B> \x2B |0 # PLUS SIGN
-<U002C> \x2C |0 # COMMA
-<U002D> \x2D |0 # HYPHEN-MINUS
-<U002E> \x2E |0 # FULL STOP
-<U002F> \x2F |0 # SOLIDUS
-<U0030> \x30 |0 # DIGIT ZERO
-<U0031> \x31 |0 # DIGIT ONE
-<U0032> \x32 |0 # DIGIT TWO
-<U0033> \x33 |0 # DIGIT THREE
-<U0034> \x34 |0 # DIGIT FOUR
-<U0035> \x35 |0 # DIGIT FIVE
-<U0036> \x36 |0 # DIGIT SIX
-<U0037> \x37 |0 # DIGIT SEVEN
-<U0038> \x38 |0 # DIGIT EIGHT
-<U0039> \x39 |0 # DIGIT NINE
-<U003A> \x3A |0 # COLON
-<U003B> \x3B |0 # SEMICOLON
-<U003C> \x3C |0 # LESS-THAN SIGN
-<U003D> \x3D |0 # EQUALS SIGN
-<U003E> \x3E |0 # GREATER-THAN SIGN
-<U003F> \x3F |0 # QUESTION MARK
-<U0040> \x40 |0 # COMMERCIAL AT
-<U0041> \x41 |0 # LATIN CAPITAL LETTER A
-<U0042> \x42 |0 # LATIN CAPITAL LETTER B
-<U0043> \x43 |0 # LATIN CAPITAL LETTER C
-<U0044> \x44 |0 # LATIN CAPITAL LETTER D
-<U0045> \x45 |0 # LATIN CAPITAL LETTER E
-<U0046> \x46 |0 # LATIN CAPITAL LETTER F
-<U0047> \x47 |0 # LATIN CAPITAL LETTER G
-<U0048> \x48 |0 # LATIN CAPITAL LETTER H
-<U0049> \x49 |0 # LATIN CAPITAL LETTER I
-<U004A> \x4A |0 # LATIN CAPITAL LETTER J
-<U004B> \x4B |0 # LATIN CAPITAL LETTER K
-<U004C> \x4C |0 # LATIN CAPITAL LETTER L
-<U004D> \x4D |0 # LATIN CAPITAL LETTER M
-<U004E> \x4E |0 # LATIN CAPITAL LETTER N
-<U004F> \x4F |0 # LATIN CAPITAL LETTER O
-<U0050> \x50 |0 # LATIN CAPITAL LETTER P
-<U0051> \x51 |0 # LATIN CAPITAL LETTER Q
-<U0052> \x52 |0 # LATIN CAPITAL LETTER R
-<U0053> \x53 |0 # LATIN CAPITAL LETTER S
-<U0054> \x54 |0 # LATIN CAPITAL LETTER T
-<U0055> \x55 |0 # LATIN CAPITAL LETTER U
-<U0056> \x56 |0 # LATIN CAPITAL LETTER V
-<U0057> \x57 |0 # LATIN CAPITAL LETTER W
-<U0058> \x58 |0 # LATIN CAPITAL LETTER X
-<U0059> \x59 |0 # LATIN CAPITAL LETTER Y
-<U005A> \x5A |0 # LATIN CAPITAL LETTER Z
-<U005B> \x5B |0 # LEFT SQUARE BRACKET
-<U005C> \x5C |0 # REVERSE SOLIDUS
-<U005D> \x5D |0 # RIGHT SQUARE BRACKET
-<U005E> \x5E |0 # CIRCUMFLEX ACCENT
-<U005F> \x5F |0 # LOW LINE
-<U0060> \x60 |0 # GRAVE ACCENT
-<U0061> \x61 |0 # LATIN SMALL LETTER A
-<U0062> \x62 |0 # LATIN SMALL LETTER B
-<U0063> \x63 |0 # LATIN SMALL LETTER C
-<U0064> \x64 |0 # LATIN SMALL LETTER D
-<U0065> \x65 |0 # LATIN SMALL LETTER E
-<U0066> \x66 |0 # LATIN SMALL LETTER F
-<U0067> \x67 |0 # LATIN SMALL LETTER G
-<U0068> \x68 |0 # LATIN SMALL LETTER H
-<U0069> \x69 |0 # LATIN SMALL LETTER I
-<U006A> \x6A |0 # LATIN SMALL LETTER J
-<U006B> \x6B |0 # LATIN SMALL LETTER K
-<U006C> \x6C |0 # LATIN SMALL LETTER L
-<U006D> \x6D |0 # LATIN SMALL LETTER M
-<U006E> \x6E |0 # LATIN SMALL LETTER N
-<U006F> \x6F |0 # LATIN SMALL LETTER O
-<U0070> \x70 |0 # LATIN SMALL LETTER P
-<U0071> \x71 |0 # LATIN SMALL LETTER Q
-<U0072> \x72 |0 # LATIN SMALL LETTER R
-<U0073> \x73 |0 # LATIN SMALL LETTER S
-<U0074> \x74 |0 # LATIN SMALL LETTER T
-<U0075> \x75 |0 # LATIN SMALL LETTER U
-<U0076> \x76 |0 # LATIN SMALL LETTER V
-<U0077> \x77 |0 # LATIN SMALL LETTER W
-<U0078> \x78 |0 # LATIN SMALL LETTER X
-<U0079> \x79 |0 # LATIN SMALL LETTER Y
-<U007A> \x7A |0 # LATIN SMALL LETTER Z
-<U007B> \x7B |0 # LEFT CURLY BRACKET
-<U007C> \x7C |0 # VERTICAL LINE
-<U007D> \x7D |0 # RIGHT CURLY BRACKET
-<U007E> \x7E |0 # TILDE
-<U00A9> \x88 |0 # COPYRIGHT SIGN
-<U00AE> \x89 |0 # REGISTERED SIGN
-<U00D7> \x80 |0 # MULTIPLICATION SIGN
-<U0901> \xA1 |0 # DEVANAGARI SIGN CANDRABINDU
-<U0902> \xA2 |0 # DEVANAGARI SIGN ANUSVARA
-<U0903> \xA3 |0 # DEVANAGARI SIGN VISARGA
-<U0905> \xA4 |0 # DEVANAGARI LETTER A
-<U0906> \xA5 |0 # DEVANAGARI LETTER AA
-<U0907> \xA6 |0 # DEVANAGARI LETTER I
-<U0908> \xA7 |0 # DEVANAGARI LETTER II
-<U0909> \xA8 |0 # DEVANAGARI LETTER U
-<U090A> \xA9 |0 # DEVANAGARI LETTER UU
-<U090B> \xAA |0 # DEVANAGARI LETTER VOCALIC R
-<U090C> \xA6\xE9 |1 # DEVANAGARI LETTER VOCALIC L
-<U090D> \xAE |0 # DEVANAGARI LETTER CANDRA E
-<U090E> \xAB |0 # DEVANAGARI LETTER SHORT E
-<U090F> \xAC |0 # DEVANAGARI LETTER E
-<U0910> \xAD |0 # DEVANAGARI LETTER AI
-<U0911> \xB2 |0 # DEVANAGARI LETTER CANDRA O
-<U0912> \xAF |0 # DEVANAGARI LETTER SHORT O
-<U0913> \xB0 |0 # DEVANAGARI LETTER O
-<U0914> \xB1 |0 # DEVANAGARI LETTER AU
-<U0915> \xB3 |0 # DEVANAGARI LETTER KA
-<U0916> \xB4 |0 # DEVANAGARI LETTER KHA
-<U0917> \xB5 |0 # DEVANAGARI LETTER GA
-<U0918> \xB6 |0 # DEVANAGARI LETTER GHA
-<U0919> \xB7 |0 # DEVANAGARI LETTER NGA
-<U091A> \xB8 |0 # DEVANAGARI LETTER CA
-<U091B> \xB9 |0 # DEVANAGARI LETTER CHA
-<U091C> \xBA |0 # DEVANAGARI LETTER JA
-<U091D> \xBB |0 # DEVANAGARI LETTER JHA
-<U091E> \xBC |0 # DEVANAGARI LETTER NYA
-<U091F> \xBD |0 # DEVANAGARI LETTER TTA
-<U0920> \xBE |0 # DEVANAGARI LETTER TTHA
-<U0921> \xBF |0 # DEVANAGARI LETTER DDA
-<U0922> \xC0 |0 # DEVANAGARI LETTER DDHA
-<U0923> \xC1 |0 # DEVANAGARI LETTER NNA
-<U0924> \xC2 |0 # DEVANAGARI LETTER TA
-<U0925> \xC3 |0 # DEVANAGARI LETTER THA
-<U0926> \xC4 |0 # DEVANAGARI LETTER DA
-<U0927> \xC5 |0 # DEVANAGARI LETTER DHA
-<U0928> \xC6 |0 # DEVANAGARI LETTER NA
-<U0929> \xC7 |0 # DEVANAGARI LETTER NNNA
-<U092A> \xC8 |0 # DEVANAGARI LETTER PA
-<U092B> \xC9 |0 # DEVANAGARI LETTER PHA
-<U092C> \xCA |0 # DEVANAGARI LETTER BA
-<U092D> \xCB |0 # DEVANAGARI LETTER BHA
-<U092E> \xCC |0 # DEVANAGARI LETTER MA
-<U092F> \xCD |0 # DEVANAGARI LETTER YA
-<U0930> \xCF |0 # DEVANAGARI LETTER RA
-<U0931> \xD0 |0 # DEVANAGARI LETTER RRA
-<U0932> \xD1 |0 # DEVANAGARI LETTER LA
-<U0933> \xD2 |0 # DEVANAGARI LETTER LLA
-<U0934> \xD3 |0 # DEVANAGARI LETTER LLLA
-<U0935> \xD4 |0 # DEVANAGARI LETTER VA
-<U0936> \xD5 |0 # DEVANAGARI LETTER SHA
-<U0937> \xD6 |0 # DEVANAGARI LETTER SSA
-<U0938> \xD7 |0 # DEVANAGARI LETTER SA
-<U0939> \xD8 |0 # DEVANAGARI LETTER HA
-<U093C> \xE9 |0 # DEVANAGARI SIGN NUKTA
-<U093D> \xEA\xE9 |1 # DEVANAGARI SIGN AVAGRAHA
-<U093E> \xDA |0 # DEVANAGARI VOWEL SIGN AA
-<U093F> \xDB |0 # DEVANAGARI VOWEL SIGN I
-<U0940> \xDC |0 # DEVANAGARI VOWEL SIGN II
-<U0941> \xDD |0 # DEVANAGARI VOWEL SIGN U
-<U0942> \xDE |0 # DEVANAGARI VOWEL SIGN UU
-<U0943> \xDF |0 # DEVANAGARI VOWEL SIGN VOCALIC R
-<U0944> \xDF\xE9 |1 # DEVANAGARI VOWEL SIGN VOCALIC RR
-<U0945> \xE3 |0 # DEVANAGARI VOWEL SIGN CANDRA E
-<U0946> \xE0 |0 # DEVANAGARI VOWEL SIGN SHORT E
-<U0947> \xE1 |0 # DEVANAGARI VOWEL SIGN E
-<U0948> \xE2 |0 # DEVANAGARI VOWEL SIGN AI
-<U0949> \xE7 |0 # DEVANAGARI VOWEL SIGN CANDRA O
-<U094A> \xE4 |0 # DEVANAGARI VOWEL SIGN SHORT O
-<U094B> \xE5 |0 # DEVANAGARI VOWEL SIGN O
-<U094C> \xE6 |0 # DEVANAGARI VOWEL SIGN AU
-<U094D> \xE8 |0 # DEVANAGARI SIGN VIRAMA # halant
-<U094D><U200C> \xE8\xE8 |1 # DEVANAGARI SIGN VIRAMA + ZWNJ # explicit halant
-<U094D><U200D> \xE8\xE9 |1 # DEVANAGARI SIGN VIRAMA + ZWJ # soft halant
-<U0950> \xA1\xE9 |1 # DEVANAGARI OM
-<U095F> \xCE |0 # DEVANAGARI LETTER YYA
-<U0960> \xAA\xE9 |1 # DEVANAGARI LETTER VOCALIC RR
-<U0961> \xA7\xE9 |1 # DEVANAGARI LETTER VOCALIC LL
-<U0962> \xDB\xE9 |1 # DEVANAGARI VOWEL SIGN VOCALIC L
-<U0963> \xDC\xE9 |1 # DEVANAGARI VOWEL SIGN VOCALIC LL
-<U0964> \xEA |0 # DEVANAGARI DANDA
-<U0965> \x90 |0 # DEVANAGARI DOUBLE DANDA
-<U0966> \xF1 |0 # DEVANAGARI DIGIT ZERO
-<U0967> \xF2 |0 # DEVANAGARI DIGIT ONE
-<U0968> \xF3 |0 # DEVANAGARI DIGIT TWO
-<U0969> \xF4 |0 # DEVANAGARI DIGIT THREE
-<U096A> \xF5 |0 # DEVANAGARI DIGIT FOUR
-<U096B> \xF6 |0 # DEVANAGARI DIGIT FIVE
-<U096C> \xF7 |0 # DEVANAGARI DIGIT SIX
-<U096D> \xF8 |0 # DEVANAGARI DIGIT SEVEN
-<U096E> \xF9 |0 # DEVANAGARI DIGIT EIGHT
-<U096F> \xFA |0 # DEVANAGARI DIGIT NINE
-<U0970> \x91 |0 # DEVANAGARI ABBREVIATION SIGN
-<U200E> \xD9 |0 # LEFT-TO-RIGHT MARK # invisible consonant
-<U2013> \x82 |0 # EN DASH
-<U2014> \x83 |0 # EM DASH
-<U2018> \x84 |0 # LEFT SINGLE QUOTATION MARK
-<U2019> \x85 |0 # RIGHT SINGLE QUOTATION MARK
-<U2022> \x87 |0 # BULLET
-<U2026> \x86 |0 # HORIZONTAL ELLIPSIS
-<U2122> \x8A |0 # TRADE MARK SIGN
-<U2212> \x81 |0 # MINUS SIGN
-END CHARMAP
+++ /dev/null
-#
-# $Id: macGujarati.ucm,v 1.20 2002/04/04 19:50:54 dankogai Exp $
-#
-# Original table can be obtained at
-# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/GUJARATI.TXT
-#
-<code_set_name> "MacGujarati"
-<mb_cur_min> 1
-<mb_cur_max> 2
-<subchar> \x3F
-CHARMAP
-<U0000> \x00 |0 # <control>
-<U0001> \x01 |0 # <control>
-<U0002> \x02 |0 # <control>
-<U0003> \x03 |0 # <control>
-<U0004> \x04 |0 # <control>
-<U0005> \x05 |0 # <control>
-<U0006> \x06 |0 # <control>
-<U0007> \x07 |0 # <control>
-<U0008> \x08 |0 # <control>
-<U0009> \x09 |0 # <control>
-<U000A> \x0A |0 # <control>
-<U000B> \x0B |0 # <control>
-<U000C> \x0C |0 # <control>
-<U000D> \x0D |0 # <control>
-<U000E> \x0E |0 # <control>
-<U000F> \x0F |0 # <control>
-<U0010> \x10 |0 # <control>
-<U0011> \x11 |0 # <control>
-<U0012> \x12 |0 # <control>
-<U0013> \x13 |0 # <control>
-<U0014> \x14 |0 # <control>
-<U0015> \x15 |0 # <control>
-<U0016> \x16 |0 # <control>
-<U0017> \x17 |0 # <control>
-<U0018> \x18 |0 # <control>
-<U0019> \x19 |0 # <control>
-<U001A> \x1A |0 # <control>
-<U001B> \x1B |0 # <control>
-<U001C> \x1C |0 # <control>
-<U001D> \x1D |0 # <control>
-<U001E> \x1E |0 # <control>
-<U001F> \x1F |0 # <control>
-<U0020> \x20 |0 # SPACE
-<U0021> \x21 |0 # EXCLAMATION MARK
-<U0022> \x22 |0 # QUOTATION MARK
-<U0023> \x23 |0 # NUMBER SIGN
-<U0024> \x24 |0 # DOLLAR SIGN
-<U0025> \x25 |0 # PERCENT SIGN
-<U0026> \x26 |0 # AMPERSAND
-<U0027> \x27 |0 # APOSTROPHE
-<U0028> \x28 |0 # LEFT PARENTHESIS
-<U0029> \x29 |0 # RIGHT PARENTHESIS
-<U002A> \x2A |0 # ASTERISK
-<U002B> \x2B |0 # PLUS SIGN
-<U002C> \x2C |0 # COMMA
-<U002D> \x2D |0 # HYPHEN-MINUS
-<U002E> \x2E |0 # FULL STOP
-<U002F> \x2F |0 # SOLIDUS
-<U0030> \x30 |0 # DIGIT ZERO
-<U0031> \x31 |0 # DIGIT ONE
-<U0032> \x32 |0 # DIGIT TWO
-<U0033> \x33 |0 # DIGIT THREE
-<U0034> \x34 |0 # DIGIT FOUR
-<U0035> \x35 |0 # DIGIT FIVE
-<U0036> \x36 |0 # DIGIT SIX
-<U0037> \x37 |0 # DIGIT SEVEN
-<U0038> \x38 |0 # DIGIT EIGHT
-<U0039> \x39 |0 # DIGIT NINE
-<U003A> \x3A |0 # COLON
-<U003B> \x3B |0 # SEMICOLON
-<U003C> \x3C |0 # LESS-THAN SIGN
-<U003D> \x3D |0 # EQUALS SIGN
-<U003E> \x3E |0 # GREATER-THAN SIGN
-<U003F> \x3F |0 # QUESTION MARK
-<U0040> \x40 |0 # COMMERCIAL AT
-<U0041> \x41 |0 # LATIN CAPITAL LETTER A
-<U0042> \x42 |0 # LATIN CAPITAL LETTER B
-<U0043> \x43 |0 # LATIN CAPITAL LETTER C
-<U0044> \x44 |0 # LATIN CAPITAL LETTER D
-<U0045> \x45 |0 # LATIN CAPITAL LETTER E
-<U0046> \x46 |0 # LATIN CAPITAL LETTER F
-<U0047> \x47 |0 # LATIN CAPITAL LETTER G
-<U0048> \x48 |0 # LATIN CAPITAL LETTER H
-<U0049> \x49 |0 # LATIN CAPITAL LETTER I
-<U004A> \x4A |0 # LATIN CAPITAL LETTER J
-<U004B> \x4B |0 # LATIN CAPITAL LETTER K
-<U004C> \x4C |0 # LATIN CAPITAL LETTER L
-<U004D> \x4D |0 # LATIN CAPITAL LETTER M
-<U004E> \x4E |0 # LATIN CAPITAL LETTER N
-<U004F> \x4F |0 # LATIN CAPITAL LETTER O
-<U0050> \x50 |0 # LATIN CAPITAL LETTER P
-<U0051> \x51 |0 # LATIN CAPITAL LETTER Q
-<U0052> \x52 |0 # LATIN CAPITAL LETTER R
-<U0053> \x53 |0 # LATIN CAPITAL LETTER S
-<U0054> \x54 |0 # LATIN CAPITAL LETTER T
-<U0055> \x55 |0 # LATIN CAPITAL LETTER U
-<U0056> \x56 |0 # LATIN CAPITAL LETTER V
-<U0057> \x57 |0 # LATIN CAPITAL LETTER W
-<U0058> \x58 |0 # LATIN CAPITAL LETTER X
-<U0059> \x59 |0 # LATIN CAPITAL LETTER Y
-<U005A> \x5A |0 # LATIN CAPITAL LETTER Z
-<U005B> \x5B |0 # LEFT SQUARE BRACKET
-<U005C> \x5C |0 # REVERSE SOLIDUS
-<U005D> \x5D |0 # RIGHT SQUARE BRACKET
-<U005E> \x5E |0 # CIRCUMFLEX ACCENT
-<U005F> \x5F |0 # LOW LINE
-<U0060> \x60 |0 # GRAVE ACCENT
-<U0061> \x61 |0 # LATIN SMALL LETTER A
-<U0062> \x62 |0 # LATIN SMALL LETTER B
-<U0063> \x63 |0 # LATIN SMALL LETTER C
-<U0064> \x64 |0 # LATIN SMALL LETTER D
-<U0065> \x65 |0 # LATIN SMALL LETTER E
-<U0066> \x66 |0 # LATIN SMALL LETTER F
-<U0067> \x67 |0 # LATIN SMALL LETTER G
-<U0068> \x68 |0 # LATIN SMALL LETTER H
-<U0069> \x69 |0 # LATIN SMALL LETTER I
-<U006A> \x6A |0 # LATIN SMALL LETTER J
-<U006B> \x6B |0 # LATIN SMALL LETTER K
-<U006C> \x6C |0 # LATIN SMALL LETTER L
-<U006D> \x6D |0 # LATIN SMALL LETTER M
-<U006E> \x6E |0 # LATIN SMALL LETTER N
-<U006F> \x6F |0 # LATIN SMALL LETTER O
-<U0070> \x70 |0 # LATIN SMALL LETTER P
-<U0071> \x71 |0 # LATIN SMALL LETTER Q
-<U0072> \x72 |0 # LATIN SMALL LETTER R
-<U0073> \x73 |0 # LATIN SMALL LETTER S
-<U0074> \x74 |0 # LATIN SMALL LETTER T
-<U0075> \x75 |0 # LATIN SMALL LETTER U
-<U0076> \x76 |0 # LATIN SMALL LETTER V
-<U0077> \x77 |0 # LATIN SMALL LETTER W
-<U0078> \x78 |0 # LATIN SMALL LETTER X
-<U0079> \x79 |0 # LATIN SMALL LETTER Y
-<U007A> \x7A |0 # LATIN SMALL LETTER Z
-<U007B> \x7B |0 # LEFT CURLY BRACKET
-<U007C> \x7C |0 # VERTICAL LINE
-<U007D> \x7D |0 # RIGHT CURLY BRACKET
-<U007E> \x7E |0 # TILDE
-<U00A9> \x88 |0 # COPYRIGHT SIGN
-<U00AE> \x89 |0 # REGISTERED SIGN
-<U00D7> \x80 |0 # MULTIPLICATION SIGN
-<U0964> \xEA |0 # DEVANAGARI DANDA
-<U0965> \x90 |0 # DEVANAGARI DOUBLE DANDA
-<U0A81> \xA1 |0 # GUJARATI SIGN CANDRABINDU
-<U0A82> \xA2 |0 # GUJARATI SIGN ANUSVARA
-<U0A83> \xA3 |0 # GUJARATI SIGN VISARGA
-<U0A85> \xA4 |0 # GUJARATI LETTER A
-<U0A86> \xA5 |0 # GUJARATI LETTER AA
-<U0A87> \xA6 |0 # GUJARATI LETTER I
-<U0A88> \xA7 |0 # GUJARATI LETTER II
-<U0A89> \xA8 |0 # GUJARATI LETTER U
-<U0A8A> \xA9 |0 # GUJARATI LETTER UU
-<U0A8B> \xAA |0 # GUJARATI LETTER VOCALIC R
-<U0A8D> \xAE |0 # GUJARATI VOWEL CANDRA E
-<U0A8F> \xAC |0 # GUJARATI LETTER E
-<U0A90> \xAD |0 # GUJARATI LETTER AI
-<U0A91> \xB2 |0 # GUJARATI VOWEL CANDRA O
-<U0A93> \xB0 |0 # GUJARATI LETTER O
-<U0A94> \xB1 |0 # GUJARATI LETTER AU
-<U0A95> \xB3 |0 # GUJARATI LETTER KA
-<U0A96> \xB4 |0 # GUJARATI LETTER KHA
-<U0A97> \xB5 |0 # GUJARATI LETTER GA
-<U0A98> \xB6 |0 # GUJARATI LETTER GHA
-<U0A99> \xB7 |0 # GUJARATI LETTER NGA
-<U0A9A> \xB8 |0 # GUJARATI LETTER CA
-<U0A9B> \xB9 |0 # GUJARATI LETTER CHA
-<U0A9C> \xBA |0 # GUJARATI LETTER JA
-<U0A9D> \xBB |0 # GUJARATI LETTER JHA
-<U0A9E> \xBC |0 # GUJARATI LETTER NYA
-<U0A9F> \xBD |0 # GUJARATI LETTER TTA
-<U0AA0> \xBE |0 # GUJARATI LETTER TTHA
-<U0AA1> \xBF |0 # GUJARATI LETTER DDA
-<U0AA2> \xC0 |0 # GUJARATI LETTER DDHA
-<U0AA3> \xC1 |0 # GUJARATI LETTER NNA
-<U0AA4> \xC2 |0 # GUJARATI LETTER TA
-<U0AA5> \xC3 |0 # GUJARATI LETTER THA
-<U0AA6> \xC4 |0 # GUJARATI LETTER DA
-<U0AA7> \xC5 |0 # GUJARATI LETTER DHA
-<U0AA8> \xC6 |0 # GUJARATI LETTER NA
-<U0AAA> \xC8 |0 # GUJARATI LETTER PA
-<U0AAB> \xC9 |0 # GUJARATI LETTER PHA
-<U0AAC> \xCA |0 # GUJARATI LETTER BA
-<U0AAD> \xCB |0 # GUJARATI LETTER BHA
-<U0AAE> \xCC |0 # GUJARATI LETTER MA
-<U0AAF> \xCD |0 # GUJARATI LETTER YA
-<U0AB0> \xCF |0 # GUJARATI LETTER RA
-<U0AB2> \xD1 |0 # GUJARATI LETTER LA
-<U0AB3> \xD2 |0 # GUJARATI LETTER LLA
-<U0AB5> \xD4 |0 # GUJARATI LETTER VA
-<U0AB6> \xD5 |0 # GUJARATI LETTER SHA
-<U0AB7> \xD6 |0 # GUJARATI LETTER SSA
-<U0AB8> \xD7 |0 # GUJARATI LETTER SA
-<U0AB9> \xD8 |0 # GUJARATI LETTER HA
-<U0ABC> \xE9 |0 # GUJARATI SIGN NUKTA
-<U0ABE> \xDA |0 # GUJARATI VOWEL SIGN AA
-<U0ABF> \xDB |0 # GUJARATI VOWEL SIGN I
-<U0AC0> \xDC |0 # GUJARATI VOWEL SIGN II
-<U0AC1> \xDD |0 # GUJARATI VOWEL SIGN U
-<U0AC2> \xDE |0 # GUJARATI VOWEL SIGN UU
-<U0AC3> \xDF |0 # GUJARATI VOWEL SIGN VOCALIC R
-<U0AC4> \xDF\xE9 |1 # GUJARATI VOWEL SIGN VOCALIC RR
-<U0AC5> \xE3 |0 # GUJARATI VOWEL SIGN CANDRA E
-<U0AC7> \xE1 |0 # GUJARATI VOWEL SIGN E
-<U0AC8> \xE2 |0 # GUJARATI VOWEL SIGN AI
-<U0AC9> \xE7 |0 # GUJARATI VOWEL SIGN CANDRA O
-<U0ACB> \xE5 |0 # GUJARATI VOWEL SIGN O
-<U0ACC> \xE6 |0 # GUJARATI VOWEL SIGN AU
-<U0ACD> \xE8 |0 # GUJARATI SIGN VIRAMA # halant
-<U0ACD><U200C> \xE8\xE8 |1 # GUJARATI SIGN VIRAMA + ZWNJ # explicit halant
-<U0ACD><U200D> \xE8\xE9 |1 # GUJARATI SIGN VIRAMA + ZWJ # soft halant
-<U0AD0> \xA1\xE9 |1 # GUJARATI OM
-<U0AE0> \xAA\xE9 |1 # GUJARATI LETTER VOCALIC RR
-<U0AE6> \xF1 |0 # GUJARATI DIGIT ZERO
-<U0AE7> \xF2 |0 # GUJARATI DIGIT ONE
-<U0AE8> \xF3 |0 # GUJARATI DIGIT TWO
-<U0AE9> \xF4 |0 # GUJARATI DIGIT THREE
-<U0AEA> \xF5 |0 # GUJARATI DIGIT FOUR
-<U0AEB> \xF6 |0 # GUJARATI DIGIT FIVE
-<U0AEC> \xF7 |0 # GUJARATI DIGIT SIX
-<U0AED> \xF8 |0 # GUJARATI DIGIT SEVEN
-<U0AEE> \xF9 |0 # GUJARATI DIGIT EIGHT
-<U0AEF> \xFA |0 # GUJARATI DIGIT NINE
-<U200E> \xD9 |0 # LEFT-TO-RIGHT MARK # invisible consonant
-<U2013> \x82 |0 # EN DASH
-<U2014> \x83 |0 # EM DASH
-<U2018> \x84 |0 # LEFT SINGLE QUOTATION MARK
-<U2019> \x85 |0 # RIGHT SINGLE QUOTATION MARK
-<U2022> \x87 |0 # BULLET
-<U2026> \x86 |0 # HORIZONTAL ELLIPSIS
-<U2122> \x8A |0 # TRADE MARK SIGN
-<U2212> \x81 |0 # MINUS SIGN
-END CHARMAP
+++ /dev/null
-#
-# $Id: macGurmukhi.ucm,v 1.20 2002/04/04 19:50:54 dankogai Exp $
-#
-# Original table can be obtained at
-# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/GURMUKHI.TXT
-#
-<code_set_name> "MacGurmukhi"
-<mb_cur_min> 1
-<mb_cur_max> 2
-<subchar> \x3F
-CHARMAP
-<U0000> \x00 |0 # <control>
-<U0001> \x01 |0 # <control>
-<U0002> \x02 |0 # <control>
-<U0003> \x03 |0 # <control>
-<U0004> \x04 |0 # <control>
-<U0005> \x05 |0 # <control>
-<U0006> \x06 |0 # <control>
-<U0007> \x07 |0 # <control>
-<U0008> \x08 |0 # <control>
-<U0009> \x09 |0 # <control>
-<U000A> \x0A |0 # <control>
-<U000B> \x0B |0 # <control>
-<U000C> \x0C |0 # <control>
-<U000D> \x0D |0 # <control>
-<U000E> \x0E |0 # <control>
-<U000F> \x0F |0 # <control>
-<U0010> \x10 |0 # <control>
-<U0011> \x11 |0 # <control>
-<U0012> \x12 |0 # <control>
-<U0013> \x13 |0 # <control>
-<U0014> \x14 |0 # <control>
-<U0015> \x15 |0 # <control>
-<U0016> \x16 |0 # <control>
-<U0017> \x17 |0 # <control>
-<U0018> \x18 |0 # <control>
-<U0019> \x19 |0 # <control>
-<U001A> \x1A |0 # <control>
-<U001B> \x1B |0 # <control>
-<U001C> \x1C |0 # <control>
-<U001D> \x1D |0 # <control>
-<U001E> \x1E |0 # <control>
-<U001F> \x1F |0 # <control>
-<U0020> \x20 |0 # SPACE
-<U0021> \x21 |0 # EXCLAMATION MARK
-<U0022> \x22 |0 # QUOTATION MARK
-<U0023> \x23 |0 # NUMBER SIGN
-<U0024> \x24 |0 # DOLLAR SIGN
-<U0025> \x25 |0 # PERCENT SIGN
-<U0026> \x26 |0 # AMPERSAND
-<U0027> \x27 |0 # APOSTROPHE
-<U0028> \x28 |0 # LEFT PARENTHESIS
-<U0029> \x29 |0 # RIGHT PARENTHESIS
-<U002A> \x2A |0 # ASTERISK
-<U002B> \x2B |0 # PLUS SIGN
-<U002C> \x2C |0 # COMMA
-<U002D> \x2D |0 # HYPHEN-MINUS
-<U002E> \x2E |0 # FULL STOP
-<U002F> \x2F |0 # SOLIDUS
-<U0030> \x30 |0 # DIGIT ZERO
-<U0031> \x31 |0 # DIGIT ONE
-<U0032> \x32 |0 # DIGIT TWO
-<U0033> \x33 |0 # DIGIT THREE
-<U0034> \x34 |0 # DIGIT FOUR
-<U0035> \x35 |0 # DIGIT FIVE
-<U0036> \x36 |0 # DIGIT SIX
-<U0037> \x37 |0 # DIGIT SEVEN
-<U0038> \x38 |0 # DIGIT EIGHT
-<U0039> \x39 |0 # DIGIT NINE
-<U003A> \x3A |0 # COLON
-<U003B> \x3B |0 # SEMICOLON
-<U003C> \x3C |0 # LESS-THAN SIGN
-<U003D> \x3D |0 # EQUALS SIGN
-<U003E> \x3E |0 # GREATER-THAN SIGN
-<U003F> \x3F |0 # QUESTION MARK
-<U0040> \x40 |0 # COMMERCIAL AT
-<U0041> \x41 |0 # LATIN CAPITAL LETTER A
-<U0042> \x42 |0 # LATIN CAPITAL LETTER B
-<U0043> \x43 |0 # LATIN CAPITAL LETTER C
-<U0044> \x44 |0 # LATIN CAPITAL LETTER D
-<U0045> \x45 |0 # LATIN CAPITAL LETTER E
-<U0046> \x46 |0 # LATIN CAPITAL LETTER F
-<U0047> \x47 |0 # LATIN CAPITAL LETTER G
-<U0048> \x48 |0 # LATIN CAPITAL LETTER H
-<U0049> \x49 |0 # LATIN CAPITAL LETTER I
-<U004A> \x4A |0 # LATIN CAPITAL LETTER J
-<U004B> \x4B |0 # LATIN CAPITAL LETTER K
-<U004C> \x4C |0 # LATIN CAPITAL LETTER L
-<U004D> \x4D |0 # LATIN CAPITAL LETTER M
-<U004E> \x4E |0 # LATIN CAPITAL LETTER N
-<U004F> \x4F |0 # LATIN CAPITAL LETTER O
-<U0050> \x50 |0 # LATIN CAPITAL LETTER P
-<U0051> \x51 |0 # LATIN CAPITAL LETTER Q
-<U0052> \x52 |0 # LATIN CAPITAL LETTER R
-<U0053> \x53 |0 # LATIN CAPITAL LETTER S
-<U0054> \x54 |0 # LATIN CAPITAL LETTER T
-<U0055> \x55 |0 # LATIN CAPITAL LETTER U
-<U0056> \x56 |0 # LATIN CAPITAL LETTER V
-<U0057> \x57 |0 # LATIN CAPITAL LETTER W
-<U0058> \x58 |0 # LATIN CAPITAL LETTER X
-<U0059> \x59 |0 # LATIN CAPITAL LETTER Y
-<U005A> \x5A |0 # LATIN CAPITAL LETTER Z
-<U005B> \x5B |0 # LEFT SQUARE BRACKET
-<U005C> \x5C |0 # REVERSE SOLIDUS
-<U005D> \x5D |0 # RIGHT SQUARE BRACKET
-<U005E> \x5E |0 # CIRCUMFLEX ACCENT
-<U005F> \x5F |0 # LOW LINE
-<U0060> \x60 |0 # GRAVE ACCENT
-<U0061> \x61 |0 # LATIN SMALL LETTER A
-<U0062> \x62 |0 # LATIN SMALL LETTER B
-<U0063> \x63 |0 # LATIN SMALL LETTER C
-<U0064> \x64 |0 # LATIN SMALL LETTER D
-<U0065> \x65 |0 # LATIN SMALL LETTER E
-<U0066> \x66 |0 # LATIN SMALL LETTER F
-<U0067> \x67 |0 # LATIN SMALL LETTER G
-<U0068> \x68 |0 # LATIN SMALL LETTER H
-<U0069> \x69 |0 # LATIN SMALL LETTER I
-<U006A> \x6A |0 # LATIN SMALL LETTER J
-<U006B> \x6B |0 # LATIN SMALL LETTER K
-<U006C> \x6C |0 # LATIN SMALL LETTER L
-<U006D> \x6D |0 # LATIN SMALL LETTER M
-<U006E> \x6E |0 # LATIN SMALL LETTER N
-<U006F> \x6F |0 # LATIN SMALL LETTER O
-<U0070> \x70 |0 # LATIN SMALL LETTER P
-<U0071> \x71 |0 # LATIN SMALL LETTER Q
-<U0072> \x72 |0 # LATIN SMALL LETTER R
-<U0073> \x73 |0 # LATIN SMALL LETTER S
-<U0074> \x74 |0 # LATIN SMALL LETTER T
-<U0075> \x75 |0 # LATIN SMALL LETTER U
-<U0076> \x76 |0 # LATIN SMALL LETTER V
-<U0077> \x77 |0 # LATIN SMALL LETTER W
-<U0078> \x78 |0 # LATIN SMALL LETTER X
-<U0079> \x79 |0 # LATIN SMALL LETTER Y
-<U007A> \x7A |0 # LATIN SMALL LETTER Z
-<U007B> \x7B |0 # LEFT CURLY BRACKET
-<U007C> \x7C |0 # VERTICAL LINE
-<U007D> \x7D |0 # RIGHT CURLY BRACKET
-<U007E> \x7E |0 # TILDE
-<U00A9> \x88 |0 # COPYRIGHT SIGN
-<U00AE> \x89 |0 # REGISTERED SIGN
-<U00D7> \x80 |0 # MULTIPLICATION SIGN
-<U0964> \xEA |0 # DEVANAGARI DANDA
-<U0A02> \xA2 |0 # GURMUKHI SIGN BINDI
-<U0A05> \xA4 |0 # GURMUKHI LETTER A
-<U0A06> \xA5 |0 # GURMUKHI LETTER AA
-<U0A07> \xA6 |0 # GURMUKHI LETTER I
-<U0A08> \xA7 |0 # GURMUKHI LETTER II
-<U0A09> \xA8 |0 # GURMUKHI LETTER U
-<U0A0A> \xA9 |0 # GURMUKHI LETTER UU
-<U0A0F> \xAC |0 # GURMUKHI LETTER EE
-<U0A10> \xAD |0 # GURMUKHI LETTER AI
-<U0A13> \xB0 |0 # GURMUKHI LETTER OO
-<U0A14> \xB1 |0 # GURMUKHI LETTER AU
-<U0A15> \xB3 |0 # GURMUKHI LETTER KA
-<U0A16> \xB4 |0 # GURMUKHI LETTER KHA
-<U0A17> \xB5 |0 # GURMUKHI LETTER GA
-<U0A18> \xB6 |0 # GURMUKHI LETTER GHA
-<U0A19> \xB7 |0 # GURMUKHI LETTER NGA
-<U0A1A> \xB8 |0 # GURMUKHI LETTER CA
-<U0A1B> \xB9 |0 # GURMUKHI LETTER CHA
-<U0A1C> \xBA |0 # GURMUKHI LETTER JA
-<U0A1D> \xBB |0 # GURMUKHI LETTER JHA
-<U0A1E> \xBC |0 # GURMUKHI LETTER NYA
-<U0A1F> \xBD |0 # GURMUKHI LETTER TTA
-<U0A20> \xBE |0 # GURMUKHI LETTER TTHA
-<U0A21> \xBF |0 # GURMUKHI LETTER DDA
-<U0A22> \xC0 |0 # GURMUKHI LETTER DDHA
-<U0A23> \xC1 |0 # GURMUKHI LETTER NNA
-<U0A24> \xC2 |0 # GURMUKHI LETTER TA
-<U0A25> \xC3 |0 # GURMUKHI LETTER THA
-<U0A26> \xC4 |0 # GURMUKHI LETTER DA
-<U0A27> \xC5 |0 # GURMUKHI LETTER DHA
-<U0A28> \xC6 |0 # GURMUKHI LETTER NA
-<U0A2A> \xC8 |0 # GURMUKHI LETTER PA
-<U0A2B> \xC9 |0 # GURMUKHI LETTER PHA
-<U0A2C> \xCA |0 # GURMUKHI LETTER BA
-<U0A2D> \xCB |0 # GURMUKHI LETTER BHA
-<U0A2E> \xCC |0 # GURMUKHI LETTER MA
-<U0A2F> \xCD |0 # GURMUKHI LETTER YA
-<U0A30> \xCF |0 # GURMUKHI LETTER RA
-<U0A32> \xD1 |0 # GURMUKHI LETTER LA
-<U0A35> \xD4 |0 # GURMUKHI LETTER VA
-<U0A36> \xD5 |0 # GURMUKHI LETTER SHA
-<U0A38> \xD7 |0 # GURMUKHI LETTER SA
-<U0A39> \xD8 |0 # GURMUKHI LETTER HA
-<U0A3C> \xE9 |0 # GURMUKHI SIGN NUKTA
-<U0A3E> \xDA |0 # GURMUKHI VOWEL SIGN AA
-<U0A3F> \xDB |0 # GURMUKHI VOWEL SIGN I
-<U0A40> \xDC |0 # GURMUKHI VOWEL SIGN II
-<U0A41> \xDD |0 # GURMUKHI VOWEL SIGN U
-<U0A42> \xDE |0 # GURMUKHI VOWEL SIGN UU
-<U0A47> \xE1 |0 # GURMUKHI VOWEL SIGN EE
-<U0A48> \xE2 |0 # GURMUKHI VOWEL SIGN AI
-<U0A4B> \xE5 |0 # GURMUKHI VOWEL SIGN OO
-<U0A4C> \xE6 |0 # GURMUKHI VOWEL SIGN AU
-<U0A4D> \xE8 |0 # GURMUKHI SIGN VIRAMA # halant
-<U0A4D><U200C> \xE8\xE8 |1 # GURMUKHI SIGN VIRAMA + ZWNJ # explicit halant
-<U0A4D><U200D> \xE8\xE9 |1 # GURMUKHI SIGN VIRAMA + ZWJ # soft halant
-<U0A66> \xF1 |0 # GURMUKHI DIGIT ZERO
-<U0A67> \xF2 |0 # GURMUKHI DIGIT ONE
-<U0A68> \xF3 |0 # GURMUKHI DIGIT TWO
-<U0A69> \xF4 |0 # GURMUKHI DIGIT THREE
-<U0A6A> \xF5 |0 # GURMUKHI DIGIT FOUR
-<U0A6B> \xF6 |0 # GURMUKHI DIGIT FIVE
-<U0A6C> \xF7 |0 # GURMUKHI DIGIT SIX
-<U0A6D> \xF8 |0 # GURMUKHI DIGIT SEVEN
-<U0A6E> \xF9 |0 # GURMUKHI DIGIT EIGHT
-<U0A6F> \xFA |0 # GURMUKHI DIGIT NINE
-<U0A71> \x90 |0 # GURMUKHI ADDAK
-<U0A72> \x93 |0 # GURMUKHI IRI
-<U0A73> \x92 |0 # GURMUKHI URA
-<U0A74> \x94 |0 # GURMUKHI EK ONKAR
-<U200E> \xD9 |0 # LEFT-TO-RIGHT MARK # invisible consonant
-<U2013> \x82 |0 # EN DASH
-<U2014> \x83 |0 # EM DASH
-<U2018> \x84 |0 # LEFT SINGLE QUOTATION MARK
-<U2019> \x85 |0 # RIGHT SINGLE QUOTATION MARK
-<U2022> \x87 |0 # BULLET
-<U2026> \x86 |0 # HORIZONTAL ELLIPSIS
-<U2122> \x8A |0 # TRADE MARK SIGN
-<U2212> \x81 |0 # MINUS SIGN
-<UF860><U0A21><U0A3C> \x91 |3 # GURMUKHI LETTER RRA, alternate
-END CHARMAP
#
-# $Id: macROMnn.ucm,v 1.20 2002/04/04 19:50:55 dankogai Exp $
+# $Id: macROMnn.ucm,v 1.21 2002/04/09 20:06:15 dankogai Exp dankogai $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT
<U0050> \x50 |0 # LATIN CAPITAL LETTER P
<U0051> \x51 |0 # LATIN CAPITAL LETTER Q
<U0052> \x52 |0 # LATIN CAPITAL LETTER R
-<U0053+U0326> \xAF |3 # LATIN CAPITAL LETTER S + COMBINING COMMA BELOW
+<U0053><U0326> \xAF |3 # LATIN CAPITAL LETTER S + COMBINING COMMA BELOW
<U0053> \x53 |0 # LATIN CAPITAL LETTER S
-<U0054+U0326> \xDE |3 # LATIN CAPITAL LETTER T + COMBINING COMMA BELOW
+<U0054><U0326> \xDE |3 # LATIN CAPITAL LETTER T + COMBINING COMMA BELOW
<U0054> \x54 |0 # LATIN CAPITAL LETTER T
<U0055> \x55 |0 # LATIN CAPITAL LETTER U
<U0056> \x56 |0 # LATIN CAPITAL LETTER V
<U0070> \x70 |0 # LATIN SMALL LETTER P
<U0071> \x71 |0 # LATIN SMALL LETTER Q
<U0072> \x72 |0 # LATIN SMALL LETTER R
-<U0073+U0326> \xBF |3 # LATIN SMALL LETTER S + COMBINING COMMA BELOW
+<U0073><U0326> \xBF |3 # LATIN SMALL LETTER S + COMBINING COMMA BELOW
<U0073> \x73 |0 # LATIN SMALL LETTER S
-<U0074+U0326> \xDF |3 # LATIN SMALL LETTER T + COMBINING COMMA BELOW
+<U0074><U0326> \xDF |3 # LATIN SMALL LETTER T + COMBINING COMMA BELOW
<U0074> \x74 |0 # LATIN SMALL LETTER T
<U0075> \x75 |0 # LATIN SMALL LETTER U
<U0076> \x76 |0 # LATIN SMALL LETTER V