From: Rafael Garcia-Suarez Date: Sat, 23 Oct 2004 19:58:07 +0000 (+0000) Subject: Upgrade to Encode 2.07 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f9d05ba35dc7d01260b38a6dc93f199c3b1d2c39;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Encode 2.07 p4raw-id: //depot/perl@23417 --- diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index a40701f..8614dd8 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -8,11 +8,12 @@ # source code kit or CPAN is, of course, allowed.) # # This list is in alphabetical order. --- +-- Andreas J. Koenig Anton Tagunov Autrijus Tang Benjamin Goldberg +Bjoern Hoehrmann Bjoern Jacke Chris Nandor Craig A. Berry @@ -47,5 +48,6 @@ Simon Cozens Spider Boardman Steve Hay Tatsuhiko Miyagawa +Tels Vadim Konovalov Yitzchak Scott-Thoennes diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 1cffc39..56e5f1b 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,9 +1,38 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.5 2004/10/19 04:55:01 dankogai Exp dankogai $ +# $Id: Changes,v 2.7 2004/10/22 19:35:52 dankogai Exp $ # +$Revision: 2.7 $ $Date: 2004/10/22 19:35:52 $ +! lib/Encode/Encoding.pm + "Remove Carp from warnings.pm" that influences Encode, by Tels. + Message-Id: <200410161618.29779@bloodgate.com> +! Encode.xs AUTHORS t/fallback.t + Now Encode::utf8's fallbacks are compliant to Encode standard. + Thank Bjoern Hoehrmann for persistently convincing me. + Message-Id: <41a61aea.638409494@smtp.bjoern.hoehrmann.de> +! Encode.pm + POD further revised. + +2.06 2004/10/22 06:23:11 +! ucm/mac* + RT #8083 reports that MacThai mapping was obsolete + Updated all mac* encodings accordingly to the URI below. + One remaining mystery is that MacRomanian vs. MacRumanian. + MacRumanian is not found in unicode.org... + http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ +! Encode.pm t/Encode.t + Fixed RT #8081: "decode(..., bless{},'x') segfault" + Two more tests added to test that. + http://rt.cpan.org/NoAuth/Bug.html?id=8081 +! Encode.pm + POD revised accordingly to RT #7966 + http://rt.cpan.org/NoAuth/Bug.html?id=7966 +! Unicode/Unicode.pm + POD updated explaining why Encode::Unicode always croaks on error + rather than giving users choices. + http://rt.cpan.org/NoAuth/Bug.html?id=7892 -$Revision: 2.5 $ $Date: 2004/10/19 04:55:01 $ +2.05 2004/10/19 04:55:01 ! encoding.pm "unnuke" jhi's patch in bleedperl, with minor correction by dankogai. Message-ID: <41210A84.6060506@iki.fi> diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 266efc6..97b5f07 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,9 +1,9 @@ # -# $Id: Encode.pm,v 2.5 2004/10/19 04:54:43 dankogai Exp $ +# $Id: Encode.pm,v 2.7 2004/10/22 19:35:52 dankogai Exp $ # package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 2.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; sub DEBUG () { 0 } use XSLoader (); XSLoader::load(__PACKAGE__, $VERSION); @@ -140,6 +140,7 @@ sub encode($$;$) { my ($name, $string, $check) = @_; return undef unless defined $string; + return undef if ref $string; $check ||=0; my $enc = find_encoding($name); unless(defined $enc){ @@ -155,6 +156,7 @@ sub decode($$;$) { my ($name,$octets,$check) = @_; return undef unless defined $octets; + return undef if ref $octets; $check ||=0; my $enc = find_encoding($name); unless(defined $enc){ @@ -429,7 +431,8 @@ decode($valid_encoding, '') is harmless and warnless. Converts B data between two encodings. The data in $octets must be encoded as octets and not as characters in Perl's internal -format. For example, to convert ISO-8859-1 data to Microsoft's CP1250 encoding: +format. For example, to convert ISO-8859-1 data to Microsoft's CP1250 +encoding: from_to($octets, "iso-8859-1", "cp1250"); @@ -440,8 +443,8 @@ and to convert it back: Note that because the conversion happens in place, the data to be converted cannot be a string constant; it must be a scalar variable. -from_to() returns the length of the converted string in octets on success, undef -otherwise. +from_to() returns the length of the converted string in octets on +success, I on error. B: The following operations look the same but are not quite so; @@ -551,40 +554,51 @@ method. perlio_ok("euc-jp") Fortunately, all encodings that come with Encode core are PerlIO-savvy -except for hz and ISO-2022-kr. For gory details, see L and L. +except for hz and ISO-2022-kr. For gory details, see +L and L. =head1 Handling Malformed Data -The I argument is used as follows. When you omit it, -the behaviour is the same as if you had passed a value of 0 for -I. +The optional I argument is used as follows. When you omit it, +Encode::FB_DEFAULT ( == 0 ) is assumed. + +=over 2 + +=item B Not all encoding suppport this feature + +Some encodings ignore I argument. For example, +L ignores I and it always croaks on error. + +=back + +Now here is the list of I values available =over 2 =item I = Encode::FB_DEFAULT ( == 0) -If I is 0, (en|de)code will put a I -in place of a malformed character. For UCM-based encodings, -EsubcharE will be used. For Unicode, the code point C<0xFFFD> is used. -If the data is supposed to be UTF-8, an optional lexical warning -(category utf8) is given. +If I is 0, (en|de)code will put a I in +place of a malformed character. When you encode to UCM-based encodings, +EsubcharE will be used. When you decode from UCM-based +encodings, the code point C<0xFFFD> is used. If the data is supposed +to be UTF-8, an optional lexical warning (category utf8) is given. =item I = Encode::FB_CROAK ( == 1) If I is 1, methods will die on error immediately with an error message. Therefore, when I is set to 1, you should trap the -fatal error with eval{} unless you really want to let it die on error. +error with eval{} unless you really want to let it die. =item I = Encode::FB_QUIET If I is set to Encode::FB_QUIET, (en|de)code will immediately -return the portion of the data that has been processed so far when -an error occurs. The data argument will be overwritten with -everything after that point (that is, the unprocessed part of data). -This is handy when you have to call decode repeatedly in the case -where your source data may contain partial multi-byte character -sequences, for example because you are reading with a fixed-width -buffer. Here is some sample code that does exactly this: +return the portion of the data that has been processed so far when an +error occurs. The data argument will be overwritten with everything +after that point (that is, the unprocessed part of data). This is +handy when you have to call decode repeatedly in the case where your +source data may contain partial multi-byte character sequences, +(i.e. you are reading with a fixed-width buffer). Here is a sample +code that does exactly this: my $data = ''; my $utf8 = ''; while(defined(read $fh, $buffer, 256)){ @@ -615,8 +629,8 @@ where I is the Unicode ID of the character that cannot be found in the character repertoire of the encoding. HTML/XML character reference modes are about the same, in place of -C<\x{I}>, HTML uses C<&#I>; where I is a decimal digit and -XML uses C<&#xI>; where I is the hexadecimal digit. +C<\x{I}>, HTML uses C<&#I;> where I is a decimal digit and +XML uses C<&#xI;> where I is the hexadecimal digit. =item The bitmask diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 77d53af..3747b6d 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.0 2004/05/16 20:55:15 dankogai Exp $ + $Id: Encode.xs,v 2.1 2004/10/22 19:35:52 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT @@ -157,24 +157,15 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, if (check & ENCODE_RETURN_ON_ERR){ goto ENCODE_SET_SRC; } - if (check & ENCODE_PERLQQ){ - SV* perlqq = - sv_2mortal(newSVpvf("\\x{%04"UVxf"}", (UV)ch)); + if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + SV* subchar = + newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" : + check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : + "&#x%" UVxf ";", (UV)ch); sdone += slen + clen; - ddone += dlen + SvCUR(perlqq); - sv_catsv(dst, perlqq); - }else if (check & ENCODE_HTMLCREF){ - SV* htmlcref = - sv_2mortal(newSVpvf("&#%" UVuf ";", (UV)ch)); - sdone += slen + clen; - ddone += dlen + SvCUR(htmlcref); - sv_catsv(dst, htmlcref); - }else if (check & ENCODE_XMLCREF){ - SV* xmlcref = - sv_2mortal(newSVpvf("&#x%" UVxf ";", (UV)ch)); - sdone += slen + clen; - ddone += dlen + SvCUR(xmlcref); - sv_catsv(dst, xmlcref); + ddone += dlen + SvCUR(subchar); + sv_catsv(dst, subchar); + SvREFCNT_dec(subchar); } else { /* fallback char */ sdone += slen + clen; @@ -200,11 +191,11 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ - SV* perlqq = - sv_2mortal(newSVpvf("\\x%02" UVXf, (UV)s[slen])); + SV* subchar = newSVpvf("\\x%02" UVXf, (UV)s[slen]); sdone += slen + 1; - ddone += dlen + SvCUR(perlqq); - sv_catsv(dst, perlqq); + ddone += dlen + SvCUR(subchar); + sv_catsv(dst, subchar); + SvREFCNT_dec(subchar); } else { sdone += slen + 1; ddone += dlen + strlen(FBCHAR_UTF8); @@ -297,7 +288,7 @@ CODE: U8 skip = UTF8SKIP(s); if ((s + skip) > e) { /* Partial character - done */ - break; + goto decode_utf8_fallback; } else if (is_utf8_char(s)) { /* Whole char is good */ @@ -313,6 +304,7 @@ CODE: /* Invalid start byte */ } /* If we get here there is something wrong with alleged UTF-8 */ + decode_utf8_fallback: if (check & ENCODE_DIE_ON_ERR){ Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", (UV)*s); XSRETURN(0); @@ -325,9 +317,9 @@ CODE: break; } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ - SV* perlqq = newSVpvf("\\x%02" UVXf, (UV)*s); - sv_catsv(dst, perlqq); - SvREFCNT_dec(perlqq); + SV* subchar = newSVpvf("\\x%02" UVXf, (UV)*s); + sv_catsv(dst, subchar); + SvREFCNT_dec(subchar); } else { sv_catpv(dst, FBCHAR_UTF8); } diff --git a/ext/Encode/META.yml b/ext/Encode/META.yml index 5a25863..32cb504 100644 --- a/ext/Encode/META.yml +++ b/ext/Encode/META.yml @@ -1,7 +1,7 @@ # http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: Encode -version: 2.05 +version: 2.07 version_from: Encode.pm installdirs: perl requires: diff --git a/ext/Encode/Unicode/Unicode.pm b/ext/Encode/Unicode/Unicode.pm index 8c661a4..bd9c188 100644 --- a/ext/Encode/Unicode/Unicode.pm +++ b/ext/Encode/Unicode/Unicode.pm @@ -4,7 +4,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; XSLoader::load(__PACKAGE__,$VERSION); @@ -234,6 +234,24 @@ every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I. (*) or \x{ffff_ffff_ffff_ffff} if your perl is compiled with 64-bit integer support! +=head1 Error Checking + +Unlike most encodings which accept various ways to handle errors, +Unicode encodings simply croaks. + + % perl -MEncode -e '$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \ + -e 'Encode::from_to($_, "utf16","shift_jis", 0); print' + UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184. + % perl -MEncode -e '$a = "BOM missing"' \ + -e ' Encode::from_to($a, "utf16", "shift_jis", 0); print' + UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184. + +Unlike other encodings where mappings are not one-to-one against +Unicode, UTFs are supposed to map 100% against one another. So Encode +is more strict on UTFs. + +Consider that "division by zero" of Encode :) + =head1 SEE ALSO L, L, L, diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm index 6eccc7e..b52280f 100644 --- a/ext/Encode/encoding.pm +++ b/ext/Encode/encoding.pm @@ -1,4 +1,4 @@ -# $Id: encoding.pm,v 2.1 2004/10/19 04:55:01 dankogai Exp dankogai $ +# $Id: encoding.pm,v 2.1 2004/10/19 04:55:01 dankogai Exp $ package encoding; our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; diff --git a/ext/Encode/lib/Encode/Encoding.pm b/ext/Encode/lib/Encode/Encoding.pm index 92f8c96..1fad60a 100644 --- a/ext/Encode/lib/Encode/Encoding.pm +++ b/ext/Encode/lib/Encode/Encoding.pm @@ -1,7 +1,7 @@ package Encode::Encoding; # Base class for classes which implement encodings use strict; -our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; require Encode; @@ -39,14 +39,14 @@ sub encode { require Carp; my $obj = shift; my $class = ref($obj) ? ref($obj) : $obj; - Carp::croak $class, "->encode() not defined!"; + Carp::croak($class . "->encode() not defined!"); } sub decode{ require Carp; my $obj = shift; my $class = ref($obj) ? ref($obj) : $obj; - Carp::croak $class, "->encode() not defined!"; + Carp::croak($class . "->encode() not defined!"); } sub DESTROY {} diff --git a/ext/Encode/t/Encode.t b/ext/Encode/t/Encode.t index 784ea74..63e913a 100644 --- a/ext/Encode/t/Encode.t +++ b/ext/Encode/t/Encode.t @@ -25,7 +25,7 @@ my @character_set = ('0'..'9', 'A'..'Z', 'a'..'z'); my @source = qw(ascii iso8859-1 cp1250); my @destiny = qw(cp1047 cp37 posix-bc); my @ebcdic_sets = qw(cp1047 cp37 posix-bc); -plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6; +plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 2; my $str = join('',map(chr($_),0x20..0x7E)); my $cpy = $str; ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong"); @@ -142,3 +142,7 @@ $a = "\x{100}"; chop $a; ok( is_utf8($a)); # weird but true: an empty UTF-8 string +# non-string arguments +ok(decode(latin1 => bless {}, "x"), undef); +ok(encode(utf8 => bless {}, "x"), undef); + diff --git a/ext/Encode/t/fallback.t b/ext/Encode/t/fallback.t index 11b484a..e319357 100644 --- a/ext/Encode/t/fallback.t +++ b/ext/Encode/t/fallback.t @@ -17,86 +17,137 @@ BEGIN { use strict; #use Test::More qw(no_plan); -use Test::More tests => 22; +use Test::More tests => 36; use Encode q(:all); -my $original = ''; -my $nofallback = ''; -my ($fallenback, $quiet, $perlqq, $htmlcref, $xmlcref); +my $uo = ''; +my $nf = ''; +my ($af, $aq, $ap, $ah, $ax, $uf, $uq, $up, $uh, $ux); for my $i (0x20..0x7e){ - $original .= chr($i); + $uo .= chr($i); } -$fallenback = $quiet = -$perlqq = $htmlcref = $xmlcref = $nofallback = $original; +$af = $aq = $ap = $ah = $ax = +$uf = $uq = $up = $uh = $ux = +$nf = $uo; my $residue = ''; for my $i (0x80..0xff){ - $original .= chr($i); + $uo .= chr($i); $residue .= chr($i); - $fallenback .= '?'; - $perlqq .= sprintf("\\x{%04x}", $i); - $htmlcref .= sprintf("&#%d;", $i); - $xmlcref .= sprintf("&#x%x;", $i); + $af .= '?'; + $uf .= "\x{FFFD}"; + $ap .= sprintf("\\x{%04x}", $i); + $up .= sprintf("\\x%02X", $i); + $ah .= sprintf("&#%d;", $i); + $uh .= sprintf("&#%d;", $i); + $ax .= sprintf("&#x%x;", $i); + $ux .= sprintf("&#x%x;", $i); } -utf8::upgrade($original); -my $meth = find_encoding('ascii'); -my $src = $original; -my $dst = $meth->encode($src, FB_DEFAULT); -is($dst, $fallenback, "FB_DEFAULT"); -is($src, $original, "FB_DEFAULT residue"); +my $ao = $uo; +utf8::upgrade($uo); -$src = $original; -eval{ $dst = $meth->encode($src, FB_CROAK) }; -like($@, qr/does not map to ascii/o, "FB_CROAK"); -is($src, $original, "FB_CROAK residue"); +my $ascii = find_encoding('ascii'); +my $utf8 = find_encoding('utf8'); -$src = $original; -eval{ $dst = $meth->encode($src, FB_CROAK) }; -like($@, qr/does not map to ascii/o, "FB_CROAK"); -is($src, $original, "FB_CROAK residue"); +my $src = $uo; +my $dst = $ascii->encode($src, FB_DEFAULT); +is($dst, $af, "FB_DEFAULT ascii"); +is($src, $uo, "FB_DEFAULT residue ascii"); +$src = $ao; +$dst = $utf8->decode($src, FB_DEFAULT); +is($dst, $uf, "FB_DEFAULT utf8"); +is($src, $ao, "FB_DEFAULT residue utf8"); -$src = $nofallback; -eval{ $dst = $meth->encode($src, FB_CROAK) }; -is($@, '', "FB_CROAK on success"); -is($src, '', "FB_CROAK on success residue"); +$src = $uo; +eval{ $dst = $ascii->encode($src, FB_CROAK) }; +like($@, qr/does not map to ascii/o, "FB_CROAK ascii"); +is($src, $uo, "FB_CROAK residue ascii"); -$src = $original; -$dst = $meth->encode($src, FB_QUIET); -is($dst, $quiet, "FB_QUIET"); -is($src, $residue, "FB_QUIET residue"); +$src = $ao; +eval{ $dst = $utf8->decode($src, FB_CROAK) }; +like($@, qr/does not map to Unicode/o, "FB_CROAK utf8"); +is($src, $ao, "FB_CROAK residue utf8"); + +$src = $nf; +eval{ $dst = $ascii->encode($src, FB_CROAK) }; +is($@, '', "FB_CROAK on success ascii"); +is($src, '', "FB_CROAK on success residue ascii"); + +$src = $nf; +eval{ $dst = $utf8->decode($src, FB_CROAK) }; +is($@, '', "FB_CROAK on success utf8"); +is($src, '', "FB_CROAK on success residue utf8"); + +$src = $uo; +$dst = $ascii->encode($src, FB_QUIET); +is($dst, $aq, "FB_QUIET ascii"); +is($src, $residue, "FB_QUIET residue ascii"); + +$src = $ao; +$dst = $utf8->decode($src, FB_QUIET); +is($dst, $uq, "FB_QUIET utf8"); +is($src, $residue, "FB_QUIET residue utf8"); { - my $message; + my $message = ''; local $SIG{__WARN__} = sub { $message = $_[0] }; - $src = $original; - $dst = $meth->encode($src, FB_WARN); - is($dst, $quiet, "FB_WARN"); - is($src, $residue, "FB_WARN residue"); - like($message, qr/does not map to ascii/o, "FB_WARN message"); + + $src = $uo; + $dst = $ascii->encode($src, FB_WARN); + is($dst, $aq, "FB_WARN ascii"); + is($src, $residue, "FB_WARN residue ascii"); + like($message, qr/does not map to ascii/o, "FB_WARN message ascii"); $message = ''; + $src = $ao; + $dst = $utf8->decode($src, FB_WARN); + is($dst, $uq, "FB_WARN utf8"); + is($src, $residue, "FB_WARN residue utf8"); + like($message, qr/does not map to Unicode/o, "FB_WARN message utf8"); - $src = $original; - $dst = $meth->encode($src, WARN_ON_ERR); + $message = ''; + $src = $uo; + $dst = $ascii->encode($src, WARN_ON_ERR); + is($dst, $af, "WARN_ON_ERR ascii"); + is($src, '', "WARN_ON_ERR residue ascii"); + like($message, qr/does not map to ascii/o, "WARN_ON_ERR message ascii"); - is($dst, $fallenback, "WARN_ON_ERR"); - is($src, '', "WARN_ON_ERR residue"); - like($message, qr/does not map to ascii/o, "WARN_ON_ERR message"); + $message = ''; + $src = $ao; + $dst = $utf8->decode($src, WARN_ON_ERR); + is($dst, $uf, "WARN_ON_ERR utf8"); + is($src, '', "WARN_ON_ERR residue utf8"); + like($message, qr/does not map to Unicode/o, "WARN_ON_ERR message ascii"); } -$src = $original; -$dst = $meth->encode($src, FB_PERLQQ); -is($dst, $perlqq, "FB_PERLQQ"); -is($src, '', "FB_PERLQQ residue"); - -$src = $original; -$dst = $meth->encode($src, FB_HTMLCREF); -is($dst, $htmlcref, "FB_HTMLCREF"); -is($src, '', "FB_HTMLCREF residue"); - -$src = $original; -$dst = $meth->encode($src, FB_XMLCREF); -is($dst, $xmlcref, "FB_XMLCREF"); -is($src, '', "FB_XMLCREF residue"); +$src = $uo; +$dst = $ascii->encode($src, FB_PERLQQ); +is($dst, $ap, "FB_PERLQQ ascii"); +is($src, '', "FB_PERLQQ residue ascii"); + +$src = $ao; +$dst = $utf8->decode($src, FB_PERLQQ); +is($dst, $up, "FB_PERLQQ utf8"); +is($src, '', "FB_PERLQQ residue utf8"); + +$src = $uo; +$dst = $ascii->encode($src, FB_HTMLCREF); +is($dst, $ah, "FB_HTMLCREF ascii"); +is($src, '', "FB_HTMLCREF residue ascii"); + +#$src = $ao; +#$dst = $utf8->decode($src, FB_HTMLCREF); +#is($dst, $uh, "FB_HTMLCREF utf8"); +#is($src, '', "FB_HTMLCREF residue utf8"); + +$src = $uo; +$dst = $ascii->encode($src, FB_XMLCREF); +is($dst, $ax, "FB_XMLCREF ascii"); +is($src, '', "FB_XMLCREF residue ascii"); + +#$src = $ao; +#$dst = $utf8->decode($src, FB_XMLCREF); +#is($dst, $ax, "FB_XMLCREF utf8"); +#is($src, '', "FB_XMLCREF residue utf8"); diff --git a/ext/Encode/ucm/macArabic.ucm b/ext/Encode/ucm/macArabic.ucm index 2fa32ea..584fd6e 100644 --- a/ext/Encode/ucm/macArabic.ucm +++ b/ext/Encode/ucm/macArabic.ucm @@ -1,5 +1,5 @@ # -# $Id: macArabic.ucm,v 2.0 2004/05/16 20:55:26 dankogai Exp $ +# $Id: macArabic.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ARABIC.TXT @@ -213,6 +213,7 @@ CHARMAP \xEF |0 # ARABIC DAMMA \xF0 |0 # ARABIC KASRA \xF1 |0 # ARABIC SHADDA + \xF2 |0 # ARABIC SUKUN \xB0 |0 # ARABIC-INDIC DIGIT ZERO, right-left (need override) \xB1 |0 # ARABIC-INDIC DIGIT ONE, right-left (need override) \xB2 |0 # ARABIC-INDIC DIGIT TWO, right-left (need override) @@ -223,7 +224,6 @@ CHARMAP \xB7 |0 # ARABIC-INDIC DIGIT SEVEN, right-left (need override) \xB8 |0 # ARABIC-INDIC DIGIT EIGHT, right-left (need override) \xB9 |0 # ARABIC-INDIC DIGIT NINE, right-left (need override) - \xF2 |0 # ARABIC SUKUN \xA5 |0 # ARABIC PERCENT SIGN \xF4 |0 # ARABIC LETTER TTEH \xF3 |0 # ARABIC LETTER PEH diff --git a/ext/Encode/ucm/macCentEuro.ucm b/ext/Encode/ucm/macCentEuro.ucm index a885997..875a8ab 100644 --- a/ext/Encode/ucm/macCentEuro.ucm +++ b/ext/Encode/ucm/macCentEuro.ucm @@ -1,5 +1,5 @@ # -# $Id: macCentEuro.ucm,v 2.0 2004/05/16 20:55:26 dankogai Exp $ +# $Id: macCentEuro.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CENTEURO.TXT @@ -9,7 +9,6 @@ 1 1 \x3F -# CHARMAP \x00 |0 # \x01 |0 # diff --git a/ext/Encode/ucm/macChinsimp.ucm b/ext/Encode/ucm/macChinsimp.ucm index 881fee7..5def5fb 100644 --- a/ext/Encode/ucm/macChinsimp.ucm +++ b/ext/Encode/ucm/macChinsimp.ucm @@ -1,5 +1,5 @@ # -# $Id: macChinsimp.ucm,v 2.0 2004/05/16 20:55:26 dankogai Exp $ +# $Id: macChinsimp.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CHINSIMP.TXT @@ -121,7 +121,6 @@ CHARMAP \x6C |0 # LATIN SMALL LETTER L \x6D |0 # LATIN SMALL LETTER M \x6E |0 # LATIN SMALL LETTER N - \xA8\xBF |3 # LATIN SMALL LETTER N + COMBINING GRAVE ACCENT \x6F |0 # LATIN SMALL LETTER O \x70 |0 # LATIN SMALL LETTER P \x71 |0 # LATIN SMALL LETTER Q @@ -180,8 +179,9 @@ CHARMAP \xA8\xB6 |0 # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE \xA8\xB7 |0 # LATIN SMALL LETTER U WITH DIAERESIS AND CARON \xA8\xB8 |0 # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE - \xA8\xBB |0 # LATIN SMALL LETTER TURNED ALPHA (wrong glyph in Apple fonts) - \xA8\xC0 |0 # LATIN SMALL LETTER SCRIPT G (wrong glyph in Apple fonts other than Hei) + \xA8\xBF |0 # LATIN SMALL LETTER N WITH GRAVE # for Unicode 3.0 and later + \xA8\xBB |0 # LATIN SMALL LETTER TURNED ALPHA + \xA8\xC0 |0 # LATIN SMALL LETTER SCRIPT G \xA1\xA6 |0 # CARON (Mandarin Chinese third tone) \xA1\xA5 |0 # MODIFIER LETTER MACRON (Mandarin Chinese first tone) \xA6\xA1 |0 # GREEK CAPITAL LETTER ALPHA diff --git a/ext/Encode/ucm/macChintrad.ucm b/ext/Encode/ucm/macChintrad.ucm index 9cbcf52..434287b 100644 --- a/ext/Encode/ucm/macChintrad.ucm +++ b/ext/Encode/ucm/macChintrad.ucm @@ -1,5 +1,5 @@ # -# $Id: macChintrad.ucm,v 2.0 2004/05/16 20:55:26 dankogai Exp $ +# $Id: macChintrad.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CHINTRAD.TXT @@ -253,8 +253,8 @@ CHARMAP \xA1\xDC |0 # APPROXIMATELY EQUAL TO OR THE IMAGE OF \xA1\xDA |0 # NOT EQUAL TO \xA1\xDD |0 # IDENTICAL TO - \xA1\xD8 |0 # LESS THAN OVER EQUAL TO - \xA1\xD9 |0 # GREATER THAN OVER EQUAL TO + \xA1\xD8 |0 # LESS-THAN OVER EQUAL TO + \xA1\xD9 |0 # GREATER-THAN OVER EQUAL TO \xA1\xF2 |0 # CIRCLED PLUS # change from UTC mapping \xA1\xE6 |0 # UP TACK \xA1\xE9 |0 # RIGHT TRIANGLE diff --git a/ext/Encode/ucm/macDingbats.ucm b/ext/Encode/ucm/macDingbats.ucm index 2c77e72..3047a67 100644 --- a/ext/Encode/ucm/macDingbats.ucm +++ b/ext/Encode/ucm/macDingbats.ucm @@ -1,5 +1,5 @@ # -# $Id: macDingbats.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $ +# $Id: macDingbats.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/DINGBATS.TXT @@ -42,14 +42,6 @@ CHARMAP \x1E |0 # \x1F |0 # \x20 |0 # SPACE - \x80 |0 # LEFT PARENTHESIS - \x82 |3 # LEFT PARENTHESIS, alternate (flattened) - \x81 |0 # RIGHT PARENTHESIS - \x83 |3 # RIGHT PARENTHESIS, alternate (flattened) - \x8C |0 # LEFT CURLY BRACKET - \x8D |0 # RIGHT CURLY BRACKET - \x86 |0 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - \x87 |0 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK \xD5 |0 # RIGHTWARDS ARROW \xD6 |0 # LEFT RIGHT ARROW \xD7 |0 # UP DOWN ARROW @@ -168,6 +160,20 @@ CHARMAP \xA5 |0 # ROTATED HEAVY BLACK HEART BULLET \xA6 |0 # FLORAL HEART \xA7 |0 # ROTATED FLORAL HEART BULLET + \x80 |0 # MEDIUM LEFT PARENTHESIS ORNAMENT # for Unicode 3.2 and later + \x81 |0 # MEDIUM RIGHT PARENTHESIS ORNAMENT # for Unicode 3.2 and later + \x82 |0 # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT # for Unicode 3.2 and later + \x83 |0 # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT # for Unicode 3.2 and later + \x84 |0 # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later + \x85 |0 # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later + \x86 |0 # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT # for Unicode 3.2 and later + \x87 |0 # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT # for Unicode 3.2 and later + \x88 |0 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later + \x89 |0 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later + \x8A |0 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT # for Unicode 3.2 and later + \x8B |0 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT # for Unicode 3.2 and later + \x8C |0 # MEDIUM LEFT CURLY BRACKET ORNAMENT # for Unicode 3.2 and later + \x8D |0 # MEDIUM RIGHT CURLY BRACKET ORNAMENT # for Unicode 3.2 and later \xB6 |0 # DINGBAT NEGATIVE CIRCLED DIGIT ONE \xB7 |0 # DINGBAT NEGATIVE CIRCLED DIGIT TWO \xB8 |0 # DINGBAT NEGATIVE CIRCLED DIGIT THREE @@ -237,10 +243,4 @@ CHARMAP \xFC |0 # WEDGE-TAILED RIGHTWARDS ARROW \xFD |0 # HEAVY WEDGE-TAILED RIGHTWARDS ARROW \xFE |0 # OPEN-OUTLINED RIGHTWARDS ARROW - \x84 |0 # LEFT ANGLE BRACKET - \x88 |3 # LEFT ANGLE BRACKET, heavy - \x85 |0 # RIGHT ANGLE BRACKET - \x89 |3 # RIGHT ANGLE BRACKET, heavy - \x8A |0 # LEFT TORTOISE SHELL BRACKET - \x8B |0 # RIGHT TORTOISE SHELL BRACKET END CHARMAP diff --git a/ext/Encode/ucm/macGreek.ucm b/ext/Encode/ucm/macGreek.ucm index baeb061..bf88b98 100644 --- a/ext/Encode/ucm/macGreek.ucm +++ b/ext/Encode/ucm/macGreek.ucm @@ -1,5 +1,5 @@ # -# $Id: macGreek.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $ +# $Id: macGreek.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/GREEK.TXT @@ -145,7 +145,7 @@ CHARMAP \xA9 |0 # COPYRIGHT SIGN \xC7 |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK \xC2 |0 # NOT SIGN - \x9C |0 # SOFT HYPHEN + \xFF |0 # SOFT HYPHEN # before Mac OS 9.2.2, was undefined \xA8 |0 # REGISTERED SIGN \xAE |0 # DEGREE SIGN \xB1 |0 # PLUS-MINUS SIGN @@ -258,10 +258,10 @@ CHARMAP \x96 |0 # BULLET \xC9 |0 # HORIZONTAL ELLIPSIS \x98 |0 # PER MILLE SIGN + \x9C |0 # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN \x93 |0 # TRADE MARK SIGN \xC5 |0 # ALMOST EQUAL TO \xAD |0 # NOT EQUAL TO \xB2 |0 # LESS-THAN OR EQUAL TO \xB3 |0 # GREATER-THAN OR EQUAL TO - \xFF |0 # undefined1 END CHARMAP diff --git a/ext/Encode/ucm/macKorean.ucm b/ext/Encode/ucm/macKorean.ucm index 54f05b6..6cdf7ca 100644 --- a/ext/Encode/ucm/macKorean.ucm +++ b/ext/Encode/ucm/macKorean.ucm @@ -1,5 +1,5 @@ # -# $Id: macKorean.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $ +# $Id: macKorean.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/KOREAN.TXT @@ -52,21 +52,9 @@ CHARMAP \x26 |0 # AMPERSAND \x27 |0 # APOSTROPHE \x28 |0 # LEFT PARENTHESIS - \xA2\x45 |3 # LEFT PARENTHESIS, small, bold - \xA1\x4F |3 # LEFT PARENTHESIS, small, more rounded - \xA1\x55 |3 # LEFT PARENTHESIS, white, alternate - \xA1\x4D |3 # LEFT PARENTHESIS, small - \xA1\x59 |3 # LEFT PARENTHESIS, white - \xA1\x65 |3 # LEFT PARENTHESIS, white, bold \xA1\x57 |3 # LEFT PARENTHESIS, bold \xA2\x4B |3 # LEFT PARENTHESIS, more rounded \x29 |0 # RIGHT PARENTHESIS - \xA2\x46 |3 # RIGHT PARENTHESIS, small, bold - \xA1\x50 |3 # RIGHT PARENTHESIS, small, more rounded - \xA1\x56 |3 # RIGHT PARENTHESIS, white, alternate - \xA1\x4E |3 # RIGHT PARENTHESIS, small - \xA1\x5A |3 # RIGHT PARENTHESIS, white - \xA1\x66 |3 # RIGHT PARENTHESIS, white, bold \xA1\x58 |3 # RIGHT PARENTHESIS, bold \xA2\x4C |3 # RIGHT PARENTHESIS, more rounded \x2A |0 # ASTERISK @@ -74,6 +62,7 @@ CHARMAP \x2B |0 # PLUS SIGN \x2C |0 # COMMA \x2D |0 # HYPHEN-MINUS + \xA7\x67 |3 # HYPHEN-MINUS+COMBINING DIAERESIS \x2E |0 # FULL STOP \x2F |0 # SOLIDUS \x30 |0 # DIGIT ZERO @@ -146,6 +135,8 @@ CHARMAP \x3C |0 # LESS-THAN SIGN \xA1\x79 |3 # LESS-THAN SIGN, superscript \x3D |0 # EQUALS SIGN + \xA7\x65 |3 # EQUALS SIGN+COMBINING LONG VERTICAL LINE OVERLAY + \xA7\x62 |3 # EQUALS SIGN+COMBINING REVERSE SOLIDUS OVERLAY # for Unicode 3.2 and later \x3E |0 # GREATER-THAN SIGN \xA1\x78 |3 # GREATER-THAN SIGN, superscript \x3F |0 # QUESTION MARK @@ -260,7 +251,6 @@ CHARMAP \xA9\xA1 |0 # LATIN SMALL LIGATURE AE \xA9\xA3 |0 # LATIN SMALL LETTER ETH (Icelandic) \xA1\xC0 |0 # DIVISION SIGN - \xA7\x60 |3 # DIVISION SIGN + COMBINING ENCLOSING CIRCLE \xA9\xAA |0 # LATIN SMALL LETTER O WITH STROKE \xA9\xAD |0 # LATIN SMALL LETTER THORN (Icelandic) \xA9\xA2 |0 # LATIN SMALL LETTER D WITH STROKE @@ -288,7 +278,7 @@ CHARMAP \xA2\xAB |0 # DOT ABOVE (Mandarin Chinese light tone) \xA2\xAA |0 # RING ABOVE \xA2\xAD |0 # OGONEK - \xA2\xA6 |0 # SMALL TILDE # KSC: "tilde accent" + \xA2\xA6 |0 # SMALL TILDE # KSC spec: "tilde accent" \xA2\xA9 |0 # DOUBLE ACUTE ACCENT \xA5\xC1 |0 # GREEK CAPITAL LETTER ALPHA \xA5\xC2 |0 # GREEK CAPITAL LETTER BETA @@ -338,6 +328,7 @@ CHARMAP \xA5\xF6 |0 # GREEK SMALL LETTER CHI \xA5\xF7 |0 # GREEK SMALL LETTER PSI \xA5\xF8 |0 # GREEK SMALL LETTER OMEGA + \xA7\x6A |0 # GREEK PHI SYMBOL \xAC\xA7 |0 # CYRILLIC CAPITAL LETTER IO \xAC\xA1 |0 # CYRILLIC CAPITAL LETTER A \xAC\xA2 |0 # CYRILLIC CAPITAL LETTER BE @@ -446,6 +437,16 @@ CHARMAP \xA3\xFE |0 # OVERLINE # change from UTC mapping; KSC spec: "overline, macron" \xA6\x4D |0 # ASTERISM \xA6\x51 |3 # ASTERISM, large + \xA7\x87 |0 # DOUBLE QUESTION MARK # for Unicode 3.2 and later + \xA7\x85 |0 # EXCLAMATION QUESTION MARK # for Unicode 3.0 and later + \xA1\x96 |0 # BLACK LEFTWARDS BULLET (used to bracket titles) # for Unicode 3.0 or later + \xA1\x97 |0 # BLACK RIGHTWARDS BULLET (used to bracket titles) # for Unicode 3.0 or later + \xA6\x4E |0 # LOW ASTERISK # for Unicode 3.2 and later + \xA1\x6D |0 # TWO ASTERISKS ALIGNED VERTICALLY (dictionary definition importance mark) # for Unicode 3.2 or later + \xA6\x4F |3 # TWO ASTERISKS ALIGNED VERTICALLY, large, right # for Unicode 3.2 and later + \xA6\x4B |3 # TWO ASTERISKS ALIGNED VERTICALLY, bold, right # for Unicode 3.2 and later + \xA1\xA0 |3 # TWO ASTERISKS ALIGNED VERTICALLY, large # for Unicode 3.2 or later + \xA1\x9D |3 # TWO ASTERISKS ALIGNED VERTICALLY, medium large # for Unicode 3.2 or later \xA9\xF9 |0 # SUPERSCRIPT FOUR \xA1\x71 |0 # SUPERSCRIPT PLUS SIGN \xA1\x72 |0 # SUPERSCRIPT MINUS @@ -495,6 +496,8 @@ CHARMAP \xA5\xAA |0 # SMALL ROMAN NUMERAL TEN \xA1\xE7 |0 # LEFTWARDS ARROW \xAC\x89 |3 # LEFTWARDS ARROW, angle head, white, large + \xA8\x69 |3 # LEFTWARDS ARROW, umbrella + \xA8\x6B |3 # LEFTWARDS ARROW, teardrop \xAC\x5D |3 # LEFTWARDS ARROW, small bold \xAC\x66 |3 # LEFTWARDS ARROW, curved head, white \xA8\x63 |3 # LEFTWARDS ARROW, alternate, white @@ -503,21 +506,22 @@ CHARMAP \xAC\x55 |3 # LEFTWARDS ARROW, angle head, white \xA8\x42 |3 # LEFTWARDS ARROW, light \xA8\x4E |3 # LEFTWARDS ARROW, bold - \xA8\x5F |3 # LEFTWARDS ARROW, alternate + \xA8\x5F |3 # LEFTWARDS ARROW, alternate (heavy round-tipped) \xA1\xE8 |0 # UPWARDS ARROW \xAC\x8B |3 # UPWARDS ARROW, angle head, white, large + \xA8\x6D |3 # UPWARDS ARROW, teardrop \xAC\x60 |3 # UPWARDS ARROW, small bold \xAC\x68 |3 # UPWARDS ARROW, curved head, white \xA8\x65 |3 # UPWARDS ARROW, alternate, white \xAC\x64 |3 # UPWARDS ARROW, curved head \xA8\x55 |3 # UPWARDS ARROW, large - \xAC\x57 |3 # UPWARDS ARROW, angle head, whitee + \xAC\x57 |3 # UPWARDS ARROW, angle head, white \xA8\x43 |3 # UPWARDS ARROW, light \xA8\x4F |3 # UPWARDS ARROW, bold \xA8\x61 |3 # UPWARDS ARROW, alternate \xA1\xE6 |0 # RIGHTWARDS ARROW \xAC\x8A |3 # RIGHTWARDS ARROW, angle head, white, large - \xAC\x5E |3 # RIGHTWARDS ARROW, small bold + \xA8\x6C |3 # RIGHTWARDS ARROW, teardrop \xAC\x67 |3 # RIGHTWARDS ARROW, curved head, white \xA8\x64 |3 # RIGHTWARDS ARROW, alternate, white \xAC\x63 |3 # RIGHTWARDS ARROW, curved head @@ -525,9 +529,9 @@ CHARMAP \xAC\x56 |3 # RIGHTWARDS ARROW, angle head, white \xA8\x41 |3 # RIGHTWARDS ARROW, light \xA8\x4D |3 # RIGHTWARDS ARROW, bold - \xA8\x60 |3 # RIGHTWARDS ARROW, alternate \xA1\xE9 |0 # DOWNWARDS ARROW \xAC\x8C |3 # DOWNWARDS ARROW, angle head, white, large + \xA8\x6E |3 # DOWNWARDS ARROW, teardrop \xAC\x61 |3 # DOWNWARDS ARROW, small bold \xAC\x69 |3 # DOWNWARDS ARROW, curved head, white \xA8\x66 |3 # DOWNWARDS ARROW, alternate, white @@ -549,6 +553,8 @@ CHARMAP \xA8\x47 |3 # SOUTH EAST ARROW, light \xA2\xD7 |0 # SOUTH WEST ARROW \xA8\x48 |3 # SOUTH WEST ARROW, light + \xAC\x53 |0 # LEFTWARDS WAVE ARROW + \xAC\x52 |0 # RIGHTWARDS WAVE ARROW \xA8\x82 |0 # UPWARDS ARROW WITH TIP LEFTWARDS \xAC\x7A |3 # UPWARDS ARROW WITH TIP LEFTWARDS, curved, white \xAC\x85 |3 # UPWARDS ARROW WITH TIP LEFTWARDS, curved @@ -558,70 +564,76 @@ CHARMAP \xAC\x82 |3 # UPWARDS ARROW WITH TIP RIGHTWARDS, curved \xA8\x87 |3 # UPWARDS ARROW WITH TIP RIGHTWARDS, curved \xA8\x7B |0 # DOWNWARDS ARROW WITH TIP LEFTWARDS - \xAC\x76 |3 # DOWNWARDS ARROW WITH TIP LEFTWARDS, curved, white - \xAC\x81 |3 # DOWNWARDS ARROW WITH TIP LEFTWARDS, curved - \xA8\x86 |3 # DOWNWARDS ARROW WITH TIP LEFTWARDS, curved \xA8\x83 |0 # DOWNWARDS ARROW WITH TIP RIGHTWARDS - \xAC\x7B |3 # DOWNWARDS ARROW WITH TIP RIGHTWARDS, curved, white - \xAC\x86 |3 # DOWNWARDS ARROW WITH TIP RIGHTWARDS, curved - \xA8\x8B |3 # DOWNWARDS ARROW WITH TIP RIGHTWARDS, curved \xA8\x81 |0 # RIGHTWARDS ARROW WITH CORNER DOWNWARDS - \xAC\x7C |3 # ANTICLOCKWISE TOP SEMICIRCLE ARROW, curved, white - \xAC\x87 |3 # ANTICLOCKWISE TOP SEMICIRCLE ARROW, curved - \xA8\x8C |3 # ANTICLOCKWISE TOP SEMICIRCLE ARROW, curved - \xAC\x79 |3 # CLOCKWISE TOP SEMICIRCLE ARROW, curved, white - \xAC\x84 |3 # CLOCKWISE TOP SEMICIRCLE ARROW, curved - \xA8\x89 |3 # CLOCKWISE TOP SEMICIRCLE ARROW, curved - \xAC\x7D |3 # ANTICLOCKWISE OPEN CIRCLE ARROW, curved, white - \xAC\x88 |3 # ANTICLOCKWISE OPEN CIRCLE ARROW, curved - \xA8\x8D |3 # ANTICLOCKWISE OPEN CIRCLE ARROW, curved + \xAC\x50 |0 # ANTICLOCKWISE TOP SEMICIRCLE ARROW: up arrow with tip curving left and down + \xAC\x51 |0 # CLOCKWISE TOP SEMICIRCLE ARROW: up arrow with tip curving right and down \xAC\x78 |3 # CLOCKWISE OPEN CIRCLE ARROW, curved, white + \xA8\x7D |3 # CLOCKWISE OPEN CIRCLE ARROW, alternate: leftwards arrow with tip upwards \xAC\x83 |3 # CLOCKWISE OPEN CIRCLE ARROW, curved \xA8\x88 |3 # CLOCKWISE OPEN CIRCLE ARROW, curved \xA8\x92 |0 # LEFTWARDS HARPOON WITH BARB UPWARDS - \xA8\x98 |3 # LEFTWARDS HARPOON WITH BARB UPWARDS + tag: large - \xAC\x4C |3 # LEFTWARDS HARPOON WITH BARB UPWARDS + tag: alt form + \xA8\x99 |3 # LEFTWARDS HARPOON WITH BARB UPWARDS, large + \xAC\x4D |3 # LEFTWARDS HARPOON WITH BARB UPWARDS, alternate \xA8\x93 |0 # RIGHTWARDS HARPOON WITH BARB UPWARDS - \xA8\x99 |3 # RIGHTWARDS HARPOON WITH BARB UPWARDS + tag: large - \xAC\x4D |3 # RIGHTWARDS HARPOON WITH BARB UPWARDS + tag: alt form + \xA8\x98 |3 # RIGHTWARDS HARPOON WITH BARB UPWARDS, large + \xAC\x4C |3 # RIGHTWARDS HARPOON WITH BARB UPWARDS, alternate \xA8\x9E |0 # RIGHTWARDS ARROW OVER LEFTWARDS ARROW \xA8\x9F |0 # UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW \xA8\x4B |0 # LEFTWARDS DOUBLE ARROW WITH STROKE \xA8\x4A |0 # RIGHTWARDS DOUBLE ARROW WITH STROKE \xA8\x49 |0 # LEFTWARDS DOUBLE ARROW - \xAC\x45 |3 # LEFTWARDS DOUBLE ARROW, small - \xA8\x6F |3 # LEFTWARDS DOUBLE ARROW, alternate - \xA8\x71 |0 # UPWARDS DOUBLE ARROW + \xA8\x9B |3 # LEFTWARDS DOUBLE ARROW, small white tapered \xA2\xA1 |0 # RIGHTWARDS DOUBLE ARROW - \xAC\x44 |3 # RIGHTWARDS DOUBLE ARROW, small - \xA8\x70 |3 # RIGHTWARDS DOUBLE ARROW, alternate - \xA8\x72 |0 # DOWNWARDS DOUBLE ARROW + \xA8\x9A |3 # RIGHTWARDS DOUBLE ARROW, small white tapered \xA2\xA2 |0 # LEFT RIGHT DOUBLE ARROW + \xA8\x95 |3 # LEFT RIGHT DOUBLE ARROW, heavy \xA8\x4C |3 # LEFT RIGHT DOUBLE ARROW, duplicate of 0xA2A2 \xAC\x6A |0 # LEFTWARDS DASHED ARROW \xAC\x6C |0 # UPWARDS DASHED ARROW \xAC\x6B |0 # RIGHTWARDS DASHED ARROW \xAC\x6D |0 # DOWNWARDS DASHED ARROW \xAC\x72 |0 # LEFTWARDS WHITE ARROW + \xA8\x5B |3 # LEFTWARDS WHITE ARROW + COMBINING ENCLOSING CIRCLE + \xA8\x57 |3 # LEFTWARDS WHITE ARROW + COMBINING ENCLOSING SQUARE + \xAC\x47 |3 # LEFTWARDS WHITE ARROW, negative: heavy black + \xA8\x97 |3 # LEFTWARDS WHITE ARROW, heavy tapered \xAC\x6E |3 # LEFTWARDS WHITE ARROW, heavy, negative \xAD\xA6 |3 # LEFTWARDS WHITE ARROW, small \xA8\x8E |3 # LEFTWARDS WHITE ARROW, large + \xA8\x73 |3 # LEFTWARDS WHITE ARROW, negative: heavy black + \xA8\x77 |3 # LEFTWARDS WHITE ARROW, negative: medium black \xAC\x59 |3 # LEFTWARDS WHITE ARROW, triangle head, white + \xAC\x4F |3 # LEFTWARDS WHITE ARROW, negative: black, demarcated head \xAC\x74 |0 # UPWARDS WHITE ARROW + \xA8\x5D |3 # UPWARDS WHITE ARROW + COMBINING ENCLOSING CIRCLE + \xA8\x59 |3 # UPWARDS WHITE ARROW + COMBINING ENCLOSING SQUARE \xAC\x70 |3 # UPWARDS WHITE ARROW, heavy, negative \xAD\xA7 |3 # UPWARDS WHITE ARROW, small \xA8\x90 |3 # UPWARDS WHITE ARROW, large + \xA8\x75 |3 # UPWARDS WHITE ARROW, negative: heavy black + \xA8\x79 |3 # UPWARDS WHITE ARROW, negative: medium black \xAC\x5B |3 # UPWARDS WHITE ARROW, triangle head, white + \xAD\xAF |3 # UPWARDS WHITE ARROW, alternate \xAC\x73 |0 # RIGHTWARDS WHITE ARROW + \xA8\x58 |3 # RIGHTWARDS WHITE ARROW + COMBINING ENCLOSING SQUARE + \xAC\x46 |3 # RIGHTWARDS WHITE ARROW, negative: heavy black + \xA8\x96 |3 # RIGHTWARDS WHITE ARROW, heavy tapered \xAC\x6F |3 # RIGHTWARDS WHITE ARROW, heavy, negative \xAD\xA5 |3 # RIGHTWARDS WHITE ARROW, small \xA8\x8F |3 # RIGHTWARDS WHITE ARROW, large \xAC\x5A |3 # RIGHTWARDS WHITE ARROW, triangle head, white + \xAC\x4E |3 # RIGHTWARDS WHITE ARROW, negative: black, demarcated head \xAC\x75 |0 # DOWNWARDS WHITE ARROW + \xA8\x5E |3 # DOWNWARDS WHITE ARROW + COMBINING ENCLOSING CIRCLE + \xA8\x5A |3 # DOWNWARDS WHITE ARROW + COMBINING ENCLOSING SQUARE \xAC\x71 |3 # DOWNWARDS WHITE ARROW, heavy, negative \xAD\xA8 |3 # DOWNWARDS WHITE ARROW, small \xA8\x91 |3 # DOWNWARDS WHITE ARROW, large + \xA8\x76 |3 # DOWNWARDS WHITE ARROW, negative: heavy black + \xA8\x7A |3 # DOWNWARDS WHITE ARROW, negative: medium black \xAC\x5C |3 # DOWNWARDS WHITE ARROW, triangle head, white + \xAC\x41 |0 # RIGHTWARDS WHITE ARROW FROM WALL # for Unicode 3.0 and later \xA2\xA3 |0 # FOR ALL \xA1\xD3 |0 # PARTIAL DIFFERENTIAL \xA2\xA4 |0 # THERE EXISTS @@ -647,6 +659,7 @@ CHARMAP \xA7\x68 |0 # SPHERICAL ANGLE \xA4\x98 |3 # SPHERICAL ANGLE, alternate \xA7\x55 |0 # PARALLEL TO + \xA4\x9E |3 # PARALLEL TO+COMBINING EQUALS SIGN BELOW # for Unicode 3.0 and later \xA7\x56 |0 # NOT PARALLEL TO \xA1\xFC |0 # LOGICAL AND \xA1\xFD |0 # LOGICAL OR @@ -663,6 +676,7 @@ CHARMAP \xA1\xF1 |0 # BECAUSE \xA2\xFE |0 # PROPORTION \xA1\xEF |0 # REVERSED TILDE + \xA7\x79 |3 # REVERSED TILDE+COMBINING LONG STROKE OVERLAY \xA1\x75 |3 # REVERSED TILDE, superscript \xA4\x9A |0 # ASYMPTOTICALLY EQUAL TO \xA4\x99 |0 # APPROXIMATELY EQUAL TO @@ -675,6 +689,8 @@ CHARMAP \xA1\xC1 |0 # NOT EQUAL TO \xA1\x7B |3 # NOT EQUAL TO, superscript \xA1\xD5 |0 # IDENTICAL TO + \xA7\x6E |3 # IDENTICAL TO+COMBINING LONG VERTICAL LINE OVERLAY + \xA7\x63 |3 # IDENTICAL TO+COMBINING REVERSE SOLIDUS OVERLAY # for Unicode 3.2 and later \xA7\x64 |0 # NOT IDENTICAL TO \xA1\xC2 |0 # LESS-THAN OR EQUAL TO \xA1\xC3 |0 # GREATER-THAN OR EQUAL TO @@ -701,11 +717,7 @@ CHARMAP \xA7\x72 |0 # NOT A SUBSET OF \xA7\x71 |0 # NOT A SUPERSET OF \xA1\xF6 |0 # SUBSET OF OR EQUAL TO - \xA4\x8B |3 # SUBSET OF OR EQUAL TO, alternate \xA1\xF7 |0 # SUPERSET OF OR EQUAL TO - \xA4\x8D |3 # SUPERSET OF OR EQUAL TO, alternate - \xA4\x8C |0 # SUBSET OF WITH NOT EQUAL TO - \xA4\x8E |0 # SUPERSET OF WITH NOT EQUAL TO \xA7\x5D |0 # CIRCLED PLUS \xA7\x5E |0 # CIRCLED MINUS \xA7\x5F |0 # CIRCLED TIMES @@ -724,6 +736,7 @@ CHARMAP \xA1\xD2 |0 # ARC \xA7\x61 |0 # SECTOR \xA7\x7A |3 # SECTOR, alternate + \xA7\x48 |3 # SOFTWARE-FUNCTION SYMBOL, rotated (small hexagon) # for Unicode 3.0 and later \xA8\xE7 |0 # CIRCLED DIGIT ONE \xA5\x4C |3 # CIRCLED DIGIT ONE, serif, bold \xA8\xE8 |0 # CIRCLED DIGIT TWO @@ -744,45 +757,15 @@ CHARMAP \xA5\x54 |3 # CIRCLED DIGIT NINE, serif, bold \xA8\xF0 |0 # CIRCLED NUMBER TEN \xA8\xF1 |0 # CIRCLED NUMBER ELEVEN - \xA3\x5F |3 # dingbat negative circled sans number eleven - \xA6\xEF |3 # CIRCLED NUMBER ELEVEN, negative - \xA4\x73 |3 # CIRCLED NUMBER ELEVEN, negative, sans, light \xA8\xF2 |0 # CIRCLED NUMBER TWELVE - \xA3\x60 |3 # dingbat negative circled sans number twelve - \xA6\xF0 |3 # CIRCLED NUMBER TWELVE, negative - \xA4\x74 |3 # CIRCLED NUMBER TWELVE negative, sans, light \xA8\xF3 |0 # CIRCLED NUMBER THIRTEEN - \xA3\x61 |3 # dingbat negative circled sans number thirteen - \xA6\xF1 |3 # CIRCLED NUMBER THIRTEEN, negative - \xA4\x75 |3 # CIRCLED NUMBER THIRTEEN negative, sans, light \xA8\xF4 |0 # CIRCLED NUMBER FOURTEEN - \xA3\x62 |3 # dingbat negative circled sans number fourteen - \xA6\xF2 |3 # CIRCLED NUMBER FOURTEEN, negative - \xA4\x76 |3 # CIRCLED NUMBER FOURTEEN negative, sans, light \xA8\xF5 |0 # CIRCLED NUMBER FIFTEEN - \xA3\x63 |3 # dingbat negative circled sans number fifteen - \xA6\xF3 |3 # CIRCLED NUMBER FIFTEEN, negative - \xA4\x77 |3 # CIRCLED NUMBER FIFTEEN negative, sans, light \xA7\xF0 |0 # CIRCLED NUMBER SIXTEEN - \xA3\x64 |3 # dingbat negative circled sans number sixteen - \xA6\xF4 |3 # CIRCLED NUMBER SIXTEEN, negative - \xA4\x78 |3 # CIRCLED NUMBER SIXTEEN negative, sans, light \xA7\xF1 |0 # CIRCLED NUMBER SEVENTEEN - \xA3\x65 |3 # dingbat negative circled sans number seventeen - \xA6\xF5 |3 # CIRCLED NUMBER SEVENTEEN, negative - \xA4\x79 |3 # CIRCLED NUMBER SEVENTEEN negative, sans, light \xA7\xF2 |0 # CIRCLED NUMBER EIGHTEEN - \xA3\x66 |3 # dingbat negative circled sans number eighteen - \xA6\xF6 |3 # CIRCLED NUMBER EIGHTEEN, negative - \xA4\x7A |3 # CIRCLED NUMBER EIGHTEEN negative, sans, light \xA7\xF3 |0 # CIRCLED NUMBER NINETEEN - \xA3\x67 |3 # dingbat negative circled sans number nineteen - \xA6\xF7 |3 # CIRCLED NUMBER NINETEEN, negative - \xA4\x7B |3 # CIRCLED NUMBER NINETEEN negative, sans, light \xA7\xF4 |0 # CIRCLED NUMBER TWENTY - \xA3\x68 |3 # dingbat negative circled sans number twenty - \xA6\xF8 |3 # CIRCLED NUMBER TWENTY, negative - \xA4\x7C |3 # CIRCLED NUMBER TWENTY negative, sans, light \xA9\xE7 |0 # PARENTHESIZED DIGIT ONE \xA9\xE8 |0 # PARENTHESIZED DIGIT TWO \xA9\xE9 |0 # PARENTHESIZED DIGIT THREE @@ -882,6 +865,36 @@ CHARMAP \xA8\xE5 |0 # CIRCLED LATIN SMALL LETTER Y \xA8\xE6 |0 # CIRCLED LATIN SMALL LETTER Z \xA5\x4B |3 # CIRCLED DIGIT ZERO, serif, bold + \xA6\xEF |0 # NEGATIVE CIRCLED NUMBER ELEVEN # for Unicode 3.2 and later + \xA4\x73 |3 # NEGATIVE CIRCLED NUMBER ELEVEN, sans, light # for Unicode 3.2 and later + \xA3\x5F |3 # NEGATIVE CIRCLED NUMBER ELEVEN, sans serif # for Unicode 3.2 and later + \xA6\xF0 |0 # NEGATIVE CIRCLED NUMBER TWELVE # for Unicode 3.2 and later + \xA4\x74 |3 # NEGATIVE CIRCLED NUMBER TWELVE, sans, light # for Unicode 3.2 and later + \xA3\x60 |3 # NEGATIVE CIRCLED NUMBER TWELVE, sans serif # for Unicode 3.2 and later + \xA6\xF1 |0 # NEGATIVE CIRCLED NUMBER THIRTEEN # for Unicode 3.2 and later + \xA4\x75 |3 # NEGATIVE CIRCLED NUMBER THIRTEEN, sans, light # for Unicode 3.2 and later + \xA3\x61 |3 # NEGATIVE CIRCLED NUMBER THIRTEEN, sans serif # for Unicode 3.2 and later + \xA6\xF2 |0 # NEGATIVE CIRCLED NUMBER FOURTEEN # for Unicode 3.2 and later + \xA4\x76 |3 # NEGATIVE CIRCLED NUMBER FOURTEEN, sans, light # for Unicode 3.2 and later + \xA3\x62 |3 # NEGATIVE CIRCLED NUMBER FOURTEEN, sans serif # for Unicode 3.2 and later + \xA6\xF3 |0 # NEGATIVE CIRCLED NUMBER FIFTEEN # for Unicode 3.2 and later + \xA4\x77 |3 # NEGATIVE CIRCLED NUMBER FIFTEEN, sans, light # for Unicode 3.2 and later + \xA3\x63 |3 # NEGATIVE CIRCLED NUMBER FIFTEEN, sans serif # for Unicode 3.2 and later + \xA6\xF4 |0 # NEGATIVE CIRCLED NUMBER SIXTEEN # for Unicode 3.2 and later + \xA4\x78 |3 # NEGATIVE CIRCLED NUMBER SIXTEEN, sans, light # for Unicode 3.2 and later + \xA3\x64 |3 # NEGATIVE CIRCLED NUMBER SIXTEEN, sans serif # for Unicode 3.2 and later + \xA6\xF5 |0 # NEGATIVE CIRCLED NUMBER SEVENTEEN # for Unicode 3.2 and later + \xA4\x79 |3 # NEGATIVE CIRCLED NUMBER SEVENTEEN, sans, light # for Unicode 3.2 and later + \xA3\x65 |3 # NEGATIVE CIRCLED NUMBER SEVENTEEN, sans serif # for Unicode 3.2 and later + \xA6\xF6 |0 # NEGATIVE CIRCLED NUMBER EIGHTEEN # for Unicode 3.2 and later + \xA4\x7A |3 # NEGATIVE CIRCLED NUMBER EIGHTEEN, sans, light # for Unicode 3.2 and later + \xA3\x66 |3 # NEGATIVE CIRCLED NUMBER EIGHTEEN, sans serif # for Unicode 3.2 and later + \xA6\xF7 |0 # NEGATIVE CIRCLED NUMBER NINETEEN # for Unicode 3.2 and later + \xA4\x7B |3 # NEGATIVE CIRCLED NUMBER NINETEEN, sans, light # for Unicode 3.2 and later + \xA3\x67 |3 # NEGATIVE CIRCLED NUMBER NINETEEN, sans serif # for Unicode 3.2 and later + \xA6\xF8 |0 # NEGATIVE CIRCLED NUMBER TWENTY # for Unicode 3.2 and later + \xA4\x7C |3 # NEGATIVE CIRCLED NUMBER TWENTY, sans, light # for Unicode 3.2 and later + \xA3\x68 |3 # NEGATIVE CIRCLED NUMBER TWENTY, sans serif # for Unicode 3.2 and later \xA6\xA1 |0 # BOX DRAWINGS LIGHT HORIZONTAL \xA6\xAC |0 # BOX DRAWINGS HEAVY HORIZONTAL \xA6\xA2 |0 # BOX DRAWINGS LIGHT VERTICAL @@ -950,28 +963,33 @@ CHARMAP \xA6\xE3 |0 # BOX DRAWINGS RIGHT LIGHT AND LEFT VERTICAL HEAVY \xA6\xE4 |0 # BOX DRAWINGS LEFT LIGHT AND RIGHT VERTICAL HEAVY \xA6\xB6 |0 # BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL + \xA7\x8F |0 # FULL BLOCK \xA2\xC6 |0 # MEDIUM SHADE \xA1\xE1 |0 # BLACK SQUARE \xA6\x56 |3 # BLACK SQUARE + COMBINING ENCLOSING DIAMOND - \xA7\x8F |3 # BLACK SQUARE, large \xA1\xE0 |0 # WHITE SQUARE - \xA6\x64 |3 # WHITE SQUARE + COMBINING ENCLOSING SQUARE \xA6\x59 |3 # WHITE SQUARE + COMBINING ENCLOSING DIAMOND \xA7\x8D |3 # WHITE SQUARE, large \xA7\x8E |3 # WHITE SQUARE, large, bold \xA7\x8C |3 # WHITE SQUARE, bold + \xA6\x78 |0 # WHITE SQUARE WITH ROUNDED CORNERS \xA2\xC3 |0 # WHITE SQUARE CONTAINING BLACK SMALL SQUARE \xA2\xC7 |0 # SQUARE WITH HORIZONTAL FILL \xA2\xC8 |0 # SQUARE WITH VERTICAL FILL \xA2\xCB |0 # SQUARE WITH ORTHOGONAL CROSSHATCH FILL \xA2\xCA |0 # SQUARE WITH UPPER LEFT TO LOWER RIGHT FILL \xA2\xC9 |0 # SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL + \xA6\x8A |3 # SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL, alternate \xA2\xCC |0 # SQUARE WITH DIAGONAL CROSSHATCH FILL + \xA7\x4A |0 # WHITE RECTANGLE + \xA7\x49 |3 # WHITE RECTANGLE, small \xA7\x66 |0 # WHITE PARALLELOGRAM \xA1\xE3 |0 # BLACK UP-POINTING TRIANGLE \xA6\x6B |3 # BLACK UP-POINTING TRIANGLE + COMBINING ENCLOSING CIRCLE \xA1\xE2 |0 # WHITE UP-POINTING TRIANGLE \xA6\x6A |3 # WHITE UP-POINTING TRIANGLE + COMBINING ENCLOSING CIRCLE + \xA7\x45 |3 # WHITE UP-POINTING TRIANGLE, small + \xA7\x9B |3 # BLACK UP-POINTING SMALL TRIANGLE + COMBINING ENCLOSING UPWARD POINTING TRIANGLE # for Unicode 3.2 and later \xA7\x95 |0 # WHITE UP-POINTING SMALL TRIANGLE \xA2\xBA |0 # BLACK RIGHT-POINTING TRIANGLE \xA2\xB9 |0 # WHITE RIGHT-POINTING TRIANGLE @@ -988,28 +1006,40 @@ CHARMAP \xA1\xDE |0 # WHITE DIAMOND \xA6\x62 |3 # WHITE DIAMOND + COMBINING ENCLOSING SQUARE \xA6\x57 |3 # WHITE DIAMOND + COMBINING ENCLOSING DIAMOND + \xA6\x61 |3 # WHITE DIAMOND + COMBINING ENCLOSING DIAMOND + COMBINING ENCLOSING DIAMOND \xA7\x89 |3 # WHITE DIAMOND, large \xA7\x8A |3 # WHITE DIAMOND, large, bold \xA7\x88 |3 # WHITE DIAMOND, bold + \xA7\x4E |3 # WHITE DIAMOND, flattened \xA2\xC2 |0 # WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND + \xA6\x89 |3 # WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND, alternate \xA2\xC1 |0 # FISHEYE + \xA6\x82 |3 # FISHEYE + COMBINING ENCLOSING CIRCLE \xA7\x9C |0 # LOZENGE \xA1\xDB |0 # WHITE CIRCLE \xA7\x91 |3 # WHITE CIRCLE, large \xA7\x92 |3 # WHITE CIRCLE, large, bold + \xA7\x44 |3 # WHITE CIRCLE, small \xA6\x75 |0 # DOTTED CIRCLE + \xA6\x84 |0 # CIRCLE WITH VERTICAL FILL \xA1\xDD |0 # BULLSEYE - \xA6\x68 |3 # BULLSEYE, alternate + \xA6\x69 |3 # BULLSEYE + COMBINING ENCLOSING CIRCLE \xA1\xDC |0 # BLACK CIRCLE \xA7\x93 |3 # BLACK CIRCLE, large \xA2\xC4 |0 # CIRCLE WITH LEFT HALF BLACK \xA2\xC5 |0 # CIRCLE WITH RIGHT HALF BLACK \xA7\x90 |0 # WHITE BULLET \xA6\x6F |0 # LARGE CIRCLE + \xA6\x70 |3 # LARGE CIRCLE, bold + \xA7\x46 |0 # WHITE MEDIUM SQUARE # for Unicode 3.2 and later + \xA7\x9A |0 # BLACK MEDIUM SQUARE # for Unicode 3.2 and later \xA1\xDA |0 # BLACK STAR \xA1\xD9 |0 # WHITE STAR \xA2\xCF |0 # BLACK TELEPHONE \xA2\xCE |0 # WHITE TELEPHONE + \xA6\x77 |0 # BALLOT BOX (large white square) + \xA6\x71 |3 # BALLOT BOX, bold (large bold white square) + \xA6\x76 |3 # BALLOT BOX, dotted \xA2\xD0 |0 # WHITE LEFT POINTING INDEX \xA6\x5E |3 # WHITE LEFT POINTING INDEX, alternate \xAC\x8D |0 # WHITE UP POINTING INDEX @@ -1029,6 +1059,7 @@ CHARMAP \xA2\xC0 |0 # BLACK CLUB SUIT \xA2\xBB |0 # WHITE SPADE SUIT \xA2\xBE |0 # BLACK HEART SUIT + \xA7\x98 |0 # BLACK DIAMOND SUIT \xA2\xBF |0 # WHITE CLUB SUIT \xA2\xCD |0 # HOT SPRINGS \xA2\xDB |0 # QUARTER NOTE @@ -1038,9 +1069,22 @@ CHARMAP \xA6\x48 |0 # MUSIC SHARP SIGN \xA6\x6D |0 # HEAVY MULTIPLICATION X \xA6\x6C |0 # HEAVY GREEK CROSS + \xA6\x88 |0 # MALTESE CROSS + \xA6\x87 |3 # MALTESE CROSS, white + \xA6\x72 |0 # FOUR BALLOON-SPOKED ASTERISK with balloon at center + \xA6\x79 |3 # FOUR BALLOON-SPOKED ASTERISK with balloon at center, white + \xA6\x53 |0 # HEAVY ASTERISK (large 6-spokes line asterisk dingbat) \xA6\x52 |0 # HEAVY TEARDROP-SPOKED ASTERISK + \xA6\x7C |0 # BLACK FLORETTE + \xA6\x7B |3 # BLACK FLORETTE, negative (white) + \xA6\x99 |0 # WHITE FLORETTE + \xA6\x8D |0 # EIGHT PETALLED OUTLINED BLACK FLORETTE + \xA6\x54 |0 # SPARKLE (small square 8-spoke line asterisk dingbat) + \xA6\x9B |3 # HEAVY SPARKLE + COMBINING RING OVERLAY + \xA6\x83 |0 # SHADOWED WHITE CIRCLE \xA6\x73 |0 # BLACK DIAMOND MINUS WHITE X \xA6\x7A |3 # BLACK DIAMOND MINUS WHITE X, negative + \xA6\x8E |3 # BLACK DIAMOND MINUS WHITE X, alternate \xA6\xE5 |0 # DINGBAT NEGATIVE CIRCLED DIGIT ONE \xA6\xE6 |0 # DINGBAT NEGATIVE CIRCLED DIGIT TWO \xA6\xE7 |0 # DINGBAT NEGATIVE CIRCLED DIGIT THREE @@ -1071,6 +1115,73 @@ CHARMAP \xA4\x71 |3 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE, light \xA3\x5E |0 # DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN \xA4\x72 |3 # DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN, light + \xAC\x5E |0 # HEAVY WIDE-HEADED RIGHTWARDS ARROW: small bold + \xA8\x6A |0 # DRAFTING POINT RIGHTWARDS ARROW (umbrella) + \xA8\x60 |0 # HEAVY ROUND-TIPPED RIGHTWARDS ARROW + \xA8\x78 |0 # HEAVY TRIANGLE-HEADED RIGHTWARDS ARROW: medium black + \xA8\x74 |0 # BLACK RIGHTWARDS ARROW: heavy black + \xAC\x48 |0 # BLACK RIGHTWARDS ARROWHEAD + \xA8\x5C |0 # CIRCLED HEAVY WHITE RIGHTWARDS ARROW + \xAC\x43 |0 # BLACK-FEATHERED RIGHTWARDS ARROW + \xA6\x85 |3 # WHITE CONCAVE-SIDED DIAMOND (like star) + COMBINING ENCLOSING CIRCLE # for Unicode 3.2 and later + \xA8\x8D |0 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS # for Unicode 3.2 and later + \xAC\x7D |3 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS, negative # for Unicode 3.2 and later + \xAC\x88 |3 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS, triangle head # for Unicode 3.2 and later + \xA8\x85 |3 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS, alternate: rightwards arrow with tip upwards # for Unicode 3.2 and later + \xA8\x89 |0 # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS # for Unicode 3.2 and later + \xAC\x79 |3 # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS, negative # for Unicode 3.2 and later + \xAC\x84 |3 # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS, triangle head # for Unicode 3.2 and later + \xA8\x86 |0 # ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS # for Unicode 3.2 and later + \xAC\x76 |3 # ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS, negative # for Unicode 3.2 and later + \xAC\x81 |3 # ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS, triangle head # for Unicode 3.2 and later + \xA8\x8B |0 # ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS # for Unicode 3.2 and later + \xAC\x7B |3 # ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS, negative # for Unicode 3.2 and later + \xAC\x86 |3 # ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS, triangle head # for Unicode 3.2 and later + \xA8\x8C |0 # LEFT-SIDE ARC ANTICLOCKWISE ARROW # for Unicode 3.2 and later + \xAC\x7C |3 # LEFT-SIDE ARC ANTICLOCKWISE ARROW, negative # for Unicode 3.2 and later + \xAC\x87 |3 # LEFT-SIDE ARC ANTICLOCKWISE ARROW, triangle head # for Unicode 3.2 and later + \xA8\x84 |3 # LEFT-SIDE ARC ANTICLOCKWISE ARROW, alternate: leftwards arrow with tip downwards # for Unicode 3.2 and later + \xA8\x6F |0 # LEFTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB DOWN # for Unicode 3.2 and later + \xAC\x45 |3 # LEFTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB DOWN, alternate # for Unicode 3.2 and later + \xA8\x71 |0 # UPWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT # for Unicode 3.2 and later + \xA8\x70 |0 # RIGHTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB DOWN # for Unicode 3.2 and later + \xAC\x44 |3 # RIGHTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB DOWN, alternate # for Unicode 3.2 and later + \xA8\x72 |0 # DOWNWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT # for Unicode 3.2 and later + \xA7\x99 |0 # Z NOTATION SPOT (small black circle) # for Unicode 3.2 and later + \xA1\x59 |0 # LEFT WHITE PARENTHESIS # for Unicode 3.2 or later + \xA2\x43 |3 # LEFT WHITE PARENTHESIS (double), small, bold # for Unicode 3.2 and later + \xA2\x41 |3 # LEFT WHITE PARENTHESIS (double), small # for Unicode 3.2 and later + \xA1\x53 |3 # LEFT WHITE PARENTHESIS (double), large # for Unicode 3.2 or later + \xA1\x65 |3 # LEFT WHITE PARENTHESIS, bold, wide # for Unicode 3.2 or later + \xA1\x55 |3 # LEFT WHITE PARENTHESIS, bold # for Unicode 3.2 or later + \xA1\x51 |3 # LEFT WHITE PARENTHESIS (double), alternate # for Unicode 3.2 or later + \xA1\x5A |0 # RIGHT WHITE PARENTHESIS # for Unicode 3.2 or later + \xA2\x44 |3 # RIGHT WHITE PARENTHESIS (double), small, bold # for Unicode 3.2 and later + \xA2\x42 |3 # RIGHT WHITE PARENTHESIS (double), small # for Unicode 3.2 and later + \xA1\x54 |3 # RIGHT WHITE PARENTHESIS (double), large # for Unicode 3.2 or later + \xA1\x66 |3 # RIGHT WHITE PARENTHESIS, bold, wide # for Unicode 3.2 or later + \xA1\x56 |3 # RIGHT WHITE PARENTHESIS, bold # for Unicode 3.2 or later + \xA1\x52 |3 # RIGHT WHITE PARENTHESIS (double), alternate # for Unicode 3.2 or later + \xA1\x99 |0 # LEFT BLACK TORTOISE SHELL BRACKET # for Unicode 3.2 or later + \xA1\x9A |0 # RIGHT BLACK TORTOISE SHELL BRACKET # for Unicode 3.2 or later + \xA4\x9C |0 # REVERSED ANGLE # for Unicode 3.2 and later + \xA6\x68 |0 # CIRCLED WHITE BULLET # for Unicode 3.2 and later + \xA6\x6E |0 # CIRCLED BULLET # for Unicode 3.2 and later + \xA6\x64 |0 # SQUARED SQUARE # for Unicode 3.2 and later + \xA6\x67 |3 # SQUARED SQUARE + COMBINING ENCLOSING SQUARE # for Unicode 3.2 and later + \xA4\x7D |0 # PLUS SIGN WITH TILDE BELOW # for Unicode 3.2 and later + \xA7\x60 |0 # CIRCLED DIVISION SIGN # for Unicode 3.2 and later + \xA7\x7B |0 # PLUS SIGN ABOVE EQUALS SIGN # for Unicode 3.2 and later + \xA4\x94 |0 # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN # for Unicode 3.2 and later + \xA4\x95 |0 # GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN # for Unicode 3.2 and later + \xA4\x96 |0 # LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL # for Unicode 3.2 and later + \xA4\x97 |0 # GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL # for Unicode 3.2 and later + \xA4\x8B |0 # SUBSET OF ABOVE EQUALS SIGN # for Unicode 3.2 and later + \xA4\x8D |0 # SUPERSET OF ABOVE EQUALS SIGN # for Unicode 3.2 and later + \xA4\x8C |0 # SUBSET OF ABOVE NOT EQUAL TO # for Unicode 3.2 and later + \xA4\x8E |0 # SUPERSET OF ABOVE NOT EQUAL TO # for Unicode 3.2 and later + \xA7\x6B |0 # SHORT UP TACK WITH UNDERBAR # for Unicode 3.2 and later + \xA4\x9F |3 # DOUBLE SOLIDUS OPERATOR+COMBINING EQUALS SIGN BELOW # for Unicode 3.2 and later \xA1\xA1 |0 # IDEOGRAPHIC SPACE \xA1\xA2 |0 # IDEOGRAPHIC COMMA # KSC spec: "comma for vertical use" \xA1\xA3 |0 # IDEOGRAPHIC FULL STOP # KSC spec: "period for vertical use" @@ -1082,7 +1193,7 @@ CHARMAP \xA1\xB5 |0 # RIGHT ANGLE BRACKET \xA1\x4C |3 # RIGHT ANGLE BRACKET, small \xA1\xB6 |0 # LEFT DOUBLE ANGLE BRACKET - \xA1\x49 |3 # LEFT DOUBLE ANGLE BRACKET , small + \xA1\x49 |3 # LEFT DOUBLE ANGLE BRACKET, small \xA1\xB7 |0 # RIGHT DOUBLE ANGLE BRACKET \xA1\x4A |3 # RIGHT DOUBLE ANGLE BRACKET, small \xA1\xB8 |0 # LEFT CORNER BRACKET @@ -1109,10 +1220,9 @@ CHARMAP \xA1\x5C |3 # RIGHT BLACK LENTICULAR BRACKET, duplicate of 0xA1BD \xA7\x42 |0 # POSTAL MARK \xA1\xEB |0 # GETA MARK # KSC spec: "bad character mark" + \xA6\x7D |3 # GETA MARK, bold (compare 0xA1EB->) \xA1\xB2 |0 # LEFT TORTOISE SHELL BRACKET - \xA1\x99 |3 # LEFT TORTOISE SHELL BRACKET, bold \xA1\xB3 |0 # RIGHT TORTOISE SHELL BRACKET - \xA1\x9A |3 # RIGHT TORTOISE SHELL BRACKET, bold \xA1\x5D |0 # LEFT WHITE LENTICULAR BRACKET \xA2\x47 |3 # LEFT WHITE LENTICULAR BRACKET, small \xA1\x5E |0 # RIGHT WHITE LENTICULAR BRACKET @@ -1418,6 +1528,26 @@ CHARMAP \xA2\xDF |0 # PARENTHESIZED HANGUL CIEUC U \xA7\x9D |0 # PARENTHESIZED IDEOGRAPH STOCK \xA7\x9E |0 # PARENTHESIZED IDEOGRAPH REPRESENT + \xA7\xF5 |0 # CIRCLED NUMBER TWENTY ONE # for Unicode 3.2 and later + \xA6\xF9 |3 # CIRCLED NUMBER TWENTY ONE, negative # for Unicode 3.2 and later + \xA7\xF6 |0 # CIRCLED NUMBER TWENTY TWO # for Unicode 3.2 and later + \xA6\xFA |3 # CIRCLED NUMBER TWENTY TWO, negative # for Unicode 3.2 and later + \xA7\xF7 |0 # CIRCLED NUMBER TWENTY THREE # for Unicode 3.2 and later + \xA6\xFB |3 # CIRCLED NUMBER TWENTY THREE, negative # for Unicode 3.2 and later + \xA7\xF8 |0 # CIRCLED NUMBER TWENTY FOUR # for Unicode 3.2 and later + \xA6\xFC |3 # CIRCLED NUMBER TWENTY FOUR, negative # for Unicode 3.2 and later + \xA7\xF9 |0 # CIRCLED NUMBER TWENTY FIVE # for Unicode 3.2 and later + \xA6\xFD |3 # CIRCLED NUMBER TWENTY FIVE, negative # for Unicode 3.2 and later + \xA7\xFA |0 # CIRCLED NUMBER TWENTY SIX # for Unicode 3.2 and later + \xA6\xFE |3 # CIRCLED NUMBER TWENTY SIX, negative # for Unicode 3.2 and later + \xA7\xFB |0 # CIRCLED NUMBER TWENTY SEVEN # for Unicode 3.2 and later + \xA5\xF9 |3 # CIRCLED NUMBER TWENTY SEVEN, negative # for Unicode 3.2 and later + \xA7\xFC |0 # CIRCLED NUMBER TWENTY EIGHT # for Unicode 3.2 and later + \xA5\xFA |3 # CIRCLED NUMBER TWENTY EIGHT, negative # for Unicode 3.2 and later + \xA7\xFD |0 # CIRCLED NUMBER TWENTY NINE # for Unicode 3.2 and later + \xA5\xFB |3 # CIRCLED NUMBER TWENTY NINE, negative # for Unicode 3.2 and later + \xA7\xFE |0 # CIRCLED NUMBER THIRTY # for Unicode 3.2 and later + \xA5\xFC |3 # CIRCLED NUMBER THIRTY, negative # for Unicode 3.2 and later \xA8\xB1 |0 # CIRCLED HANGUL KIYEOK \xA8\xB2 |0 # CIRCLED HANGUL NIEUN \xA8\xB3 |0 # CIRCLED HANGUL TIKEUT @@ -1875,7 +2005,9 @@ CHARMAP \xCB\xC2 |0 # \xFD\xD5 |0 # \xF4\xC8 |0 # + \xA7\x50 |3 # protrusion/convex, alternate \xE8\xEA |0 # + \xA7\x4F |3 # depression/concave, alternate \xF5\xF3 |0 # \xF9\xDE |0 # \xD3\xEF |0 # @@ -5282,6 +5414,7 @@ CHARMAP \xCF\xD9 |0 # \xDC\xCD |0 # \xAA\x60 |3 # ideograph repair/restore + COMBINING ENCLOSING SQUARE + \xA7\x7C |3 # "repair/restore" in enclosing triangle # for Unicode 3.2 and later \xED\xFB |0 # \xDE\xF0 |0 # \xD7\xEB |0 # @@ -8743,10 +8876,37 @@ CHARMAP \xC8\xFC |0 # HANGUL SYLLABLE HIEUH I PIEUP \xC8\xFD |0 # HANGUL SYLLABLE HIEUH I SIOS \xC8\xFE |0 # HANGUL SYLLABLE HIEUH I IEUNG - \xA7\x85 |3 # EXCLAMATION MARK and QUESTION MARK - \xA1\x51 |3 # double left parenthesis - \xA1\x52 |3 # double right parenthesisS - \xA7\x87 |3 # double QUESTION MARK + \xA6\x58 |0 # black diamond minus white square # corporate char + \xA6\x66 |3 # black diamond minus white square + COMBINING ENCLOSING SQUARE # corporate char + \xA6\x63 |0 # black square minus white diamond # corporate char + \xA6\x60 |3 # black square minus white diamond + COMBINING ENCLOSING DIAMOND # corporate char + \xA6\x9F |0 # telephone dial # corporate char + \xA6\x8F |0 # five vertical lines # corporate char + \xA6\x81 |0 # one downward-pointing black triangle over two others # corporate char + \xA6\x91 |3 # one downward-pointing black triangle over two others, negative # corporate char + \xA6\x74 |0 # two interwoven eye shapes # corporate char + \xA6\x96 |0 # narrow-leaf four-petal florette # corporate char + \xA6\x86 |3 # narrow-leaf four-petal florette, in front of black diamond # corporate char + \xA6\x9A |0 # four interleaved fisheyes # corporate char + \xA6\x42 |0 # fleur-de-lis # corporate char + \xA6\x41 |3 # fleur-de-lis, alternate # corporate char + \xA1\x6E |0 # three asterisks aligned vertically (dictionary definition importance mark) # corporate char + \xA8\x94 |0 # left right up down arrow # corporate char + \xAC\x54 |0 # downwards wave arrow # corporate char + \xAC\x42 |0 # leftwards white arrow from wall # corporate char + \xAC\x49 |0 # black leftwards arrowhead # corporate char + \xAC\x5F |0 # black-feathered leftwards arrow # corporate char + \xA8\x67 |0 # leftwards arrowhead with tail of spreading ripples # corporate char + \xA8\x68 |0 # rightwards arrowhead with tail of spreading ripples # corporate char + \xA8\x9D |0 # large white leftwards arrow with white fins # corporate char + \xA8\x9C |0 # large white rightwards arrow with white fins # corporate char + \xAC\x4B |0 # leftwards arrow with bow # corporate char + \xAC\x4A |0 # rightwards arrow with bow # corporate char + \xA7\x47 |0 # small pentagon # corporate char + \xA7\x4B |0 # trapezoid # corporate char + \xA7\x4C |0 # quadrilateral with shorter right side # corporate char + \xA7\x4D |0 # quadrilateral with shorter left side # corporate char + \xA6\x4C |3 # two asterisks aligned horizontally (annotation/comment mark) \xA5\x55 |3 # LATIN CAPITAL LETTER A with RIGHT PARENTHESIS \xA9\x41 |3 # LATIN CAPITAL LETTER A with FULL STOP \xA5\x56 |3 # LATIN CAPITAL LETTER B with RIGHT PARENTHESIS @@ -8885,7 +9045,7 @@ CHARMAP \xAA\xFB |3 # parenthesized number twenty-three \xAA\xFC |3 # parenthesized number twenty-four \xAA\xFD |3 # parenthesized number twenty-five - \xAA\xFE |3 # parenthesized number twenty-siz + \xAA\xFE |3 # parenthesized number twenty-six \xAB\xF7 |3 # parenthesized number twenty-seven \xAB\xF8 |3 # parenthesized number twenty-eight \xAB\xF9 |3 # parenthesized number twenty-nine @@ -8911,16 +9071,7 @@ CHARMAP \xAD\x66 |3 # ideographs for eighteen in enclosing square \xAD\x64 |3 # ideographs for sixteen in enclosing square \xAD\x62 |3 # ideographs for fourteen in enclosing square - \xA7\xF5 |3 # circled number twenty-one - \xA7\xF6 |3 # circled number twenty-two - \xA7\xF7 |3 # circled number twenty-three - \xA7\xF8 |3 # circled number twenty-four - \xA7\xF9 |3 # circled number twenty-five - \xA7\xFA |3 # circled number twenty-six - \xA7\xFB |3 # circled number twenty-seven - \xA7\xFC |3 # circled number twenty-eight - \xA7\xFD |3 # circled number twenty-nine - \xA7\xFE |3 # circled number twenty-ten + \xA7\x7D |3 # square hangul, horizontal LR form \xA2\xEF |3 # number ten in enclosing square, serif, bold \xA2\xF0 |3 # number eleven in enclosing square, serif, bold \xA2\xF1 |3 # number twelve in enclosing square, serif, bold @@ -8942,16 +9093,7 @@ CHARMAP \xAD\x52 |3 # ideographs for eighteen in enclosing square, negative \xAD\x50 |3 # ideographs for sixteen in enclosing square, negative \xAD\x4E |3 # ideographs for fourteen in enclosing square, negative - \xA6\xF9 |3 # circled number twenty-one, negative - \xA6\xFA |3 # circled number twenty-two, negative - \xA6\xFB |3 # circled number twenty-three, negative - \xA6\xFC |3 # circled number twenty-four, negative - \xA6\xFD |3 # circled number twenty-five, negative - \xA6\xFE |3 # circled number twenty-six, negative - \xA5\xF9 |3 # circled number twenty-seven, negative - \xA5\xFA |3 # circled number twenty-eight, negative - \xA5\xFB |3 # circled number twenty-nine, negative - \xA5\xFC |3 # circled number thirty, negative + \xA7\x81 |3 # square hangul, vertical form \xA3\x4A |3 # number ten in enclosing square, sans, shadowed \xA3\x4B |3 # number eleven in enclosing square, sans, shadowed \xA3\x4C |3 # number twelve in enclosing square, sans, shadowed @@ -8985,12 +9127,7 @@ CHARMAP \xAC\xF4 |3 # number eighteen in enclosing square, negative, light \xAC\xF5 |3 # number nineteen in enclosing square, negative, light \xAC\xF6 |3 # number twenty in enclosing square, negative, light - \xA1\x53 |3 # double left parenthesis, alternate - \xA1\x54 |3 # double right parenthesis, alternate - \xA2\x41 |3 # double left parenthesis, alternate 2 - \xA2\x42 |3 # double right parenthesis, alternate 2 - \xA2\x43 |3 # double left parenthesis, alternate 3 - \xA2\x44 |3 # double right parenthesis, alternate 3 + \xA6\x50 |3 # two asterisks aligned horizontally (annotation/comment mark), large \xCB\xD0 |0 # CJK COMPATIBILITY IDEOGRAPH-F900 \xCB\xD6 |0 # CJK COMPATIBILITY IDEOGRAPH-F901 \xCB\xE7 |0 # CJK COMPATIBILITY IDEOGRAPH-F902 @@ -9259,6 +9396,12 @@ CHARMAP \xFA\xA2 |0 # CJK COMPATIBILITY IDEOGRAPH-FA09 \xFA\xE6 |0 # CJK COMPATIBILITY IDEOGRAPH-FA0A \xFC\xA9 |0 # CJK COMPATIBILITY IDEOGRAPH-FA0B + \xA1\x4D |0 # SMALL LEFT PARENTHESIS + \xA2\x45 |3 # SMALL LEFT PARENTHESIS, bold + \xA1\x4F |3 # SMALL LEFT PARENTHESIS, more rounded + \xA1\x4E |0 # SMALL RIGHT PARENTHESIS + \xA2\x46 |3 # SMALL RIGHT PARENTHESIS, bold + \xA1\x50 |3 # SMALL RIGHT PARENTHESIS, more rounded \xA3\xA1 |0 # FULLWIDTH EXCLAMATION MARK \xA5\xDA |3 # FULLWIDTH EXCLAMATION MARK, position left \xA3\xA2 |0 # FULLWIDTH QUOTATION MARK @@ -9270,12 +9413,11 @@ CHARMAP \xA3\xA8 |0 # FULLWIDTH LEFT PARENTHESIS \xA3\xA9 |0 # FULLWIDTH RIGHT PARENTHESIS \xA3\xAA |0 # FULLWIDTH ASTERISK - \xA6\x4E |3 # FULLWIDTH ASTERISK, position low, large \xA1\x9F |3 # FULLWIDTH ASTERISK, position left, large \xA6\x4A |3 # FULLWIDTH ASTERISK, position center \xA1\x9C |3 # FULLWIDTH ASTERISK, position left \xA1\x9E |3 # FULLWIDTH ASTERISK, position low left - \xA6\x49 |3 # FULLWIDTH ASTERISK, duplicate of xA3AA + \xA6\x49 |3 # FULLWIDTH ASTERISK, duplicate of 0xA3AA \xA3\xAB |0 # FULLWIDTH PLUS SIGN \xA3\xAC |0 # FULLWIDTH COMMA \xA3\xAD |0 # FULLWIDTH HYPHEN-MINUS diff --git a/ext/Encode/ucm/macROMnn.ucm b/ext/Encode/ucm/macROMnn.ucm index 6be54cd..5f81911 100644 --- a/ext/Encode/ucm/macROMnn.ucm +++ b/ext/Encode/ucm/macROMnn.ucm @@ -1,5 +1,5 @@ # -# $Id: macROMnn.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $ +# $Id: macROMnn.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT @@ -8,7 +8,6 @@ 1 1 \x3F -# CHARMAP \x00 |0 # \x01 |0 # @@ -93,9 +92,7 @@ CHARMAP \x50 |0 # LATIN CAPITAL LETTER P \x51 |0 # LATIN CAPITAL LETTER Q \x52 |0 # LATIN CAPITAL LETTER R - \xAF |3 # LATIN CAPITAL LETTER S + COMBINING COMMA BELOW \x53 |0 # LATIN CAPITAL LETTER S - \xDE |3 # LATIN CAPITAL LETTER T + COMBINING COMMA BELOW \x54 |0 # LATIN CAPITAL LETTER T \x55 |0 # LATIN CAPITAL LETTER U \x56 |0 # LATIN CAPITAL LETTER V @@ -127,9 +124,7 @@ CHARMAP \x70 |0 # LATIN SMALL LETTER P \x71 |0 # LATIN SMALL LETTER Q \x72 |0 # LATIN SMALL LETTER R - \xBF |3 # LATIN SMALL LETTER S + COMBINING COMMA BELOW \x73 |0 # LATIN SMALL LETTER S - \xDF |3 # LATIN SMALL LETTER T + COMBINING COMMA BELOW \x74 |0 # LATIN SMALL LETTER T \x75 |0 # LATIN SMALL LETTER U \x76 |0 # LATIN SMALL LETTER V @@ -224,6 +219,10 @@ CHARMAP \xCF |0 # LATIN SMALL LIGATURE OE \xD9 |0 # LATIN CAPITAL LETTER Y WITH DIAERESIS \xC4 |0 # LATIN SMALL LETTER F WITH HOOK + \xAF |0 # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + \xBF |0 # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later + \xDE |0 # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later + \xDF |0 # LATIN SMALL LETTER T WITH COMMA BELOW; # for Unicode 3.0 and later \xF6 |0 # MODIFIER LETTER CIRCUMFLEX ACCENT \xFF |0 # CARON \xF9 |0 # BREVE diff --git a/ext/Encode/ucm/macSymbol.ucm b/ext/Encode/ucm/macSymbol.ucm index 6d64b4a..dfae00e 100644 --- a/ext/Encode/ucm/macSymbol.ucm +++ b/ext/Encode/ucm/macSymbol.ucm @@ -1,5 +1,5 @@ # -# $Id: macSymbol.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $ +# $Id: macSymbol.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/SYMBOL.TXT @@ -47,13 +47,7 @@ CHARMAP \x25 |0 # PERCENT SIGN \x26 |0 # AMPERSAND \x28 |0 # LEFT PARENTHESIS - \xE6 |3 # LEFT PARENTHESIS, fragment (top) - \xE7 |3 # LEFT PARENTHESIS, fragment (extender) - \xE8 |3 # LEFT PARENTHESIS, fragment (bottom) \x29 |0 # RIGHT PARENTHESIS - \xF6 |3 # RIGHT PARENTHESIS, fragment (top) - \xF7 |3 # RIGHT PARENTHESIS, fragment (extender) - \xF8 |3 # RIGHT PARENTHESIS, fragment (bottom) \x2B |0 # PLUS SIGN \x2C |0 # COMMA \x2E |0 # FULL STOP @@ -75,28 +69,16 @@ CHARMAP \x3E |0 # GREATER-THAN SIGN \x3F |0 # QUESTION MARK \x5B |0 # LEFT SQUARE BRACKET - \xE9 |3 # LEFT SQUARE BRACKET, fragment (top) - \xEA |3 # LEFT SQUARE BRACKET, fragment (extender) - \xEB |3 # LEFT SQUARE BRACKET, fragment (bottom) \x5D |0 # RIGHT SQUARE BRACKET - \xF9 |3 # RIGHT SQUARE BRACKET, fragment (top) - \xFA |3 # RIGHT SQUARE BRACKET, fragment (extender) - \xFB |3 # RIGHT SQUARE BRACKET, fragment (bottom) \x5F |0 # LOW LINE \x7B |0 # LEFT CURLY BRACKET - \xEC |3 # LEFT CURLY BRACKET, fragment (top) - \xED |3 # LEFT CURLY BRACKET, fragment (center) - \xEE |3 # LEFT CURLY BRACKET, fragment (bottom) \x7C |0 # VERTICAL LINE \x7D |0 # RIGHT CURLY BRACKET - \xFC |3 # RIGHT CURLY BRACKET, fragment (top) - \xFD |3 # RIGHT CURLY BRACKET, fragment (center) - \xFE |3 # RIGHT CURLY BRACKET, fragment (bottom) \xD3 |0 # COPYRIGHT SIGN # serif - \xE3 |3 # COPYRIGHT SIGN, alternate (sans serif) + \xE3 |3 # COPYRIGHT SIGN, alternate: sans serif \xD8 |0 # NOT SIGN \xD2 |0 # REGISTERED SIGN # serif - \xE2 |3 # REGISTERED SIGN, alternate (sans serif) + \xE2 |3 # REGISTERED SIGN, alternate: sans serif \xB0 |0 # DEGREE SIGN \xB1 |0 # PLUS-MINUS SIGN \xB4 |0 # MULTIPLICATION SIGN @@ -165,7 +147,7 @@ CHARMAP \xC3 |0 # SCRIPT CAPITAL P \xC2 |0 # BLACK-LETTER CAPITAL R \xD4 |0 # TRADE MARK SIGN # serif - \xE4 |3 # TRADE MARK SIGN, alternate (sans serif) + \xE4 |3 # TRADE MARK SIGN, alternate: sans serif \xC0 |0 # ALEF SYMBOL \xAC |0 # LEFTWARDS ARROW \xAD |0 # UPWARDS ARROW @@ -199,7 +181,6 @@ CHARMAP \xC7 |0 # INTERSECTION \xC8 |0 # UNION \xF2 |0 # INTEGRAL - \xF4 |3 # INTEGRAL, fragment (extender) \x5C |0 # THEREFORE \x7E |0 # TILDE OPERATOR \x40 |0 # APPROXIMATELY EQUAL TO @@ -220,15 +201,34 @@ CHARMAP \xD7 |0 # DOT OPERATOR \xF3 |0 # TOP HALF INTEGRAL \xF5 |0 # BOTTOM HALF INTEGRAL + \xE6 |0 # LEFT PARENTHESIS UPPER HOOK # for Unicode 3.2 and later + \xE7 |0 # LEFT PARENTHESIS EXTENSION # for Unicode 3.2 and later + \xE8 |0 # LEFT PARENTHESIS LOWER HOOK # for Unicode 3.2 and later + \xF6 |0 # RIGHT PARENTHESIS UPPER HOOK # for Unicode 3.2 and later + \xF7 |0 # RIGHT PARENTHESIS EXTENSION # for Unicode 3.2 and later + \xF8 |0 # RIGHT PARENTHESIS LOWER HOOK # for Unicode 3.2 and later + \xE9 |0 # LEFT SQUARE BRACKET UPPER CORNER # for Unicode 3.2 and later + \xEA |0 # LEFT SQUARE BRACKET EXTENSION # for Unicode 3.2 and later + \xEB |0 # LEFT SQUARE BRACKET LOWER CORNER # for Unicode 3.2 and later + \xF9 |0 # RIGHT SQUARE BRACKET UPPER CORNER # for Unicode 3.2 and later + \xFA |0 # RIGHT SQUARE BRACKET EXTENSION # for Unicode 3.2 and later + \xFB |0 # RIGHT SQUARE BRACKET LOWER CORNER # for Unicode 3.2 and later + \xEC |0 # LEFT CURLY BRACKET UPPER HOOK # for Unicode 3.2 and later + \xED |0 # LEFT CURLY BRACKET MIDDLE PIECE # for Unicode 3.2 and later + \xEE |0 # LEFT CURLY BRACKET LOWER HOOK # for Unicode 3.2 and later + \xEF |0 # CURLY BRACKET EXTENSION # for Unicode 3.2 and later + \xFC |0 # RIGHT CURLY BRACKET UPPER HOOK # for Unicode 3.2 and later + \xFD |0 # RIGHT CURLY BRACKET MIDDLE PIECE # for Unicode 3.2 and later + \xFE |0 # RIGHT CURLY BRACKET LOWER HOOK # for Unicode 3.2 and later + \xF4 |0 # INTEGRAL EXTENSION # for Unicode 3.2 and later + \xBE |0 # HORIZONTAL LINE EXTENSION (for arrows) # for Unicode 3.2 and later \xAA |0 # BLACK SPADE SUIT \xA7 |0 # BLACK CLUB SUIT \xA9 |0 # BLACK HEART SUIT \xA8 |0 # BLACK DIAMOND SUIT \xE1 |0 # LEFT ANGLE BRACKET \xF1 |0 # RIGHT ANGLE BRACKET - \x60 |0 # radical extender - \xBD |0 # vertical arrow extender - \xBE |0 # horizontal arrow extender - \xEF |0 # curly bracket extender + \x60 |0 # radical extender # corporate char + \xBD |0 # vertical line extension (for arrows) # corporate char \xF0 |0 # Apple logo END CHARMAP diff --git a/ext/Encode/ucm/macThai.ucm b/ext/Encode/ucm/macThai.ucm index 3940034..159204c 100644 --- a/ext/Encode/ucm/macThai.ucm +++ b/ext/Encode/ucm/macThai.ucm @@ -1,5 +1,5 @@ # -# $Id: macThai.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $ +# $Id: macThai.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $ # # Original table can be obtained at # http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/THAI.TXT @@ -256,6 +256,6 @@ CHARMAP \x8E |0 # RIGHT DOUBLE QUOTATION MARK \x91 |0 # BULLET \x82 |0 # HORIZONTAL ELLIPSIS + \xDB |0 # WORD JOINER # for Unicode 3.2 and later \xEE |0 # TRADE MARK SIGN - \xDB |0 # ZERO WIDTH NO-BREAK SPACE END CHARMAP