Commit | Line | Data |
ac5ea531 |
1 | |
4a2e806c |
2 | BEGIN { |
9f1f04a1 |
3 | unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { |
4 | print "1..0 # Unicode::Normalize " . |
5 | "cannot stringify a Unicode code point\n"; |
f027f502 |
6 | exit 0; |
4a2e806c |
7 | } |
8 | } |
9 | |
6c941e0c |
10 | BEGIN { |
11 | if ($ENV{PERL_CORE}) { |
12 | chdir('t') if -d 't'; |
9f1f04a1 |
13 | @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); |
6c941e0c |
14 | } |
15 | } |
16 | |
ac5ea531 |
17 | ######################### |
18 | |
19 | use Test; |
20 | use strict; |
21 | use warnings; |
f027f502 |
22 | BEGIN { plan tests => 13 }; |
ac5ea531 |
23 | use Unicode::Normalize qw(:all); |
24 | ok(1); # If we made it this far, we're ok. |
25 | |
9f1f04a1 |
26 | sub _pack_U { Unicode::Normalize::pack_U(@_) } |
27 | sub _unpack_U { Unicode::Normalize::unpack_U(@_) } |
6c941e0c |
28 | |
ac5ea531 |
29 | ######################### |
30 | |
31 | print getCombinClass( 0) == 0 |
32 | && getCombinClass( 768) == 230 |
33 | && getCombinClass(1809) == 36 |
f027f502 |
34 | && ($] < 5.007003 || getCombinClass(0x1D167) == 1) # Unicode 3.1 |
ac5ea531 |
35 | ? "ok" : "not ok", " 2\n"; |
36 | |
37 | print ! defined getCanon( 0) |
38 | && ! defined getCanon(41) |
6c941e0c |
39 | && getCanon(0x00C0) eq _pack_U(0x0041, 0x0300) |
40 | && getCanon(0x00EF) eq _pack_U(0x0069, 0x0308) |
41 | && getCanon(0x304C) eq _pack_U(0x304B, 0x3099) |
42 | && getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301) |
9f1f04a1 |
43 | && getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345) |
6c941e0c |
44 | && getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345) |
45 | && getCanon(0xAC00) eq _pack_U(0x1100, 0x1161) |
46 | && getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF) |
ac5ea531 |
47 | && ! defined getCanon(0x212C) |
48 | && ! defined getCanon(0x3243) |
6c941e0c |
49 | && getCanon(0xFA2D) eq _pack_U(0x9DB4) |
ac5ea531 |
50 | ? "ok" : "not ok", " 3\n"; |
51 | |
52 | print ! defined getCompat( 0) |
53 | && ! defined getCompat(41) |
6c941e0c |
54 | && getCompat(0x00C0) eq _pack_U(0x0041, 0x0300) |
55 | && getCompat(0x00EF) eq _pack_U(0x0069, 0x0308) |
56 | && getCompat(0x304C) eq _pack_U(0x304B, 0x3099) |
57 | && getCompat(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301) |
58 | && getCompat(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345) |
59 | && getCompat(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345) |
60 | && getCompat(0x212C) eq _pack_U(0x0042) |
61 | && getCompat(0x3243) eq _pack_U(0x0028, 0x81F3, 0x0029) |
62 | && getCompat(0xAC00) eq _pack_U(0x1100, 0x1161) |
63 | && getCompat(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF) |
64 | && getCompat(0xFA2D) eq _pack_U(0x9DB4) |
ac5ea531 |
65 | ? "ok" : "not ok", " 4\n"; |
66 | |
2a204b45 |
67 | print ! defined getComposite( 0, 0) |
68 | && ! defined getComposite( 0, 41) |
69 | && ! defined getComposite(41, 0) |
70 | && ! defined getComposite(41, 41) |
71 | && ! defined getComposite(12, 0x0300) |
72 | && ! defined getComposite(0x0055, 0xFF00) |
73 | && 0x00C0 == getComposite(0x0041, 0x0300) |
ac5ea531 |
74 | && 0x00D9 == getComposite(0x0055, 0x0300) |
75 | && 0x1E14 == getComposite(0x0112, 0x0300) |
76 | && 0xAC00 == getComposite(0x1100, 0x1161) |
77 | && 0xADF8 == getComposite(0x1100, 0x1173) |
ab8fe378 |
78 | && ! defined getComposite(0x1100, 0x11AF) |
79 | && ! defined getComposite(0x1173, 0x11AF) |
8f118dcd |
80 | && ! defined getComposite(0xAC00, 0x11A7) |
81 | && 0xAC01 == getComposite(0xAC00, 0x11A8) |
ac5ea531 |
82 | && 0xAE00 == getComposite(0xADF8, 0x11AF) |
83 | ? "ok" : "not ok", " 5\n"; |
84 | |
2a204b45 |
85 | print ! isExclusion( 0) |
86 | && ! isExclusion(41) |
8f118dcd |
87 | && isExclusion(2392) # DEVANAGARI LETTER QA |
88 | && isExclusion(3907) # TIBETAN LETTER GHA |
89 | && isExclusion(64334) # HEBREW LETTER PE WITH RAFE |
ac5ea531 |
90 | ? "ok" : "not ok", " 6\n"; |
2a204b45 |
91 | |
8f118dcd |
92 | print ! isSingleton( 0) |
93 | && isSingleton(0x212B) # ANGSTROM SIGN |
94 | ? "ok" : "not ok", " 7\n"; |
95 | |
96 | print reorder("") eq "" |
6c941e0c |
97 | && reorder(_pack_U(0x0041, 0x0300, 0x0315, 0x0313, 0x031b, 0x0061)) |
98 | eq _pack_U(0x0041, 0x031b, 0x0300, 0x0313, 0x0315, 0x0061) |
99 | && reorder(_pack_U(0x00C1, 0x0300, 0x0315, 0x0313, 0x031b, |
8f118dcd |
100 | 0x0061, 0x309A, 0x3099)) |
6c941e0c |
101 | eq _pack_U(0x00C1, 0x031b, 0x0300, 0x0313, 0x0315, |
8f118dcd |
102 | 0x0061, 0x309A, 0x3099) |
103 | ? "ok" : "not ok", " 8\n"; |
104 | |
105 | sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } |
106 | |
107 | print answer(checkNFD("")) eq "YES" |
108 | && answer(checkNFC("")) eq "YES" |
109 | && answer(checkNFKD("")) eq "YES" |
110 | && answer(checkNFKC("")) eq "YES" |
111 | && answer(check("NFD", "")) eq "YES" |
112 | && answer(check("NFC", "")) eq "YES" |
113 | && answer(check("NFKD","")) eq "YES" |
114 | && answer(check("NFKC","")) eq "YES" |
115 | # U+0000 to U+007F are prenormalized in all the normalization forms. |
116 | && answer(checkNFD("AZaz\t12!#`")) eq "YES" |
117 | && answer(checkNFC("AZaz\t12!#`")) eq "YES" |
118 | && answer(checkNFKD("AZaz\t12!#`")) eq "YES" |
119 | && answer(checkNFKC("AZaz\t12!#`")) eq "YES" |
120 | && answer(check("D", "AZaz\t12!#`")) eq "YES" |
121 | && answer(check("C", "AZaz\t12!#`")) eq "YES" |
122 | && answer(check("KD","AZaz\t12!#`")) eq "YES" |
123 | && answer(check("KC","AZaz\t12!#`")) eq "YES" |
124 | ? "ok" : "not ok", " 9\n"; |
125 | |
126 | print 1 |
6c941e0c |
127 | && answer(checkNFD(NFD(_pack_U(0xC1, 0x1100, 0x1173, 0x11AF)))) eq "YES" |
128 | && answer(checkNFD(_pack_U(0x20, 0xC1, 0x1100, 0x1173, 0x11AF))) eq "NO" |
129 | && answer(checkNFC(_pack_U(0x20, 0xC1, 0x1173, 0x11AF))) eq "MAYBE" |
130 | && answer(checkNFC(_pack_U(0x20, 0xC1, 0xAE00, 0x1100))) eq "YES" |
131 | && answer(checkNFC(_pack_U(0x20, 0xC1, 0xAE00, 0x1100, 0x300))) eq "MAYBE" |
132 | && answer(checkNFC(_pack_U(0x20, 0xC1, 0xFF71, 0x2025))) eq "YES" |
133 | && answer(check("NFC", _pack_U(0x20, 0xC1, 0x212B, 0x300))) eq "NO" |
134 | && answer(checkNFKD(_pack_U(0x20, 0xC1, 0xFF71, 0x2025))) eq "NO" |
135 | && answer(checkNFKC(_pack_U(0x20, 0xC1, 0xAE00, 0x2025))) eq "NO" |
8f118dcd |
136 | ? "ok" : "not ok", " 10\n"; |
f027f502 |
137 | |
138 | "012ABC" =~ /(\d+)(\w+)/; |
139 | print "012" eq NFC $1 && "ABC" eq NFC $2 |
140 | ? "ok" : "not ok", " 11\n"; |
141 | |
142 | print "012" eq normalize('C', $1) && "ABC" eq normalize('C', $2) |
143 | ? "ok" : "not ok", " 12\n"; |
144 | |
145 | print "012" eq normalize('NFC', $1) && "ABC" eq normalize('NFC', $2) |
146 | ? "ok" : "not ok", " 13\n"; |
147 | # s/^NF// in normalize() must not prevent using $1, $&, etc. |
148 | |