Commit | Line | Data |
646ca15d |
1 | #!./perl |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
5 | @INC = qw(. ../lib); # ../lib needed for test.deparse |
6 | require "test.pl"; |
7 | } |
8 | |
8a064bd6 |
9 | plan tests => 34; |
646ca15d |
10 | |
11 | # Note that t/op/ord.t already tests for chr() <-> ord() rountripping. |
12 | |
13 | # Don't assume ASCII. |
14 | |
15 | is(chr(ord("A")), "A"); |
16 | |
17 | is(chr( 0), "\x00"); |
18 | is(chr(127), "\x7F"); |
19 | is(chr(128), "\x80"); |
20 | is(chr(255), "\xFF"); |
21 | |
8a064bd6 |
22 | is(chr(-0.1), "\x{FFFD}"); # The U+FFFD Unicode replacement character. |
23 | is(chr(-1 ), "\x{FFFD}"); |
24 | is(chr(-2 ), "\x{FFFD}"); |
25 | is(chr(-3.0), "\x{FFFD}"); |
26 | { |
27 | use bytes; # Backward compatibility. |
28 | is(chr(-0.1), "\x00"); |
29 | is(chr(-1 ), "\xFF"); |
30 | is(chr(-2 ), "\xFE"); |
31 | is(chr(-3.0), "\xFD"); |
32 | } |
646ca15d |
33 | |
4c5ed6e2 |
34 | # Check UTF-8 (not UTF-EBCDIC). |
35 | SKIP: { |
36 | skip "no UTF-8 on EBCDIC", 21 if chr(193) eq 'A'; |
646ca15d |
37 | |
8a064bd6 |
38 | sub hexes { |
39 | no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings |
1651fc44 |
40 | join(" ",unpack "U0 (H2)*", chr $_[0]); |
8a064bd6 |
41 | } |
646ca15d |
42 | |
43 | # The following code points are some interesting steps in UTF-8. |
4c5ed6e2 |
44 | is(hexes( 0x100), "c4 80"); |
45 | is(hexes( 0x7FF), "df bf"); |
46 | is(hexes( 0x800), "e0 a0 80"); |
47 | is(hexes( 0xFFF), "e0 bf bf"); |
48 | is(hexes( 0x1000), "e1 80 80"); |
49 | is(hexes( 0xCFFF), "ec bf bf"); |
50 | is(hexes( 0xD000), "ed 80 80"); |
51 | is(hexes( 0xD7FF), "ed 9f bf"); |
52 | is(hexes( 0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin) |
53 | is(hexes( 0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end) |
54 | is(hexes( 0xE000), "ee 80 80"); |
55 | is(hexes( 0xFFFF), "ef bf bf"); |
56 | is(hexes( 0x10000), "f0 90 80 80"); |
57 | is(hexes( 0x3FFFF), "f0 bf bf bf"); |
58 | is(hexes( 0x40000), "f1 80 80 80"); |
59 | is(hexes( 0xFFFFF), "f3 bf bf bf"); |
60 | is(hexes(0x100000), "f4 80 80 80"); |
61 | is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point |
62 | is(hexes(0x110000), "f4 90 80 80"); |
63 | is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding |
64 | is(hexes(0x200000), "f8 88 80 80 80"); |
65 | } |