Commit | Line | Data |
f96ec2a2 |
1 | #!./perl |
2 | |
3 | BEGIN { |
4 | chdir 't' if -d 't'; |
20822f61 |
5 | @INC = '../lib'; |
8ae6c9f9 |
6 | require './test.pl'; |
f96ec2a2 |
7 | } |
8 | |
4765795a |
9 | # NOTE! |
10 | # |
11 | # Think carefully before adding tests here. In general this should be |
12 | # used only for about three categories of tests: |
13 | # |
14 | # (1) tests that absolutely require 'use utf8', and since that in general |
15 | # shouldn't be needed as the utf8 is being obsoleted, this should |
16 | # have rather few tests. If you want to test Unicode and regexes, |
17 | # you probably want to go to op/regexp or op/pat; if you want to test |
18 | # split, go to op/split; pack, op/pack; appending or joining, |
19 | # op/append or op/join, and so forth |
20 | # |
21 | # (2) tests that have to do with Unicode tokenizing (though it's likely |
22 | # that all the other Unicode tests sprinkled around the t/**/*.t are |
23 | # going to catch that) |
24 | # |
25 | # (3) complicated tests that simultaneously stress so many Unicode features |
26 | # that deciding into which other test script the tests should go to |
27 | # is hard -- maybe consider breaking up the complicated test |
28 | # |
29 | # |
30 | |
435e7af6 |
31 | plan tests => 31; |
31067593 |
32 | |
7bbb0251 |
33 | { |
da450f52 |
34 | # bug id 20001009.001 |
35 | |
89491803 |
36 | my ($a, $b); |
37 | |
38 | { use bytes; $a = "\xc3\xa4" } |
4765795a |
39 | { use utf8; $b = "\xe4" } |
89491803 |
40 | |
4765795a |
41 | my $test = 68; |
31067593 |
42 | |
4765795a |
43 | ok($a ne $b); |
da450f52 |
44 | |
4765795a |
45 | { use utf8; ok($a ne $b) } |
da450f52 |
46 | } |
47 | |
60ff4832 |
48 | |
49 | { |
50 | # bug id 20000730.004 |
51 | |
60ff4832 |
52 | my $smiley = "\x{263a}"; |
53 | |
4765795a |
54 | for my $s ("\x{263a}", |
55 | $smiley, |
60ff4832 |
56 | |
4765795a |
57 | "" . $smiley, |
58 | "" . "\x{263a}", |
60ff4832 |
59 | |
4765795a |
60 | $smiley . "", |
61 | "\x{263a}" . "", |
60ff4832 |
62 | ) { |
63 | my $length_chars = length($s); |
64 | my $length_bytes; |
65 | { use bytes; $length_bytes = length($s) } |
66 | my @regex_chars = $s =~ m/(.)/g; |
67 | my $regex_chars = @regex_chars; |
68 | my @split_chars = split //, $s; |
69 | my $split_chars = @split_chars; |
4765795a |
70 | ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq |
71 | "1/1/1/3"); |
60ff4832 |
72 | } |
73 | |
4765795a |
74 | for my $s ("\x{263a}" . "\x{263a}", |
75 | $smiley . $smiley, |
60ff4832 |
76 | |
4765795a |
77 | "\x{263a}\x{263a}", |
78 | "$smiley$smiley", |
60ff4832 |
79 | |
4765795a |
80 | "\x{263a}" x 2, |
81 | $smiley x 2, |
60ff4832 |
82 | ) { |
83 | my $length_chars = length($s); |
84 | my $length_bytes; |
85 | { use bytes; $length_bytes = length($s) } |
86 | my @regex_chars = $s =~ m/(.)/g; |
87 | my $regex_chars = @regex_chars; |
88 | my @split_chars = split //, $s; |
89 | my $split_chars = @split_chars; |
4765795a |
90 | ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq |
91 | "2/2/2/6"); |
60ff4832 |
92 | } |
93 | } |
ffc61ed2 |
94 | |
ffc61ed2 |
95 | |
96 | { |
f9a63242 |
97 | my $w = 0; |
98 | local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ }; |
99 | my $x = eval q/"\\/ . "\x{100}" . q/"/;; |
100 | |
4765795a |
101 | ok($w == 0 && $x eq "\x{100}"); |
f9a63242 |
102 | } |
103 | |
8ae6c9f9 |
104 | { |
435e7af6 |
105 | use warnings; |
8ae6c9f9 |
106 | my $progfile = 'utf' . $$; |
435e7af6 |
107 | END {unlink_all $progfile} |
108 | |
109 | # If I'm right 60 is '>' in ASCII, ' ' in EBCDIC |
110 | # 173 is not punctuation in either ASCII or EBCDIC |
111 | my (@char); |
112 | foreach (60, 173, 257, 65532) { |
113 | my $char = chr $_; |
114 | utf8::encode($char); |
115 | # I don't want to use map {ord} and I've no need to hardcode the UTF |
116 | # version |
117 | my $charsubst = $char; |
118 | $charsubst =~ s/(.)/ord ($1) . ','/ge; |
119 | chop $charsubst; |
120 | push @char, [$_, $char, $charsubst]; |
121 | } |
122 | foreach ( |
123 | ['check our detection program works', |
124 | '@a = ("'.chr(60).'\x2A", ""); display @a', qr/^>60,42<><$/], |
125 | ['check literal 8 bit input', |
126 | '$a = "' . chr (173) . '"; display $a', qr/^>173<$/], |
127 | ['check no utf8; makes no change', |
128 | 'no utf8; $a = "' . chr (173) . '"; display $a', qr/^>173<$/], |
129 | # Now we do the real byte sequences that are valid UTF8 |
130 | (map { |
131 | ["the utf8 sequence for chr $_->[0]", |
132 | qq(\$a = "$_->[1]"; display \$a), qr/^>$_->[2]<$/], |
133 | ["no utf8; for the utf8 sequence for chr $_->[0]", |
134 | qq(no utf8; \$a = "$_->[1]"; display \$a), qr/^>$_->[2]<$/], |
135 | ["use utf8; for the utf8 sequence for chr $_->[0]", |
136 | qq(use utf8; \$a = "$_->[1]"; display \$a), qr/^>$_->[0]<$/], |
137 | } @char), |
138 | # Interpolation of hex characters needs to take place now, as we're |
139 | # testing feeding malformed utf8 into perl. Bug now fixed was an |
140 | # "out of memory" error. We really need the "" [rather than qq() |
141 | # or q()] to get the best explosion. |
142 | ["!Feed malformed utf8 into perl.", <<"BANG", |
8ae6c9f9 |
143 | use utf8; %a = ("\xE1\xA0"=>"sterling"); |
435e7af6 |
144 | print 'start'; printf '%x,', ord \$_ foreach keys %a; print "end\n"; |
8ae6c9f9 |
145 | BANG |
435e7af6 |
146 | qr/^Malformed UTF-8 character \(2 bytes, need 3\).*start\d+,end$/s |
147 | ], |
148 | ) { |
149 | my ($why, $prog, $expect) = @$_; |
150 | open P, ">$progfile" or die "Can't open '$progfile': $!"; |
151 | print P q( |
152 | sub display { |
153 | print '>' . join (',', map {ord} split //, $_) . '<' |
154 | foreach @_; |
155 | } |
156 | ); |
157 | print P $prog; |
158 | close P or die "Can't close '$progfile': $!"; |
159 | if ($why =~ s/^!//) { |
160 | print "# Possible delay...\n"; |
161 | } else { |
162 | print "# $prog\n"; |
163 | } |
164 | my $result = runperl ( stderr => 1, progfile => $progfile ); |
165 | like ($result, $expect, $why); |
166 | } |
8ae6c9f9 |
167 | } |