Commit | Line | Data |
d0ea2801 |
1 | #!./perl -T |
2 | # tests whether tainting works with UTF-8 |
3 | |
4 | BEGIN { |
d0ea2801 |
5 | chdir 't' if -d 't'; |
6 | @INC = qw(../lib); |
7 | } |
8 | |
9 | use strict; |
10 | use Config; |
11 | |
d2a59f97 |
12 | # How to identify taint when you see it |
13 | sub any_tainted (@) { |
14 | not eval { join("",@_), kill 0; 1 }; |
15 | } |
16 | sub tainted ($) { |
17 | any_tainted @_; |
d0ea2801 |
18 | } |
19 | |
d2a59f97 |
20 | require './test.pl'; |
d020f5c4 |
21 | plan(tests => 3*10 + 3*8 + 2*16 + 2); |
d0ea2801 |
22 | |
23 | my $arg = $ENV{PATH}; # a tainted value |
24 | use constant UTF8 => "\x{1234}"; |
25 | |
1651fc44 |
26 | *is_utf8 = \&utf8::is_utf8; |
78ea37eb |
27 | |
d0ea2801 |
28 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { |
29 | my $encode = $ary->[0]; |
30 | my $string = $ary->[1]; |
31 | |
32 | my $taint = $arg; substr($taint, 0) = $ary->[1]; |
33 | |
d2a59f97 |
34 | is(tainted($taint), tainted($arg), "tainted: $encode, before test"); |
d0ea2801 |
35 | |
36 | my $lconcat = $taint; |
37 | $lconcat .= UTF8; |
d2a59f97 |
38 | is($lconcat, $string.UTF8, "compare: $encode, concat left"); |
d0ea2801 |
39 | |
d2a59f97 |
40 | is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left"); |
d0ea2801 |
41 | |
42 | my $rconcat = UTF8; |
43 | $rconcat .= $taint; |
d2a59f97 |
44 | is($rconcat, UTF8.$string, "compare: $encode, concat right"); |
d0ea2801 |
45 | |
d2a59f97 |
46 | is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right"); |
d0ea2801 |
47 | |
48 | my $ljoin = join('!', $taint, UTF8); |
d2a59f97 |
49 | is($ljoin, join('!', $string, UTF8), "compare: $encode, join left"); |
d0ea2801 |
50 | |
d2a59f97 |
51 | is(tainted($ljoin), tainted($arg), "tainted: $encode, join left"); |
d0ea2801 |
52 | |
53 | my $rjoin = join('!', UTF8, $taint); |
d2a59f97 |
54 | is($rjoin, join('!', UTF8, $string), "compare: $encode, join right"); |
d0ea2801 |
55 | |
d2a59f97 |
56 | is(tainted($rjoin), tainted($arg), "tainted: $encode, join right"); |
d0ea2801 |
57 | |
d2a59f97 |
58 | is(tainted($taint), tainted($arg), "tainted: $encode, after test"); |
d0ea2801 |
59 | } |
78ea37eb |
60 | |
61 | |
62 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) { |
63 | my $encode = $ary->[0]; |
64 | |
65 | my $utf8 = pack('U*') . $ary->[1]; |
f337b084 |
66 | my $byte = unpack('U0a*', $utf8); |
78ea37eb |
67 | |
68 | my $taint = $arg; substr($taint, 0) = $utf8; |
69 | utf8::encode($taint); |
70 | |
d2a59f97 |
71 | is($taint, $byte, "compare: $encode, encode utf8"); |
78ea37eb |
72 | |
d2a59f97 |
73 | is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8"); |
78ea37eb |
74 | |
d2a59f97 |
75 | ok(!is_utf8($taint), "is_utf8: $encode, encode utf8"); |
78ea37eb |
76 | |
d2a59f97 |
77 | is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8"); |
78ea37eb |
78 | |
79 | my $taint = $arg; substr($taint, 0) = $byte; |
80 | utf8::decode($taint); |
81 | |
d2a59f97 |
82 | is($taint, $utf8, "compare: $encode, decode byte"); |
78ea37eb |
83 | |
d2a59f97 |
84 | is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte"); |
78ea37eb |
85 | |
d2a59f97 |
86 | is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte"); |
78ea37eb |
87 | |
d2a59f97 |
88 | is(tainted($taint), tainted($arg), "tainted: $encode, decode byte"); |
78ea37eb |
89 | } |
90 | |
91 | |
92 | for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) { |
93 | my $encode = $ary->[0]; |
94 | |
95 | my $up = pack('U*') . $ary->[1]; |
f337b084 |
96 | my $down = pack("a*", $ary->[1]); |
78ea37eb |
97 | |
98 | my $taint = $arg; substr($taint, 0) = $up; |
99 | utf8::upgrade($taint); |
100 | |
d2a59f97 |
101 | is($taint, $up, "compare: $encode, upgrade up"); |
78ea37eb |
102 | |
d2a59f97 |
103 | is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up"); |
78ea37eb |
104 | |
d2a59f97 |
105 | ok(is_utf8($taint), "is_utf8: $encode, upgrade up"); |
78ea37eb |
106 | |
d2a59f97 |
107 | is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up"); |
78ea37eb |
108 | |
109 | my $taint = $arg; substr($taint, 0) = $down; |
110 | utf8::upgrade($taint); |
111 | |
d2a59f97 |
112 | is($taint, $up, "compare: $encode, upgrade down"); |
78ea37eb |
113 | |
d2a59f97 |
114 | is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down"); |
78ea37eb |
115 | |
d2a59f97 |
116 | ok(is_utf8($taint), "is_utf8: $encode, upgrade down"); |
78ea37eb |
117 | |
d2a59f97 |
118 | is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down"); |
78ea37eb |
119 | |
120 | my $taint = $arg; substr($taint, 0) = $up; |
121 | utf8::downgrade($taint); |
122 | |
d2a59f97 |
123 | is($taint, $down, "compare: $encode, downgrade up"); |
78ea37eb |
124 | |
d2a59f97 |
125 | is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up"); |
78ea37eb |
126 | |
d2a59f97 |
127 | ok(!is_utf8($taint), "is_utf8: $encode, downgrade up"); |
78ea37eb |
128 | |
d2a59f97 |
129 | is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up"); |
78ea37eb |
130 | |
131 | my $taint = $arg; substr($taint, 0) = $down; |
132 | utf8::downgrade($taint); |
133 | |
d2a59f97 |
134 | is($taint, $down, "compare: $encode, downgrade down"); |
78ea37eb |
135 | |
d2a59f97 |
136 | is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down"); |
78ea37eb |
137 | |
d2a59f97 |
138 | ok(!is_utf8($taint), "is_utf8: $encode, downgrade down"); |
78ea37eb |
139 | |
d2a59f97 |
140 | is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down"); |
78ea37eb |
141 | } |
142 | |
5316d14d |
143 | { |
5316d14d |
144 | fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,', |
145 | 'ok', {switches => ["-T", "-l"]}, |
146 | "matching a regexp is taint agnostic"); |
e7b79d50 |
147 | |
148 | fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,', |
149 | 'ok', {switches => ["-T", "-l"]}, |
150 | "therefore swash_init should be taint agnostic"); |
151 | } |