2 # tests whether tainting works with UTF-8
12 # How to identify taint when you see it
14 not eval { join("",@_), kill 0; 1 };
21 plan(tests => 3*10 + 3*8 + 2*16 + 2);
23 my $arg = $ENV{PATH}; # a tainted value
24 use constant UTF8 => "\x{1234}";
28 return 0xB6 != unpack('C', chr(0xB6).$s);
31 for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
32 my $encode = $ary->[0];
33 my $string = $ary->[1];
35 my $taint = $arg; substr($taint, 0) = $ary->[1];
37 is(tainted($taint), tainted($arg), "tainted: $encode, before test");
41 is($lconcat, $string.UTF8, "compare: $encode, concat left");
43 is(tainted($lconcat), tainted($arg), "tainted: $encode, concat left");
47 is($rconcat, UTF8.$string, "compare: $encode, concat right");
49 is(tainted($rconcat), tainted($arg), "tainted: $encode, concat right");
51 my $ljoin = join('!', $taint, UTF8);
52 is($ljoin, join('!', $string, UTF8), "compare: $encode, join left");
54 is(tainted($ljoin), tainted($arg), "tainted: $encode, join left");
56 my $rjoin = join('!', UTF8, $taint);
57 is($rjoin, join('!', UTF8, $string), "compare: $encode, join right");
59 is(tainted($rjoin), tainted($arg), "tainted: $encode, join right");
61 is(tainted($taint), tainted($arg), "tainted: $encode, after test");
65 for my $ary ([ascii => 'perl'], [latin1 => "\xB6"], [utf8 => "\x{100}"]) {
66 my $encode = $ary->[0];
68 my $utf8 = pack('U*') . $ary->[1];
69 my $byte = unpack('U0a*', $utf8);
71 my $taint = $arg; substr($taint, 0) = $utf8;
74 is($taint, $byte, "compare: $encode, encode utf8");
76 is(pack('a*',$taint), pack('a*',$byte), "bytecmp: $encode, encode utf8");
78 ok(!is_utf8($taint), "is_utf8: $encode, encode utf8");
80 is(tainted($taint), tainted($arg), "tainted: $encode, encode utf8");
82 my $taint = $arg; substr($taint, 0) = $byte;
85 is($taint, $utf8, "compare: $encode, decode byte");
87 is(pack('a*',$taint), pack('a*',$utf8), "bytecmp: $encode, decode byte");
89 is(is_utf8($taint), ($encode ne 'ascii'), "is_utf8: $encode, decode byte");
91 is(tainted($taint), tainted($arg), "tainted: $encode, decode byte");
95 for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) {
96 my $encode = $ary->[0];
98 my $up = pack('U*') . $ary->[1];
99 my $down = pack("a*", $ary->[1]);
101 my $taint = $arg; substr($taint, 0) = $up;
102 utf8::upgrade($taint);
104 is($taint, $up, "compare: $encode, upgrade up");
106 is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade up");
108 ok(is_utf8($taint), "is_utf8: $encode, upgrade up");
110 is(tainted($taint), tainted($arg), "tainted: $encode, upgrade up");
112 my $taint = $arg; substr($taint, 0) = $down;
113 utf8::upgrade($taint);
115 is($taint, $up, "compare: $encode, upgrade down");
117 is(pack('a*',$taint), pack('a*',$up), "bytecmp: $encode, upgrade down");
119 ok(is_utf8($taint), "is_utf8: $encode, upgrade down");
121 is(tainted($taint), tainted($arg), "tainted: $encode, upgrade down");
123 my $taint = $arg; substr($taint, 0) = $up;
124 utf8::downgrade($taint);
126 is($taint, $down, "compare: $encode, downgrade up");
128 is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade up");
130 ok(!is_utf8($taint), "is_utf8: $encode, downgrade up");
132 is(tainted($taint), tainted($arg), "tainted: $encode, downgrade up");
134 my $taint = $arg; substr($taint, 0) = $down;
135 utf8::downgrade($taint);
137 is($taint, $down, "compare: $encode, downgrade down");
139 is(pack('a*',$taint), pack('a*',$down), "bytecmp: $encode, downgrade down");
141 ok(!is_utf8($taint), "is_utf8: $encode, downgrade down");
143 is(tainted($taint), tainted($arg), "tainted: $encode, downgrade down");
148 fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,',
149 'ok', {switches => ["-T", "-l"]},
150 "matching a regexp is taint agnostic");
152 if ($@ =~ /^Insecure directory in/) {
154 skip ("Can't run taint checks with $@", 2);
157 fresh_perl_is('$a = substr $^X, 0, 0; /$a\x{100}/i || print q,ok,',
158 'ok', {switches => ["-T", "-l"]},
159 "therefore swash_init should be taint agnostic");