Major utf8 test reorganisation and rewrite.
[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
CommitLineData
f96ec2a2 1#!./perl
2
3BEGIN {
4 chdir 't' if -d 't';
20822f61 5 @INC = '../lib';
f96ec2a2 6 $ENV{PERL5LIB} = '../lib';
f70c35af 7 if ( ord("\t") != 9 ) { # skip on ebcdic platforms
8 print "1..0 # Skip utf8 tests on ebcdic platform.\n";
9 exit;
10 }
f96ec2a2 11}
12
4765795a 13# NOTE!
14#
15# Think carefully before adding tests here. In general this should be
16# used only for about three categories of tests:
17#
18# (1) tests that absolutely require 'use utf8', and since that in general
19# shouldn't be needed as the utf8 is being obsoleted, this should
20# have rather few tests. If you want to test Unicode and regexes,
21# you probably want to go to op/regexp or op/pat; if you want to test
22# split, go to op/split; pack, op/pack; appending or joining,
23# op/append or op/join, and so forth
24#
25# (2) tests that have to do with Unicode tokenizing (though it's likely
26# that all the other Unicode tests sprinkled around the t/**/*.t are
27# going to catch that)
28#
29# (3) complicated tests that simultaneously stress so many Unicode features
30# that deciding into which other test script the tests should go to
31# is hard -- maybe consider breaking up the complicated test
32#
33#
34
35use Test;
36plan tests => 15;
31067593 37
7bbb0251 38{
da450f52 39 # bug id 20001009.001
40
89491803 41 my ($a, $b);
42
43 { use bytes; $a = "\xc3\xa4" }
4765795a 44 { use utf8; $b = "\xe4" }
89491803 45
4765795a 46 my $test = 68;
31067593 47
4765795a 48 ok($a ne $b);
da450f52 49
4765795a 50 { use utf8; ok($a ne $b) }
da450f52 51}
52
60ff4832 53
54{
55 # bug id 20000730.004
56
60ff4832 57 my $smiley = "\x{263a}";
58
4765795a 59 for my $s ("\x{263a}",
60 $smiley,
60ff4832 61
4765795a 62 "" . $smiley,
63 "" . "\x{263a}",
60ff4832 64
4765795a 65 $smiley . "",
66 "\x{263a}" . "",
60ff4832 67 ) {
68 my $length_chars = length($s);
69 my $length_bytes;
70 { use bytes; $length_bytes = length($s) }
71 my @regex_chars = $s =~ m/(.)/g;
72 my $regex_chars = @regex_chars;
73 my @split_chars = split //, $s;
74 my $split_chars = @split_chars;
4765795a 75 ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
76 "1/1/1/3");
60ff4832 77 }
78
4765795a 79 for my $s ("\x{263a}" . "\x{263a}",
80 $smiley . $smiley,
60ff4832 81
4765795a 82 "\x{263a}\x{263a}",
83 "$smiley$smiley",
60ff4832 84
4765795a 85 "\x{263a}" x 2,
86 $smiley x 2,
60ff4832 87 ) {
88 my $length_chars = length($s);
89 my $length_bytes;
90 { use bytes; $length_bytes = length($s) }
91 my @regex_chars = $s =~ m/(.)/g;
92 my $regex_chars = @regex_chars;
93 my @split_chars = split //, $s;
94 my $split_chars = @split_chars;
4765795a 95 ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
96 "2/2/2/6");
60ff4832 97 }
98}
ffc61ed2 99
ffc61ed2 100
101{
f9a63242 102 my $w = 0;
103 local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
104 my $x = eval q/"\\/ . "\x{100}" . q/"/;;
105
4765795a 106 ok($w == 0 && $x eq "\x{100}");
f9a63242 107}
108