[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t

#!./perl 

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    $ENV{PERL5LIB} = '../lib';
    if ( ord("\t") != 9 ) { # skip on ebcdic platforms
        print "1..0 # Skip utf8 tests on ebcdic platform.\n";
        exit;
    }
}

# NOTE!
#
# Think carefully before adding tests here.  In general this should be
# used only for about three categories of tests:
#
# (1) tests that absolutely require 'use utf8', and since that in general
#     shouldn't be needed as the utf8 is being obsoleted, this should
#     have rather few tests.  If you want to test Unicode and regexes,
#     you probably want to go to op/regexp or op/pat; if you want to test
#     split, go to op/split; pack, op/pack; appending or joining,
#     op/append or op/join, and so forth
#
# (2) tests that have to do with Unicode tokenizing (though it's likely
#     that all the other Unicode tests sprinkled around the t/**/*.t are
#     going to catch that)
#
# (3) complicated tests that simultaneously stress so many Unicode features
#     that deciding into which other test script the tests should go to
#     is hard -- maybe consider breaking up the complicated test
#
#

use Test;
plan tests => 15;

{
    # bug id 20001009.001

    my ($a, $b);

    { use bytes; $a = "\xc3\xa4" }
    { use utf8;  $b = "\xe4"     }

    my $test = 68;

    ok($a ne $b);

    { use utf8; ok($a ne $b) }
}


{
    # bug id 20000730.004

    my $smiley = "\x{263a}";

    for my $s ("\x{263a}",
	       $smiley,
		
	       "" . $smiley,
	       "" . "\x{263a}",

	       $smiley    . "",
	       "\x{263a}" . "",
	       ) {
	my $length_chars = length($s);
	my $length_bytes;
	{ use bytes; $length_bytes = length($s) }
	my @regex_chars = $s =~ m/(.)/g;
	my $regex_chars = @regex_chars;
	my @split_chars = split //, $s;
	my $split_chars = @split_chars;
	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
	   "1/1/1/3");
    }

    for my $s ("\x{263a}" . "\x{263a}",
	       $smiley    . $smiley,

	       "\x{263a}\x{263a}",
	       "$smiley$smiley",
	       
	       "\x{263a}" x 2,
	       $smiley    x 2,
	       ) {
	my $length_chars = length($s);
	my $length_bytes;
	{ use bytes; $length_bytes = length($s) }
	my @regex_chars = $s =~ m/(.)/g;
	my $regex_chars = @regex_chars;
	my @split_chars = split //, $s;
	my $split_chars = @split_chars;
	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
	   "2/2/2/6");
    }
}


{
    my $w = 0;
    local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
    my $x = eval q/"\\/ . "\x{100}" . q/"/;;
   
    ok($w == 0 && $x eq "\x{100}");
}
Commit	Line	Data
f96ec2a2	1	#!./perl
	2
	3	BEGIN {
	4	chdir 't' if -d 't';
20822f61	5	@INC = '../lib';
f96ec2a2	6	$ENV{PERL5LIB} = '../lib';
f70c35af	7	if ( ord("\t") != 9 ) { # skip on ebcdic platforms
	8	print "1..0 # Skip utf8 tests on ebcdic platform.\n";
	9	exit;
	10	}
f96ec2a2	11	}
f96ec2a2	12
4765795a	13	# NOTE!
	14	#
	15	# Think carefully before adding tests here. In general this should be
	16	# used only for about three categories of tests:
	17	#
	18	# (1) tests that absolutely require 'use utf8', and since that in general
	19	# shouldn't be needed as the utf8 is being obsoleted, this should
	20	# have rather few tests. If you want to test Unicode and regexes,
	21	# you probably want to go to op/regexp or op/pat; if you want to test
	22	# split, go to op/split; pack, op/pack; appending or joining,
	23	# op/append or op/join, and so forth
	24	#
	25	# (2) tests that have to do with Unicode tokenizing (though it's likely
	26	# that all the other Unicode tests sprinkled around the t/*/.t are
	27	# going to catch that)
	28	#
	29	# (3) complicated tests that simultaneously stress so many Unicode features
	30	# that deciding into which other test script the tests should go to
	31	# is hard -- maybe consider breaking up the complicated test
	32	#
	33	#
	34
	35	use Test;
	36	plan tests => 15;
31067593	37
7bbb0251	38	{
da450f52	39	# bug id 20001009.001
da450f52	40
89491803	41	my ($a, $b);
	42
	43	{ use bytes; $a = "\xc3\xa4" }
4765795a	44	{ use utf8; $b = "\xe4" }
89491803	45
4765795a	46	my $test = 68;
31067593	47
4765795a	48	ok($a ne $b);
da450f52	49
4765795a	50	{ use utf8; ok($a ne $b) }
da450f52	51	}
da450f52	52
60ff4832	53
	54	{
	55	# bug id 20000730.004
	56
60ff4832	57	my $smiley = "\x{263a}";
60ff4832	58
4765795a	59	for my $s ("\x{263a}",
4765795a	60	$smiley,
60ff4832	61
4765795a	62	"" . $smiley,
4765795a	63	"" . "\x{263a}",
60ff4832	64
4765795a	65	$smiley . "",
4765795a	66	"\x{263a}" . "",
60ff4832	67	) {
	68	my $length_chars = length($s);
	69	my $length_bytes;
	70	{ use bytes; $length_bytes = length($s) }
	71	my @regex_chars = $s =~ m/(.)/g;
	72	my $regex_chars = @regex_chars;
	73	my @split_chars = split //, $s;
	74	my $split_chars = @split_chars;
4765795a	75	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
4765795a	76	"1/1/1/3");
60ff4832	77	}
60ff4832	78
4765795a	79	for my $s ("\x{263a}" . "\x{263a}",
4765795a	80	$smiley . $smiley,
60ff4832	81
4765795a	82	"\x{263a}\x{263a}",
4765795a	83	"$smiley$smiley",
60ff4832	84
4765795a	85	"\x{263a}" x 2,
4765795a	86	$smiley x 2,
60ff4832	87	) {
	88	my $length_chars = length($s);
	89	my $length_bytes;
	90	{ use bytes; $length_bytes = length($s) }
	91	my @regex_chars = $s =~ m/(.)/g;
	92	my $regex_chars = @regex_chars;
	93	my @split_chars = split //, $s;
	94	my $split_chars = @split_chars;
4765795a	95	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
4765795a	96	"2/2/2/6");
60ff4832	97	}
60ff4832	98	}
ffc61ed2	99
ffc61ed2	100
ffc61ed2	101	{
f9a63242	102	my $w = 0;
	103	local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
	104	my $x = eval q/"\\/ . "\x{100}" . q/"/;;
	105
4765795a	106	ok($w == 0 && $x eq "\x{100}");
f9a63242	107	}
f9a63242	108