[p5sagit/p5-mst-13.2.git] / lib / utf8.t

#!./perl 

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require './test.pl';
}

# NOTE!
#
# Think carefully before adding tests here.  In general this should be
# used only for about three categories of tests:
#
# (1) tests that absolutely require 'use utf8', and since that in general
#     shouldn't be needed as the utf8 is being obsoleted, this should
#     have rather few tests.  If you want to test Unicode and regexes,
#     you probably want to go to op/regexp or op/pat; if you want to test
#     split, go to op/split; pack, op/pack; appending or joining,
#     op/append or op/join, and so forth
#
# (2) tests that have to do with Unicode tokenizing (though it's likely
#     that all the other Unicode tests sprinkled around the t/**/*.t are
#     going to catch that)
#
# (3) complicated tests that simultaneously stress so many Unicode features
#     that deciding into which other test script the tests should go to
#     is hard -- maybe consider breaking up the complicated test
#
#

plan tests => 16;

{
    # bug id 20001009.001

    my ($a, $b);

    { use bytes; $a = "\xc3\xa4" }
    { use utf8;  $b = "\xe4"     }

    my $test = 68;

    ok($a ne $b);

    { use utf8; ok($a ne $b) }
}


{
    # bug id 20000730.004

    my $smiley = "\x{263a}";

    for my $s ("\x{263a}",
	       $smiley,
		
	       "" . $smiley,
	       "" . "\x{263a}",

	       $smiley    . "",
	       "\x{263a}" . "",
	       ) {
	my $length_chars = length($s);
	my $length_bytes;
	{ use bytes; $length_bytes = length($s) }
	my @regex_chars = $s =~ m/(.)/g;
	my $regex_chars = @regex_chars;
	my @split_chars = split //, $s;
	my $split_chars = @split_chars;
	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
	   "1/1/1/3");
    }

    for my $s ("\x{263a}" . "\x{263a}",
	       $smiley    . $smiley,

	       "\x{263a}\x{263a}",
	       "$smiley$smiley",
	       
	       "\x{263a}" x 2,
	       $smiley    x 2,
	       ) {
	my $length_chars = length($s);
	my $length_bytes;
	{ use bytes; $length_bytes = length($s) }
	my @regex_chars = $s =~ m/(.)/g;
	my $regex_chars = @regex_chars;
	my @split_chars = split //, $s;
	my $split_chars = @split_chars;
	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
	   "2/2/2/6");
    }
}


{
    my $w = 0;
    local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
    my $x = eval q/"\\/ . "\x{100}" . q/"/;;
   
    ok($w == 0 && $x eq "\x{100}");
}

{
    my $progfile = 'utf' . $$;
    END {unlink $progfile}
    open P, ">$progfile" or die "Can't open '$progfile': $!";
    # Interpolation of hex characters needs to take place now, as we're
    # testing feeding malformed utf8 into perl. Bug now fixed was an
    # "out of memory" error. We really need the "" [rather than qq() or q()]
    # to get the best explosion.
    print P <<"BANG";
    use utf8; %a = ("\xE1\xA0"=>"sterling");
    print 'start'; printf '%x,', ord $_ foreach keys %a; print "end\n";
BANG
    print "# Possible delay...\n";
    my $result = runperl ( verbose => 1, stderr => 1, progfile => $progfile );
    like ($result,
          qr/^Malformed UTF-8 character \(2 bytes, need 3\).*start\d+,end$/s);
}
Commit	Line	Data
f96ec2a2	1	#!./perl
	2
	3	BEGIN {
	4	chdir 't' if -d 't';
20822f61	5	@INC = '../lib';
8ae6c9f9	6	require './test.pl';
f96ec2a2	7	}
f96ec2a2	8
4765795a	9	# NOTE!
	10	#
	11	# Think carefully before adding tests here. In general this should be
	12	# used only for about three categories of tests:
	13	#
	14	# (1) tests that absolutely require 'use utf8', and since that in general
	15	# shouldn't be needed as the utf8 is being obsoleted, this should
	16	# have rather few tests. If you want to test Unicode and regexes,
	17	# you probably want to go to op/regexp or op/pat; if you want to test
	18	# split, go to op/split; pack, op/pack; appending or joining,
	19	# op/append or op/join, and so forth
	20	#
	21	# (2) tests that have to do with Unicode tokenizing (though it's likely
	22	# that all the other Unicode tests sprinkled around the t/*/.t are
	23	# going to catch that)
	24	#
	25	# (3) complicated tests that simultaneously stress so many Unicode features
	26	# that deciding into which other test script the tests should go to
	27	# is hard -- maybe consider breaking up the complicated test
	28	#
	29	#
	30
8ae6c9f9	31	plan tests => 16;
31067593	32
7bbb0251	33	{
da450f52	34	# bug id 20001009.001
da450f52	35
89491803	36	my ($a, $b);
	37
	38	{ use bytes; $a = "\xc3\xa4" }
4765795a	39	{ use utf8; $b = "\xe4" }
89491803	40
4765795a	41	my $test = 68;
31067593	42
4765795a	43	ok($a ne $b);
da450f52	44
4765795a	45	{ use utf8; ok($a ne $b) }
da450f52	46	}
da450f52	47
60ff4832	48
	49	{
	50	# bug id 20000730.004
	51
60ff4832	52	my $smiley = "\x{263a}";
60ff4832	53
4765795a	54	for my $s ("\x{263a}",
4765795a	55	$smiley,
60ff4832	56
4765795a	57	"" . $smiley,
4765795a	58	"" . "\x{263a}",
60ff4832	59
4765795a	60	$smiley . "",
4765795a	61	"\x{263a}" . "",
60ff4832	62	) {
	63	my $length_chars = length($s);
	64	my $length_bytes;
	65	{ use bytes; $length_bytes = length($s) }
	66	my @regex_chars = $s =~ m/(.)/g;
	67	my $regex_chars = @regex_chars;
	68	my @split_chars = split //, $s;
	69	my $split_chars = @split_chars;
4765795a	70	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
4765795a	71	"1/1/1/3");
60ff4832	72	}
60ff4832	73
4765795a	74	for my $s ("\x{263a}" . "\x{263a}",
4765795a	75	$smiley . $smiley,
60ff4832	76
4765795a	77	"\x{263a}\x{263a}",
4765795a	78	"$smiley$smiley",
60ff4832	79
4765795a	80	"\x{263a}" x 2,
4765795a	81	$smiley x 2,
60ff4832	82	) {
	83	my $length_chars = length($s);
	84	my $length_bytes;
	85	{ use bytes; $length_bytes = length($s) }
	86	my @regex_chars = $s =~ m/(.)/g;
	87	my $regex_chars = @regex_chars;
	88	my @split_chars = split //, $s;
	89	my $split_chars = @split_chars;
4765795a	90	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
4765795a	91	"2/2/2/6");
60ff4832	92	}
60ff4832	93	}
ffc61ed2	94
ffc61ed2	95
ffc61ed2	96	{
f9a63242	97	my $w = 0;
	98	local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
	99	my $x = eval q/"\\/ . "\x{100}" . q/"/;;
	100
4765795a	101	ok($w == 0 && $x eq "\x{100}");
f9a63242	102	}
f9a63242	103
8ae6c9f9	104	{
	105	my $progfile = 'utf' . $$;
	106	END {unlink $progfile}
	107	open P, ">$progfile" or die "Can't open '$progfile': $!";
	108	# Interpolation of hex characters needs to take place now, as we're
	109	# testing feeding malformed utf8 into perl. Bug now fixed was an
	110	# "out of memory" error. We really need the "" [rather than qq() or q()]
	111	# to get the best explosion.
	112	print P <<"BANG";
	113	use utf8; %a = ("\xE1\xA0"=>"sterling");
	114	print 'start'; printf '%x,', ord $_ foreach keys %a; print "end\n";
	115	BANG
	116	print "# Possible delay...\n";
	117	my $result = runperl ( verbose => 1, stderr => 1, progfile => $progfile );
	118	like ($result,
	119	qr/^Malformed UTF-8 character \(2 bytes, need 3\).*start\d+,end$/s);
	120	}