[p5sagit/p5-mst-13.2.git] / lib / charnames.t

#!./perl

BEGIN {
    unless(grep /blib/, @INC) {
	chdir 't' if -d 't';
	@INC = '../lib';
    }
}

$| = 1;
print "1..25\n";

use charnames ':full';

print "not " unless "Here\N{EXCLAMATION MARK}?" eq "Here!?";
print "ok 1\n";

{
  use bytes;			# TEST -utf8 can switch utf8 on

  print "# \$res=$res \$\@='$@'\nnot "
    if $res = eval <<'EOE'
use charnames ":full";
"Here: \N{CYRILLIC SMALL LETTER BE}!";
1
EOE
      or $@ !~ /above 0xFF/;
  print "ok 2\n";
  # print "# \$res=$res \$\@='$@'\n";

  print "# \$res=$res \$\@='$@'\nnot "
    if $res = eval <<'EOE'
use charnames 'cyrillic';
"Here: \N{Be}!";
1
EOE
      or $@ !~ /CYRILLIC CAPITAL LETTER BE.*above 0xFF/;
  print "ok 3\n";
}

# If octal representation of unicode char is \0xyzt, then the utf8 is \3xy\2zt
if (ord('A') == 65) { # as on ASCII or UTF-8 machines
    $encoded_be = "\320\261";
    $encoded_alpha = "\316\261";
    $encoded_bet = "\327\221";
    $encoded_deseng = "\360\220\221\215";
}
else { # EBCDIC where UTF-EBCDIC may be used (this may be 1047 specific since
       # UTF-EBCDIC is codepage specific)
    $encoded_be = "\270\102\130";
    $encoded_alpha = "\264\130";
    $encoded_bet = "\270\125\130";
    $encoded_deseng = "\336\102\103\124";
}

sub to_bytes {
    pack"a*", shift;
}

{
  use charnames ':full';

  print "not " unless to_bytes("\N{CYRILLIC SMALL LETTER BE}") eq $encoded_be;
  print "ok 4\n";

  use charnames qw(cyrillic greek :short);

  print "not " unless to_bytes("\N{be},\N{alpha},\N{hebrew:bet}")
    eq "$encoded_be,$encoded_alpha,$encoded_bet";
  print "ok 5\n";
}

{
    use charnames ':full';
    print "not " unless "\x{263a}" eq "\N{WHITE SMILING FACE}";
    print "ok 6\n";
    print "not " unless length("\x{263a}") == 1;
    print "ok 7\n";
    print "not " unless length("\N{WHITE SMILING FACE}") == 1;
    print "ok 8\n";
    print "not " unless sprintf("%vx", "\x{263a}") eq "263a";
    print "ok 9\n";
    print "not " unless sprintf("%vx", "\N{WHITE SMILING FACE}") eq "263a";
    print "ok 10\n";
    print "not " unless sprintf("%vx", "\xFF\N{WHITE SMILING FACE}") eq "ff.263a";
    print "ok 11\n";
    print "not " unless sprintf("%vx", "\x{ff}\N{WHITE SMILING FACE}") eq "ff.263a";
    print "ok 12\n";
}

{
   use charnames qw(:full);
   use utf8;
   
    my $x = "\x{221b}";
    my $named = "\N{CUBE ROOT}";

    print "not " unless ord($x) == ord($named);
    print "ok 13\n";
}

{
   use charnames qw(:full);
   use utf8;
   print "not " unless "\x{100}\N{CENT SIGN}" eq "\x{100}"."\N{CENT SIGN}";
   print "ok 14\n";
}

{
  use charnames ':full';

  print "not "
      unless to_bytes("\N{DESERET SMALL LETTER ENG}") eq $encoded_deseng;
  print "ok 15\n";
}

{
  # 20001114.001	

  no utf8; # naked Latin-1

  if (ord("Ä") == 0xc4) { # Try to do this only on Latin-1.
      use charnames ':full';
      my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
      print "not " unless $text eq "\xc4" && ord($text) == 0xc4;
      print "ok 16\n";
  } else {
      print "ok 16 # Skip: not Latin-1\n";
  }
}

{
    print "not " unless charnames::viacode(0x1234) eq "ETHIOPIC SYLLABLE SEE";
    print "ok 17\n";

    print "not " if defined charnames::viacode(0x0590); # unused Hebrew
    print "ok 18\n";
}

{
    print "not " unless
	sprintf "%04X\n", charnames::vianame("GOTHIC LETTER AHSA") eq "10330";
    print "ok 19\n";

    print "not " if
	defined charnames::vianame("NONE SUCH");
    print "ok 20\n";
}

{
    # check that caching at least hasn't broken anything

    print "not " unless charnames::viacode(0x1234) eq "ETHIOPIC SYLLABLE SEE";
    print "ok 21\n";

    print "not " unless
	sprintf "%04X\n", charnames::vianame("GOTHIC LETTER AHSA") eq "10330";
    print "ok 22\n";

}

print "not " unless "\N{HORIZONTAL TABULATION}" eq "\t";
print "ok 23\n";

print "not " unless "\N{ESCAPE}" eq "\e";
print "ok 24\n";

print "not " unless "\N{NULL}" eq "\c@";
print "ok 25\n";

# TODO: when Unicode 3.2 comes along some names will change
# HORIZONTAL TABULATION -> CHARACTER TABULATION (since ISO 6429
# has been updated), and some names will have shorter aliases
# LINEFEED (LF).  Update the tests, and also update the charnames
# pragma to support the 3.1 names, and the shorter aliases.
Commit	Line	Data
423cee85	1	#!./perl
	2
	3	BEGIN {
	4	unless(grep /blib/, @INC) {
	5	chdir 't' if -d 't';
20822f61	6	@INC = '../lib';
423cee85	7	}
	8	}
	9
	10	$\| = 1;
301a3cda	11	print "1..25\n";
423cee85	12
	13	use charnames ':full';
	14
93979888	15	print "not " unless "Here\N{EXCLAMATION MARK}?" eq "Here!?";
423cee85	16	print "ok 1\n";
423cee85	17
c82a54e6	18	{
5d9a6404	19	use bytes; # TEST -utf8 can switch utf8 on
c82a54e6	20
	21	print "# \$res=$res \$\@='$@'\nnot "
	22	if $res = eval <<'EOE'
423cee85	23	use charnames ":full";
4a2d328f	24	"Here: \N{CYRILLIC SMALL LETTER BE}!";
423cee85	25	1
423cee85	26	EOE
c82a54e6	27	or $@ !~ /above 0xFF/;
	28	print "ok 2\n";
	29	# print "# \$res=$res \$\@='$@'\n";
423cee85	30
c82a54e6	31	print "# \$res=$res \$\@='$@'\nnot "
c82a54e6	32	if $res = eval <<'EOE'
423cee85	33	use charnames 'cyrillic';
4a2d328f	34	"Here: \N{Be}!";
423cee85	35	1
423cee85	36	EOE
c82a54e6	37	or $@ !~ /CYRILLIC CAPITAL LETTER BE.*above 0xFF/;
	38	print "ok 3\n";
	39	}
423cee85	40
423cee85	41	# If octal representation of unicode char is \0xyzt, then the utf8 is \3xy\2zt
210db7fc	42	if (ord('A') == 65) { # as on ASCII or UTF-8 machines
	43	$encoded_be = "\320\261";
	44	$encoded_alpha = "\316\261";
	45	$encoded_bet = "\327\221";
	46	$encoded_deseng = "\360\220\221\215";
	47	}
	48	else { # EBCDIC where UTF-EBCDIC may be used (this may be 1047 specific since
	49	# UTF-EBCDIC is codepage specific)
	50	$encoded_be = "\270\102\130";
	51	$encoded_alpha = "\264\130";
	52	$encoded_bet = "\270\125\130";
	53	$encoded_deseng = "\336\102\103\124";
	54	}
c5cc3500	55
c5cc3500	56	sub to_bytes {
f9a63242	57	pack"a*", shift;
c5cc3500	58	}
c5cc3500	59
423cee85	60	{
423cee85	61	use charnames ':full';
423cee85	62
c5cc3500	63	print "not " unless to_bytes("\N{CYRILLIC SMALL LETTER BE}") eq $encoded_be;
423cee85	64	print "ok 4\n";
	65
	66	use charnames qw(cyrillic greek :short);
	67
c5cc3500	68	print "not " unless to_bytes("\N{be},\N{alpha},\N{hebrew:bet}")
423cee85	69	eq "$encoded_be,$encoded_alpha,$encoded_bet";
	70	print "ok 5\n";
	71	}
e1992b6d	72
	73	{
	74	use charnames ':full';
	75	print "not " unless "\x{263a}" eq "\N{WHITE SMILING FACE}";
	76	print "ok 6\n";
	77	print "not " unless length("\x{263a}") == 1;
	78	print "ok 7\n";
	79	print "not " unless length("\N{WHITE SMILING FACE}") == 1;
	80	print "ok 8\n";
	81	print "not " unless sprintf("%vx", "\x{263a}") eq "263a";
	82	print "ok 9\n";
	83	print "not " unless sprintf("%vx", "\N{WHITE SMILING FACE}") eq "263a";
	84	print "ok 10\n";
f08d6ad9	85	print "not " unless sprintf("%vx", "\xFF\N{WHITE SMILING FACE}") eq "ff.263a";
	86	print "ok 11\n";
	87	print "not " unless sprintf("%vx", "\x{ff}\N{WHITE SMILING FACE}") eq "ff.263a";
	88	print "ok 12\n";
e1992b6d	89	}
c00525d4	90
	91	{
	92	use charnames qw(:full);
55eda711	93	use utf8;
c00525d4	94
	95	my $x = "\x{221b}";
	96	my $named = "\N{CUBE ROOT}";
	97
	98	print "not " unless ord($x) == ord($named);
	99	print "ok 13\n";
	100	}
	101
f9a63242	102	{
f9a63242	103	use charnames qw(:full);
55eda711	104	use utf8;
f9a63242	105	print "not " unless "\x{100}\N{CENT SIGN}" eq "\x{100}"."\N{CENT SIGN}";
	106	print "ok 14\n";
	107	}
	108
b896c7a5	109	{
	110	use charnames ':full';
	111
	112	print "not "
	113	unless to_bytes("\N{DESERET SMALL LETTER ENG}") eq $encoded_deseng;
	114	print "ok 15\n";
4765795a	115	}
b896c7a5	116
4765795a	117	{
	118	# 20001114.001
	119
4c53e876	120	no utf8; # naked Latin-1
3ba0e062	121
4765795a	122	if (ord("Ä") == 0xc4) { # Try to do this only on Latin-1.
	123	use charnames ':full';
	124	my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
	125	print "not " unless $text eq "\xc4" && ord($text) == 0xc4;
	126	print "ok 16\n";
	127	} else {
	128	print "ok 16 # Skip: not Latin-1\n";
	129	}
b896c7a5	130	}
b896c7a5	131
daf0d493	132	{
	133	print "not " unless charnames::viacode(0x1234) eq "ETHIOPIC SYLLABLE SEE";
	134	print "ok 17\n";
	135
	136	print "not " if defined charnames::viacode(0x0590); # unused Hebrew
	137	print "ok 18\n";
	138	}
	139
	140	{
	141	print "not " unless
	142	sprintf "%04X\n", charnames::vianame("GOTHIC LETTER AHSA") eq "10330";
	143	print "ok 19\n";
	144
	145	print "not " if
	146	defined charnames::vianame("NONE SUCH");
	147	print "ok 20\n";
	148	}
4e2cda5d	149
	150	{
	151	# check that caching at least hasn't broken anything
	152
	153	print "not " unless charnames::viacode(0x1234) eq "ETHIOPIC SYLLABLE SEE";
	154	print "ok 21\n";
	155
	156	print "not " unless
	157	sprintf "%04X\n", charnames::vianame("GOTHIC LETTER AHSA") eq "10330";
	158	print "ok 22\n";
	159
	160	}
301a3cda	161
	162	print "not " unless "\N{HORIZONTAL TABULATION}" eq "\t";
	163	print "ok 23\n";
	164
	165	print "not " unless "\N{ESCAPE}" eq "\e";
	166	print "ok 24\n";
	167
	168	print "not " unless "\N{NULL}" eq "\c@";
	169	print "ok 25\n";
	170
	171	# TODO: when Unicode 3.2 comes along some names will change
	172	# HORIZONTAL TABULATION -> CHARACTER TABULATION (since ISO 6429
	173	# has been updated), and some names will have shorter aliases
	174	# LINEFEED (LF). Update the tests, and also update the charnames
	175	# pragma to support the 3.1 names, and the shorter aliases.