t/test.pl Simple testing library
t/uni/case.pl See if Unicode casing works
t/uni/chomp.t See if Unicode chomp works
+t/uni/chr.t See if Unicode chr works
t/uni/class.t See if Unicode classes work (\p)
t/uni/fold.t See if Unicode folding works
t/uni/lower.t See if Unicode casing works
*tmps++ = (char)value;
*tmps = '\0';
(void)SvPOK_only(TARG);
+
if (PL_encoding && !IN_BYTES) {
sv_recode_to_utf8(TARG, PL_encoding);
tmps = SvPVX(TARG);
if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
- memEQ(tmps, "\xef\xbf\xbd\0", 4)) {
- SvGROW(TARG, 3);
+ UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
+ SvGROW(TARG, 2);
tmps = SvPVX(TARG);
- SvCUR_set(TARG, 2);
- *tmps++ = (U8)UTF8_EIGHT_BIT_HI(value);
- *tmps++ = (U8)UTF8_EIGHT_BIT_LO(value);
+ SvCUR_set(TARG, 1);
+ *tmps++ = (char)value;
*tmps = '\0';
- SvUTF8_on(TARG);
+ SvUTF8_off(TARG);
}
}
+
XPUSHs(TARG);
RETURN;
}
is(chr(-3.0), "\xFD");
}
-# Check UTF-8.
+# Check UTF-8 (not UTF-EBCDIC).
+SKIP: {
+ skip "no UTF-8 on EBCDIC", 21 if chr(193) eq 'A';
sub hexes {
no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings
}
# The following code points are some interesting steps in UTF-8.
-is(hexes( 0x100), "c4 80");
-is(hexes( 0x7FF), "df bf");
-is(hexes( 0x800), "e0 a0 80");
-is(hexes( 0xFFF), "e0 bf bf");
-is(hexes( 0x1000), "e1 80 80");
-is(hexes( 0xCFFF), "ec bf bf");
-is(hexes( 0xD000), "ed 80 80");
-is(hexes( 0xD7FF), "ed 9f bf");
-is(hexes( 0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin)
-is(hexes( 0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end)
-is(hexes( 0xE000), "ee 80 80");
-is(hexes( 0xFFFF), "ef bf bf");
-is(hexes( 0x10000), "f0 90 80 80");
-is(hexes( 0x3FFFF), "f0 bf bf bf");
-is(hexes( 0x40000), "f1 80 80 80");
-is(hexes( 0xFFFFF), "f3 bf bf bf");
-is(hexes(0x100000), "f4 80 80 80");
-is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point
-is(hexes(0x110000), "f4 90 80 80");
-is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding
-is(hexes(0x200000), "f8 88 80 80 80");
-
+ is(hexes( 0x100), "c4 80");
+ is(hexes( 0x7FF), "df bf");
+ is(hexes( 0x800), "e0 a0 80");
+ is(hexes( 0xFFF), "e0 bf bf");
+ is(hexes( 0x1000), "e1 80 80");
+ is(hexes( 0xCFFF), "ec bf bf");
+ is(hexes( 0xD000), "ed 80 80");
+ is(hexes( 0xD7FF), "ed 9f bf");
+ is(hexes( 0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin)
+ is(hexes( 0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end)
+ is(hexes( 0xE000), "ee 80 80");
+ is(hexes( 0xFFFF), "ef bf bf");
+ is(hexes( 0x10000), "f0 90 80 80");
+ is(hexes( 0x3FFFF), "f0 bf bf bf");
+ is(hexes( 0x40000), "f1 80 80 80");
+ is(hexes( 0xFFFFF), "f3 bf bf bf");
+ is(hexes(0x100000), "f4 80 80 80");
+ is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point
+ is(hexes(0x110000), "f4 90 80 80");
+ is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding
+ is(hexes(0x200000), "f8 88 80 80 80");
+}
require "test.pl";
}
-plan tests => 7;
+plan tests => 35;
# compile time evaluation
$x = "\x{1234}";
is(ord($x), 0x1234, 'runtime ord \x{....}');
+{
+ no warnings 'utf8'; # avoid Unicode warnings
+
+# The following code points are some interesting steps.
+ is(ord(chr( 0x100)), 0x100, '0x0100');
+ is(ord(chr( 0x3FF)), 0x3FF, 'last two-byte char in UTF-EBCDIC');
+ is(ord(chr( 0x400)), 0x400, 'first three-byte char in UTF-EBCDIC');
+ is(ord(chr( 0x7FF)), 0x7FF, 'last two-byte char in UTF-8');
+ is(ord(chr( 0x800)), 0x800, 'first three-byte char in UTF-8');
+ is(ord(chr( 0xFFF)), 0xFFF, '0x0FFF');
+ is(ord(chr( 0x1000)), 0x1000, '0x1000');
+ is(ord(chr( 0x3FFF)), 0x3FFF, 'last three-byte char in UTF-EBCDIC');
+ is(ord(chr( 0x4000)), 0x4000, 'first four-byte char in UTF-EBCDIC');
+ is(ord(chr( 0xCFFF)), 0xCFFF, '0xCFFF');
+ is(ord(chr( 0xD000)), 0xD000, '0xD000');
+ is(ord(chr( 0xD7FF)), 0xD7FF, '0xD7FF');
+ is(ord(chr( 0xD800)), 0xD800, 'surrogate begin (not strict utf-8)');
+ is(ord(chr( 0xDFFF)), 0xDFFF, 'surrogate end (not strict utf-8)');
+ is(ord(chr( 0xE000)), 0xE000, '0xE000');
+ is(ord(chr( 0xFDD0)), 0xFDD0, 'first additional noncharacter in BMP');
+ is(ord(chr( 0xFDEF)), 0xFDEF, 'last additional noncharacter in BMP');
+ is(ord(chr( 0xFFFE)), 0xFFFE, '0xFFFE');
+ is(ord(chr( 0xFFFF)), 0xFFFF, 'last three-byte char in UTF-8');
+ is(ord(chr( 0x10000)), 0x10000, 'first four-byte char in UTF-8');
+ is(ord(chr( 0x3FFFF)), 0x3FFFF, 'last four-byte char in UTF-EBCDIC');
+ is(ord(chr( 0x40000)), 0x40000, 'first five-byte char in UTF-EBCDIC');
+ is(ord(chr( 0xFFFFF)), 0xFFFFF, '0xFFFFF');
+ is(ord(chr(0x100000)), 0x100000, '0x100000');
+ is(ord(chr(0x10FFFF)), 0x10FFFF, 'Unicode last code point');
+ is(ord(chr(0x110000)), 0x110000, '0x110000');
+ is(ord(chr(0x1FFFFF)), 0x1FFFFF, 'last four-byte char in UTF-8');
+ is(ord(chr(0x200000)), 0x200000, 'first five-byte char in UTF-8');
+}
--- /dev/null
+
+BEGIN {
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ @INC = '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ unless (PerlIO::Layer->find('perlio')){
+ print "1..0 # Skip: PerlIO required\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE_MINITEST}) {
+ print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use Test::More tests => 6;
+use Encode;
+
+use encoding 'johab';
+
+ok(chr(0x7f) eq "\x7f");
+ok(chr(0x80) eq "\x80");
+ok(chr(0xff) eq "\xff");
+
+for my $i (127, 128, 255) {
+ ok(chr($i) eq pack('C', $i));
+}
+
+__END__