From: Nick Ing-Simmons <nik@tiuk.ti.com>
Date: Mon, 19 Mar 2001 19:27:57 +0000 (+0000)
Subject: More EBCDIC fixes.
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c4d5f83add3e03ac76c328ed8a29701d939174ce;p=p5sagit%2Fp5-mst-13.2.git

More EBCDIC fixes.

p4raw-id: //depot/perlio@9246
---

diff --git a/doop.c b/doop.c
index e4a516a..f2bda8b 100644
--- a/doop.c
+++ b/doop.c
@@ -316,9 +316,11 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
     isutf8 = SvUTF8(sv);
     if (!isutf8) {
 	U8 *t = s, *e = s + len;
-	while (t < e)
-	    if ((hibit = !UTF8_IS_INVARIANT(*t++)))
+	while (t < e) {
+	    U8 ch = *t++;
+	    if ((hibit = !NATIVE_IS_INVARIANT(ch)))
 		break;
+	}
 	if (hibit)
 	    s = bytes_to_utf8(s, &len);
     }
@@ -408,9 +410,11 @@ S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
     s = (U8*)SvPV(sv, len);
     if (!SvUTF8(sv)) {
 	U8 *t = s, *e = s + len;
-	while (t < e)
-	    if ((hibit = !UTF8_IS_INVARIANT(*t++)))
+	while (t < e) {
+	    U8 ch = *t++;
+	    if ((hibit = !NATIVE_IS_INVARIANT(ch)))
 		break;
+	}
 	if (hibit)
 	    start = s = bytes_to_utf8(s, &len);
     }
@@ -453,9 +457,11 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
     isutf8 = SvUTF8(sv);
     if (!isutf8) {
 	U8 *t = s, *e = s + len;
-	while (t < e)
-	    if ((hibit = !UTF8_IS_INVARIANT(*t++)))
+	while (t < e) {
+	    U8 ch = *t++;
+	    if ((hibit = !NATIVE_IS_INVARIANT(ch)))
 		break;
+	}
 	if (hibit)
 	    s = bytes_to_utf8(s, &len);
     }
diff --git a/sv.c b/sv.c
index 18c5ac9..1b36744 100644
--- a/sv.c
+++ b/sv.c
@@ -2978,7 +2978,8 @@ Perl_sv_utf8_upgrade(pTHX_ register SV *sv)
     e = (U8 *) SvEND(sv);
     t = s;
     while (t < e) {
-	if ((hibit = !UTF8_IS_INVARIANT(*t++)))
+	U8 ch = *t++;
+	if ((hibit = !NATIVE_IS_INVARIANT(ch)))
 	    break;
     }
     if (hibit) {
@@ -2991,12 +2992,6 @@ Perl_sv_utf8_upgrade(pTHX_ register SV *sv)
 	    Safefree(s); /* No longer using what was there before. */
 	SvLEN(sv) = len; /* No longer know the real size. */
     }
-#ifdef EBCDIC
-    else {
-	for (t = s; t < e; t++)
-	    *t = NATIVE_TO_ASCII(*t);
-    }
-#endif
     /* Mark as UTF-8 even if no hibit - saves scanning loop */
     SvUTF8_on(sv);
     return SvCUR(sv);
@@ -3112,7 +3107,8 @@ Perl_sv_utf8_decode(pTHX_ register SV *sv)
 	    return FALSE;
         e = (U8 *) SvEND(sv);
         while (c < e) {
-            if (!UTF8_IS_INVARIANT(*c++)) {
+	    U8 ch = *c++;
+            if (!UTF8_IS_INVARIANT(ch)) {
 		SvUTF8_on(sv);
 		break;
 	    }
@@ -7127,7 +7123,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
 
 	case 'c':
 	    uv = args ? va_arg(*args, int) : SvIVx(argsv);
-	    if ((uv > 255 || (!UTF8_IS_INVARIANT(uv) && SvUTF8(sv))) && !IN_BYTE) {
+	    if ((uv > 255 || (!UNI_IS_INVARIANT(uv) || SvUTF8(sv))) && !IN_BYTE) {
 		eptr = (char*)utf8buf;
 		elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf;
 		is_utf = TRUE;
diff --git a/t/camel-III/vstring.t b/t/camel-III/vstring.t
index 7360ae7..6dec4dd 100644
--- a/t/camel-III/vstring.t
+++ b/t/camel-III/vstring.t
@@ -5,14 +5,12 @@ BEGIN {
 }
 use Test;
 plan test => 5;
-# Error messages may have wide chars, say that is okay - if we can.
-eval { binmode STDOUT,":utf8" };
 
 # Chapter 2 pp67/68
 my $vs = v1.20.300.4000;
 ok($vs,"\x{1}\x{14}\x{12c}\x{fa0}","v-string ne \\x{}");
 ok($vs,chr(1).chr(20).chr(300).chr(4000),"v-string ne chr()");
-ok('foo',v102.111.111,"v-string ne ''");
+ok('foo',((chr(193) eq 'A') ? v134.150.150 : v102.111.111),"v-string ne ''");
 
 # Chapter 15, pp403
 
diff --git a/t/op/each.t b/t/op/each.t
index 2e80dcd..daddc9c 100755
--- a/t/op/each.t
+++ b/t/op/each.t
@@ -2,11 +2,11 @@
 
 BEGIN {
     chdir 't' if -d 't';
-    @INC = '.'; 
+    @INC = '.';
     push @INC, '../lib';
-}    
+}
 
-print "1..26\n";
+print "1..27\n";
 
 $h{'abc'} = 'ABC';
 $h{'def'} = 'DEF';
@@ -163,15 +163,20 @@ print "ok 23\n";
 print "#$u{$_}\n" for keys %u; # Used to core dump before change #8056.
 print "ok 24\n";
 
+use bytes ();
+
 $d = pack("U*", 0xe3, 0x81, 0x82);
+$ol = bytes::length($d);
+print "not " unless $ol > 3;
+print "ok 25\n";
 %u = ($d => "downgrade");
 for (keys %u) {
     use bytes;
     print "not " if length ne 3 or $_ ne "\xe3\x81\x82";
-    print "ok 25\n";
+    print "ok 26\n";
 }
 {
     use bytes;
-    print "not " if length($d) ne 6;
-    print "ok 26\n";
+    print "not " if length($d) != $ol;
+    print "ok 27\n";
 }
diff --git a/t/op/length.t b/t/op/length.t
index df80fcd..c4445e3 100644
--- a/t/op/length.t
+++ b/t/op/length.t
@@ -34,52 +34,84 @@ print "ok 3\n";
 
 {
     my $a = pack("U", 0x80);
-    
+
     print "not " unless length($a) == 1;
     print "ok 6\n";
     $test++;
-    
+
     use bytes;
-    print "not " unless $a eq "\xc2\x80" && length($a) == 2;
+    if (ord('A') == 193)
+     {
+      printf "#%vx for 0x80\n",$a;
+      print "not " unless $a eq "\x8a\x67" && length($a) == 2;
+     }
+    else
+     {
+      print "not " unless $a eq "\xc2\x80" && length($a) == 2;
+     }
     print "ok 7\n";
     $test++;
 }
 
 {
     my $a = "\x{100}";
-    
+
     print "not " unless length($a) == 1;
     print "ok 8\n";
     $test++;
-    
+
     use bytes;
-    print "not " unless $a eq "\xc4\x80" && length($a) == 2;
+    if (ord('A') == 193)
+     {
+      printf "#%vx for 0x100\n",$a;
+      print "not " unless $a eq "\x8c\x41" && length($a) == 2;
+     }
+    else
+     {
+      print "not " unless $a eq "\xc4\x80" && length($a) == 2;
+     }
     print "ok 9\n";
     $test++;
 }
 
 {
     my $a = "\x{100}\x{80}";
-    
+
     print "not " unless length($a) == 2;
     print "ok 10\n";
     $test++;
-    
+
     use bytes;
-    print "not " unless $a eq "\xc4\x80\xc2\x80" && length($a) == 4;
+    if (ord('A') == 193)
+     {
+      printf "#%vx for 0x100 0x80\n",$a;
+      print "not " unless $a eq "\x8c\x41\x8a\x67" && length($a) == 4;
+     }
+    else
+     {
+      print "not " unless $a eq "\xc4\x80\xc2\x80" && length($a) == 4;
+     }
     print "ok 11\n";
     $test++;
 }
 
 {
     my $a = "\x{80}\x{100}";
-    
+
     print "not " unless length($a) == 2;
     print "ok 12\n";
     $test++;
-    
+
     use bytes;
-    print "not " unless $a eq "\xc2\x80\xc4\x80" && length($a) == 4;
+    if (ord('A') == 193)
+     {
+      printf "#%vx for 0x80 0x100\n",$a;
+      print "not " unless $a eq "\x8a\x67\x8c\x41" && length($a) == 4;
+     }
+    else
+     {
+      print "not " unless $a eq "\xc2\x80\xc4\x80" && length($a) == 4;
+     }
     print "ok 13\n";
     $test++;
 }
diff --git a/t/op/pack.t b/t/op/pack.t
index 4c16991..5323bc3 100755
--- a/t/op/pack.t
+++ b/t/op/pack.t
@@ -43,7 +43,7 @@ $sum = 103 if ($Config{ebcdic} eq 'define');
 print +($x = unpack("%32B*", "Now is the time for all good blurfl")) == $sum
 	? "ok 7\n" : "not ok 7 $x\n";
 
-open(BIN, "./perl") || open(BIN, "./perl.exe") 
+open(BIN, "./perl") || open(BIN, "./perl.exe")
     || die "Can't open ../perl or ../perl.exe: $!\n";
 sysread BIN, $foo, 8192;
 close BIN;
@@ -119,10 +119,10 @@ print( ((unpack("i",pack("i",-1))) == -1 ? "ok " : "not ok "),$test++,"\n");
 # 31..36: test the pack lengths of s S i I l L
 print "not " unless length(pack("s", 0)) == 2;
 print "ok ", $test++, "\n";
- 
+
 print "not " unless length(pack("S", 0)) == 2;
 print "ok ", $test++, "\n";
- 
+
 print "not " unless length(pack("i", 0)) >= 4;
 print "ok ", $test++, "\n";
 
@@ -171,7 +171,7 @@ foreach my $t (@templates) {
 # binary values of the uuencoded version would not be portable between
 # character sets.  Uuencoding is meant for encoding binary data, not
 # text data.
- 
+
 $in = pack 'C*', 0 .. 255;
 
 # just to be anal, we do some random tr/`/ /
@@ -205,7 +205,7 @@ print "ok ", $test++, "\n";
 
 $uu = <<'EOUU';
 M'XL("%C<Q#4" TI!4%4 \RHM+E%(S,LOR4@M4@A(+<I1*"U-SD])+>(" &1F
-&8%P:    
+&8%P:
 EOUU
 
 print "not " unless unpack('u', $uu) eq $in;
@@ -407,15 +407,16 @@ $z = pack <<EOP,'string','etc';
 EOP
 print 'not ' unless $z eq "\000\006string\003etc"; print "ok $test\n"; $test++;
 
-print 'not ' unless "1.20.300.4000" eq sprintf "%vd", pack("U*",1,20,300,4000); 
+print 'not ' unless "1.20.300.4000" eq sprintf "%vd", pack("U*",1,20,300,4000);
 print "ok $test\n"; $test++;
-print 'not ' unless "1.20.300.4000" eq 
-                    sprintf "%vd", pack("  U*",1,20,300,4000); 
+print 'not ' unless "1.20.300.4000" eq
+                    sprintf "%vd", pack("  U*",1,20,300,4000);
 print "ok $test\n"; $test++;
-print 'not ' unless v1.20.300.4000 ne 
-                    sprintf "%vd", pack("C0U*",1,20,300,4000); 
+print 'not ' unless v1.20.300.4000 ne
+                    sprintf "%vd", pack("C0U*",1,20,300,4000);
 print "ok $test\n"; $test++;
 
 # 160
-print "not " unless join(" ", unpack("C*", chr(0x1e2))) eq "199 162";
+print "not " unless join(" ", unpack("C*", chr(0x1e2)))
+        eq ((ord(A) == 193) ? "156 67" : "199 162");
 print "ok $test\n"; $test++;
diff --git a/t/op/ver.t b/t/op/ver.t
index b9ba589..e248a48 100755
--- a/t/op/ver.t
+++ b/t/op/ver.t
@@ -114,7 +114,7 @@ print "not " unless sprintf("%*vb", "##", v1.22.333.4444)
 print "ok $test\n";  ++$test;
 
 print "not " unless sprintf("%vd", join("", map { chr }
-					    unpack "U*", v2001.2002.2003))
+					    unpack 'U*', pack('U*',2001,2002,2003)))
 		    eq '2001.2002.2003';
 print "ok $test\n";  ++$test;
 
diff --git a/toke.c b/toke.c
index 53159f3..ea0f650 100644
--- a/toke.c
+++ b/toke.c
@@ -1457,7 +1457,7 @@ S_scan_const(pTHX_ char *start)
 		/* We need to map to chars to ASCII before doing the tests
 		   to cover EBCDIC
 		*/
-		if (!UTF8_IS_INVARIANT(uv)) {
+		if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
 		    if (!has_utf8 && uv > 255) {
 		        /* Might need to recode whatever we have
 			 * accumulated so far if it contains any
@@ -1469,7 +1469,7 @@ S_scan_const(pTHX_ char *start)
 			int hicount = 0;
 			U8 *c;
 			for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
-			    if (!UTF8_IS_INVARIANT(*c)) {
+			    if (!NATIVE_IS_INVARIANT(*c)) {
 			        hicount++;
 			    }
 			}
@@ -1481,7 +1481,7 @@ S_scan_const(pTHX_ char *start)
 			    dst = src+hicount;
 			    d  += hicount;
 			    while (src >= (U8 *)SvPVX(sv)) {
-			        if (!UTF8_IS_INVARIANT(*src)) {
+			        if (!NATIVE_IS_INVARIANT(*src)) {
 				    U8 ch = NATIVE_TO_ASCII(*src);
 				    *dst-- = UTF8_EIGHT_BIT_LO(ch);
 				    *dst-- = UTF8_EIGHT_BIT_HI(ch);
@@ -1510,7 +1510,7 @@ S_scan_const(pTHX_ char *start)
 		    }
 		}
 		else {
-		    *d++ = NATIVE_TO_NEED(has_utf8,uv);
+		    *d++ = (char) uv;
 		}
 		continue;
 
@@ -1603,7 +1603,6 @@ S_scan_const(pTHX_ char *start)
 	} /* end if (backslash) */
 
     default_action:
-       /* The 'has_utf8' here is very dubious */
        if (!UTF8_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
            STRLEN len = (STRLEN) -1;
            UV uv;
@@ -7230,7 +7229,7 @@ vstring:
 	    while (isDIGIT(*pos) || *pos == '_')
 		pos++;
 	    if (!isALPHA(*pos)) {
-		UV rev, revmax = 0;
+		UV rev;
 		U8 tmpbuf[UTF8_MAXLEN+1];
 		U8 *tmpend;
 		s++;				/* get past 'v' */
@@ -7260,9 +7259,9 @@ vstring:
 		    }
 		    /* Append native character for the rev point */
 		    tmpend = uvchr_to_utf8(tmpbuf, rev);
-		    if (rev > revmax)
-			revmax = rev;
 		    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
+		    if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+			SvUTF8_on(sv);
 		    if (*pos == '.' && isDIGIT(pos[1]))
 			s = ++pos;
 		    else {
@@ -7272,14 +7271,8 @@ vstring:
 		    while (isDIGIT(*pos) || *pos == '_')
 			pos++;
 		}
-
 		SvPOK_on(sv);
 		SvREADONLY_on(sv);
-		/* if (revmax > 127) { */
-		    SvUTF8_on(sv); /*
-		    if (revmax < 256)
-		      sv_utf8_downgrade(sv, TRUE);
-		} */
 	    }
 	}
 	break;
diff --git a/utf8.c b/utf8.c
index 81fb44d..01afa01 100644
--- a/utf8.c
+++ b/utf8.c
@@ -46,8 +46,8 @@ is the recommended Unicode-aware way of saying
 U8 *
 Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
 {
-    if (UTF8_IS_INVARIANT(uv)) {
-	*d++ = uv;
+    if (UNI_IS_INVARIANT(uv)) {
+	*d++ = UTF_TO_NATIVE(uv);
 	return d;
     }
 #if defined(EBCDIC) || 1 /* always for testing */
@@ -151,9 +151,7 @@ is the recommended wide native character-aware way of saying
 U8 *
 Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv)
 {
-    if (uv < 0x100)
-	uv = NATIVE_TO_ASCII(uv);
-    return Perl_uvuni_to_utf8(aTHX_ d, uv);
+    return Perl_uvuni_to_utf8(aTHX_ d, NATIVE_TO_UNI(uv));
 }
 
 
@@ -293,7 +291,7 @@ Perl_utf8n_to_uvuni(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
     if (UTF8_IS_INVARIANT(uv)) {
 	if (retlen)
 	    *retlen = 1;
-	return (UV) (*s);
+	return (UV) (NATIVE_TO_UTF(*s));
     }
 
     if (UTF8_IS_CONTINUATION(uv) &&
@@ -478,9 +476,7 @@ UV
 Perl_utf8n_to_uvchr(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags)
 {
     UV uv = Perl_utf8n_to_uvuni(aTHX_ s, curlen, retlen, flags);
-    if (uv < 0x100)
-        return (UV) ASCII_TO_NATIVE(uv);
-    return uv;
+    return UNI_TO_NATIVE(uv);
 }
 
 /*
@@ -550,7 +546,7 @@ Perl_utf8_length(pTHX_ U8* s, U8* e)
 	U8 t = UTF8SKIP(s);
 
 	if (e - s < t)
-	    Perl_croak(aTHX_ "panic: utf8_length: unaligned end");
+	    Perl_croak(aTHX_ "panic: utf8_length: s=%p (%02X) e=%p l=%d - unaligned end",s,*s,e,t);
 	s += t;
 	len++;
     }
@@ -713,19 +709,16 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8)
 
     *is_utf8 = 0;		
 
-#ifndef EBCDIC
-    /* Can use as-is if no high chars */
-    if (!count)
-	return start;
-#endif
-
     Newz(801, d, (*len) - count + 1, U8);
     s = start; start = d;
     while (s < send) {
 	U8 c = *s++;
-	if (!UTF8_IS_INVARIANT(c))
-	    c = UTF8_ACCUMULATE(c, *s++);
-	*d++ = ASCII_TO_NATIVE(c);
+	if (!UTF8_IS_INVARIANT(c)) {
+	    /* Then it is two-byte encoded */
+	    c = UTF8_ACCUMULATE(NATIVE_TO_UTF(c), *s++);
+	    c = ASCII_TO_NATIVE(c);
+	}
+	*d++ = c;
     }
     *d = '\0';
     *len = d - start;
@@ -755,8 +748,8 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len)
 
     while (s < send) {
         UV uv = NATIVE_TO_ASCII(*s++);
-        if (UTF8_IS_INVARIANT(uv))
-            *d++ = uv;
+        if (UNI_IS_INVARIANT(uv))
+            *d++ = UTF_TO_NATIVE(uv);
         else {
             *d++ = UTF8_EIGHT_BIT_HI(uv);
             *d++ = UTF8_EIGHT_BIT_LO(uv);
diff --git a/utf8.h b/utf8.h
index a606397..46bc828 100644
--- a/utf8.h
+++ b/utf8.h
@@ -64,7 +64,9 @@ END_EXTERN_C
 
  */
 
-#define UTF8_IS_INVARIANT(c) 		(((UV)c) <  0x80)
+#define UNI_IS_INVARIANT(c)		(((UV)c) <  0x80)
+#define UTF8_IS_INVARIANT(c)		UNI_IS_INVARIANT(NATIVE_TO_UTF(c))
+#define NATIVE_IS_INVARIANT(c)		UNI_IS_INVARIANT(NATIVE_TO_ASCII(c))
 #define UTF8_IS_START(c)		(((U8)c) >= 0xc0 && (((U8)c) <= 0xfd))
 #define UTF8_IS_CONTINUATION(c)		(((U8)c) >= 0x80 && (((U8)c) <= 0xbf))
 #define UTF8_IS_CONTINUED(c) 		(((U8)c) &  0x80)
diff --git a/utfebcdic.h b/utfebcdic.h
index 0eef54b..ef67cb2 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -15,17 +15,18 @@ START_EXTERN_C
 #ifdef DOINIT
 /* Indexed by encoded byte this table gives the length of the sequence.
    Adapted from the shadow flags table in tr16.
-   The entries marked 9 are continuation bytes.
+   The entries marked 9 in tr6 are continuation bytes and are marked
+   as length 1 here so that we can recover.
 */
 EXTCONST unsigned char PL_utf8skip[] = {
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,9,9,9,9,9,9,9,9,9,9,1,1,1,1,1,
-1,9,9,9,9,9,9,9,9,9,1,1,1,1,1,1,
-1,1,9,9,9,9,9,9,9,9,9,1,1,1,1,1,
-9,9,9,9,2,2,2,2,2,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
 2,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2,
@@ -221,7 +222,7 @@ END_EXTERN_C
 #define UTF_TO_NATIVE(ch)        PL_utf2e[(U8)(ch)]
 /* Transform in wide UV char space */
 #define NATIVE_TO_UNI(ch)        (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch))
-#define UNI_TO_NATIVE(ch)        (((ch) > 255) ? (ch) : (UV) ASCII_TO_NATIVE(ch))
+#define UNI_TO_NATIVE(ch)        (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
 /* Transform in invariant..byte space */
 #define NATIVE_TO_NEED(enc,ch)   ((enc) ? UTF_TO_NATIVE(NATIVE_TO_ASCII(ch)) : (ch))
 #define ASCII_TO_NEED(enc,ch)    ((enc) ? UTF_TO_NATIVE(ch) : ASCII_TO_NATIVE(ch))
@@ -267,8 +268,11 @@ END_EXTERN_C
 		      (uv) < 0x400000       ? 5 : \
 		      (uv) < 0x4000000      ? 6 : 7 )
 
+
+#define UNI_IS_INVARIANT(c)		((c) <  0xA0)
 /* UTF-EBCDIC sematic macros - transform back into UTF-8-Mod and then compare */
-#define UTF8_IS_INVARIANT(c) 		(NATIVE_TO_UTF(c) <  0xA0)
+#define NATIVE_IS_INVARIANT(c)		UNI_IS_INVARIANT(NATIVE_TO_ASCII(c))
+#define UTF8_IS_INVARIANT(c)		UNI_IS_INVARIANT(NATIVE_TO_UTF(c))
 #define UTF8_IS_START(c)		(NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) != 0xA0)
 #define UTF8_IS_CONTINUATION(c)		(NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) == 0xA0)
 #define UTF8_IS_CONTINUED(c) 		(NATIVE_TO_UTF(c) >= 0xA0)