X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=utfebcdic.h;h=2c56006ff67a6a972df61d212ca91c4c1bf3f9bb;hb=c4fbe2471f42249bd57e1c071c99349d2331aea5;hp=0eef54b00187346e730a8cee11300af58475723e;hpb=753838417b30f18133bff09e0b7fa58f5a0db60a;p=p5sagit%2Fp5-mst-13.2.git diff --git a/utfebcdic.h b/utfebcdic.h index 0eef54b..2c56006 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -15,17 +15,18 @@ START_EXTERN_C #ifdef DOINIT /* Indexed by encoded byte this table gives the length of the sequence. Adapted from the shadow flags table in tr16. - The entries marked 9 are continuation bytes. + The entries marked 9 in tr6 are continuation bytes and are marked + as length 1 here so that we can recover. */ EXTCONST unsigned char PL_utf8skip[] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,9,9,9,9,9,9,9,9,9,9,1,1,1,1,1, -1,9,9,9,9,9,9,9,9,9,1,1,1,1,1,1, -1,1,9,9,9,9,9,9,9,9,9,1,1,1,1,1, -9,9,9,9,2,2,2,2,2,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1, 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 2,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2, @@ -221,7 +222,7 @@ END_EXTERN_C #define UTF_TO_NATIVE(ch) PL_utf2e[(U8)(ch)] /* Transform in wide UV char space */ #define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch)) -#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : (UV) ASCII_TO_NATIVE(ch)) +#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch)) /* Transform in invariant..byte space */ #define NATIVE_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(NATIVE_TO_ASCII(ch)) : (ch)) #define ASCII_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(ch) : ASCII_TO_NATIVE(ch)) @@ -233,10 +234,10 @@ END_EXTERN_C * unnecessarily. */ -#define isIDFIRST_lazy_if(p,c) ((IN_BYTE || (!c || UTF8_IS_INVARIANT(*p))) \ +#define isIDFIRST_lazy_if(p,c) ((IN_BYTES || (!c || UTF8_IS_INVARIANT(*p))) \ ? isIDFIRST(*(p)) \ : isIDFIRST_utf8((U8*)p)) -#define isALNUM_lazy_if(p,c) ((IN_BYTE || (!c || UTF8_IS_INVARIANT(*p))) \ +#define isALNUM_lazy_if(p,c) ((IN_BYTES || (!c || UTF8_IS_INVARIANT(*p))) \ ? isALNUM(*(p)) \ : isALNUM_utf8((U8*)p)) @@ -267,10 +268,13 @@ END_EXTERN_C (uv) < 0x400000 ? 5 : \ (uv) < 0x4000000 ? 6 : 7 ) + +#define UNI_IS_INVARIANT(c) ((c) < 0xA0) /* UTF-EBCDIC sematic macros - transform back into UTF-8-Mod and then compare */ -#define UTF8_IS_INVARIANT(c) (NATIVE_TO_UTF(c) < 0xA0) +#define NATIVE_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_ASCII(c)) +#define UTF8_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_UTF(c)) #define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) != 0xA0) -#define UTF8_IS_CONTINUATION(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) == 0xA0) +#define UTF8_IS_CONTINUATION(c) ((NATIVE_TO_UTF(c) & 0xE0) == 0xA0) #define UTF8_IS_CONTINUED(c) (NATIVE_TO_UTF(c) >= 0xA0) #define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xF8) == 0xC0)