X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=utfebcdic.h;h=7931940e4680318b3954848103199785980a0a4f;hb=0d658bf5a06395c253c09769a32f6face7d329cb;hp=efcd00db7eab134d93f7505e46df711b50c69476;hpb=1d72bdf6104ef56ab17c3abedf522be0125851c7;p=p5sagit%2Fp5-mst-13.2.git diff --git a/utfebcdic.h b/utfebcdic.h index efcd00d..7931940 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -1,6 +1,6 @@ /* utfebcdic.h * - * Copyright (c) 2001, Larry Wall, Nick Ing-Simmons + * Copyright (c) 2001-2002, Larry Wall, Nick Ing-Simmons * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -15,17 +15,18 @@ START_EXTERN_C #ifdef DOINIT /* Indexed by encoded byte this table gives the length of the sequence. Adapted from the shadow flags table in tr16. - The entries marked 9 are continuation bytes. + The entries marked 9 in tr6 are continuation bytes and are marked + as length 1 here so that we can recover. */ EXTCONST unsigned char PL_utf8skip[] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,9,9,9,9,9,9,9,9,9,9,1,1,1,1,1, -1,9,9,9,9,9,9,9,9,9,1,1,1,1,1,1, -1,1,9,9,9,9,9,9,9,9,9,1,1,1,1,1, -9,9,9,9,2,2,2,2,2,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1, 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 2,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2, @@ -100,7 +101,7 @@ EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) * 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF }; -EXTCONST unsigned char PL_e2a[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */ +EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */ 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, @@ -221,7 +222,7 @@ END_EXTERN_C #define UTF_TO_NATIVE(ch) PL_utf2e[(U8)(ch)] /* Transform in wide UV char space */ #define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch)) -#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : (UV) ASCII_TO_NATIVE(ch)) +#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch)) /* Transform in invariant..byte space */ #define NATIVE_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(NATIVE_TO_ASCII(ch)) : (ch)) #define ASCII_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(ch) : ASCII_TO_NATIVE(ch)) @@ -233,10 +234,10 @@ END_EXTERN_C * unnecessarily. */ -#define isIDFIRST_lazy_if(p,c) ((IN_BYTE || (!c || UTF8_INVARIANT(*p)) \ +#define isIDFIRST_lazy_if(p,c) ((IN_BYTES || (!c || UTF8_IS_INVARIANT(*p))) \ ? isIDFIRST(*(p)) \ : isIDFIRST_utf8((U8*)p)) -#define isALNUM_lazy_if(p,c) ((IN_BYTE || (!c || UTF8_INVARIANT(*p)) \ +#define isALNUM_lazy_if(p,c) ((IN_BYTES || (!c || UTF8_IS_INVARIANT(*p))) \ ? isALNUM(*(p)) \ : isALNUM_utf8((U8*)p)) @@ -267,10 +268,13 @@ END_EXTERN_C (uv) < 0x400000 ? 5 : \ (uv) < 0x4000000 ? 6 : 7 ) + +#define UNI_IS_INVARIANT(c) ((c) < 0xA0) /* UTF-EBCDIC sematic macros - transform back into UTF-8-Mod and then compare */ -#define UTF8_IS_INVARIANT(c) (NATIVE_TO_UTF(c) < 0xA0) +#define NATIVE_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_ASCII(c)) +#define UTF8_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_UTF(c)) #define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) != 0xA0) -#define UTF8_IS_CONTINUATION(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) == 0xA0) +#define UTF8_IS_CONTINUATION(c) ((NATIVE_TO_UTF(c) & 0xE0) == 0xA0) #define UTF8_IS_CONTINUED(c) (NATIVE_TO_UTF(c) >= 0xA0) #define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xF8) == 0xC0) @@ -280,7 +284,7 @@ END_EXTERN_C #define UTF_CONTINUATION_MASK ((U8)0x1f) #define UTF_ACCUMULATION_SHIFT 5 -#define UTF8_ACCUMULATE(old, new) (((old) << UTF_ACCUMULATION_SHIFT)|(NATIVE_TO_UTF(new) & UTFE_CONTINUATION_MASK)) +#define UTF8_ACCUMULATE(old, new) (((old) << UTF_ACCUMULATION_SHIFT)|(NATIVE_TO_UTF(new) & UTF_CONTINUATION_MASK)) /* UTF-EBCDIC encode a downgradeable value */ #define UTF8_EIGHT_BIT_HI(c) UTF_TO_NATIVE((((U8)(c))>>UTF_ACCUMULATION_SHIFT)|UTF_START_MARK(2))