From: Karl Williamson Date: Sun, 8 Nov 2009 00:36:55 +0000 (-0700) Subject: Define specially handled chars; and clean-up ebcdic vs unicode X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5cd46e1f8d2d8792e0bfe33c11fc9ce2dee1c278;p=p5sagit%2Fp5-mst-13.2.git Define specially handled chars; and clean-up ebcdic vs unicode --- diff --git a/utf8.h b/utf8.h index 659319e..e70559e 100644 --- a/utf8.h +++ b/utf8.h @@ -112,7 +112,7 @@ encoded character. #define UNI_IS_INVARIANT(c) (((UV)c) < 0x80) #define UTF8_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_UTF(c)) -#define NATIVE_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_ASCII(c)) +#define NATIVE_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE8_TO_UNI(c)) #define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd)) #define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf)) #define UTF8_IS_CONTINUED(c) (((U8)c) & 0x80) @@ -235,35 +235,28 @@ encoded character. #define UTF8_IS_ASCII(c) UTF8_IS_INVARIANT(c) -#define UNICODE_LATIN_SMALL_LETTER_SHARP_S 0x00DF #define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3 #define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 #define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3 -#define EBCDIC_LATIN_SMALL_LETTER_SHARP_S 0x0059 - #define UNI_DISPLAY_ISPRINT 0x0001 #define UNI_DISPLAY_BACKSLASH 0x0002 #define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH) #define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH) -#ifdef EBCDIC -# define ANYOF_FOLD_SHARP_S(node, input, end) \ - (ANYOF_BITMAP_TEST(node, EBCDIC_LATIN_SMALL_LETTER_SHARP_S) && \ - (ANYOF_FLAGS(node) & ANYOF_UNICODE) && \ - (ANYOF_FLAGS(node) & ANYOF_FOLD) && \ - ((end) > (input) + 1) && \ - toLOWER((input)[0]) == 's' && \ - toLOWER((input)[1]) == 's') -#else -# define ANYOF_FOLD_SHARP_S(node, input, end) \ - (ANYOF_BITMAP_TEST(node, UNICODE_LATIN_SMALL_LETTER_SHARP_S) && \ +#ifndef EBCDIC +# define LATIN_SMALL_LETTER_SHARP_S 0x00DF +# define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0x00FF +# define MICRO_SIGN 0x00B5 +#endif + +#define ANYOF_FOLD_SHARP_S(node, input, end) \ + (ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \ (ANYOF_FLAGS(node) & ANYOF_UNICODE) && \ (ANYOF_FLAGS(node) & ANYOF_FOLD) && \ ((end) > (input) + 1) && \ toLOWER((input)[0]) == 's' && \ toLOWER((input)[1]) == 's') -#endif #define SHARP_S_SKIP 2 #ifdef EBCDIC diff --git a/utfebcdic.h b/utfebcdic.h index e61b4a7..8a6176c 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -293,6 +293,10 @@ EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) * 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF }; +#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF +#define LATIN_SMALL_LETTER_SHARP_S 0x59 +#define MICRO_SIGN 0xA0 + EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */ 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, @@ -333,6 +337,10 @@ EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */ 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF }; +#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF +#define LATIN_SMALL_LETTER_SHARP_S 0x59 +#define MICRO_SIGN 0xA0 + EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */ 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, @@ -373,6 +381,11 @@ EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */ 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF }; + +#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF +#define LATIN_SMALL_LETTER_SHARP_S 0x59 +#define MICRO_SIGN 0xA0 + EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */ 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, @@ -409,7 +422,7 @@ END_EXTERN_C /* Native to iso-8859-1 */ #define NATIVE_TO_ASCII(ch) PL_e2a[(U8)(ch)] -#define NATIVE8_TO_UNI(ch) NATIVE_TO_ASCII(ch) /* synonym */ +#define NATIVE8_TO_UNI(ch) NATIVE_TO_ASCII(ch) /* a clearer synonym */ #define ASCII_TO_NATIVE(ch) PL_a2e[(U8)(ch)] /* Transform after encoding */ #define NATIVE_TO_UTF(ch) PL_e2utf[(U8)(ch)]