/* #define IN_UTF8 (PL_curcop->op_private & HINT_UTF8) */
#define IN_BYTE (PL_curcop->op_private & HINT_BYTE)
+#ifdef USE_BYTES_DOWNGRADES
+#define DO_UTF8(sv) (SvUTF8(sv) && !(IN_BYTE && sv_utf8_downgrade(sv,0)))
+#else
#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTE)
+#endif
#define UTF8_ALLOW_EMPTY 0x0001
#define UTF8_ALLOW_CONTINUATION 0x0002
#define UTF8_QUAD_MAX UINT64_C(0x1000000000)
/*
-
+
The following table is from Unicode 3.1.
Code Points 1st Byte 2nd Byte 3rd Byte 4th Byte
(uv) < 0x200000 ? 4 : \
(uv) < 0x4000000 ? 5 : \
(uv) < 0x80000000 ? 6 : \
- (uv) < UTF8_QUAD_MAX ? 7 : 13 )
+ (uv) < UTF8_QUAD_MAX ? 7 : 13 )
#else
/* No, I'm not even going to *TRY* putting #ifdef inside a #define */
#define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \
#endif
#define isIDFIRST_lazy(p) isIDFIRST_lazy_if(p,1)
#define isALNUM_lazy(p) isALNUM_lazy_if(p,1)
+
+/* EBCDIC-happy ways of converting native code to UTF8 */
+
+#ifdef EBCDIC
+#define NATIVE_TO_ASCII(ch) PL_e2a[(ch)]
+#define ASCII_TO_NATIVE(ch) PL_a2e[(ch)]
+#define UNI_TO_NATIVE(ch) (((ch) > 0x100) ? (ch) : (UV) PL_a2e[(ch)])
+#define NATIVE_TO_UNI(ch) (((ch) > 0x100) ? (ch) : (UV) PL_e2a[(ch)])
+#else
+#define NATIVE_TO_ASCII(ch) (ch)
+#define ASCII_TO_NATIVE(ch) (ch)
+#define UNI_TO_NATIVE(ch) (ch)
+#define NATIVE_TO_UNI(ch) (ch)
+#endif
+