char * const s0 = STRING(scan), *s, *t;
char * const s1 = s0 + STR_LEN(scan) - 1;
char * const s2 = s1 - 4;
+#ifdef EBCDIC /* RD tunifold greek 0390 and 03B0 */
+ const char t0[] = "\xaf\x49\xaf\x42";
+#else
const char t0[] = "\xcc\x88\xcc\x81";
+#endif
const char * const t1 = t0 + 3;
for (s = s0 + 2;
s < s2 && (t = ninstr(s, s1, t0, t1));
s = t + 4) {
+#ifdef EBCDIC
+ if (((U8)t[-1] == 0x68 && (U8)t[-2] == 0xB4) ||
+ ((U8)t[-1] == 0x46 && (U8)t[-2] == 0xB5))
+#else
if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) ||
((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
+#endif
*min -= 4;
}
}
STRLEN foldlen;
const UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
+#ifdef EBCDIC /* RD t/uni/fold ff and 6b */
+ if (RExC_precomp[0] == ':' &&
+ RExC_precomp[1] == '[' &&
+ (f == 0xDF || f == 0x92)) {
+ f = NATIVE_TO_UNI(f);
+ }
+#endif
/* If folding and foldable and a single
* character, insert also the folded version
* to the charclass. */
if (f != value) {
+#ifdef EBCDIC /* RD tunifold ligatures s,t fb05, fb06 */
+ if ((RExC_precomp[0] == ':' &&
+ RExC_precomp[1] == '[' &&
+ (f == 0xA2 &&
+ (value == 0xFB05 || value == 0xFB06))) ?
+ foldlen == ((STRLEN)UNISKIP(f) - 1) :
+ foldlen == (STRLEN)UNISKIP(f) )
+#else
if (foldlen == (STRLEN)UNISKIP(f))
+#endif
Perl_sv_catpvf(aTHX_ listsv,
"%04"UVxf"\n", f);
else {
to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
-
+#ifdef EBCDIC
+ ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
+ ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
+#else
ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
- uniflags);
+ uniflags);
ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
- uniflags);
+ uniflags);
+#endif
}
else {
ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
UV uv;
#ifdef EBCDIC
UV literal_endpoint = 0;
+ bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
#endif
const char * const leaveit = /* set of acceptably-backslashed characters */
I32 min; /* first character in range */
I32 max; /* last character in range */
- if (has_utf8) {
+#ifdef EBCDIC
+ UV uvmax = 0;
+#endif
+
+ if (has_utf8
+#ifdef EBCDIC
+ && !native_range
+#endif
+ ) {
char * const c = (char*)utf8_hop((U8*)d, -1);
char *e = d++;
while (e-- > c)
}
i = d - SvPVX_const(sv); /* remember current offset */
+#ifdef EBCDIC
+ SvGROW(sv,
+ SvLEN(sv) + (has_utf8 ?
+ (512 - UTF_CONTINUATION_MARK +
+ UNISKIP(0x100))
+ : 256));
+ /* How many two-byte within 0..255: 128 in UTF-8,
+ * 96 in UTF-8-mod. */
+#else
SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
+#endif
d = SvPVX(sv) + i; /* refresh d after realloc */
- d -= 2; /* eat the first char and the - */
-
- min = (U8)*d; /* first char in range */
- max = (U8)d[1]; /* last char in range */
+#ifdef EBCDIC
+ if (has_utf8) {
+ int j;
+ for (j = 0; j <= 1; j++) {
+ char * const c = (char*)utf8_hop((U8*)d, -1);
+ const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
+ if (j)
+ min = (U8)uv;
+ else if (uv < 256)
+ max = (U8)uv;
+ else {
+ max = (U8)0xff; /* only to \xff */
+ uvmax = uv; /* \x{100} to uvmax */
+ }
+ d = c; /* eat endpoint chars */
+ }
+ }
+ else {
+#endif
+ d -= 2; /* eat the first char and the - */
+ min = (U8)*d; /* first char in range */
+ max = (U8)d[1]; /* last char in range */
+#ifdef EBCDIC
+ }
+#endif
if (min > max) {
Perl_croak(aTHX_
else
#endif
for (i = min; i <= max; i++)
- *d++ = (char)i;
+#ifdef EBCDIC
+ if (has_utf8) {
+ const U8 ch = (U8)NATIVE_TO_UTF(i);
+ if (UNI_IS_INVARIANT(ch))
+ *d++ = (U8)i;
+ else {
+ *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
+ *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
+ }
+ }
+ else
+#endif
+ *d++ = (char)i;
+
+#ifdef EBCDIC
+ if (uvmax) {
+ d = (char*)uvchr_to_utf8((U8*)d, 0x100);
+ if (uvmax > 0x101)
+ *d++ = (char)UTF_TO_NATIVE(0xff);
+ if (uvmax > 0x100)
+ d = (char*)uvchr_to_utf8((U8*)d, uvmax);
+ }
+#endif
/* mark the range as done, and continue */
dorange = FALSE;
if (didrange) {
Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
}
- if (has_utf8) {
+ if (has_utf8
+#ifdef EBCDIC
+ && !native_range
+#endif
+ ) {
*d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
s++;
continue;
didrange = FALSE;
#ifdef EBCDIC
literal_endpoint = 0;
+ native_range = TRUE;
#endif
}
}
if ((isALPHA(*s) || isDIGIT(*s)) &&
ckWARN(WARN_MISC))
Perl_warner(aTHX_ packWARN(WARN_MISC),
- "Unrecognized escape \\%c passed through",
- *s);
+ "Unrecognized escape \\%c passed through",
+ *s);
/* default action is to copy the quoted character */
goto default_action;
}
(PL_lex_repl ? OPpTRANS_FROM_UTF
: OPpTRANS_TO_UTF);
}
+#ifdef EBCDIC
+ if (uv > 255 && !dorange)
+ native_range = FALSE;
+#endif
}
else {
*d++ = (char)uv;
SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
d = SvPVX(sv) + (d - odest);
}
+#ifdef EBCDIC
+ if (!dorange)
+ native_range = FALSE; /* \N{} is guessed to be Unicode */
+#endif
Copy(str, d, len, char);
d += len;
SvREFCNT_dec(res);
}
d = (char*)uvchr_to_utf8((U8*)d, nextuv);
has_utf8 = TRUE;
+#ifdef EBCDIC
+ if (uv > 255 && !dorange)
+ native_range = FALSE;
+#endif
}
else {
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
filter_add(utf16rev_textfilter, NULL);
Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
+ PL_bufend - (char*)s,
&newlen);
sv_setpvn(PL_linestr, (const char*)news, newlen);
#ifdef PERL_MAD
goto utf16be;
}
}
+#ifdef EBCDIC
+ case 0xDD:
+ if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+ s += 4; /* UTF-8 */
+ }
+ break;
+#endif
+
default:
if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
/* Leading bytes