UV uv;
#ifdef EBCDIC
UV literal_endpoint = 0;
+ bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
#endif
const char * const leaveit = /* set of acceptably-backslashed characters */
- PL_lex_inpat
- ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
- : "";
+ (const char *)
+ (PL_lex_inpat
+ ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+ : "");
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
/* If we are doing a trans and we know we want UTF8 set expectation */
I32 min; /* first character in range */
I32 max; /* last character in range */
- if (has_utf8) {
+#ifdef EBCDIC
+ UV uvmax = 0;
+#endif
+
+ if (has_utf8
+#ifdef EBCDIC
+ && !native_range
+#endif
+ ) {
char * const c = (char*)utf8_hop((U8*)d, -1);
char *e = d++;
while (e-- > c)
}
i = d - SvPVX_const(sv); /* remember current offset */
+#ifdef EBCDIC
+ SvGROW(sv,
+ SvLEN(sv) + (has_utf8 ?
+ (512 - UTF_CONTINUATION_MARK +
+ UNISKIP(0x100))
+ : 256));
+ /* How many two-byte within 0..255: 128 in UTF-8,
+ * 96 in UTF-8-mod. */
+#else
SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
+#endif
d = SvPVX(sv) + i; /* refresh d after realloc */
- d -= 2; /* eat the first char and the - */
-
- min = (U8)*d; /* first char in range */
- max = (U8)d[1]; /* last char in range */
+#ifdef EBCDIC
+ if (has_utf8) {
+ int j;
+ for (j = 0; j <= 1; j++) {
+ char * const c = (char*)utf8_hop((U8*)d, -1);
+ const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
+ if (j)
+ min = (U8)uv;
+ else if (uv < 256)
+ max = (U8)uv;
+ else {
+ max = (U8)0xff; /* only to \xff */
+ uvmax = uv; /* \x{100} to uvmax */
+ }
+ d = c; /* eat endpoint chars */
+ }
+ }
+ else {
+#endif
+ d -= 2; /* eat the first char and the - */
+ min = (U8)*d; /* first char in range */
+ max = (U8)d[1]; /* last char in range */
+#ifdef EBCDIC
+ }
+#endif
if (min > max) {
Perl_croak(aTHX_
else
#endif
for (i = min; i <= max; i++)
- *d++ = (char)i;
+#ifdef EBCDIC
+ if (has_utf8) {
+ const U8 ch = (U8)NATIVE_TO_UTF(i);
+ if (UNI_IS_INVARIANT(ch))
+ *d++ = (U8)i;
+ else {
+ *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
+ *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
+ }
+ }
+ else
+#endif
+ *d++ = (char)i;
+
+#ifdef EBCDIC
+ if (uvmax) {
+ d = (char*)uvchr_to_utf8((U8*)d, 0x100);
+ if (uvmax > 0x101)
+ *d++ = (char)UTF_TO_NATIVE(0xff);
+ if (uvmax > 0x100)
+ d = (char*)uvchr_to_utf8((U8*)d, uvmax);
+ }
+#endif
/* mark the range as done, and continue */
dorange = FALSE;
if (didrange) {
Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
}
- if (has_utf8) {
+ if (has_utf8
+#ifdef EBCDIC
+ && !native_range
+#endif
+ ) {
*d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
s++;
continue;
didrange = FALSE;
#ifdef EBCDIC
literal_endpoint = 0;
+ native_range = TRUE;
#endif
}
}
/* check for embedded arrays
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
- else if (*s == '@' && s[1]
- && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
- break;
+ else if (*s == '@' && s[1]) {
+ if (isALNUM_lazy_if(s+1,UTF))
+ break;
+ if (strchr(":'{$", s[1]))
+ break;
+ if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
+ break; /* in regexp, neither @+ nor @- are interpolated */
+ }
/* check for embedded scalars. only stop if we're sure it's a
variable.
if ((isALPHA(*s) || isDIGIT(*s)) &&
ckWARN(WARN_MISC))
Perl_warner(aTHX_ packWARN(WARN_MISC),
- "Unrecognized escape \\%c passed through",
- *s);
+ "Unrecognized escape \\%c passed through",
+ *s);
/* default action is to copy the quoted character */
goto default_action;
}
(PL_lex_repl ? OPpTRANS_FROM_UTF
: OPpTRANS_TO_UTF);
}
+#ifdef EBCDIC
+ if (uv > 255 && !dorange)
+ native_range = FALSE;
+#endif
}
else {
*d++ = (char)uv;
SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
d = SvPVX(sv) + (d - odest);
}
+#ifdef EBCDIC
+ if (!dorange)
+ native_range = FALSE; /* \N{} is guessed to be Unicode */
+#endif
Copy(str, d, len, char);
d += len;
SvREFCNT_dec(res);
}
d = (char*)uvchr_to_utf8((U8*)d, nextuv);
has_utf8 = TRUE;
+#ifdef EBCDIC
+ if (uv > 255 && !dorange)
+ native_range = FALSE;
+#endif
}
else {
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
/* return the substring (via yylval) only if we parsed anything */
if (s > PL_bufptr) {
if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
- sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
+ sv = new_constant(start, s - start,
+ (const char *)(PL_lex_inpat ? "qr" : "q"),
sv, NULL,
- ( PL_lex_inwhat == OP_TRANS
- ? "tr"
- : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
- ? "s"
- : "qq")));
+ (const char *)
+ (( PL_lex_inwhat == OP_TRANS
+ ? "tr"
+ : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
+ ? "s"
+ : "qq"))));
yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
} else
SvREFCNT_dec(sv);
if (s[1]) {
if (strchr("wds]",s[1]))
weight += 100;
- else if (seen['\''] || seen['"'])
+ else if (seen[(U8)'\''] || seen[(U8)'"'])
weight += 1;
else if (strchr("rnftbxcav",s[1]))
weight += 40;
len = start - SvPVX(PL_linestr);
#endif
s = PEEKSPACE(s);
-#ifdef PERLMAD
+#ifdef PERL_MAD
start = SvPVX(PL_linestr) + len;
#endif
PL_bufptr = start;
STRLEN len;
bool bof = FALSE;
+ /* orig_keyword, gvp, and gv are initialized here because
+ * jump to the label just_a_word_zero can bypass their
+ * initialization later. */
+ I32 orig_keyword = 0;
+ GV *gv = NULL;
+ GV **gvp = NULL;
+
DEBUG_T( {
SV* tmp = newSVpvs("");
PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
PL_last_uni = 0;
PL_last_lop = 0;
if (PL_lex_brackets) {
- yyerror(PL_lex_formbrack
- ? "Format not terminated"
- : "Missing right curly or square bracket");
+ yyerror((const char *)
+ (PL_lex_formbrack
+ ? "Format not terminated"
+ : "Missing right curly or square bracket"));
}
DEBUG_T( { PerlIO_printf(Perl_debug_log,
"### Tokener got EOF\n");
if (PL_madskills)
PL_faketokens = 1;
#endif
- sv_setpv(PL_linestr,PL_minus_p
- ? ";}continue{print;}" : ";}");
+ sv_setpv(PL_linestr,
+ (const char *)
+ (PL_minus_p
+ ? ";}continue{print;}" : ";}"));
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
context messages from yyerror().
*/
PL_bufptr = s;
- yyerror( *s
- ? Perl_form(aTHX_ "Invalid separator character "
- "%c%c%c in attribute list", q, *s, q)
- : "Unterminated attribute list" );
+ yyerror( (const char *)
+ (*s
+ ? Perl_form(aTHX_ "Invalid separator character "
+ "%c%c%c in attribute list", q, *s, q)
+ : "Unterminated attribute list" ) );
if (attrs)
op_free(attrs);
OPERATOR(':');
keylookup: {
I32 tmp;
- I32 orig_keyword = 0;
- GV *gv = NULL;
- GV **gvp = NULL;
+
+ orig_keyword = 0;
+ gv = NULL;
+ gvp = NULL;
PL_bufptr = s;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
while (*proto == ';')
proto++;
if (*proto == '&' && *s == '{') {
- sv_setpv(PL_subname, PL_curstash ?
- "__ANON__" : "__ANON__::__ANON__");
+ sv_setpv(PL_subname,
+ (const char *)
+ (PL_curstash ?
+ "__ANON__" : "__ANON__::__ANON__"));
PREBLOCK(LSTOPSUB);
}
}
#endif
if (!have_name) {
sv_setpv(PL_subname,
- PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
+ (const char *)
+ (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
TOKEN(ANONSUB);
}
#ifndef PERL_MAD
if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
SV *msg;
- why2 = strEQ(key,"charnames")
- ? "(possibly a missing \"use charnames ...\")"
- : "";
+ why2 = (const char *)
+ (strEQ(key,"charnames")
+ ? "(possibly a missing \"use charnames ...\")"
+ : "");
msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
(type ? type: "undef"), why2);
s++;
if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
- const char * const brack = (*s == '[') ? "[...]" : "{...}";
+ const char * const brack =
+ (const char *)
+ ((*s == '[') ? "[...]" : "{...}");
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
"Ambiguous use of %c{%s%s} resolved to %c%s%s",
funny, dest, brack, funny, dest, brack);
dVAR;
PMOP *pm;
char *s = scan_str(start,!!PL_madskills,FALSE);
- const char * const valid_flags = (type == OP_QR) ? "iomsx" : "iogcmsx";
+ const char * const valid_flags =
+ (const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
#ifdef PERL_MAD
char *modstart;
#endif
if (!s) {
const char * const delimiter = skipspace(start);
- Perl_croak(aTHX_ *delimiter == '?'
- ? "Search pattern not terminated or ternary operator parsed as search pattern"
- : "Search pattern not terminated" );
+ Perl_croak(aTHX_
+ (const char *)
+ (*delimiter == '?'
+ ? "Search pattern not terminated or ternary operator parsed as search pattern"
+ : "Search pattern not terminated" ));
}
pm = (PMOP*)newPMOP(type, 0);
PL_multi_end = 0;
pm->op_pmflags |= PMf_EVAL;
while (es-- > 0)
- sv_catpv(repl, es ? "eval " : "do ");
+ sv_catpv(repl, (const char *)(es ? "eval " : "do "));
sv_catpvs(repl, "{");
sv_catsv(repl, PL_lex_repl);
if (strchr(SvPVX(PL_lex_repl), '#'))
#ifdef PERL_MAD
found_newline = 0;
#endif
- if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
+ if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
herewas = newSVpvn(s,PL_bufend-s);
}
else {
{
dVAR;
SV *sv; /* scalar value: string */
- char *tmps; /* temp string, used for delimiter matching */
+ const char *tmps; /* temp string, used for delimiter matching */
register char *s = start; /* current position in the buffer */
register char term; /* terminating character */
register char *to; /* current position in the sv's data */
if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
(PL_hints & HINT_NEW_INTEGER) )
- sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
+ sv = new_constant(PL_tokenbuf,
+ d - PL_tokenbuf,
+ (const char *)
(floatit ? "float" : "integer"),
sv, NULL, NULL);
break;
goto utf16be;
}
}
+#ifdef EBCDIC
+ case 0xDD:
+ if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+ s += 4; /* UTF-8 */
+ }
+ break;
+#endif
+
default:
if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
/* Leading bytes