# define PL_nextval (PL_parser->nextval)
#endif
+/* This can't be done with embed.fnc, because struct yy_parser contains a
+ member named pending_ident, which clashes with the generated #define */
static int
S_pending_ident(pTHX);
static const char ident_too_long[] = "Identifier too long";
-static const char commaless_variable_list[] = "comma-less variable list";
-
-#ifndef PERL_NO_UTF16_FILTER
-static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
-static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
-#endif
#ifdef PERL_MAD
# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
#endif
+static int
+S_deprecate_commaless_var_list(pTHX) {
+ PL_expect = XTERM;
+ deprecate("comma-less variable list");
+ return REPORT(','); /* grandfather non-comma-format format */
+}
+
/*
* S_ao
*
}
/*
- * Perl_deprecate
- */
-
-void
-Perl_deprecate(pTHX_ const char *const s)
-{
- PERL_ARGS_ASSERT_DEPRECATE;
-
- if (ckWARN(WARN_DEPRECATED))
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
-}
-
-void
-Perl_deprecate_old(pTHX_ const char *const s)
-{
- /* This function should NOT be called for any new deprecated warnings */
- /* Use Perl_deprecate instead */
- /* */
- /* It is here to maintain backward compatibility with the pre-5.8 */
- /* warnings category hierarchy. The "deprecated" category used to */
- /* live under the "syntax" category. It is now a top-level category */
- /* in its own right. */
-
- PERL_ARGS_ASSERT_DEPRECATE_OLD;
-
- if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
- "Use of %s is deprecated", s);
-}
-
-/*
* experimental text filters for win32 carriage-returns, utf16-to-utf8 and
* utf16-to-utf8-reversed.
*/
curoff = s - SvPVX(PL_linestr);
#endif
- if ((s = filter_gets(PL_linestr, PL_rsfp,
- (prevlen = SvCUR(PL_linestr)))) == NULL)
+ if ((s = filter_gets(PL_linestr, (prevlen = SvCUR(PL_linestr))))
+ == NULL)
{
#ifdef PERL_MAD
if (PL_madskills && curoff != startoff) {
if ((t = strchr(s, '(')) && t < PL_bufptr)
return;
- if (ckWARN_d(WARN_AMBIGUOUS)){
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Warning: Use of \"%.*s\" without parentheses is ambiguous",
- (int)(s - PL_last_uni), PL_last_uni);
- }
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Warning: Use of \"%.*s\" without parentheses is ambiguous",
+ (int)(s - PL_last_uni), PL_last_uni);
}
/*
{
dVAR;
SV * const sv = newSVpvn_utf8(start, len,
- UTF && !IN_BYTES
+ !IN_BYTES
+ && UTF
+ && !is_ascii_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}
if (!PL_lex_inpat) /* not a regexp, so $ must be var */
break;
if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
- if (s[1] == '\\' && ckWARN(WARN_AMBIGUOUS)) {
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of $\\ in regex");
+ if (s[1] == '\\') {
+ Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Possible unintended interpolation of $\\ in regex");
}
break; /* in regexp, $ might be tail anchor */
}
if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
{
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
*--s = '$';
break;
}
/* FALL THROUGH */
default:
{
- if ((isALPHA(*s) || isDIGIT(*s)) &&
- ckWARN(WARN_MISC))
- Perl_warner(aTHX_ packWARN(WARN_MISC),
- "Unrecognized escape \\%c passed through",
- *s);
+ if ((isALPHA(*s) || isDIGIT(*s)))
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
+ "Unrecognized escape \\%c passed through",
+ *s);
/* default action is to copy the quoted character */
goto default_action;
}
bare_package:
start_force(PL_curforce);
NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
- newSVpvn(tmpbuf,len));
+ S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
if (PL_madskills)
curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
const int old_len = SvCUR(buf_sv);
/* ensure buf_sv is large enough */
- SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
+ SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
correct_length)) <= 0) {
if (PerlIO_error(PL_rsfp))
return 0 ; /* end of file */
}
SvCUR_set(buf_sv, old_len + len) ;
+ SvPVX(buf_sv)[old_len + len] = '\0';
} else {
/* Want a line */
if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
}
STATIC char *
-S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
+S_filter_gets(pTHX_ register SV *sv, STRLEN append)
{
dVAR;
return NULL ;
}
else
- return (sv_gets(sv, fp, append));
+ return (sv_gets(sv, PL_rsfp, append));
}
STATIC HV *
}
do {
bof = PL_rsfp ? TRUE : FALSE;
- if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
+ if ((s = filter_gets(PL_linestr, 0)) == NULL) {
fake_eof:
#ifdef PERL_MAD
PL_realtokenstart = -1;
while (s < PL_bufend && isSPACE(*s))
s++;
if (s < PL_bufend) {
- Newxz(newargv,PL_origargc+3,char*);
+ Newx(newargv,PL_origargc+3,char*);
newargv[1] = s;
while (s < PL_bufend && !isSPACE(*s))
s++;
const char *d1 = d;
do {
- if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
+ bool baduni = FALSE;
+ if (*d1 == 'C') {
+ const char *d2 = d1 + 1;
+ if (parse_unicode_opts((const char **)&d2)
+ != PL_unicode)
+ baduni = TRUE;
+ }
+ if (baduni || *d1 == 'M' || *d1 == 'm') {
const char * const m = d1;
while (*d1 && !isSPACE(*d1))
d1++;
pl_yylval.ival = 0;
OPERATOR(ASSIGNOP);
case '!':
- if (PL_expect == XSTATE && s[1] == '!' && s[2] == '!') {
- s += 3;
- LOP(OP_DIE,XTERM);
- }
s++;
{
const char tmp = *s++;
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
- PL_expect = XTERM;
- deprecate_old(commaless_variable_list);
- return REPORT(','); /* grandfather non-comma-format format */
+ return deprecate_commaless_var_list();
}
}
AOPERATOR(DORDOR);
}
case '?': /* may either be conditional or pattern */
- if (PL_expect == XSTATE && s[1] == '?' && s[2] == '?') {
- s += 3;
- LOP(OP_WARN,XTERM);
- }
if (PL_expect == XOPERATOR) {
char tmp = *s++;
if(tmp == '?') {
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
- PL_expect = XTERM;
- deprecate_old(commaless_variable_list);
- return REPORT(','); /* grandfather non-comma-format format */
+ return deprecate_commaless_var_list();
}
else
no_op("String",s);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
- PL_expect = XTERM;
- deprecate_old(commaless_variable_list);
- return REPORT(','); /* grandfather non-comma-format format */
+ return deprecate_commaless_var_list();
}
else
no_op("String",s);
case '\\':
s++;
- if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
- *s, *s);
+ if (PL_lex_inwhat && isDIGIT(*s))
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
+ *s, *s);
if (PL_expect == XOPERATOR)
no_op("Backslash",s);
OPERATOR(REFGEN);
/* Is this a label? */
if (!tmp && PL_expect == XSTATE
&& d < PL_bufend && *d == ':' && *(d + 1) != ':') {
+ tmp = keyword(PL_tokenbuf, len, 0);
+ if (tmp)
+ Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
s = d + 1;
pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
CLINE;
TOKEN(LABEL);
}
-
- /* Check for keywords */
- tmp = keyword(PL_tokenbuf, len, 0);
+ else
+ /* Check for keywords */
+ tmp = keyword(PL_tokenbuf, len, 0);
/* Is this a word before a => operator? */
if (*d == '=' && d[1] == '>') {
}
else { /* no override */
tmp = -tmp;
- if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
- Perl_warner(aTHX_ packWARN(WARN_MISC),
- "dump() better written as CORE::dump()");
+ if (tmp == KEY_dump) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
+ "dump() better written as CORE::dump()");
}
gv = NULL;
gvp = 0;
- if (hgv && tmp != KEY_x && tmp != KEY_CORE
- && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous call resolved as CORE::%s(), %s",
- GvENAME(hgv), "qualify as such or use &");
+ if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
+ Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous call resolved as CORE::%s(), %s",
+ GvENAME(hgv), "qualify as such or use &");
}
}
/* Not a method, so call it a subroutine (if defined) */
if (cv) {
- if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of -%s resolved as -&%s()",
- PL_tokenbuf, PL_tokenbuf);
+ if (lastchar == '-')
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous use of -%s resolved as -&%s()",
+ PL_tokenbuf, PL_tokenbuf);
/* Check for a constant sub */
if ((sv = gv_const_sv(gv))) {
its_constant:
/* Call it a bare word */
- bareword:
if (PL_hints & HINT_STRICT_SUBS)
pl_yylval.opval->op_private |= OPpCONST_STRICT;
else {
+ bareword:
+ /* after "print" and similar functions (corresponding to
+ * "F? L" in opcode.pl), whatever wasn't already parsed as
+ * a filehandle should be subject to "strict subs".
+ * Likewise for the optional indirect-object argument to system
+ * or exec, which can't be a bareword */
+ if ((PL_last_lop_op == OP_PRINT
+ || PL_last_lop_op == OP_PRTF
+ || PL_last_lop_op == OP_SAY
+ || PL_last_lop_op == OP_SYSTEM
+ || PL_last_lop_op == OP_EXEC)
+ && (PL_hints & HINT_STRICT_SUBS))
+ pl_yylval.opval->op_private |= OPpCONST_STRICT;
if (lastchar != '-') {
if (ckWARN(WARN_RESERVED)) {
d = PL_tokenbuf;
}
safe_bareword:
- if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
- && ckWARN_d(WARN_AMBIGUOUS)) {
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Operator or semicolon missing before %c%s",
- lastchar, PL_tokenbuf);
- Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of %c resolved as operator %c",
- lastchar, lastchar);
+ if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Operator or semicolon missing before %c%s",
+ lastchar, PL_tokenbuf);
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous use of %c resolved as operator %c",
+ lastchar, lastchar);
}
TOKEN(WORD);
}
sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
PL_realtokenstart = -1;
}
- while ((s = filter_gets(PL_endwhite, PL_rsfp,
- SvCUR(PL_endwhite))) != NULL) ;
+ while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
+ != NULL) ;
}
#endif
PL_rsfp = NULL;
case KEY_package:
s = force_word(s,WORD,FALSE,TRUE,FALSE);
+ s = force_version(s, FALSE);
OPERATOR(PACKAGE);
case KEY_pipe:
and @foo isn't a variable we can find in the symbol
table.
*/
- if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
+ if (ckWARN(WARN_AMBIGUOUS) &&
+ pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
- && ckWARN(WARN_AMBIGUOUS)
/* DO NOT warn for @- and @+ */
&& !( PL_tokenbuf[2] == '\0' &&
( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
{
/* Downgraded from fatal to warning 20000522 mjd */
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of %s in string",
- PL_tokenbuf);
+ "Possible unintended interpolation of %s in string",
+ PL_tokenbuf);
}
}
name[4] == 'i' &&
name[5] == 'f')
{ /* elseif */
- if(ckWARN_d(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
}
goto unknown;
}
#endif
/* issue a warning if /c is specified,but /g is not */
- if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
- && ckWARN(WARN_REGEXP))
+ if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
{
- Perl_warner(aTHX_ packWARN(WARN_REGEXP),
- "Use of /c modifier is meaningless without /g" );
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
+ "Use of /c modifier is meaningless without /g" );
}
PL_lex_op = (OP*)pm;
PL_thismad = 0;
}
#endif
- if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
- Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
+ if ((pm->op_pmflags & PMf_CONTINUE)) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
}
if (es) {
else
term = '"';
if (!isALNUM_lazy_if(s,UTF))
- deprecate_old("bare << to mean <<\"\"");
+ deprecate("bare << to mean <<\"\"");
for (; isALNUM_lazy_if(s,UTF); s++) {
if (d < e)
*d++ = *s;
}
#endif
if (!outer ||
- !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
+ !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart
+ = filter_gets(PL_linestr, 0))) {
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
missingterm(PL_tokenbuf);
}
}
#endif
if (!PL_rsfp ||
- !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
+ !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart
+ = filter_gets(PL_linestr, 0))) {
sv_free(sv);
CopLINE_set(PL_curcop, (line_t)PL_multi_start);
return NULL;
}
if (*s == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
"Misplaced _ in number");
lastub = s++;
}
/* _ are ignored -- but warned about if consecutive */
case '_':
- if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ if (lastub && s == lastub + 1)
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
break;
&& !(PL_hints & HINT_NEW_BINARY)) {
overflowed = TRUE;
n = (NV) u;
- if (ckWARN_d(WARN_OVERFLOW))
- Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in %s number",
- base);
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
+ "Integer overflow in %s number",
+ base);
} else
u = x | b; /* add the digit to the end */
}
/* final misplaced underbar check */
if (s[-1] == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
}
sv = newSV(0);
if (overflowed) {
- if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
- Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
- "%s number > %s non-portable",
- Base, max);
+ if (n > 4294967295.0)
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
+ "%s number > %s non-portable",
+ Base, max);
sv_setnv(sv, n);
}
else {
#if UVSIZE > 4
- if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
- Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
- "%s number > %s non-portable",
- Base, max);
+ if (u > 0xffffffff)
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
+ "%s number > %s non-portable",
+ Base, max);
#endif
sv_setuv(sv, u);
}
if -w is on
*/
if (*s == '_') {
- if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ if (lastub && s == lastub + 1)
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
}
else {
/* final misplaced underbar check */
if (lastub && s == lastub + 1) {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
}
/* read a decimal portion if there is one. avoid
*d++ = *s++;
if (*s == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s;
}
if (d >= e)
Perl_croak(aTHX_ number_too_long);
if (*s == '_') {
- if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ if (lastub && s == lastub + 1)
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s;
}
else
}
/* fractional part ending in underbar? */
if (s[-1] == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
}
if (*s == '.' && isDIGIT(s[1])) {
/* oops, it's really a v-string, but without the "v" */
/* stray preinitial _ */
if (*s == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
}
/* stray initial _ */
if (*s == '_') {
- if (ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
}
}
else {
if (((lastub && s == lastub + 1) ||
- (!isDIGIT(s[1]) && s[1] != '_'))
- && ckWARN(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Misplaced _ in number");
+ (!isDIGIT(s[1]) && s[1] != '_')))
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "Misplaced _ in number");
lastub = s++;
}
}
PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
}
#endif
- s = filter_gets(PL_linestr, PL_rsfp, 0);
+ s = filter_gets(PL_linestr, 0);
#ifdef PERL_MAD
tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
#else
PL_multi_end = 0;
}
if (PL_in_eval & EVAL_WARNONLY) {
- if (ckWARN_d(WARN_SYNTAX))
- Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
}
else
qerror(msg);
#ifndef PERL_NO_UTF16_FILTER
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
s += 2;
- utf16le:
if (PL_bufend > (char*)s) {
- U8 *news;
- I32 newlen;
-
- filter_add(utf16rev_textfilter, NULL);
- Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
- &newlen);
- sv_setpvn(PL_linestr, (const char*)news, newlen);
-#ifdef PERL_MAD
- s = (U8*)SvPVX(PL_linestr);
- Copy(news, s, newlen, U8);
- s[newlen] = '\0';
-#endif
- Safefree(news);
- SvUTF8_on(PL_linestr);
- s = (U8*)SvPVX(PL_linestr);
-#ifdef PERL_MAD
- /* FIXME - is this a general bug fix? */
- s[newlen] = '\0';
-#endif
- PL_bufend = SvPVX(PL_linestr) + newlen;
+ s = add_utf16_textfilter(s, TRUE);
}
#else
Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
#ifndef PERL_NO_UTF16_FILTER
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
s += 2;
- utf16be:
if (PL_bufend > (char *)s) {
- U8 *news;
- I32 newlen;
-
- filter_add(utf16_textfilter, NULL);
- Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- utf16_to_utf8(s, news,
- PL_bufend - (char*)s,
- &newlen);
- sv_setpvn(PL_linestr, (const char*)news, newlen);
- Safefree(news);
- SvUTF8_on(PL_linestr);
- s = (U8*)SvPVX(PL_linestr);
- PL_bufend = SvPVX(PL_linestr) + newlen;
+ s = add_utf16_textfilter(s, FALSE);
}
#else
Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
* 00 xx 00 xx
* are a good indicator of UTF-16BE. */
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
- goto utf16be;
+ s = add_utf16_textfilter(s, FALSE);
}
}
#ifdef EBCDIC
* xx 00 xx 00
* are a good indicator of UTF-16LE. */
if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
- goto utf16le;
+ s = add_utf16_textfilter(s, TRUE);
}
}
return (char*)s;
#ifndef PERL_NO_UTF16_FILTER
static I32
-utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
+S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
dVAR;
- const STRLEN old = SvCUR(sv);
- const I32 count = FILTER_READ(idx+1, sv, maxlen);
+ SV *const filter = FILTER_DATA(idx);
+ /* We re-use this each time round, throwing the contents away before we
+ return. */
+ SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
+ SV *const utf8_buffer = filter;
+ IV status = IoPAGE(filter);
+ const bool reverse = IoLINES(filter);
+
+ /* As we're automatically added, at the lowest level, and hence only called
+ from this file, we can be sure that we're not called in block mode. Hence
+ don't bother writing code to deal with block mode. */
+ if (maxlen) {
+ Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
+ }
+ if (status < 0) {
+ Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
+ }
DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16_textfilter(%p): %d %d (%d)\n",
- FPTR2DPTR(void *, utf16_textfilter),
- idx, maxlen, (int) count));
- if (count) {
- U8* tmps;
+ "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+ FPTR2DPTR(void *, S_utf16_textfilter),
+ reverse ? 'l' : 'b', idx, maxlen, status,
+ (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
+
+ while (1) {
+ STRLEN chars;
+ STRLEN have;
I32 newlen;
- Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- Copy(SvPVX_const(sv), tmps, old, char);
- utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
- SvCUR(sv) - old, &newlen);
- sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
+ U8 *end;
+ /* First, look in our buffer of existing UTF-8 data: */
+ char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
+
+ if (nl) {
+ ++nl;
+ } else if (status == 0) {
+ /* EOF */
+ IoPAGE(filter) = 0;
+ nl = SvEND(utf8_buffer);
+ }
+ if (nl) {
+ sv_catpvn(sv, SvPVX(utf8_buffer), nl - SvPVX(utf8_buffer));
+ /* Everything else in this code works just fine if SVp_POK isn't
+ set. This, however, needs it, and we need it to work, else
+ we loop infinitely because the buffer is never consumed. */
+ sv_chop(utf8_buffer, nl);
+ break;
+ }
+
+ /* OK, not a complete line there, so need to read some more UTF-16.
+ Read an extra octect if the buffer currently has an odd number. */
+ while (1) {
+ if (status <= 0)
+ break;
+ if (SvCUR(utf16_buffer) >= 2) {
+ /* Location of the high octet of the last complete code point.
+ Gosh, UTF-16 is a pain. All the benefits of variable length,
+ *coupled* with all the benefits of partial reads and
+ endianness. */
+ const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
+ + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
+
+ if (*last_hi < 0xd8 || *last_hi > 0xdb) {
+ break;
+ }
+
+ /* We have the first half of a surrogate. Read more. */
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
+ }
+
+ status = FILTER_READ(idx + 1, utf16_buffer,
+ 160 + (SvCUR(utf16_buffer) & 1));
+ DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
+ DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
+ if (status < 0) {
+ /* Error */
+ IoPAGE(filter) = status;
+ return status;
+ }
+ }
+
+ chars = SvCUR(utf16_buffer) >> 1;
+ have = SvCUR(utf8_buffer);
+ SvGROW(utf8_buffer, have + chars * 3 + 1);
+
+ if (reverse) {
+ end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
+ (U8*)SvPVX_const(utf8_buffer) + have,
+ chars * 2, &newlen);
+ } else {
+ end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
+ (U8*)SvPVX_const(utf8_buffer) + have,
+ chars * 2, &newlen);
+ }
+ SvCUR_set(utf8_buffer, have + newlen);
+ *end = '\0';
+
+ /* No need to keep this SV "well-formed" with a '\0' after the end, as
+ it's private to us, and utf16_to_utf8{,reversed} take a
+ (pointer,length) pair, rather than a NUL-terminated string. */
+ if(SvCUR(utf16_buffer) & 1) {
+ *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
+ SvCUR_set(utf16_buffer, 1);
+ } else {
+ SvCUR_set(utf16_buffer, 0);
+ }
}
- DEBUG_P({sv_dump(sv);});
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
+ status,
+ (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
+ DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
return SvCUR(sv);
}
-static I32
-utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
+static U8 *
+S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
{
- dVAR;
- const STRLEN old = SvCUR(sv);
- const I32 count = FILTER_READ(idx+1, sv, maxlen);
- DEBUG_P(PerlIO_printf(Perl_debug_log,
- "utf16rev_textfilter(%p): %d %d (%d)\n",
- FPTR2DPTR(void *, utf16rev_textfilter),
- idx, maxlen, (int) count));
- if (count) {
- U8* tmps;
- I32 newlen;
- Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- Copy(SvPVX_const(sv), tmps, old, char);
- utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
- SvCUR(sv) - old, &newlen);
- sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
+ SV *filter = filter_add(S_utf16_textfilter, NULL);
+
+ IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
+ sv_setpvs(filter, "");
+ IoLINES(filter) = reversed;
+ IoPAGE(filter) = 1; /* Not EOF */
+
+ /* Sadly, we have to return a valid pointer, come what may, so we have to
+ ignore any error return from this. */
+ SvCUR_set(PL_linestr, 0);
+ if (FILTER_READ(0, PL_linestr, 0)) {
+ SvUTF8_on(PL_linestr);
+ } else {
+ SvUTF8_on(PL_linestr);
}
- DEBUG_P({ sv_dump(sv); });
- return count;
+ PL_bufend = SvEND(PL_linestr);
+ return (U8*)SvPVX(PL_linestr);
}
#endif
const UV orev = rev;
rev += (*end - '0') * mult;
mult *= 10;
- if (orev > rev && ckWARN_d(WARN_OVERFLOW))
- Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in decimal number");
+ if (orev > rev)
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
+ "Integer overflow in decimal number");
}
}
#ifdef EBCDIC