X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=8c019c510d961b3cf1e98348af9397145ba6e3c4;hb=482aa6ffeb230a1cea9d05e9eb425b3d7fc09217;hp=951c1ca84cfe93b0ccd331a677e5881ac50c1d77;hpb=7bf7986364dc86acb86c5d83e7512b8dbdcb0165;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index 951c1ca..8c019c5 100644 --- a/toke.c +++ b/toke.c @@ -96,16 +96,12 @@ # define PL_nextval (PL_parser->nextval) #endif +/* This can't be done with embed.fnc, because struct yy_parser contains a + member named pending_ident, which clashes with the generated #define */ static int S_pending_ident(pTHX); static const char ident_too_long[] = "Identifier too long"; -static const char commaless_variable_list[] = "comma-less variable list"; - -#ifndef PERL_NO_UTF16_FILTER -static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen); -static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); -#endif #ifdef PERL_MAD # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; } @@ -124,16 +120,14 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) #endif +/* The maximum number of characters preceding the unrecognized one to display */ +#define UNRECOGNIZED_PRECEDE_COUNT 10 + /* In variables named $^X, these are the legal values for X. * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) -/* On MacOS, respect nonbreaking spaces */ -#ifdef MACOS_TRADITIONAL -#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t') -#else #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') -#endif /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement @@ -453,6 +447,13 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s) #endif +static int +S_deprecate_commaless_var_list(pTHX) { + PL_expect = XTERM; + deprecate("comma-less variable list"); + return REPORT(','); /* grandfather non-comma-format format */ +} + /* * S_ao * @@ -587,37 +588,6 @@ S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) } /* - * Perl_deprecate - */ - -void -Perl_deprecate(pTHX_ const char *const s) -{ - PERL_ARGS_ASSERT_DEPRECATE; - - if (ckWARN(WARN_DEPRECATED)) - Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s); -} - -void -Perl_deprecate_old(pTHX_ const char *const s) -{ - /* This function should NOT be called for any new deprecated warnings */ - /* Use Perl_deprecate instead */ - /* */ - /* It is here to maintain backward compatibility with the pre-5.8 */ - /* warnings category hierarchy. The "deprecated" category used to */ - /* live under the "syntax" category. It is now a top-level category */ - /* in its own right. */ - - PERL_ARGS_ASSERT_DEPRECATE_OLD; - - if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), - "Use of %s is deprecated", s); -} - -/* * experimental text filters for win32 carriage-returns, utf16-to-utf8 and * utf16-to-utf8-reversed. */ @@ -1084,8 +1054,8 @@ S_skipspace(pTHX_ register char *s) curoff = s - SvPVX(PL_linestr); #endif - if ((s = filter_gets(PL_linestr, PL_rsfp, - (prevlen = SvCUR(PL_linestr)))) == NULL) + if ((s = filter_gets(PL_linestr, (prevlen = SvCUR(PL_linestr)))) + == NULL) { #ifdef PERL_MAD if (PL_madskills && curoff != startoff) { @@ -1221,11 +1191,9 @@ S_check_uni(pTHX) if ((t = strchr(s, '(')) && t < PL_bufptr) return; - if (ckWARN_d(WARN_AMBIGUOUS)){ - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Warning: Use of \"%.*s\" without parentheses is ambiguous", - (int)(s - PL_last_uni), PL_last_uni); - } + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Warning: Use of \"%.*s\" without parentheses is ambiguous", + (int)(s - PL_last_uni), PL_last_uni); } /* @@ -1386,7 +1354,9 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) { dVAR; SV * const sv = newSVpvn_utf8(start, len, - UTF && !IN_BYTES + !IN_BYTES + && UTF + && !is_ascii_string((const U8*)start, len) && is_utf8_string((const U8*)start, len)); return sv; } @@ -2193,9 +2163,9 @@ S_scan_const(pTHX_ char *start) if (!PL_lex_inpat) /* not a regexp, so $ must be var */ break; if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { - if (s[1] == '\\' && ckWARN(WARN_AMBIGUOUS)) { - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of $\\ in regex"); + if (s[1] == '\\') { + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Possible unintended interpolation of $\\ in regex"); } break; /* in regexp, $ might be tail anchor */ } @@ -2211,8 +2181,7 @@ S_scan_const(pTHX_ char *start) if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); *--s = '$'; break; } @@ -2240,11 +2209,10 @@ S_scan_const(pTHX_ char *start) /* FALL THROUGH */ default: { - if ((isALPHA(*s) || isDIGIT(*s)) && - ckWARN(WARN_MISC)) - Perl_warner(aTHX_ packWARN(WARN_MISC), - "Unrecognized escape \\%c passed through", - *s); + if ((isALPHA(*s) || isDIGIT(*s))) + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "Unrecognized escape \\%c passed through", + *s); /* default action is to copy the quoted character */ goto default_action; } @@ -2826,7 +2794,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) bare_package: start_force(PL_curforce); NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, - newSVpvn(tmpbuf,len)); + S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; if (PL_madskills) curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start)); @@ -2949,7 +2917,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) const int old_len = SvCUR(buf_sv); /* ensure buf_sv is large enough */ - SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ; + SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, correct_length)) <= 0) { if (PerlIO_error(PL_rsfp)) @@ -2958,6 +2926,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) return 0 ; /* end of file */ } SvCUR_set(buf_sv, old_len + len) ; + SvPVX(buf_sv)[old_len + len] = '\0'; } else { /* Want a line */ if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { @@ -2988,7 +2957,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) } STATIC char * -S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) +S_filter_gets(pTHX_ register SV *sv, STRLEN append) { dVAR; @@ -3008,7 +2977,7 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) return NULL ; } else - return (sv_gets(sv, fp, append)); + return (sv_gets(sv, PL_rsfp, append)); } STATIC HV * @@ -3657,8 +3626,17 @@ Perl_yylex(pTHX) default: if (isIDFIRST_lazy_if(s,UTF)) goto keylookup; - len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); - Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1); + { + unsigned char c = *s; + len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); + if (len > UNRECOGNIZED_PRECEDE_COUNT) { + d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; + } else { + d = PL_linestart; + } + *s = '\0'; + Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1); + } case 4: case 26: goto fake_eof; /* emulate EOF on ^D or ^Z */ @@ -3762,7 +3740,7 @@ Perl_yylex(pTHX) } do { bof = PL_rsfp ? TRUE : FALSE; - if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) { + if ((s = filter_gets(PL_linestr, 0)) == NULL) { fake_eof: #ifdef PERL_MAD PL_realtokenstart = -1; @@ -3948,7 +3926,6 @@ Perl_yylex(pTHX) *s = '#'; /* Don't try to parse shebang line */ } #endif /* ALTERNATE_SHEBANG */ -#ifndef MACOS_TRADITIONAL if (!d && *s == '#' && ipathend > ipath && @@ -3964,7 +3941,7 @@ Perl_yylex(pTHX) while (s < PL_bufend && isSPACE(*s)) s++; if (s < PL_bufend) { - Newxz(newargv,PL_origargc+3,char*); + Newx(newargv,PL_origargc+3,char*); newargv[1] = s; while (s < PL_bufend && !isSPACE(*s)) s++; @@ -3979,7 +3956,6 @@ Perl_yylex(pTHX) PERL_FPU_POST_EXEC Perl_croak(aTHX_ "Can't exec %s", ipath); } -#endif if (d) { while (*d && !isSPACE(*d)) d++; @@ -3994,7 +3970,14 @@ Perl_yylex(pTHX) const char *d1 = d; do { - if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') { + bool baduni = FALSE; + if (*d1 == 'C') { + const char *d2 = d1 + 1; + if (parse_unicode_opts((const char **)&d2) + != PL_unicode) + baduni = TRUE; + } + if (baduni || *d1 == 'M' || *d1 == 'm') { const char * const m = d1; while (*d1 && !isSPACE(*d1)) d1++; @@ -4042,9 +4025,6 @@ Perl_yylex(pTHX) "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); #endif case ' ': case '\t': case '\f': case 013: -#ifdef MACOS_TRADITIONAL - case '\312': -#endif #ifdef PERL_MAD PL_realtokenstart = -1; if (!PL_thiswhite) @@ -4287,7 +4267,10 @@ Perl_yylex(pTHX) BOop(OP_BIT_XOR); case '[': PL_lex_brackets++; - /* FALL THROUGH */ + { + const char tmp = *s++; + OPERATOR(tmp); + } case '~': if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) @@ -4337,6 +4320,7 @@ Perl_yylex(pTHX) case KEY_or: case KEY_and: case KEY_for: + case KEY_foreach: case KEY_unless: case KEY_if: case KEY_while: @@ -4372,11 +4356,6 @@ Perl_yylex(pTHX) if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { sv_free(sv); if (PL_in_my == KEY_our) { -#ifdef USE_ITHREADS - GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval)); -#else - /* skip to avoid loading attributes.pm */ -#endif deprecate(":unique"); } else @@ -4391,7 +4370,7 @@ Perl_yylex(pTHX) } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { sv_free(sv); - CvLOCKED_on(PL_compcv); + deprecate(":locked"); } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { sv_free(sv); @@ -4804,10 +4783,6 @@ Perl_yylex(pTHX) pl_yylval.ival = 0; OPERATOR(ASSIGNOP); case '!': - if (PL_expect == XSTATE && s[1] == '!' && s[2] == '!') { - s += 3; - LOP(OP_DIE,XTERM); - } s++; { const char tmp = *s++; @@ -4877,9 +4852,7 @@ Perl_yylex(pTHX) if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } } @@ -5059,10 +5032,6 @@ Perl_yylex(pTHX) AOPERATOR(DORDOR); } case '?': /* may either be conditional or pattern */ - if (PL_expect == XSTATE && s[1] == '?' && s[2] == '?') { - s += 3; - LOP(OP_WARN,XTERM); - } if (PL_expect == XOPERATOR) { char tmp = *s++; if(tmp == '?') { @@ -5139,9 +5108,7 @@ Perl_yylex(pTHX) DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } else no_op("String",s); @@ -5156,9 +5123,7 @@ Perl_yylex(pTHX) DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } else no_op("String",s); @@ -5188,9 +5153,9 @@ Perl_yylex(pTHX) case '\\': s++; - if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", - *s, *s); + if (PL_lex_inwhat && isDIGIT(*s)) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", + *s, *s); if (PL_expect == XOPERATOR) no_op("Backslash",s); OPERATOR(REFGEN); @@ -5278,14 +5243,17 @@ Perl_yylex(pTHX) /* Is this a label? */ if (!tmp && PL_expect == XSTATE && d < PL_bufend && *d == ':' && *(d + 1) != ':') { + tmp = keyword(PL_tokenbuf, len, 0); + if (tmp) + Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf); s = d + 1; pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf); CLINE; TOKEN(LABEL); } - - /* Check for keywords */ - tmp = keyword(PL_tokenbuf, len, 0); + else + /* Check for keywords */ + tmp = keyword(PL_tokenbuf, len, 0); /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { @@ -5330,17 +5298,16 @@ Perl_yylex(pTHX) } else { /* no override */ tmp = -tmp; - if (tmp == KEY_dump && ckWARN(WARN_MISC)) { - Perl_warner(aTHX_ packWARN(WARN_MISC), - "dump() better written as CORE::dump()"); + if (tmp == KEY_dump) { + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "dump() better written as CORE::dump()"); } gv = NULL; gvp = 0; - if (hgv && tmp != KEY_x && tmp != KEY_CORE - && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */ - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous call resolved as CORE::%s(), %s", - GvENAME(hgv), "qualify as such or use &"); + if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */ + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous call resolved as CORE::%s(), %s", + GvENAME(hgv), "qualify as such or use &"); } } @@ -5580,10 +5547,10 @@ Perl_yylex(pTHX) /* Not a method, so call it a subroutine (if defined) */ if (cv) { - if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS)) - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%s resolved as -&%s()", - PL_tokenbuf, PL_tokenbuf); + if (lastchar == '-') + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of -%s resolved as -&%s()", + PL_tokenbuf, PL_tokenbuf); /* Check for a constant sub */ if ((sv = gv_const_sv(gv))) { its_constant: @@ -5696,10 +5663,22 @@ Perl_yylex(pTHX) /* Call it a bare word */ - bareword: if (PL_hints & HINT_STRICT_SUBS) pl_yylval.opval->op_private |= OPpCONST_STRICT; else { + bareword: + /* after "print" and similar functions (corresponding to + * "F? L" in opcode.pl), whatever wasn't already parsed as + * a filehandle should be subject to "strict subs". + * Likewise for the optional indirect-object argument to system + * or exec, which can't be a bareword */ + if ((PL_last_lop_op == OP_PRINT + || PL_last_lop_op == OP_PRTF + || PL_last_lop_op == OP_SAY + || PL_last_lop_op == OP_SYSTEM + || PL_last_lop_op == OP_EXEC) + && (PL_hints & HINT_STRICT_SUBS)) + pl_yylval.opval->op_private |= OPpCONST_STRICT; if (lastchar != '-') { if (ckWARN(WARN_RESERVED)) { d = PL_tokenbuf; @@ -5713,14 +5692,13 @@ Perl_yylex(pTHX) } safe_bareword: - if ((lastchar == '*' || lastchar == '%' || lastchar == '&') - && ckWARN_d(WARN_AMBIGUOUS)) { - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Operator or semicolon missing before %c%s", - lastchar, PL_tokenbuf); - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of %c resolved as operator %c", - lastchar, lastchar); + if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Operator or semicolon missing before %c%s", + lastchar, PL_tokenbuf); + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of %c resolved as operator %c", + lastchar, lastchar); } TOKEN(WORD); } @@ -5831,8 +5809,8 @@ Perl_yylex(pTHX) sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart); PL_realtokenstart = -1; } - while ((s = filter_gets(PL_endwhite, PL_rsfp, - SvCUR(PL_endwhite))) != NULL) ; + while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite))) + != NULL) ; } #endif PL_rsfp = NULL; @@ -6393,6 +6371,7 @@ Perl_yylex(pTHX) case KEY_package: s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_version(s, FALSE); OPERATOR(PACKAGE); case KEY_pipe: @@ -7150,11 +7129,11 @@ S_pending_ident(pTHX) and @foo isn't a variable we can find in the symbol table. */ - if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { + if (ckWARN(WARN_AMBIGUOUS) && + pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0, SVt_PVAV); if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) - && ckWARN(WARN_AMBIGUOUS) /* DO NOT warn for @- and @+ */ && !( PL_tokenbuf[2] == '\0' && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) @@ -7162,8 +7141,8 @@ S_pending_ident(pTHX) { /* Downgraded from fatal to warning 20000522 mjd */ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of %s in string", - PL_tokenbuf); + "Possible unintended interpolation of %s in string", + PL_tokenbuf); } } @@ -8704,8 +8683,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) name[4] == 'i' && name[5] == 'f') { /* elseif */ - if(ckWARN_d(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif"); + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif"); } goto unknown; @@ -11030,11 +11008,10 @@ S_scan_pat(pTHX_ char *start, I32 type) } #endif /* issue a warning if /c is specified,but /g is not */ - if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL) - && ckWARN(WARN_REGEXP)) + if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) { - Perl_warner(aTHX_ packWARN(WARN_REGEXP), - "Use of /c modifier is meaningless without /g" ); + Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), + "Use of /c modifier is meaningless without /g" ); } PL_lex_op = (OP*)pm; @@ -11116,8 +11093,8 @@ S_scan_subst(pTHX_ char *start) PL_thismad = 0; } #endif - if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) { - Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); + if ((pm->op_pmflags & PMf_CONTINUE)) { + Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); } if (es) { @@ -11285,7 +11262,7 @@ S_scan_heredoc(pTHX_ register char *s) else term = '"'; if (!isALNUM_lazy_if(s,UTF)) - deprecate_old("bare << to mean <<\"\""); + deprecate("bare << to mean <<\"\""); for (; isALNUM_lazy_if(s,UTF); s++) { if (d < e) *d++ = *s; @@ -11446,7 +11423,8 @@ S_scan_heredoc(pTHX_ register char *s) } #endif if (!outer || - !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { + !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart + = filter_gets(PL_linestr, 0))) { CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } @@ -11958,7 +11936,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } #endif if (!PL_rsfp || - !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { + !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart + = filter_gets(PL_linestr, 0))) { sv_free(sv); CopLINE_set(PL_curcop, (line_t)PL_multi_start); return NULL; @@ -12129,8 +12108,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; } @@ -12153,9 +12131,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* _ are ignored -- but warned about if consecutive */ case '_': - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; break; @@ -12197,10 +12175,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) && !(PL_hints & HINT_NEW_BINARY)) { overflowed = TRUE; n = (NV) u; - if (ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), - "Integer overflow in %s number", - base); + Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in %s number", + base); } else u = x | b; /* add the digit to the end */ } @@ -12227,24 +12204,23 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* final misplaced underbar check */ if (s[-1] == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } sv = newSV(0); if (overflowed) { - if (n > 4294967295.0 && ckWARN(WARN_PORTABLE)) - Perl_warner(aTHX_ packWARN(WARN_PORTABLE), - "%s number > %s non-portable", - Base, max); + if (n > 4294967295.0) + Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), + "%s number > %s non-portable", + Base, max); sv_setnv(sv, n); } else { #if UVSIZE > 4 - if (u > 0xffffffff && ckWARN(WARN_PORTABLE)) - Perl_warner(aTHX_ packWARN(WARN_PORTABLE), - "%s number > %s non-portable", - Base, max); + if (u > 0xffffffff) + Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), + "%s number > %s non-portable", + Base, max); #endif sv_setuv(sv, u); } @@ -12273,9 +12249,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if -w is on */ if (*s == '_') { - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } else { @@ -12289,8 +12265,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* final misplaced underbar check */ if (lastub && s == lastub + 1) { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } /* read a decimal portion if there is one. avoid @@ -12302,9 +12277,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) *d++ = *s++; if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s; } @@ -12315,9 +12289,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (d >= e) Perl_croak(aTHX_ number_too_long); if (*s == '_') { - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s; } else @@ -12325,9 +12299,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } /* fractional part ending in underbar? */ if (s[-1] == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); } if (*s == '.' && isDIGIT(s[1])) { /* oops, it's really a v-string, but without the "v" */ @@ -12346,9 +12319,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* stray preinitial _ */ if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } @@ -12358,9 +12330,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* stray initial _ */ if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } @@ -12373,10 +12344,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } else { if (((lastub && s == lastub + 1) || - (!isDIGIT(s[1]) && s[1] != '_')) - && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + (!isDIGIT(s[1]) && s[1] != '_'))) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } } @@ -12518,7 +12488,7 @@ S_scan_formline(pTHX_ register char *s) PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart); } #endif - s = filter_gets(PL_linestr, PL_rsfp, 0); + s = filter_gets(PL_linestr, 0); #ifdef PERL_MAD tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr); #else @@ -12700,8 +12670,7 @@ Perl_yyerror(pTHX_ const char *const s) PL_multi_end = 0; } if (PL_in_eval & EVAL_WARNONLY) { - if (ckWARN_d(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); } else qerror(msg); @@ -12738,30 +12707,8 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n"); s += 2; - utf16le: if (PL_bufend > (char*)s) { - U8 *news; - I32 newlen; - - filter_add(utf16rev_textfilter, NULL); - Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - utf16_to_utf8_reversed(s, news, - PL_bufend - (char*)s - 1, - &newlen); - sv_setpvn(PL_linestr, (const char*)news, newlen); -#ifdef PERL_MAD - s = (U8*)SvPVX(PL_linestr); - Copy(news, s, newlen, U8); - s[newlen] = '\0'; -#endif - Safefree(news); - SvUTF8_on(PL_linestr); - s = (U8*)SvPVX(PL_linestr); -#ifdef PERL_MAD - /* FIXME - is this a general bug fix? */ - s[newlen] = '\0'; -#endif - PL_bufend = SvPVX(PL_linestr) + newlen; + s = add_utf16_textfilter(s, TRUE); } #else Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE"); @@ -12773,21 +12720,8 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); s += 2; - utf16be: if (PL_bufend > (char *)s) { - U8 *news; - I32 newlen; - - filter_add(utf16_textfilter, NULL); - Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - utf16_to_utf8(s, news, - PL_bufend - (char*)s, - &newlen); - sv_setpvn(PL_linestr, (const char*)news, newlen); - Safefree(news); - SvUTF8_on(PL_linestr); - s = (U8*)SvPVX(PL_linestr); - PL_bufend = SvPVX(PL_linestr) + newlen; + s = add_utf16_textfilter(s, FALSE); } #else Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE"); @@ -12813,7 +12747,7 @@ S_swallow_bom(pTHX_ U8 *s) * 00 xx 00 xx * are a good indicator of UTF-16BE. */ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); - goto utf16be; + s = add_utf16_textfilter(s, FALSE); } } #ifdef EBCDIC @@ -12831,7 +12765,7 @@ S_swallow_bom(pTHX_ U8 *s) * xx 00 xx 00 * are a good indicator of UTF-16LE. */ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); - goto utf16le; + s = add_utf16_textfilter(s, TRUE); } } return (char*)s; @@ -12840,49 +12774,142 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER static I32 -utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) +S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) { dVAR; - const STRLEN old = SvCUR(sv); - const I32 count = FILTER_READ(idx+1, sv, maxlen); + SV *const filter = FILTER_DATA(idx); + /* We re-use this each time round, throwing the contents away before we + return. */ + SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); + SV *const utf8_buffer = filter; + IV status = IoPAGE(filter); + const bool reverse = IoLINES(filter); + + /* As we're automatically added, at the lowest level, and hence only called + from this file, we can be sure that we're not called in block mode. Hence + don't bother writing code to deal with block mode. */ + if (maxlen) { + Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); + } + if (status < 0) { + Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); + } DEBUG_P(PerlIO_printf(Perl_debug_log, - "utf16_textfilter(%p): %d %d (%d)\n", - FPTR2DPTR(void *, utf16_textfilter), - idx, maxlen, (int) count)); - if (count) { - U8* tmps; + "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", + FPTR2DPTR(void *, S_utf16_textfilter), + reverse ? 'l' : 'b', idx, maxlen, status, + (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); + + while (1) { + STRLEN chars; + STRLEN have; I32 newlen; - Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8); - Copy(SvPVX_const(sv), tmps, old, char); - utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old, - SvCUR(sv) - old, &newlen); - sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old); + U8 *end; + /* First, look in our buffer of existing UTF-8 data: */ + char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); + + if (nl) { + ++nl; + } else if (status == 0) { + /* EOF */ + IoPAGE(filter) = 0; + nl = SvEND(utf8_buffer); + } + if (nl) { + sv_catpvn(sv, SvPVX(utf8_buffer), nl - SvPVX(utf8_buffer)); + /* Everything else in this code works just fine if SVp_POK isn't + set. This, however, needs it, and we need it to work, else + we loop infinitely because the buffer is never consumed. */ + sv_chop(utf8_buffer, nl); + break; + } + + /* OK, not a complete line there, so need to read some more UTF-16. + Read an extra octect if the buffer currently has an odd number. */ + while (1) { + if (status <= 0) + break; + if (SvCUR(utf16_buffer) >= 2) { + /* Location of the high octet of the last complete code point. + Gosh, UTF-16 is a pain. All the benefits of variable length, + *coupled* with all the benefits of partial reads and + endianness. */ + const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) + + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); + + if (*last_hi < 0xd8 || *last_hi > 0xdb) { + break; + } + + /* We have the first half of a surrogate. Read more. */ + DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); + } + + status = FILTER_READ(idx + 1, utf16_buffer, + 160 + (SvCUR(utf16_buffer) & 1)); + DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); + DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); + if (status < 0) { + /* Error */ + IoPAGE(filter) = status; + return status; + } + } + + chars = SvCUR(utf16_buffer) >> 1; + have = SvCUR(utf8_buffer); + SvGROW(utf8_buffer, have + chars * 3 + 1); + + if (reverse) { + end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), + (U8*)SvPVX_const(utf8_buffer) + have, + chars * 2, &newlen); + } else { + end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), + (U8*)SvPVX_const(utf8_buffer) + have, + chars * 2, &newlen); + } + SvCUR_set(utf8_buffer, have + newlen); + *end = '\0'; + + /* No need to keep this SV "well-formed" with a '\0' after the end, as + it's private to us, and utf16_to_utf8{,reversed} take a + (pointer,length) pair, rather than a NUL-terminated string. */ + if(SvCUR(utf16_buffer) & 1) { + *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; + SvCUR_set(utf16_buffer, 1); + } else { + SvCUR_set(utf16_buffer, 0); + } } - DEBUG_P({sv_dump(sv);}); + DEBUG_P(PerlIO_printf(Perl_debug_log, + "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", + status, + (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); + DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); return SvCUR(sv); } -static I32 -utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen) +static U8 * +S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) { - dVAR; - const STRLEN old = SvCUR(sv); - const I32 count = FILTER_READ(idx+1, sv, maxlen); - DEBUG_P(PerlIO_printf(Perl_debug_log, - "utf16rev_textfilter(%p): %d %d (%d)\n", - FPTR2DPTR(void *, utf16rev_textfilter), - idx, maxlen, (int) count)); - if (count) { - U8* tmps; - I32 newlen; - Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8); - Copy(SvPVX_const(sv), tmps, old, char); - utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old, - SvCUR(sv) - old, &newlen); - sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old); + SV *filter = filter_add(S_utf16_textfilter, NULL); + + IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); + sv_setpvs(filter, ""); + IoLINES(filter) = reversed; + IoPAGE(filter) = 1; /* Not EOF */ + + /* Sadly, we have to return a valid pointer, come what may, so we have to + ignore any error return from this. */ + SvCUR_set(PL_linestr, 0); + if (FILTER_READ(0, PL_linestr, 0)) { + SvUTF8_on(PL_linestr); + } else { + SvUTF8_on(PL_linestr); } - DEBUG_P({ sv_dump(sv); }); - return count; + PL_bufend = SvEND(PL_linestr); + return (U8*)SvPVX(PL_linestr); } #endif @@ -12944,9 +12971,9 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) const UV orev = rev; rev += (*end - '0') * mult; mult *= 10; - if (orev > rev && ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), - "Integer overflow in decimal number"); + if (orev > rev) + Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in decimal number"); } } #ifdef EBCDIC