X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=8c019c510d961b3cf1e98348af9397145ba6e3c4;hb=482aa6ffeb230a1cea9d05e9eb425b3d7fc09217;hp=2ae8b7577bb85ec460834d9745d028f7b7e1464f;hpb=15f169a1089447c7f805778ce6caca1f591d50fc;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index 2ae8b75..8c019c5 100644 --- a/toke.c +++ b/toke.c @@ -1,7 +1,7 @@ /* toke.c * - * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, - * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others + * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, + * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -9,7 +9,9 @@ */ /* - * "It all comes from here, the stench and the peril." --Frodo + * 'It all comes from here, the stench and the peril.' --Frodo + * + * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"] */ /* @@ -94,16 +96,12 @@ # define PL_nextval (PL_parser->nextval) #endif +/* This can't be done with embed.fnc, because struct yy_parser contains a + member named pending_ident, which clashes with the generated #define */ static int S_pending_ident(pTHX); static const char ident_too_long[] = "Identifier too long"; -static const char commaless_variable_list[] = "comma-less variable list"; - -#ifndef PERL_NO_UTF16_FILTER -static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen); -static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); -#endif #ifdef PERL_MAD # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; } @@ -122,16 +120,14 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) #endif +/* The maximum number of characters preceding the unrecognized one to display */ +#define UNRECOGNIZED_PRECEDE_COUNT 10 + /* In variables named $^X, these are the legal values for X. * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) -/* On MacOS, respect nonbreaking spaces */ -#ifdef MACOS_TRADITIONAL -#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t') -#else #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') -#endif /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement @@ -368,6 +364,7 @@ static struct debug_tokens { { WHEN, TOKENTYPE_IVAL, "WHEN" }, { WHILE, TOKENTYPE_IVAL, "WHILE" }, { WORD, TOKENTYPE_OPVAL, "WORD" }, + { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" }, { 0, TOKENTYPE_NONE, NULL } }; @@ -377,6 +374,9 @@ STATIC int S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) { dVAR; + + PERL_ARGS_ASSERT_TOKEREPORT; + if (DEBUG_T_TEST) { const char *name = NULL; enum token_type type = TOKENTYPE_NONE; @@ -438,12 +438,22 @@ STATIC void S_printbuf(pTHX_ const char *const fmt, const char *const s) { SV* const tmp = newSVpvs(""); + + PERL_ARGS_ASSERT_PRINTBUF; + PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); SvREFCNT_dec(tmp); } #endif +static int +S_deprecate_commaless_var_list(pTHX) { + PL_expect = XTERM; + deprecate("comma-less variable list"); + return REPORT(','); /* grandfather non-comma-format format */ +} + /* * S_ao * @@ -488,6 +498,8 @@ S_no_op(pTHX_ const char *const what, char *s) char * const oldbp = PL_bufptr; const bool is_first = (PL_oldbufptr == PL_linestart); + PERL_ARGS_ASSERT_NO_OP; + if (!s) s = oldbp; else @@ -535,13 +547,7 @@ S_missingterm(pTHX_ char *s) if (nl) *nl = '\0'; } - else if ( -#ifdef EBCDIC - iscntrl(PL_multi_close) -#else - PL_multi_close < 32 || PL_multi_close == 127 -#endif - ) { + else if (isCNTRL(PL_multi_close)) { *tmpbuf = '^'; tmpbuf[1] = (char)toCTRL(PL_multi_close); tmpbuf[2] = '\0'; @@ -572,6 +578,9 @@ S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) dVAR; HV * const hinthv = GvHV(PL_hintgv); char he_name[8 + MAX_FEATURE_LEN] = "feature_"; + + PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; + assert(namelen <= MAX_FEATURE_LEN); memcpy(&he_name[8], name, namelen); @@ -579,33 +588,6 @@ S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) } /* - * Perl_deprecate - */ - -void -Perl_deprecate(pTHX_ const char *const s) -{ - if (ckWARN(WARN_DEPRECATED)) - Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s); -} - -void -Perl_deprecate_old(pTHX_ const char *const s) -{ - /* This function should NOT be called for any new deprecated warnings */ - /* Use Perl_deprecate instead */ - /* */ - /* It is here to maintain backward compatibility with the pre-5.8 */ - /* warnings category hierarchy. The "deprecated" category used to */ - /* live under the "syntax" category. It is now a top-level category */ - /* in its own right. */ - - if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), - "Use of %s is deprecated", s); -} - -/* * experimental text filters for win32 carriage-returns, utf16-to-utf8 and * utf16-to-utf8-reversed. */ @@ -616,6 +598,9 @@ strip_return(SV *sv) { register const char *s = SvPVX_const(sv); register const char * const e = s + SvCUR(sv); + + PERL_ARGS_ASSERT_STRIP_RETURN; + /* outer loop optimized to do nothing if there are no CR-LFs */ while (s < e) { if (*s++ == '\r' && *s == '\n') { @@ -692,12 +677,13 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter) #else parser->nexttoke = 0; #endif + parser->error_count = oparser ? oparser->error_count : 0; parser->copline = NOLINE; parser->lex_state = LEX_NORMAL; parser->expect = XSTATE; parser->rsfp = rsfp; parser->rsfp_filters = (new_filter || !oparser) ? newAV() - : (AV*)SvREFCNT_inc(oparser->rsfp_filters); + : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters)); Newx(parser->lex_brackstack, 120, char); Newx(parser->lex_casestack, 12, char); @@ -734,13 +720,15 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter) void Perl_parser_free(pTHX_ const yy_parser *parser) { + PERL_ARGS_ASSERT_PARSER_FREE; + PL_curcop = parser->saved_curcop; SvREFCNT_dec(parser->linestr); if (parser->rsfp == PerlIO_stdin()) PerlIO_clearerr(parser->rsfp); - else if (parser->rsfp && parser->old_parser - && parser->rsfp != parser->old_parser->rsfp) + else if (parser->rsfp && (!parser->old_parser || + (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) PerlIO_close(parser->rsfp); SvREFCNT_dec(parser->rsfp_filters); @@ -783,6 +771,8 @@ S_incline(pTHX_ const char *s) const char *n; const char *e; + PERL_ARGS_ASSERT_INCLINE; + CopLINE_inc(PL_curcop); if (*s++ != '#') return; @@ -804,6 +794,8 @@ S_incline(pTHX_ const char *s) n = s; while (isDIGIT(*s)) s++; + if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') + return; while (SPACE_OR_TAB(*s)) s++; if (*s == '"' && (t = strchr(s+1, '"'))) { @@ -878,8 +870,8 @@ S_incline(pTHX_ const char *s) gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE); /* adjust ${"::_mad_key == '^') { (*where)->mad_key = slot; - sv_free((SV*)((*where)->mad_val)); + sv_free(MUTABLE_SV(((*where)->mad_val))); (*where)->mad_val = (void*)sv; } else @@ -1324,7 +1326,7 @@ S_force_next(pTHX_ I32 type) #ifdef DEBUGGING if (DEBUG_T_TEST) { PerlIO_printf(Perl_debug_log, "### forced token:\n"); - tokereport(THING, &NEXTVAL_NEXTTOKE); + tokereport(type, &NEXTVAL_NEXTTOKE); } #endif #ifdef PERL_MAD @@ -1352,7 +1354,9 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) { dVAR; SV * const sv = newSVpvn_utf8(start, len, - UTF && !IN_BYTES + !IN_BYTES + && UTF + && !is_ascii_string((const U8*)start, len) && is_utf8_string((const U8*)start, len)); return sv; } @@ -1381,6 +1385,8 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow register char *s; STRLEN len; + PERL_ARGS_ASSERT_FORCE_WORD; + start = SKIPSPACE1(start); s = start; if (isIDFIRST_lazy_if(s,UTF) || @@ -1425,6 +1431,9 @@ STATIC void S_force_ident(pTHX_ register const char *s, int kind) { dVAR; + + PERL_ARGS_ASSERT_FORCE_IDENT; + if (*s) { const STRLEN len = strlen(s); OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len)); @@ -1457,6 +1466,9 @@ Perl_str_to_version(pTHX_ SV *sv) const char *start = SvPV_const(sv,len); const char * const end = start + len; const bool utf = SvUTF8(sv) ? TRUE : FALSE; + + PERL_ARGS_ASSERT_STR_TO_VERSION; + while (start < end) { STRLEN skip; UV n; @@ -1491,6 +1503,8 @@ S_force_version(pTHX_ char *s, int guessing) I32 startoff = s - SvPVX(PL_linestr); #endif + PERL_ARGS_ASSERT_FORCE_VERSION; + s = SKIPSPACE1(s); d = s; @@ -1561,6 +1575,8 @@ S_tokeq(pTHX_ SV *sv) STRLEN len = 0; SV *pv = sv; + PERL_ARGS_ASSERT_TOKEQ; + if (!SvLEN(sv)) goto finish; @@ -1798,7 +1814,7 @@ S_sublex_done(pTHX) PL_thiswhite = 0; } if (PL_thistoken) - sv_setpvn(PL_thistoken,"",0); + sv_setpvs(PL_thistoken,""); else PL_realtokenstart = -1; } @@ -1886,7 +1902,9 @@ S_sublex_done(pTHX) handle \cV (control characters) handle printf-style backslashes (\f, \r, \n, etc) } (end switch) + continue } (end if backslash) + handle regular character } (end while character to read) */ @@ -1896,19 +1914,40 @@ S_scan_const(pTHX_ char *start) { dVAR; register char *send = PL_bufend; /* end of the constant */ - SV *sv = newSV(send - start); /* sv for the constant */ + SV *sv = newSV(send - start); /* sv for the constant. See + note below on sizing. */ register char *s = start; /* start of the constant */ register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ I32 has_utf8 = FALSE; /* Output constant is UTF8 */ - I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */ + I32 this_utf8 = UTF; /* Is the source string assumed + to be UTF8? But, this can + show as true when the source + isn't utf8, as for example + when it is entirely composed + of hex constants */ + + /* Note on sizing: The scanned constant is placed into sv, which is + * initialized by newSV() assuming one byte of output for every byte of + * input. This routine expects newSV() to allocate an extra byte for a + * trailing NUL, which this routine will append if it gets to the end of + * the input. There may be more bytes of input than output (eg., \N{LATIN + * CAPITAL LETTER A}), or more output than input if the constant ends up + * recoded to utf8, but each time a construct is found that might increase + * the needed size, SvGROW() is called. Its size parameter each time is + * based on the best guess estimate at the time, namely the length used so + * far, plus the length the current construct will occupy, plus room for + * the trailing NUL, plus one byte for every input byte still unscanned */ + UV uv; #ifdef EBCDIC UV literal_endpoint = 0; bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */ #endif + PERL_ARGS_ASSERT_SCAN_CONST; + if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { /* If we are doing a trans and we know we want UTF8 set expectation */ has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF); @@ -2123,8 +2162,13 @@ S_scan_const(pTHX_ char *start) else if (*s == '$') { if (!PL_lex_inpat) /* not a regexp, so $ must be var */ break; - if (s + 1 < send && !strchr("()| \r\n\t", s[1])) + if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { + if (s[1] == '\\') { + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Possible unintended interpolation of $\\ in regex"); + } break; /* in regexp, $ might be tail anchor */ + } } /* End of else if chain - OP_TRANS rejoin rest */ @@ -2137,8 +2181,7 @@ S_scan_const(pTHX_ char *start) if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); *--s = '$'; break; } @@ -2166,27 +2209,26 @@ S_scan_const(pTHX_ char *start) /* FALL THROUGH */ default: { - if ((isALPHA(*s) || isDIGIT(*s)) && - ckWARN(WARN_MISC)) - Perl_warner(aTHX_ packWARN(WARN_MISC), - "Unrecognized escape \\%c passed through", - *s); + if ((isALPHA(*s) || isDIGIT(*s))) + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "Unrecognized escape \\%c passed through", + *s); /* default action is to copy the quoted character */ goto default_action; } - /* \132 indicates an octal constant */ + /* eg. \132 indicates the octal constant 0x132 */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { I32 flags = 0; STRLEN len = 3; - uv = grok_oct(s, &len, &flags, NULL); + uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL)); s += len; } goto NUM_ESCAPE_INSERT; - /* \x24 indicates a hex constant */ + /* eg. \x24 indicates the hex constant 0x24 */ case 'x': ++s; if (*s == '{') { @@ -2201,67 +2243,47 @@ S_scan_const(pTHX_ char *start) continue; } len = e - s; - uv = grok_hex(s, &len, &flags, NULL); + uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); s = e + 1; } else { { STRLEN len = 2; I32 flags = PERL_SCAN_DISALLOW_PREFIX; - uv = grok_hex(s, &len, &flags, NULL); + uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); s += len; } } NUM_ESCAPE_INSERT: - /* Insert oct or hex escaped character. - * There will always enough room in sv since such - * escapes will be longer than any UTF-8 sequence - * they can end up as. */ + /* Insert oct, hex, or \N{U+...} escaped character. There will + * always be enough room in sv since such escapes will be + * longer than any UTF-8 sequence they can end up as, except if + * they force us to recode the rest of the string into utf8 */ - /* We need to map to chars to ASCII before doing the tests - to cover EBCDIC - */ - if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) { + /* Here uv is the ordinal of the next character being added in + * unicode (converted from native). (It has to be done before + * here because \N is interpreted as unicode, and oct and hex + * as native.) */ + if (!UNI_IS_INVARIANT(uv)) { if (!has_utf8 && uv > 255) { - /* Might need to recode whatever we have - * accumulated so far if it contains any - * hibit chars. - * - * (Can't we keep track of that and avoid - * this rescan? --jhi) - */ - int hicount = 0; - U8 *c; - for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) { - if (!NATIVE_IS_INVARIANT(*c)) { - hicount++; - } - } - if (hicount) { - const STRLEN offset = d - SvPVX_const(sv); - U8 *src, *dst; - d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset; - src = (U8 *)d - 1; - dst = src+hicount; - d += hicount; - while (src >= (const U8 *)SvPVX_const(sv)) { - if (!NATIVE_IS_INVARIANT(*src)) { - const U8 ch = NATIVE_TO_ASCII(*src); - *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch); - *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch); - } - else { - *dst-- = *src; - } - src--; - } - } + /* Might need to recode whatever we have accumulated so + * far if it contains any chars variant in utf8 or + * utf-ebcdic. */ + + SvCUR_set(sv, d - SvPVX_const(sv)); + SvPOK_on(sv); + *d = '\0'; + /* See Note on sizing above. */ + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + UNISKIP(uv) + (STRLEN)(send - s) + 1); + d = SvPVX(sv) + SvCUR(sv); + has_utf8 = TRUE; } - if (has_utf8 || uv > 255) { - d = (char*)uvchr_to_utf8((U8*)d, uv); - has_utf8 = TRUE; + if (has_utf8) { + d = (char*)uvuni_to_utf8((U8*)d, uv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { PL_sublex_info.sub_op->op_private |= @@ -2282,7 +2304,8 @@ S_scan_const(pTHX_ char *start) } continue; - /* \N{LATIN SMALL LETTER A} is a named character */ + /* \N{LATIN SMALL LETTER A} is a named character, and so is + * \N{U+0041} */ case 'N': ++s; if (*s == '{') { @@ -2297,7 +2320,8 @@ S_scan_const(pTHX_ char *start) goto cont_scan; } if (e > s + 2 && s[1] == 'U' && s[2] == '+') { - /* \N{U+...} */ + /* \N{U+...} The ... is a unicode value even on EBCDIC + * machines */ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | PERL_SCAN_DISALLOW_PREFIX; s += 3; @@ -2335,22 +2359,24 @@ S_scan_const(pTHX_ char *start) } } #endif + /* If destination is not in utf8 but this new character is, + * recode the dest to utf8 */ if (!has_utf8 && SvUTF8(res)) { - const char * const ostart = SvPVX_const(sv); - SvCUR_set(sv, d - ostart); + SvCUR_set(sv, d - SvPVX_const(sv)); SvPOK_on(sv); *d = '\0'; - sv_utf8_upgrade(sv); - /* this just broke our allocation above... */ - SvGROW(sv, (STRLEN)(send - start)); + /* See Note on sizing above. */ + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + len + (STRLEN)(send - s) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; - } - if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ - const char * const odest = SvPVX_const(sv); + } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ - SvGROW(sv, (SvLEN(sv) + len - (e - s + 4))); - d = SvPVX(sv) + (d - odest); + /* See Note on sizing above. (NOTE: SvCUR() is not set + * correctly here). */ + const STRLEN off = d - SvPVX_const(sv); + d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off; } #ifdef EBCDIC if (!dorange) @@ -2415,20 +2441,41 @@ S_scan_const(pTHX_ char *start) #endif default_action: - /* If we started with encoded form, or already know we want it - and then encode the next character */ - if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) { + /* If we started with encoded form, or already know we want it, + then encode the next character */ + if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { STRLEN len = 1; + + + /* One might think that it is wasted effort in the case of the + * source being utf8 (this_utf8 == TRUE) to take the next character + * in the source, convert it to an unsigned value, and then convert + * it back again. But the source has not been validated here. The + * routine that does the conversion checks for errors like + * malformed utf8 */ + const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s); const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv)); - s += len; - if (need > len) { - /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */ + if (!has_utf8) { + SvCUR_set(sv, d - SvPVX_const(sv)); + SvPOK_on(sv); + *d = '\0'; + /* See Note on sizing above. */ + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + need + (STRLEN)(send - s) + 1); + d = SvPVX(sv) + SvCUR(sv); + has_utf8 = TRUE; + } else if (need > len) { + /* encoded value larger than old, may need extra space (NOTE: + * SvCUR() is not set correctly here). See Note on sizing + * above. */ const STRLEN off = d - SvPVX_const(sv); - d = SvGROW(sv, SvLEN(sv) + (need-len)) + off; + d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off; } + s += len; + d = (char*)uvchr_to_utf8((U8*)d, nextuv); - has_utf8 = TRUE; #ifdef EBCDIC if (uv > 255 && !dorange) native_range = FALSE; @@ -2517,6 +2564,9 @@ STATIC int S_intuit_more(pTHX_ register char *s) { dVAR; + + PERL_ARGS_ASSERT_INTUIT_MORE; + if (PL_lex_brackets) return TRUE; if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) @@ -2682,6 +2732,8 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) int soff; #endif + PERL_ARGS_ASSERT_INTUIT_METHOD; + if (gv) { if (SvTYPE(gv) == SVt_PVGV && GvIO(gv)) return 0; @@ -2742,7 +2794,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) bare_package: start_force(PL_curforce); NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, - newSVpvn(tmpbuf,len)); + S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; if (PL_madskills) curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start)); @@ -2808,6 +2860,8 @@ Perl_filter_del(pTHX_ filter_t funcp) dVAR; SV *datasv; + PERL_ARGS_ASSERT_FILTER_DEL; + #ifdef DEBUGGING DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(void*, funcp))); @@ -2848,6 +2902,8 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) #endif : maxlen; + PERL_ARGS_ASSERT_FILTER_READ; + if (!PL_parser || !PL_rsfp_filters) return -1; if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ @@ -2861,7 +2917,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) const int old_len = SvCUR(buf_sv); /* ensure buf_sv is large enough */ - SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ; + SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, correct_length)) <= 0) { if (PerlIO_error(PL_rsfp)) @@ -2870,6 +2926,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) return 0 ; /* end of file */ } SvCUR_set(buf_sv, old_len + len) ; + SvPVX(buf_sv)[old_len + len] = '\0'; } else { /* Want a line */ if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { @@ -2900,9 +2957,12 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) } STATIC char * -S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) +S_filter_gets(pTHX_ register SV *sv, STRLEN append) { dVAR; + + PERL_ARGS_ASSERT_FILTER_GETS; + #ifdef PERL_CR_FILTER if (!PL_rsfp_filters) { filter_add(S_cr_textfilter,NULL); @@ -2917,7 +2977,7 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) return NULL ; } else - return (sv_gets(sv, fp, append)); + return (sv_gets(sv, PL_rsfp, append)); } STATIC HV * @@ -2926,6 +2986,8 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) dVAR; GV *gv; + PERL_ARGS_ASSERT_FIND_IN_MY_STASH; + if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) return PL_curstash; @@ -3160,6 +3222,9 @@ Perl_madlex(pTHX) STATIC char * S_tokenize_use(pTHX_ int is_use, char *s) { dVAR; + + PERL_ARGS_ASSERT_TOKENIZE_USE; + if (PL_expect != XSTATE) yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", is_use ? "use" : "no")); @@ -3266,7 +3331,7 @@ Perl_yylex(pTHX) PL_thismad = PL_nexttoke[PL_lasttoke].next_mad; PL_nexttoke[PL_lasttoke].next_mad = 0; if (PL_thismad && PL_thismad->mad_key == '_') { - PL_thiswhite = (SV*)PL_thismad->mad_val; + PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val); PL_thismad->mad_val = 0; mad_free(PL_thismad); PL_thismad = 0; @@ -3561,8 +3626,17 @@ Perl_yylex(pTHX) default: if (isIDFIRST_lazy_if(s,UTF)) goto keylookup; - len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); - Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1); + { + unsigned char c = *s; + len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); + if (len > UNRECOGNIZED_PRECEDE_COUNT) { + d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; + } else { + d = PL_linestart; + } + *s = '\0'; + Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1); + } case 4: case 26: goto fake_eof; /* emulate EOF on ^D or ^Z */ @@ -3619,9 +3693,12 @@ Perl_yylex(pTHX) ++svp; sv_catpvs(PL_linestr, ";"); } - sv_free((SV*)PL_preambleav); + sv_free(MUTABLE_SV(PL_preambleav)); PL_preambleav = NULL; } + if (PL_minus_E) + sv_catpvs(PL_linestr, + "use feature ':5." STRINGIFY(PERL_VERSION) "';"); if (PL_minus_n || PL_minus_p) { sv_catpvs(PL_linestr, "LINE: while (<>) {"); if (PL_minus_l) @@ -3653,20 +3730,17 @@ Perl_yylex(pTHX) sv_catpvs(PL_linestr,"our @F=split(' ');"); } } - if (PL_minus_E) - sv_catpvs(PL_linestr, - "use feature ':5." STRINGIFY(PERL_VERSION) "';"); sv_catpvs(PL_linestr, "\n"); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; - if (PERLDB_LINE && PL_curstash != PL_debstash) + if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) update_debugger_info(PL_linestr, NULL, 0); goto retry; } do { bof = PL_rsfp ? TRUE : FALSE; - if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) { + if ((s = filter_gets(PL_linestr, 0)) == NULL) { fake_eof: #ifdef PERL_MAD PL_realtokenstart = -1; @@ -3696,7 +3770,7 @@ Perl_yylex(pTHX) } PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_last_lop = PL_last_uni = NULL; - sv_setpvn(PL_linestr,"",0); + sv_setpvs(PL_linestr,""); TOKEN(';'); /* not infinite loop because rsfp is NULL now */ } /* If it looks like the start of a BOM or raw UTF-16, @@ -3732,7 +3806,7 @@ Perl_yylex(pTHX) sv_catsv(PL_thiswhite, PL_linestr); #endif if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) { - sv_setpvn(PL_linestr, "", 0); + sv_setpvs(PL_linestr, ""); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; @@ -3742,7 +3816,7 @@ Perl_yylex(pTHX) incline(s); } while (PL_doextract); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; - if (PERLDB_LINE && PL_curstash != PL_debstash) + if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) update_debugger_info(PL_linestr, NULL, 0); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; @@ -3852,7 +3926,6 @@ Perl_yylex(pTHX) *s = '#'; /* Don't try to parse shebang line */ } #endif /* ALTERNATE_SHEBANG */ -#ifndef MACOS_TRADITIONAL if (!d && *s == '#' && ipathend > ipath && @@ -3868,7 +3941,7 @@ Perl_yylex(pTHX) while (s < PL_bufend && isSPACE(*s)) s++; if (s < PL_bufend) { - Newxz(newargv,PL_origargc+3,char*); + Newx(newargv,PL_origargc+3,char*); newargv[1] = s; while (s < PL_bufend && !isSPACE(*s)) s++; @@ -3883,7 +3956,6 @@ Perl_yylex(pTHX) PERL_FPU_POST_EXEC Perl_croak(aTHX_ "Can't exec %s", ipath); } -#endif if (d) { while (*d && !isSPACE(*d)) d++; @@ -3898,7 +3970,14 @@ Perl_yylex(pTHX) const char *d1 = d; do { - if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') { + bool baduni = FALSE; + if (*d1 == 'C') { + const char *d2 = d1 + 1; + if (parse_unicode_opts((const char **)&d2) + != PL_unicode) + baduni = TRUE; + } + if (baduni || *d1 == 'M' || *d1 == 'm') { const char * const m = d1; while (*d1 && !isSPACE(*d1)) d1++; @@ -3915,17 +3994,17 @@ Perl_yylex(pTHX) } while (argc && argv[0][0] == '-' && argv[0][1]); init_argv_symbols(argc,argv); } - if ((PERLDB_LINE && !oldpdb) || + if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) /* if we have already added "LINE: while (<>) {", we must not do it again */ { - sv_setpvn(PL_linestr, "", 0); + sv_setpvs(PL_linestr, ""); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; PL_preambled = FALSE; - if (PERLDB_LINE) + if (PERLDB_LINE || PERLDB_SAVESRC) (void)gv_fetchfile(PL_origfilename); goto retry; } @@ -3946,9 +4025,6 @@ Perl_yylex(pTHX) "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); #endif case ' ': case '\t': case '\f': case 013: -#ifdef MACOS_TRADITIONAL - case '\312': -#endif #ifdef PERL_MAD PL_realtokenstart = -1; if (!PL_thiswhite) @@ -4019,7 +4095,7 @@ Perl_yylex(pTHX) if (!PL_thiswhite) PL_thiswhite = newSVpvs(""); if (CopLINE(PL_curcop) == 1) { - sv_setpvn(PL_thiswhite, "", 0); + sv_setpvs(PL_thiswhite, ""); PL_faketokens = 0; } sv_catpvn(PL_thiswhite, s, d - s); @@ -4191,7 +4267,10 @@ Perl_yylex(pTHX) BOop(OP_BIT_XOR); case '[': PL_lex_brackets++; - /* FALL THROUGH */ + { + const char tmp = *s++; + OPERATOR(tmp); + } case '~': if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) @@ -4241,6 +4320,7 @@ Perl_yylex(pTHX) case KEY_or: case KEY_and: case KEY_for: + case KEY_foreach: case KEY_unless: case KEY_if: case KEY_while: @@ -4276,11 +4356,6 @@ Perl_yylex(pTHX) if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { sv_free(sv); if (PL_in_my == KEY_our) { -#ifdef USE_ITHREADS - GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval)); -#else - /* skip to avoid loading attributes.pm */ -#endif deprecate(":unique"); } else @@ -4295,7 +4370,7 @@ Perl_yylex(pTHX) } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { sv_free(sv); - CvLOCKED_on(PL_compcv); + deprecate(":locked"); } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { sv_free(sv); @@ -4583,7 +4658,7 @@ Perl_yylex(pTHX) if (PL_madskills) { if (!PL_thiswhite) PL_thiswhite = newSVpvs(""); - sv_catpvn(PL_thiswhite,"}",1); + sv_catpvs(PL_thiswhite,"}"); } #endif return yylex(); /* ignore fake brackets */ @@ -4620,7 +4695,7 @@ Perl_yylex(pTHX) && isIDFIRST_lazy_if(s,UTF)) { CopLINE_dec(PL_curcop); - Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi); + Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); CopLINE_inc(PL_curcop); } BAop(OP_BIT_AND); @@ -4777,9 +4852,7 @@ Perl_yylex(pTHX) if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } } @@ -4959,10 +5032,10 @@ Perl_yylex(pTHX) AOPERATOR(DORDOR); } case '?': /* may either be conditional or pattern */ - if(PL_expect == XOPERATOR) { + if (PL_expect == XOPERATOR) { char tmp = *s++; if(tmp == '?') { - OPERATOR('?'); + OPERATOR('?'); } else { tmp = *s++; @@ -5001,6 +5074,10 @@ Perl_yylex(pTHX) PL_expect = XSTATE; goto rightbracket; } + if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { + s += 3; + OPERATOR(YADAYADA); + } if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { char tmp = *s++; if (*s == tmp) { @@ -5031,9 +5108,7 @@ Perl_yylex(pTHX) DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } else no_op("String",s); @@ -5048,9 +5123,7 @@ Perl_yylex(pTHX) DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } else no_op("String",s); @@ -5080,9 +5153,9 @@ Perl_yylex(pTHX) case '\\': s++; - if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", - *s, *s); + if (PL_lex_inwhat && isDIGIT(*s)) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", + *s, *s); if (PL_expect == XOPERATOR) no_op("Backslash",s); OPERATOR(REFGEN); @@ -5170,14 +5243,17 @@ Perl_yylex(pTHX) /* Is this a label? */ if (!tmp && PL_expect == XSTATE && d < PL_bufend && *d == ':' && *(d + 1) != ':') { + tmp = keyword(PL_tokenbuf, len, 0); + if (tmp) + Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf); s = d + 1; pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf); CLINE; TOKEN(LABEL); } - - /* Check for keywords */ - tmp = keyword(PL_tokenbuf, len, 0); + else + /* Check for keywords */ + tmp = keyword(PL_tokenbuf, len, 0); /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { @@ -5222,17 +5298,16 @@ Perl_yylex(pTHX) } else { /* no override */ tmp = -tmp; - if (tmp == KEY_dump && ckWARN(WARN_MISC)) { - Perl_warner(aTHX_ packWARN(WARN_MISC), - "dump() better written as CORE::dump()"); + if (tmp == KEY_dump) { + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "dump() better written as CORE::dump()"); } gv = NULL; gvp = 0; - if (hgv && tmp != KEY_x && tmp != KEY_CORE - && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */ - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous call resolved as CORE::%s(), %s", - GvENAME(hgv), "qualify as such or use &"); + if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */ + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous call resolved as CORE::%s(), %s", + GvENAME(hgv), "qualify as such or use &"); } } @@ -5276,7 +5351,7 @@ Perl_yylex(pTHX) if (PL_expect == XOPERATOR) { if (PL_bufptr == PL_linestart) { CopLINE_dec(PL_curcop); - Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi); + Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); CopLINE_inc(PL_curcop); } else @@ -5358,7 +5433,7 @@ Perl_yylex(pTHX) /* Real typeglob, so get the real subroutine: */ ? GvCVu(gv) /* A proxy for a subroutine in this package? */ - : SvOK(gv) ? (CV *) gv : NULL) + : SvOK(gv) ? MUTABLE_CV(gv) : NULL) : NULL; /* See if it's the indirect object for a list operator. */ @@ -5472,10 +5547,10 @@ Perl_yylex(pTHX) /* Not a method, so call it a subroutine (if defined) */ if (cv) { - if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS)) - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%s resolved as -&%s()", - PL_tokenbuf, PL_tokenbuf); + if (lastchar == '-') + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of -%s resolved as -&%s()", + PL_tokenbuf, PL_tokenbuf); /* Check for a constant sub */ if ((sv = gv_const_sv(gv))) { its_constant: @@ -5507,7 +5582,7 @@ Perl_yylex(pTHX) SvPOK(cv)) { STRLEN protolen; - const char *proto = SvPV_const((SV*)cv, protolen); + const char *proto = SvPV_const(MUTABLE_SV(cv), protolen); if (!protolen) TERM(FUNC0SUB); if ((*proto == '$' || *proto == '_') && proto[1] == '\0') @@ -5592,6 +5667,18 @@ Perl_yylex(pTHX) pl_yylval.opval->op_private |= OPpCONST_STRICT; else { bareword: + /* after "print" and similar functions (corresponding to + * "F? L" in opcode.pl), whatever wasn't already parsed as + * a filehandle should be subject to "strict subs". + * Likewise for the optional indirect-object argument to system + * or exec, which can't be a bareword */ + if ((PL_last_lop_op == OP_PRINT + || PL_last_lop_op == OP_PRTF + || PL_last_lop_op == OP_SAY + || PL_last_lop_op == OP_SYSTEM + || PL_last_lop_op == OP_EXEC) + && (PL_hints & HINT_STRICT_SUBS)) + pl_yylval.opval->op_private |= OPpCONST_STRICT; if (lastchar != '-') { if (ckWARN(WARN_RESERVED)) { d = PL_tokenbuf; @@ -5605,14 +5692,13 @@ Perl_yylex(pTHX) } safe_bareword: - if ((lastchar == '*' || lastchar == '%' || lastchar == '&') - && ckWARN_d(WARN_AMBIGUOUS)) { - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Operator or semicolon missing before %c%s", - lastchar, PL_tokenbuf); - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of %c resolved as operator %c", - lastchar, lastchar); + if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Operator or semicolon missing before %c%s", + lastchar, PL_tokenbuf); + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of %c resolved as operator %c", + lastchar, lastchar); } TOKEN(WORD); } @@ -5723,8 +5809,8 @@ Perl_yylex(pTHX) sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart); PL_realtokenstart = -1; } - while ((s = filter_gets(PL_endwhite, PL_rsfp, - SvCUR(PL_endwhite))) != NULL) ; + while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite))) + != NULL) ; } #endif PL_rsfp = NULL; @@ -6285,6 +6371,7 @@ Perl_yylex(pTHX) case KEY_package: s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_version(s, FALSE); OPERATOR(PACKAGE); case KEY_pipe: @@ -6604,7 +6691,7 @@ Perl_yylex(pTHX) (*s == ':' && s[1] == ':')) { #ifdef PERL_MAD - SV *nametoke; + SV *nametoke = NULL; #endif PL_expect = XBLOCK; @@ -6643,7 +6730,7 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "Missing name in \"my sub\""); PL_expect = XTERMBLOCK; attrful = XATTRTERM; - sv_setpvn(PL_subname,"?",1); + sv_setpvs(PL_subname,"?"); have_name = FALSE; } @@ -6665,6 +6752,12 @@ Perl_yylex(pTHX) if (*s == '(') { char *p; bool bad_proto = FALSE; + bool in_brackets = FALSE; + char greedy_proto = ' '; + bool proto_after_greedy_proto = FALSE; + bool must_be_last = FALSE; + bool underscore = FALSE; + bool seen_underscore = FALSE; const bool warnsyntax = ckWARN(WARN_SYNTAX); s = scan_str(s,!!PL_madskills,FALSE); @@ -6676,14 +6769,47 @@ Perl_yylex(pTHX) for (p = d; *p; ++p) { if (!isSPACE(*p)) { d[tmp++] = *p; - if (warnsyntax && !strchr("$@%*;[]&\\_", *p)) - bad_proto = TRUE; + + if (warnsyntax) { + if (must_be_last) + proto_after_greedy_proto = TRUE; + if (!strchr("$@%*;[]&\\_", *p)) { + bad_proto = TRUE; + } + else { + if ( underscore ) { + if ( *p != ';' ) + bad_proto = TRUE; + underscore = FALSE; + } + if ( *p == '[' ) { + in_brackets = TRUE; + } + else if ( *p == ']' ) { + in_brackets = FALSE; + } + else if ( (*p == '@' || *p == '%') && + ( tmp < 2 || d[tmp-2] != '\\' ) && + !in_brackets ) { + must_be_last = TRUE; + greedy_proto = *p; + } + else if ( *p == '_' ) { + underscore = seen_underscore = TRUE; + } + } + } } } d[tmp] = '\0'; + if (proto_after_greedy_proto) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "Prototype after '%c' for %"SVf" : %s", + greedy_proto, SVfARG(PL_subname), d); if (bad_proto) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Illegal character in prototype for %"SVf" : %s", + "Illegal character %sin prototype for %"SVf" : %s", + seen_underscore ? "after '_' " : "", SVfARG(PL_subname), d); SvCUR_set(PL_lex_stuff, tmp); have_proto = TRUE; @@ -7003,11 +7129,11 @@ S_pending_ident(pTHX) and @foo isn't a variable we can find in the symbol table. */ - if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { + if (ckWARN(WARN_AMBIGUOUS) && + pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0, SVt_PVAV); if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) - && ckWARN(WARN_AMBIGUOUS) /* DO NOT warn for @- and @+ */ && !( PL_tokenbuf[2] == '\0' && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) @@ -7015,8 +7141,8 @@ S_pending_ident(pTHX) { /* Downgraded from fatal to warning 20000522 mjd */ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of %s in string", - PL_tokenbuf); + "Possible unintended interpolation of %s in string", + PL_tokenbuf); } } @@ -7057,6 +7183,9 @@ I32 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) { dVAR; + + PERL_ARGS_ASSERT_KEYWORD; + switch (len) { case 1: /* 5 tokens of length 1 */ @@ -8554,8 +8683,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) name[4] == 'i' && name[5] == 'f') { /* elseif */ - if(ckWARN_d(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif"); + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif"); } goto unknown; @@ -10445,6 +10573,8 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) { dVAR; + PERL_ARGS_ASSERT_CHECKCOMMA; + if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ if (ckWARN(WARN_SYNTAX)) { int level = 1; @@ -10507,6 +10637,8 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, SV *cv, *typesv; const char *why1 = "", *why2 = "", *why3 = ""; + PERL_ARGS_ASSERT_NEW_CONSTANT; + if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { SV *msg; @@ -10601,6 +10733,9 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag dVAR; register char *d = dest; register char * const e = d + destlen - 3; /* two-character token, ending NUL */ + + PERL_ARGS_ASSERT_SCAN_WORD; + for (;;) { if (d >= e) Perl_croak(aTHX_ ident_too_long); @@ -10644,6 +10779,8 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL register char *d = dest; register char * const e = d + destlen + 3; /* two-character token, ending NUL */ + PERL_ARGS_ASSERT_SCAN_IDENT; + if (isSPACE(*s)) s = PEEKSPACE(s); if (isDIGIT(*s)) { @@ -10798,6 +10935,8 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL void Perl_pmflag(pTHX_ U32* pmfl, int ch) { + PERL_ARGS_ASSERT_PMFLAG; + PERL_UNUSED_CONTEXT; if (ch<256) { const char c = (char)ch; @@ -10823,6 +10962,7 @@ S_scan_pat(pTHX_ char *start, I32 type) char *modstart; #endif + PERL_ARGS_ASSERT_SCAN_PAT; if (!s) { const char * const delimiter = skipspace(start); @@ -10843,10 +10983,10 @@ S_scan_pat(pTHX_ char *start, I32 type) matches. */ assert(type != OP_TRANS); if (PL_curstash) { - MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab); + MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); U32 elements; if (!mg) { - mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0, + mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, 0); } elements = mg->mg_len / sizeof(PMOP**); @@ -10868,11 +11008,10 @@ S_scan_pat(pTHX_ char *start, I32 type) } #endif /* issue a warning if /c is specified,but /g is not */ - if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL) - && ckWARN(WARN_REGEXP)) + if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) { - Perl_warner(aTHX_ packWARN(WARN_REGEXP), - "Use of /c modifier is meaningless without /g" ); + Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), + "Use of /c modifier is meaningless without /g" ); } PL_lex_op = (OP*)pm; @@ -10892,6 +11031,8 @@ S_scan_subst(pTHX_ char *start) char *modstart; #endif + PERL_ARGS_ASSERT_SCAN_SUBST; + pl_yylval.ival = OP_NULL; s = scan_str(start,!!PL_madskills,FALSE); @@ -10952,8 +11093,8 @@ S_scan_subst(pTHX_ char *start) PL_thismad = 0; } #endif - if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) { - Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); + if ((pm->op_pmflags & PMf_CONTINUE)) { + Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); } if (es) { @@ -10991,13 +11132,15 @@ S_scan_trans(pTHX_ char *start) register char* s; OP *o; short *tbl; - I32 squash; - I32 del; - I32 complement; + U8 squash; + U8 del; + U8 complement; #ifdef PERL_MAD char *modstart; #endif + PERL_ARGS_ASSERT_SCAN_TRANS; + pl_yylval.ival = OP_NULL; s = scan_str(start,!!PL_madskills,FALSE); @@ -11095,6 +11238,8 @@ S_scan_heredoc(pTHX_ register char *s) PL_realtokenstart = -1; #endif + PERL_ARGS_ASSERT_SCAN_HEREDOC; + s += 2; d = PL_tokenbuf; e = PL_tokenbuf + sizeof PL_tokenbuf - 1; @@ -11117,7 +11262,7 @@ S_scan_heredoc(pTHX_ register char *s) else term = '"'; if (!isALNUM_lazy_if(s,UTF)) - deprecate_old("bare << to mean <<\"\""); + deprecate("bare << to mean <<\"\""); for (; isALNUM_lazy_if(s,UTF); s++) { if (d < e) *d++ = *s; @@ -11266,7 +11411,7 @@ S_scan_heredoc(pTHX_ register char *s) PL_last_lop = PL_last_uni = NULL; } else - sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */ + sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ while (s >= PL_bufend) { /* multiple line string? */ #ifdef PERL_MAD if (PL_madskills) { @@ -11278,7 +11423,8 @@ S_scan_heredoc(pTHX_ register char *s) } #endif if (!outer || - !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { + !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart + = filter_gets(PL_linestr, 0))) { CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } @@ -11303,7 +11449,7 @@ S_scan_heredoc(pTHX_ register char *s) else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') PL_bufend[-1] = '\n'; #endif - if (PERLDB_LINE && PL_curstash != PL_debstash) + if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) update_debugger_info(PL_linestr, NULL, 0); if (*s == term && memEQ(s,PL_tokenbuf,len)) { STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr); @@ -11358,10 +11504,11 @@ S_scan_inputsymbol(pTHX_ char *start) register char *s = start; /* current position in buffer */ char *end; I32 len; - char *d = PL_tokenbuf; /* start of temp holding space */ const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */ + PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; + end = strchr(s, '\n'); if (!end) end = PL_bufend; @@ -11558,6 +11705,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) char *tstart; #endif + PERL_ARGS_ASSERT_SCAN_STR; + /* skip space before the delimiter */ if (isSPACE(*s)) { s = PEEKSPACE(s); @@ -11787,7 +11936,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } #endif if (!PL_rsfp || - !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { + !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart + = filter_gets(PL_linestr, 0))) { sv_free(sv); CopLINE_set(PL_curcop, (line_t)PL_multi_start); return NULL; @@ -11799,7 +11949,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) CopLINE_inc(PL_curcop); /* update debugger info */ - if (PERLDB_LINE && PL_curstash != PL_debstash) + if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash) update_debugger_info(PL_linestr, NULL, 0); /* having changed the buffer, we must update PL_bufend */ @@ -11898,6 +12048,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) const char *lastub = NULL; /* position of last underbar */ static char const number_too_long[] = "Number too long"; + PERL_ARGS_ASSERT_SCAN_NUM; + /* We use the first character to decide what type of number this is */ switch (*s) { @@ -11956,8 +12108,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; } @@ -11980,9 +12131,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* _ are ignored -- but warned about if consecutive */ case '_': - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; break; @@ -12024,10 +12175,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) && !(PL_hints & HINT_NEW_BINARY)) { overflowed = TRUE; n = (NV) u; - if (ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), - "Integer overflow in %s number", - base); + Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in %s number", + base); } else u = x | b; /* add the digit to the end */ } @@ -12054,24 +12204,23 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* final misplaced underbar check */ if (s[-1] == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } sv = newSV(0); if (overflowed) { - if (n > 4294967295.0 && ckWARN(WARN_PORTABLE)) - Perl_warner(aTHX_ packWARN(WARN_PORTABLE), - "%s number > %s non-portable", - Base, max); + if (n > 4294967295.0) + Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), + "%s number > %s non-portable", + Base, max); sv_setnv(sv, n); } else { #if UVSIZE > 4 - if (u > 0xffffffff && ckWARN(WARN_PORTABLE)) - Perl_warner(aTHX_ packWARN(WARN_PORTABLE), - "%s number > %s non-portable", - Base, max); + if (u > 0xffffffff) + Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), + "%s number > %s non-portable", + Base, max); #endif sv_setuv(sv, u); } @@ -12100,9 +12249,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if -w is on */ if (*s == '_') { - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } else { @@ -12116,8 +12265,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* final misplaced underbar check */ if (lastub && s == lastub + 1) { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } /* read a decimal portion if there is one. avoid @@ -12129,9 +12277,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) *d++ = *s++; if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s; } @@ -12142,9 +12289,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (d >= e) Perl_croak(aTHX_ number_too_long); if (*s == '_') { - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s; } else @@ -12152,9 +12299,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } /* fractional part ending in underbar? */ if (s[-1] == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); } if (*s == '.' && isDIGIT(s[1])) { /* oops, it's really a v-string, but without the "v" */ @@ -12173,9 +12319,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* stray preinitial _ */ if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } @@ -12185,9 +12330,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* stray initial _ */ if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } @@ -12200,10 +12344,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } else { if (((lastub && s == lastub + 1) || - (!isDIGIT(s[1]) && s[1] != '_')) - && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + (!isDIGIT(s[1]) && s[1] != '_'))) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } } @@ -12280,14 +12423,16 @@ S_scan_formline(pTHX_ register char *s) bool eofmt = FALSE; #ifdef PERL_MAD char *tokenstart = s; - SV* savewhite; - + SV* savewhite = NULL; + if (PL_madskills) { savewhite = PL_thiswhite; PL_thiswhite = 0; } #endif + PERL_ARGS_ASSERT_SCAN_FORMLINE; + while (!needargs) { if (*s == '.') { t = s+1; @@ -12343,7 +12488,7 @@ S_scan_formline(pTHX_ register char *s) PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart); } #endif - s = filter_gets(PL_linestr, PL_rsfp, 0); + s = filter_gets(PL_linestr, 0); #ifdef PERL_MAD tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr); #else @@ -12414,12 +12559,12 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) save_item(PL_subname); SAVESPTR(PL_compcv); - PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV); + PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV)); CvFLAGS(PL_compcv) |= flags; PL_subline = CopLINE(PL_curcop); CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); - CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv); + CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; return oldsavestack_ix; @@ -12428,10 +12573,13 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) #ifdef __SC__ #pragma segment Perl_yylex #endif -int -Perl_yywarn(pTHX_ const char *const s) +static int +S_yywarn(pTHX_ const char *const s) { dVAR; + + PERL_ARGS_ASSERT_YYWARN; + PL_in_eval |= EVAL_WARNONLY; yyerror(s); PL_in_eval &= ~EVAL_WARNONLY; @@ -12448,6 +12596,8 @@ Perl_yyerror(pTHX_ const char *const s) SV *msg; int yychar = PL_parser->yychar; + PERL_ARGS_ASSERT_YYERROR; + if (!yychar || (yychar == ';' && !PL_rsfp)) where = "at EOF"; else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr && @@ -12520,8 +12670,7 @@ Perl_yyerror(pTHX_ const char *const s) PL_multi_end = 0; } if (PL_in_eval & EVAL_WARNONLY) { - if (ckWARN_d(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); } else qerror(msg); @@ -12546,6 +12695,9 @@ S_swallow_bom(pTHX_ U8 *s) { dVAR; const STRLEN slen = SvCUR(PL_linestr); + + PERL_ARGS_ASSERT_SWALLOW_BOM; + switch (s[0]) { case 0xFF: if (s[1] == 0xFE) { @@ -12555,30 +12707,8 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n"); s += 2; - utf16le: if (PL_bufend > (char*)s) { - U8 *news; - I32 newlen; - - filter_add(utf16rev_textfilter, NULL); - Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - utf16_to_utf8_reversed(s, news, - PL_bufend - (char*)s - 1, - &newlen); - sv_setpvn(PL_linestr, (const char*)news, newlen); -#ifdef PERL_MAD - s = (U8*)SvPVX(PL_linestr); - Copy(news, s, newlen, U8); - s[newlen] = '\0'; -#endif - Safefree(news); - SvUTF8_on(PL_linestr); - s = (U8*)SvPVX(PL_linestr); -#ifdef PERL_MAD - /* FIXME - is this a general bug fix? */ - s[newlen] = '\0'; -#endif - PL_bufend = SvPVX(PL_linestr) + newlen; + s = add_utf16_textfilter(s, TRUE); } #else Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE"); @@ -12590,21 +12720,8 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); s += 2; - utf16be: if (PL_bufend > (char *)s) { - U8 *news; - I32 newlen; - - filter_add(utf16_textfilter, NULL); - Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - utf16_to_utf8(s, news, - PL_bufend - (char*)s, - &newlen); - sv_setpvn(PL_linestr, (const char*)news, newlen); - Safefree(news); - SvUTF8_on(PL_linestr); - s = (U8*)SvPVX(PL_linestr); - PL_bufend = SvPVX(PL_linestr) + newlen; + s = add_utf16_textfilter(s, FALSE); } #else Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE"); @@ -12630,7 +12747,7 @@ S_swallow_bom(pTHX_ U8 *s) * 00 xx 00 xx * are a good indicator of UTF-16BE. */ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); - goto utf16be; + s = add_utf16_textfilter(s, FALSE); } } #ifdef EBCDIC @@ -12648,7 +12765,7 @@ S_swallow_bom(pTHX_ U8 *s) * xx 00 xx 00 * are a good indicator of UTF-16LE. */ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); - goto utf16le; + s = add_utf16_textfilter(s, TRUE); } } return (char*)s; @@ -12657,49 +12774,142 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER static I32 -utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) +S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) { dVAR; - const STRLEN old = SvCUR(sv); - const I32 count = FILTER_READ(idx+1, sv, maxlen); + SV *const filter = FILTER_DATA(idx); + /* We re-use this each time round, throwing the contents away before we + return. */ + SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); + SV *const utf8_buffer = filter; + IV status = IoPAGE(filter); + const bool reverse = IoLINES(filter); + + /* As we're automatically added, at the lowest level, and hence only called + from this file, we can be sure that we're not called in block mode. Hence + don't bother writing code to deal with block mode. */ + if (maxlen) { + Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); + } + if (status < 0) { + Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); + } DEBUG_P(PerlIO_printf(Perl_debug_log, - "utf16_textfilter(%p): %d %d (%d)\n", - FPTR2DPTR(void *, utf16_textfilter), - idx, maxlen, (int) count)); - if (count) { - U8* tmps; + "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", + FPTR2DPTR(void *, S_utf16_textfilter), + reverse ? 'l' : 'b', idx, maxlen, status, + (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); + + while (1) { + STRLEN chars; + STRLEN have; I32 newlen; - Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8); - Copy(SvPVX_const(sv), tmps, old, char); - utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old, - SvCUR(sv) - old, &newlen); - sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old); + U8 *end; + /* First, look in our buffer of existing UTF-8 data: */ + char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); + + if (nl) { + ++nl; + } else if (status == 0) { + /* EOF */ + IoPAGE(filter) = 0; + nl = SvEND(utf8_buffer); + } + if (nl) { + sv_catpvn(sv, SvPVX(utf8_buffer), nl - SvPVX(utf8_buffer)); + /* Everything else in this code works just fine if SVp_POK isn't + set. This, however, needs it, and we need it to work, else + we loop infinitely because the buffer is never consumed. */ + sv_chop(utf8_buffer, nl); + break; + } + + /* OK, not a complete line there, so need to read some more UTF-16. + Read an extra octect if the buffer currently has an odd number. */ + while (1) { + if (status <= 0) + break; + if (SvCUR(utf16_buffer) >= 2) { + /* Location of the high octet of the last complete code point. + Gosh, UTF-16 is a pain. All the benefits of variable length, + *coupled* with all the benefits of partial reads and + endianness. */ + const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) + + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); + + if (*last_hi < 0xd8 || *last_hi > 0xdb) { + break; + } + + /* We have the first half of a surrogate. Read more. */ + DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); + } + + status = FILTER_READ(idx + 1, utf16_buffer, + 160 + (SvCUR(utf16_buffer) & 1)); + DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); + DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); + if (status < 0) { + /* Error */ + IoPAGE(filter) = status; + return status; + } + } + + chars = SvCUR(utf16_buffer) >> 1; + have = SvCUR(utf8_buffer); + SvGROW(utf8_buffer, have + chars * 3 + 1); + + if (reverse) { + end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), + (U8*)SvPVX_const(utf8_buffer) + have, + chars * 2, &newlen); + } else { + end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), + (U8*)SvPVX_const(utf8_buffer) + have, + chars * 2, &newlen); + } + SvCUR_set(utf8_buffer, have + newlen); + *end = '\0'; + + /* No need to keep this SV "well-formed" with a '\0' after the end, as + it's private to us, and utf16_to_utf8{,reversed} take a + (pointer,length) pair, rather than a NUL-terminated string. */ + if(SvCUR(utf16_buffer) & 1) { + *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; + SvCUR_set(utf16_buffer, 1); + } else { + SvCUR_set(utf16_buffer, 0); + } } - DEBUG_P({sv_dump(sv);}); + DEBUG_P(PerlIO_printf(Perl_debug_log, + "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", + status, + (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); + DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); return SvCUR(sv); } -static I32 -utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen) +static U8 * +S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) { - dVAR; - const STRLEN old = SvCUR(sv); - const I32 count = FILTER_READ(idx+1, sv, maxlen); - DEBUG_P(PerlIO_printf(Perl_debug_log, - "utf16rev_textfilter(%p): %d %d (%d)\n", - FPTR2DPTR(void *, utf16rev_textfilter), - idx, maxlen, (int) count)); - if (count) { - U8* tmps; - I32 newlen; - Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8); - Copy(SvPVX_const(sv), tmps, old, char); - utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old, - SvCUR(sv) - old, &newlen); - sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old); + SV *filter = filter_add(S_utf16_textfilter, NULL); + + IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); + sv_setpvs(filter, ""); + IoLINES(filter) = reversed; + IoPAGE(filter) = 1; /* Not EOF */ + + /* Sadly, we have to return a valid pointer, come what may, so we have to + ignore any error return from this. */ + SvCUR_set(PL_linestr, 0); + if (FILTER_READ(0, PL_linestr, 0)) { + SvUTF8_on(PL_linestr); + } else { + SvUTF8_on(PL_linestr); } - DEBUG_P({ sv_dump(sv); }); - return count; + PL_bufend = SvEND(PL_linestr); + return (U8*)SvPVX(PL_linestr); } #endif @@ -12724,6 +12934,9 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) dVAR; const char *pos = s; const char *start = s; + + PERL_ARGS_ASSERT_SCAN_VSTRING; + if (*pos == 'v') pos++; /* get past 'v' */ while (pos < e && (isDIGIT(*pos) || *pos == '_')) pos++; @@ -12745,7 +12958,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) if (*s == 'v') s++; /* get past 'v' */ - sv_setpvn(sv, "", 0); + sv_setpvs(sv, ""); for (;;) { /* this is atoi() that tolerates underscores */ @@ -12758,9 +12971,9 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) const UV orev = rev; rev += (*end - '0') * mult; mult *= 10; - if (orev > rev && ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), - "Integer overflow in decimal number"); + if (orev > rev) + Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in decimal number"); } } #ifdef EBCDIC