X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=b8abbd854f001057679312ed03b534e0853cf490;hb=484c818fbcf400d897228be2cf2b34b67be8a340;hp=580ca9153abd9f2ca0bd301d688611f466a54ca9;hpb=af41e527c8e130b652c27870cfc5eef3f1e00711;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index 580ca91..b8abbd8 100644 --- a/toke.c +++ b/toke.c @@ -96,16 +96,12 @@ # define PL_nextval (PL_parser->nextval) #endif +/* This can't be done with embed.fnc, because struct yy_parser contains a + member named pending_ident, which clashes with the generated #define */ static int S_pending_ident(pTHX); static const char ident_too_long[] = "Identifier too long"; -static const char commaless_variable_list[] = "comma-less variable list"; - -#ifndef PERL_NO_UTF16_FILTER -static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen); -static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); -#endif #ifdef PERL_MAD # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; } @@ -124,16 +120,14 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) #endif +/* The maximum number of characters preceding the unrecognized one to display */ +#define UNRECOGNIZED_PRECEDE_COUNT 10 + /* In variables named $^X, these are the legal values for X. * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) -/* On MacOS, respect nonbreaking spaces */ -#ifdef MACOS_TRADITIONAL -#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t') -#else #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') -#endif /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement @@ -349,6 +343,8 @@ static struct debug_tokens { { OROP, TOKENTYPE_IVAL, "OROP" }, { OROR, TOKENTYPE_NONE, "OROR" }, { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, + { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, + { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, { POSTINC, TOKENTYPE_NONE, "POSTINC" }, @@ -453,6 +449,13 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s) #endif +static int +S_deprecate_commaless_var_list(pTHX) { + PL_expect = XTERM; + deprecate("comma-less variable list"); + return REPORT(','); /* grandfather non-comma-format format */ +} + /* * S_ao * @@ -587,37 +590,6 @@ S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) } /* - * Perl_deprecate - */ - -void -Perl_deprecate(pTHX_ const char *const s) -{ - PERL_ARGS_ASSERT_DEPRECATE; - - if (ckWARN(WARN_DEPRECATED)) - Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s); -} - -void -Perl_deprecate_old(pTHX_ const char *const s) -{ - /* This function should NOT be called for any new deprecated warnings */ - /* Use Perl_deprecate instead */ - /* */ - /* It is here to maintain backward compatibility with the pre-5.8 */ - /* warnings category hierarchy. The "deprecated" category used to */ - /* live under the "syntax" category. It is now a top-level category */ - /* in its own right. */ - - PERL_ARGS_ASSERT_DEPRECATE_OLD; - - if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), - "Use of %s is deprecated", s); -} - -/* * experimental text filters for win32 carriage-returns, utf16-to-utf8 and * utf16-to-utf8-reversed. */ @@ -824,6 +796,8 @@ S_incline(pTHX_ const char *s) n = s; while (isDIGIT(*s)) s++; + if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') + return; while (SPACE_OR_TAB(*s)) s++; if (*s == '"' && (t = strchr(s+1, '"'))) { @@ -1082,8 +1056,8 @@ S_skipspace(pTHX_ register char *s) curoff = s - SvPVX(PL_linestr); #endif - if ((s = filter_gets(PL_linestr, PL_rsfp, - (prevlen = SvCUR(PL_linestr)))) == NULL) + if ((s = filter_gets(PL_linestr, (prevlen = SvCUR(PL_linestr)))) + == NULL) { #ifdef PERL_MAD if (PL_madskills && curoff != startoff) { @@ -1219,11 +1193,9 @@ S_check_uni(pTHX) if ((t = strchr(s, '(')) && t < PL_bufptr) return; - if (ckWARN_d(WARN_AMBIGUOUS)){ - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Warning: Use of \"%.*s\" without parentheses is ambiguous", - (int)(s - PL_last_uni), PL_last_uni); - } + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Warning: Use of \"%.*s\" without parentheses is ambiguous", + (int)(s - PL_last_uni), PL_last_uni); } /* @@ -1384,7 +1356,9 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) { dVAR; SV * const sv = newSVpvn_utf8(start, len, - UTF && !IN_BYTES + !IN_BYTES + && UTF + && !is_ascii_string((const U8*)start, len) && is_utf8_string((const U8*)start, len)); return sv; } @@ -1930,7 +1904,9 @@ S_sublex_done(pTHX) handle \cV (control characters) handle printf-style backslashes (\f, \r, \n, etc) } (end switch) + continue } (end if backslash) + handle regular character } (end while character to read) */ @@ -1940,13 +1916,32 @@ S_scan_const(pTHX_ char *start) { dVAR; register char *send = PL_bufend; /* end of the constant */ - SV *sv = newSV(send - start); /* sv for the constant */ + SV *sv = newSV(send - start); /* sv for the constant. See + note below on sizing. */ register char *s = start; /* start of the constant */ register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ I32 has_utf8 = FALSE; /* Output constant is UTF8 */ - I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */ + I32 this_utf8 = UTF; /* Is the source string assumed + to be UTF8? But, this can + show as true when the source + isn't utf8, as for example + when it is entirely composed + of hex constants */ + + /* Note on sizing: The scanned constant is placed into sv, which is + * initialized by newSV() assuming one byte of output for every byte of + * input. This routine expects newSV() to allocate an extra byte for a + * trailing NUL, which this routine will append if it gets to the end of + * the input. There may be more bytes of input than output (eg., \N{LATIN + * CAPITAL LETTER A}), or more output than input if the constant ends up + * recoded to utf8, but each time a construct is found that might increase + * the needed size, SvGROW() is called. Its size parameter each time is + * based on the best guess estimate at the time, namely the length used so + * far, plus the length the current construct will occupy, plus room for + * the trailing NUL, plus one byte for every input byte still unscanned */ + UV uv; #ifdef EBCDIC UV literal_endpoint = 0; @@ -2170,9 +2165,9 @@ S_scan_const(pTHX_ char *start) if (!PL_lex_inpat) /* not a regexp, so $ must be var */ break; if (s + 1 < send && !strchr("()| \r\n\t", s[1])) { - if (s[1] == '\\' && ckWARN(WARN_AMBIGUOUS)) { - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of $\\ in regex"); + if (s[1] == '\\') { + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Possible unintended interpolation of $\\ in regex"); } break; /* in regexp, $ might be tail anchor */ } @@ -2188,8 +2183,7 @@ S_scan_const(pTHX_ char *start) if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); *--s = '$'; break; } @@ -2217,27 +2211,26 @@ S_scan_const(pTHX_ char *start) /* FALL THROUGH */ default: { - if ((isALPHA(*s) || isDIGIT(*s)) && - ckWARN(WARN_MISC)) - Perl_warner(aTHX_ packWARN(WARN_MISC), - "Unrecognized escape \\%c passed through", - *s); + if ((isALPHA(*s) || isDIGIT(*s))) + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "Unrecognized escape \\%c passed through", + *s); /* default action is to copy the quoted character */ goto default_action; } - /* \132 indicates an octal constant */ + /* eg. \132 indicates the octal constant 0x132 */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { I32 flags = 0; STRLEN len = 3; - uv = grok_oct(s, &len, &flags, NULL); + uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL)); s += len; } goto NUM_ESCAPE_INSERT; - /* \x24 indicates a hex constant */ + /* eg. \x24 indicates the hex constant 0x24 */ case 'x': ++s; if (*s == '{') { @@ -2252,67 +2245,47 @@ S_scan_const(pTHX_ char *start) continue; } len = e - s; - uv = grok_hex(s, &len, &flags, NULL); + uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); s = e + 1; } else { { STRLEN len = 2; I32 flags = PERL_SCAN_DISALLOW_PREFIX; - uv = grok_hex(s, &len, &flags, NULL); + uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); s += len; } } NUM_ESCAPE_INSERT: - /* Insert oct or hex escaped character. - * There will always enough room in sv since such - * escapes will be longer than any UTF-8 sequence - * they can end up as. */ + /* Insert oct, hex, or \N{U+...} escaped character. There will + * always be enough room in sv since such escapes will be + * longer than any UTF-8 sequence they can end up as, except if + * they force us to recode the rest of the string into utf8 */ - /* We need to map to chars to ASCII before doing the tests - to cover EBCDIC - */ - if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) { + /* Here uv is the ordinal of the next character being added in + * unicode (converted from native). (It has to be done before + * here because \N is interpreted as unicode, and oct and hex + * as native.) */ + if (!UNI_IS_INVARIANT(uv)) { if (!has_utf8 && uv > 255) { - /* Might need to recode whatever we have - * accumulated so far if it contains any - * hibit chars. - * - * (Can't we keep track of that and avoid - * this rescan? --jhi) - */ - int hicount = 0; - U8 *c; - for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) { - if (!NATIVE_IS_INVARIANT(*c)) { - hicount++; - } - } - if (hicount) { - const STRLEN offset = d - SvPVX_const(sv); - U8 *src, *dst; - d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset; - src = (U8 *)d - 1; - dst = src+hicount; - d += hicount; - while (src >= (const U8 *)SvPVX_const(sv)) { - if (!NATIVE_IS_INVARIANT(*src)) { - const U8 ch = NATIVE_TO_ASCII(*src); - *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch); - *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch); - } - else { - *dst-- = *src; - } - src--; - } - } + /* Might need to recode whatever we have accumulated so + * far if it contains any chars variant in utf8 or + * utf-ebcdic. */ + + SvCUR_set(sv, d - SvPVX_const(sv)); + SvPOK_on(sv); + *d = '\0'; + /* See Note on sizing above. */ + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + UNISKIP(uv) + (STRLEN)(send - s) + 1); + d = SvPVX(sv) + SvCUR(sv); + has_utf8 = TRUE; } - if (has_utf8 || uv > 255) { - d = (char*)uvchr_to_utf8((U8*)d, uv); - has_utf8 = TRUE; + if (has_utf8) { + d = (char*)uvuni_to_utf8((U8*)d, uv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { PL_sublex_info.sub_op->op_private |= @@ -2333,7 +2306,8 @@ S_scan_const(pTHX_ char *start) } continue; - /* \N{LATIN SMALL LETTER A} is a named character */ + /* \N{LATIN SMALL LETTER A} is a named character, and so is + * \N{U+0041} */ case 'N': ++s; if (*s == '{') { @@ -2348,7 +2322,8 @@ S_scan_const(pTHX_ char *start) goto cont_scan; } if (e > s + 2 && s[1] == 'U' && s[2] == '+') { - /* \N{U+...} */ + /* \N{U+...} The ... is a unicode value even on EBCDIC + * machines */ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | PERL_SCAN_DISALLOW_PREFIX; s += 3; @@ -2386,22 +2361,24 @@ S_scan_const(pTHX_ char *start) } } #endif + /* If destination is not in utf8 but this new character is, + * recode the dest to utf8 */ if (!has_utf8 && SvUTF8(res)) { - const char * const ostart = SvPVX_const(sv); - SvCUR_set(sv, d - ostart); + SvCUR_set(sv, d - SvPVX_const(sv)); SvPOK_on(sv); *d = '\0'; - sv_utf8_upgrade(sv); - /* this just broke our allocation above... */ - SvGROW(sv, (STRLEN)(send - start)); + /* See Note on sizing above. */ + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + len + (STRLEN)(send - s) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; - } - if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ - const char * const odest = SvPVX_const(sv); + } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ - SvGROW(sv, (SvLEN(sv) + len - (e - s + 4))); - d = SvPVX(sv) + (d - odest); + /* See Note on sizing above. (NOTE: SvCUR() is not set + * correctly here). */ + const STRLEN off = d - SvPVX_const(sv); + d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off; } #ifdef EBCDIC if (!dorange) @@ -2466,20 +2443,41 @@ S_scan_const(pTHX_ char *start) #endif default_action: - /* If we started with encoded form, or already know we want it - and then encode the next character */ - if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) { + /* If we started with encoded form, or already know we want it, + then encode the next character */ + if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) { STRLEN len = 1; + + + /* One might think that it is wasted effort in the case of the + * source being utf8 (this_utf8 == TRUE) to take the next character + * in the source, convert it to an unsigned value, and then convert + * it back again. But the source has not been validated here. The + * routine that does the conversion checks for errors like + * malformed utf8 */ + const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s); const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv)); - s += len; - if (need > len) { - /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */ + if (!has_utf8) { + SvCUR_set(sv, d - SvPVX_const(sv)); + SvPOK_on(sv); + *d = '\0'; + /* See Note on sizing above. */ + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + need + (STRLEN)(send - s) + 1); + d = SvPVX(sv) + SvCUR(sv); + has_utf8 = TRUE; + } else if (need > len) { + /* encoded value larger than old, may need extra space (NOTE: + * SvCUR() is not set correctly here). See Note on sizing + * above. */ const STRLEN off = d - SvPVX_const(sv); - d = SvGROW(sv, SvLEN(sv) + (need-len)) + off; + d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off; } + s += len; + d = (char*)uvchr_to_utf8((U8*)d, nextuv); - has_utf8 = TRUE; #ifdef EBCDIC if (uv > 255 && !dorange) native_range = FALSE; @@ -2798,7 +2796,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) bare_package: start_force(PL_curforce); NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, - newSVpvn(tmpbuf,len)); + S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; if (PL_madskills) curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start)); @@ -2921,7 +2919,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) const int old_len = SvCUR(buf_sv); /* ensure buf_sv is large enough */ - SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ; + SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, correct_length)) <= 0) { if (PerlIO_error(PL_rsfp)) @@ -2930,6 +2928,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) return 0 ; /* end of file */ } SvCUR_set(buf_sv, old_len + len) ; + SvPVX(buf_sv)[old_len + len] = '\0'; } else { /* Want a line */ if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { @@ -2960,7 +2959,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) } STATIC char * -S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) +S_filter_gets(pTHX_ register SV *sv, STRLEN append) { dVAR; @@ -2980,7 +2979,7 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) return NULL ; } else - return (sv_gets(sv, fp, append)); + return (sv_gets(sv, PL_rsfp, append)); } STATIC HV * @@ -3629,8 +3628,17 @@ Perl_yylex(pTHX) default: if (isIDFIRST_lazy_if(s,UTF)) goto keylookup; - len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); - Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1); + { + unsigned char c = *s; + len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); + if (len > UNRECOGNIZED_PRECEDE_COUNT) { + d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; + } else { + d = PL_linestart; + } + *s = '\0'; + Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1); + } case 4: case 26: goto fake_eof; /* emulate EOF on ^D or ^Z */ @@ -3734,7 +3742,7 @@ Perl_yylex(pTHX) } do { bof = PL_rsfp ? TRUE : FALSE; - if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) { + if ((s = filter_gets(PL_linestr, 0)) == NULL) { fake_eof: #ifdef PERL_MAD PL_realtokenstart = -1; @@ -3920,7 +3928,6 @@ Perl_yylex(pTHX) *s = '#'; /* Don't try to parse shebang line */ } #endif /* ALTERNATE_SHEBANG */ -#ifndef MACOS_TRADITIONAL if (!d && *s == '#' && ipathend > ipath && @@ -3936,7 +3943,7 @@ Perl_yylex(pTHX) while (s < PL_bufend && isSPACE(*s)) s++; if (s < PL_bufend) { - Newxz(newargv,PL_origargc+3,char*); + Newx(newargv,PL_origargc+3,char*); newargv[1] = s; while (s < PL_bufend && !isSPACE(*s)) s++; @@ -3951,7 +3958,6 @@ Perl_yylex(pTHX) PERL_FPU_POST_EXEC Perl_croak(aTHX_ "Can't exec %s", ipath); } -#endif if (d) { while (*d && !isSPACE(*d)) d++; @@ -3966,7 +3972,14 @@ Perl_yylex(pTHX) const char *d1 = d; do { - if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') { + bool baduni = FALSE; + if (*d1 == 'C') { + const char *d2 = d1 + 1; + if (parse_unicode_opts((const char **)&d2) + != PL_unicode) + baduni = TRUE; + } + if (baduni || *d1 == 'M' || *d1 == 'm') { const char * const m = d1; while (*d1 && !isSPACE(*d1)) d1++; @@ -4014,9 +4027,6 @@ Perl_yylex(pTHX) "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); #endif case ' ': case '\t': case '\f': case 013: -#ifdef MACOS_TRADITIONAL - case '\312': -#endif #ifdef PERL_MAD PL_realtokenstart = -1; if (!PL_thiswhite) @@ -4259,7 +4269,10 @@ Perl_yylex(pTHX) BOop(OP_BIT_XOR); case '[': PL_lex_brackets++; - /* FALL THROUGH */ + { + const char tmp = *s++; + OPERATOR(tmp); + } case '~': if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) @@ -4287,6 +4300,9 @@ Perl_yylex(pTHX) if (!PL_in_my || PL_lex_state != LEX_NORMAL) break; PL_bufptr = s; /* update in case we back off */ + if (*s == '=') { + deprecate(":= for an empty attribute list"); + } goto grabattrs; case XATTRBLOCK: PL_expect = XBLOCK; @@ -4309,6 +4325,7 @@ Perl_yylex(pTHX) case KEY_or: case KEY_and: case KEY_for: + case KEY_foreach: case KEY_unless: case KEY_if: case KEY_while: @@ -4344,11 +4361,6 @@ Perl_yylex(pTHX) if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { sv_free(sv); if (PL_in_my == KEY_our) { -#ifdef USE_ITHREADS - GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval)); -#else - /* skip to avoid loading attributes.pm */ -#endif deprecate(":unique"); } else @@ -4363,7 +4375,7 @@ Perl_yylex(pTHX) } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { sv_free(sv); - CvLOCKED_on(PL_compcv); + deprecate(":locked"); } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { sv_free(sv); @@ -4776,10 +4788,6 @@ Perl_yylex(pTHX) pl_yylval.ival = 0; OPERATOR(ASSIGNOP); case '!': - if (PL_expect == XSTATE && s[1] == '!' && s[2] == '!') { - s += 3; - LOP(OP_DIE,XTERM); - } s++; { const char tmp = *s++; @@ -4849,9 +4857,7 @@ Perl_yylex(pTHX) if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } } @@ -5031,10 +5037,6 @@ Perl_yylex(pTHX) AOPERATOR(DORDOR); } case '?': /* may either be conditional or pattern */ - if (PL_expect == XSTATE && s[1] == '?' && s[2] == '?') { - s += 3; - LOP(OP_WARN,XTERM); - } if (PL_expect == XOPERATOR) { char tmp = *s++; if(tmp == '?') { @@ -5111,9 +5113,7 @@ Perl_yylex(pTHX) DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } else no_op("String",s); @@ -5128,9 +5128,7 @@ Perl_yylex(pTHX) DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - PL_expect = XTERM; - deprecate_old(commaless_variable_list); - return REPORT(','); /* grandfather non-comma-format format */ + return deprecate_commaless_var_list(); } else no_op("String",s); @@ -5160,9 +5158,9 @@ Perl_yylex(pTHX) case '\\': s++; - if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", - *s, *s); + if (PL_lex_inwhat && isDIGIT(*s)) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", + *s, *s); if (PL_expect == XOPERATOR) no_op("Backslash",s); OPERATOR(REFGEN); @@ -5224,6 +5222,7 @@ Perl_yylex(pTHX) case 'z': case 'Z': keylookup: { + bool anydelim; I32 tmp; orig_keyword = 0; @@ -5234,31 +5233,19 @@ Perl_yylex(pTHX) s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); /* Some keywords can be followed by any delimiter, including ':' */ - tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) || + anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) || (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') || (PL_tokenbuf[0] == 'q' && strchr("qwxr", PL_tokenbuf[1]))))); /* x::* is just a word, unless x is "CORE" */ - if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE")) + if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE")) goto just_a_word; d = s; while (d < PL_bufend && isSPACE(*d)) d++; /* no comments skipped here, or s### is misparsed */ - /* Is this a label? */ - if (!tmp && PL_expect == XSTATE - && d < PL_bufend && *d == ':' && *(d + 1) != ':') { - s = d + 1; - pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf); - CLINE; - TOKEN(LABEL); - } - - /* Check for keywords */ - tmp = keyword(PL_tokenbuf, len, 0); - /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { CLINE; @@ -5269,6 +5256,47 @@ Perl_yylex(pTHX) TERM(WORD); } + /* Check for plugged-in keyword */ + { + OP *o; + int result; + char *saved_bufptr = PL_bufptr; + PL_bufptr = s; + result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o); + s = PL_bufptr; + if (result == KEYWORD_PLUGIN_DECLINE) { + /* not a plugged-in keyword */ + PL_bufptr = saved_bufptr; + } else if (result == KEYWORD_PLUGIN_STMT) { + pl_yylval.opval = o; + CLINE; + PL_expect = XSTATE; + return REPORT(PLUGSTMT); + } else if (result == KEYWORD_PLUGIN_EXPR) { + pl_yylval.opval = o; + CLINE; + PL_expect = XOPERATOR; + return REPORT(PLUGEXPR); + } else { + Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", + PL_tokenbuf); + } + } + + /* Check for built-in keyword */ + tmp = keyword(PL_tokenbuf, len, 0); + + /* Is this a label? */ + if (!anydelim && PL_expect == XSTATE + && d < PL_bufend && *d == ':' && *(d + 1) != ':') { + if (tmp) + Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf); + s = d + 1; + pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf); + CLINE; + TOKEN(LABEL); + } + if (tmp < 0) { /* second-class keyword? */ GV *ogv = NULL; /* override (winner) */ GV *hgv = NULL; /* hidden (loser) */ @@ -5302,17 +5330,16 @@ Perl_yylex(pTHX) } else { /* no override */ tmp = -tmp; - if (tmp == KEY_dump && ckWARN(WARN_MISC)) { - Perl_warner(aTHX_ packWARN(WARN_MISC), - "dump() better written as CORE::dump()"); + if (tmp == KEY_dump) { + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "dump() better written as CORE::dump()"); } gv = NULL; gvp = 0; - if (hgv && tmp != KEY_x && tmp != KEY_CORE - && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */ - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous call resolved as CORE::%s(), %s", - GvENAME(hgv), "qualify as such or use &"); + if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */ + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous call resolved as CORE::%s(), %s", + GvENAME(hgv), "qualify as such or use &"); } } @@ -5334,6 +5361,7 @@ Perl_yylex(pTHX) SV *sv; int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); + OP *rv2cv_op; CV *cv; #ifdef PERL_MAD SV *nextPL_nextwhite = 0; @@ -5427,19 +5455,29 @@ Perl_yylex(pTHX) if (len) goto safe_bareword; - /* Do the explicit type check so that we don't need to force - the initialisation of the symbol table to have a real GV. - Beware - gv may not really be a PVGV, cv may not really be - a PVCV, (because of the space optimisations that gv_init - understands) But they're true if for this symbol there is - respectively a typeglob and a subroutine. - */ - cv = gv ? ((SvTYPE(gv) == SVt_PVGV) - /* Real typeglob, so get the real subroutine: */ - ? GvCVu(gv) - /* A proxy for a subroutine in this package? */ - : SvOK(gv) ? MUTABLE_CV(gv) : NULL) - : NULL; + cv = NULL; + { + OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv)); + const_op->op_private = OPpCONST_BARE; + rv2cv_op = newCVREF(0, const_op); + } + if (rv2cv_op->op_type == OP_RV2CV && + (rv2cv_op->op_flags & OPf_KIDS)) { + OP *rv_op = cUNOPx(rv2cv_op)->op_first; + switch (rv_op->op_type) { + case OP_CONST: { + SV *sv = cSVOPx_sv(rv_op); + if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV) + cv = (CV*)SvRV(sv); + } break; + case OP_GV: { + GV *gv = cGVOPx_gv(rv_op); + CV *maybe_cv = GvCVu(gv); + if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV) + cv = maybe_cv; + } break; + } + } /* See if it's the indirect object for a list operator. */ @@ -5462,8 +5500,10 @@ Perl_yylex(pTHX) /* Two barewords in a row may indicate method call. */ if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && - (tmp = intuit_method(s, gv, cv))) + (tmp = intuit_method(s, gv, cv))) { + op_free(rv2cv_op); return REPORT(tmp); + } /* If not a declared subroutine, it's an indirect object. */ /* (But it's an indir obj regardless for sort.) */ @@ -5471,7 +5511,7 @@ Perl_yylex(pTHX) if ( ( !immediate_paren && (PL_last_lop_op == OP_SORT || - ((!gv || !cv) && + (!cv && (PL_last_lop_op != OP_MAPSTART && PL_last_lop_op != OP_GREPSTART)))) || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0' @@ -5494,6 +5534,7 @@ Perl_yylex(pTHX) /* Is this a word before a => operator? */ if (*s == '=' && s[1] == '>' && !pkgname) { + op_free(rv2cv_op); CLINE; sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) @@ -5508,7 +5549,7 @@ Perl_yylex(pTHX) d = s + 1; while (SPACE_OR_TAB(*d)) d++; - if (*d == ')' && (sv = gv_const_sv(gv))) { + if (*d == ')' && (sv = cv_const_sv(cv))) { s = d + 1; goto its_constant; } @@ -5529,6 +5570,7 @@ Perl_yylex(pTHX) PL_thistoken = newSVpvs(""); } #endif + op_free(rv2cv_op); force_next(WORD); pl_yylval.ival = 0; TOKEN('&'); @@ -5536,7 +5578,8 @@ Perl_yylex(pTHX) /* If followed by var or block, call it a method (unless sub) */ - if ((*s == '$' || *s == '{') && (!gv || !cv)) { + if ((*s == '$' || *s == '{') && !cv) { + op_free(rv2cv_op); PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_METHOD; PREBLOCK(METHOD); @@ -5546,36 +5589,30 @@ Perl_yylex(pTHX) if (!orig_keyword && (isIDFIRST_lazy_if(s,UTF) || *s == '$') - && (tmp = intuit_method(s, gv, cv))) + && (tmp = intuit_method(s, gv, cv))) { + op_free(rv2cv_op); return REPORT(tmp); + } /* Not a method, so call it a subroutine (if defined) */ if (cv) { - if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS)) - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%s resolved as -&%s()", - PL_tokenbuf, PL_tokenbuf); + if (lastchar == '-') + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of -%s resolved as -&%s()", + PL_tokenbuf, PL_tokenbuf); /* Check for a constant sub */ - if ((sv = gv_const_sv(gv))) { + if ((sv = cv_const_sv(cv))) { its_constant: + op_free(rv2cv_op); SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv); ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv); pl_yylval.opval->op_private = 0; TOKEN(WORD); } - /* Resolve to GV now. */ - if (SvTYPE(gv) != SVt_PVGV) { - gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV); - assert (SvTYPE(gv) == SVt_PVGV); - /* cv must have been some sort of placeholder, so - now needs replacing with a real code reference. */ - cv = GvCV(gv); - } - op_free(pl_yylval.opval); - pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv)); + pl_yylval.opval = rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -5643,7 +5680,7 @@ Perl_yylex(pTHX) if (probable_sub) { gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV); op_free(pl_yylval.opval); - pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv)); + pl_yylval.opval = rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -5668,10 +5705,22 @@ Perl_yylex(pTHX) /* Call it a bare word */ - bareword: if (PL_hints & HINT_STRICT_SUBS) pl_yylval.opval->op_private |= OPpCONST_STRICT; else { + bareword: + /* after "print" and similar functions (corresponding to + * "F? L" in opcode.pl), whatever wasn't already parsed as + * a filehandle should be subject to "strict subs". + * Likewise for the optional indirect-object argument to system + * or exec, which can't be a bareword */ + if ((PL_last_lop_op == OP_PRINT + || PL_last_lop_op == OP_PRTF + || PL_last_lop_op == OP_SAY + || PL_last_lop_op == OP_SYSTEM + || PL_last_lop_op == OP_EXEC) + && (PL_hints & HINT_STRICT_SUBS)) + pl_yylval.opval->op_private |= OPpCONST_STRICT; if (lastchar != '-') { if (ckWARN(WARN_RESERVED)) { d = PL_tokenbuf; @@ -5683,16 +5732,16 @@ Perl_yylex(pTHX) } } } + op_free(rv2cv_op); safe_bareword: - if ((lastchar == '*' || lastchar == '%' || lastchar == '&') - && ckWARN_d(WARN_AMBIGUOUS)) { - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Operator or semicolon missing before %c%s", - lastchar, PL_tokenbuf); - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of %c resolved as operator %c", - lastchar, lastchar); + if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Operator or semicolon missing before %c%s", + lastchar, PL_tokenbuf); + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of %c resolved as operator %c", + lastchar, lastchar); } TOKEN(WORD); } @@ -5803,8 +5852,8 @@ Perl_yylex(pTHX) sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart); PL_realtokenstart = -1; } - while ((s = filter_gets(PL_endwhite, PL_rsfp, - SvCUR(PL_endwhite))) != NULL) ; + while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite))) + != NULL) ; } #endif PL_rsfp = NULL; @@ -6365,6 +6414,7 @@ Perl_yylex(pTHX) case KEY_package: s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_version(s, FALSE); OPERATOR(PACKAGE); case KEY_pipe: @@ -7044,7 +7094,7 @@ S_pending_ident(pTHX) yyerror(Perl_form(aTHX_ "No package name allowed for " "variable %s in \"our\"", PL_tokenbuf)); - tmp = allocmy(PL_tokenbuf); + tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0); } else { if (has_colon) @@ -7052,7 +7102,7 @@ S_pending_ident(pTHX) PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf)); pl_yylval.opval = newOP(OP_PADANY, 0); - pl_yylval.opval->op_targ = allocmy(PL_tokenbuf); + pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0); return PRIVATEREF; } } @@ -7071,7 +7121,7 @@ S_pending_ident(pTHX) if (!has_colon) { if (!PL_in_my) - tmp = pad_findmy(PL_tokenbuf); + tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0); if (tmp != NOT_IN_PAD) { /* might be an "our" variable" */ if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { @@ -7122,11 +7172,11 @@ S_pending_ident(pTHX) and @foo isn't a variable we can find in the symbol table. */ - if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { + if (ckWARN(WARN_AMBIGUOUS) && + pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0, SVt_PVAV); if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) - && ckWARN(WARN_AMBIGUOUS) /* DO NOT warn for @- and @+ */ && !( PL_tokenbuf[2] == '\0' && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' )) @@ -7134,8 +7184,8 @@ S_pending_ident(pTHX) { /* Downgraded from fatal to warning 20000522 mjd */ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of %s in string", - PL_tokenbuf); + "Possible unintended interpolation of %s in string", + PL_tokenbuf); } } @@ -8676,8 +8726,7 @@ Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords) name[4] == 'i' && name[5] == 'f') { /* elseif */ - if(ckWARN_d(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif"); + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif"); } goto unknown; @@ -10926,21 +10975,28 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL return s; } +static U32 +S_pmflag(U32 pmfl, const char ch) { + switch (ch) { + CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl); + case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break; + case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break; + case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break; + case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break; + } + return pmfl; +} + void Perl_pmflag(pTHX_ U32* pmfl, int ch) { PERL_ARGS_ASSERT_PMFLAG; - PERL_UNUSED_CONTEXT; + Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), + "Perl_pmflag() is deprecated, and will be removed from the XS API"); + if (ch<256) { - const char c = (char)ch; - switch (c) { - CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl); - case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; - case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; - case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; - case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break; - } + *pmfl = S_pmflag(*pmfl, (char)ch); } } @@ -10994,7 +11050,7 @@ S_scan_pat(pTHX_ char *start, I32 type) modstart = s; #endif while (*s && strchr(valid_flags, *s)) - pmflag(&pm->op_pmflags,*s++); + pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++); #ifdef PERL_MAD if (PL_madskills && modstart != s) { SV* tmptoken = newSVpvn(modstart, s - modstart); @@ -11002,11 +11058,10 @@ S_scan_pat(pTHX_ char *start, I32 type) } #endif /* issue a warning if /c is specified,but /g is not */ - if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL) - && ckWARN(WARN_REGEXP)) + if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) { - Perl_warner(aTHX_ packWARN(WARN_REGEXP), - "Use of /c modifier is meaningless without /g" ); + Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), + "Use of /c modifier is meaningless without /g" ); } PL_lex_op = (OP*)pm; @@ -11075,7 +11130,7 @@ S_scan_subst(pTHX_ char *start) es++; } else if (strchr(S_PAT_MODS, *s)) - pmflag(&pm->op_pmflags,*s++); + pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++); else break; } @@ -11088,8 +11143,8 @@ S_scan_subst(pTHX_ char *start) PL_thismad = 0; } #endif - if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) { - Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); + if ((pm->op_pmflags & PMf_CONTINUE)) { + Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" ); } if (es) { @@ -11257,7 +11312,7 @@ S_scan_heredoc(pTHX_ register char *s) else term = '"'; if (!isALNUM_lazy_if(s,UTF)) - deprecate_old("bare << to mean <<\"\""); + deprecate("bare << to mean <<\"\""); for (; isALNUM_lazy_if(s,UTF); s++) { if (d < e) *d++ = *s; @@ -11418,7 +11473,8 @@ S_scan_heredoc(pTHX_ register char *s) } #endif if (!outer || - !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { + !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart + = filter_gets(PL_linestr, 0))) { CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } @@ -11573,7 +11629,7 @@ S_scan_inputsymbol(pTHX_ char *start) /* try to find it in the pad for this block, otherwise find add symbol table ops */ - const PADOFFSET tmp = pad_findmy(d); + const PADOFFSET tmp = pad_findmy(d, len, 0); if (tmp != NOT_IN_PAD) { if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { HV * const stash = PAD_COMPNAME_OURSTASH(tmp); @@ -11930,7 +11986,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } #endif if (!PL_rsfp || - !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { + !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart + = filter_gets(PL_linestr, 0))) { sv_free(sv); CopLINE_set(PL_curcop, (line_t)PL_multi_start); return NULL; @@ -12101,8 +12158,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; } @@ -12125,9 +12181,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* _ are ignored -- but warned about if consecutive */ case '_': - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; break; @@ -12169,10 +12225,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) && !(PL_hints & HINT_NEW_BINARY)) { overflowed = TRUE; n = (NV) u; - if (ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), - "Integer overflow in %s number", - base); + Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in %s number", + base); } else u = x | b; /* add the digit to the end */ } @@ -12199,24 +12254,23 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* final misplaced underbar check */ if (s[-1] == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } sv = newSV(0); if (overflowed) { - if (n > 4294967295.0 && ckWARN(WARN_PORTABLE)) - Perl_warner(aTHX_ packWARN(WARN_PORTABLE), - "%s number > %s non-portable", - Base, max); + if (n > 4294967295.0) + Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), + "%s number > %s non-portable", + Base, max); sv_setnv(sv, n); } else { #if UVSIZE > 4 - if (u > 0xffffffff && ckWARN(WARN_PORTABLE)) - Perl_warner(aTHX_ packWARN(WARN_PORTABLE), - "%s number > %s non-portable", - Base, max); + if (u > 0xffffffff) + Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), + "%s number > %s non-portable", + Base, max); #endif sv_setuv(sv, u); } @@ -12245,9 +12299,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if -w is on */ if (*s == '_') { - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } else { @@ -12261,8 +12315,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* final misplaced underbar check */ if (lastub && s == lastub + 1) { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } /* read a decimal portion if there is one. avoid @@ -12274,9 +12327,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) *d++ = *s++; if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s; } @@ -12287,9 +12339,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (d >= e) Perl_croak(aTHX_ number_too_long); if (*s == '_') { - if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + if (lastub && s == lastub + 1) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s; } else @@ -12297,9 +12349,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } /* fractional part ending in underbar? */ if (s[-1] == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); } if (*s == '.' && isDIGIT(s[1])) { /* oops, it's really a v-string, but without the "v" */ @@ -12318,9 +12369,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* stray preinitial _ */ if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } @@ -12330,9 +12380,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* stray initial _ */ if (*s == '_') { - if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } @@ -12345,10 +12394,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } else { if (((lastub && s == lastub + 1) || - (!isDIGIT(s[1]) && s[1] != '_')) - && ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Misplaced _ in number"); + (!isDIGIT(s[1]) && s[1] != '_'))) + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), + "Misplaced _ in number"); lastub = s++; } } @@ -12490,7 +12538,7 @@ S_scan_formline(pTHX_ register char *s) PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart); } #endif - s = filter_gets(PL_linestr, PL_rsfp, 0); + s = filter_gets(PL_linestr, 0); #ifdef PERL_MAD tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr); #else @@ -12672,8 +12720,7 @@ Perl_yyerror(pTHX_ const char *const s) PL_multi_end = 0; } if (PL_in_eval & EVAL_WARNONLY) { - if (ckWARN_d(WARN_SYNTAX)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg)); } else qerror(msg); @@ -12710,30 +12757,8 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n"); s += 2; - utf16le: if (PL_bufend > (char*)s) { - U8 *news; - I32 newlen; - - filter_add(utf16rev_textfilter, NULL); - Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - utf16_to_utf8_reversed(s, news, - PL_bufend - (char*)s - 1, - &newlen); - sv_setpvn(PL_linestr, (const char*)news, newlen); -#ifdef PERL_MAD - s = (U8*)SvPVX(PL_linestr); - Copy(news, s, newlen, U8); - s[newlen] = '\0'; -#endif - Safefree(news); - SvUTF8_on(PL_linestr); - s = (U8*)SvPVX(PL_linestr); -#ifdef PERL_MAD - /* FIXME - is this a general bug fix? */ - s[newlen] = '\0'; -#endif - PL_bufend = SvPVX(PL_linestr) + newlen; + s = add_utf16_textfilter(s, TRUE); } #else Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE"); @@ -12745,21 +12770,8 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); s += 2; - utf16be: if (PL_bufend > (char *)s) { - U8 *news; - I32 newlen; - - filter_add(utf16_textfilter, NULL); - Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - utf16_to_utf8(s, news, - PL_bufend - (char*)s, - &newlen); - sv_setpvn(PL_linestr, (const char*)news, newlen); - Safefree(news); - SvUTF8_on(PL_linestr); - s = (U8*)SvPVX(PL_linestr); - PL_bufend = SvPVX(PL_linestr) + newlen; + s = add_utf16_textfilter(s, FALSE); } #else Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE"); @@ -12785,7 +12797,7 @@ S_swallow_bom(pTHX_ U8 *s) * 00 xx 00 xx * are a good indicator of UTF-16BE. */ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); - goto utf16be; + s = add_utf16_textfilter(s, FALSE); } } #ifdef EBCDIC @@ -12803,7 +12815,7 @@ S_swallow_bom(pTHX_ U8 *s) * xx 00 xx 00 * are a good indicator of UTF-16LE. */ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); - goto utf16le; + s = add_utf16_textfilter(s, TRUE); } } return (char*)s; @@ -12812,49 +12824,146 @@ S_swallow_bom(pTHX_ U8 *s) #ifndef PERL_NO_UTF16_FILTER static I32 -utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) +S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) { dVAR; - const STRLEN old = SvCUR(sv); - const I32 count = FILTER_READ(idx+1, sv, maxlen); + SV *const filter = FILTER_DATA(idx); + /* We re-use this each time round, throwing the contents away before we + return. */ + SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter)); + SV *const utf8_buffer = filter; + IV status = IoPAGE(filter); + const bool reverse = (bool) IoLINES(filter); + I32 retval; + + /* As we're automatically added, at the lowest level, and hence only called + from this file, we can be sure that we're not called in block mode. Hence + don't bother writing code to deal with block mode. */ + if (maxlen) { + Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); + } + if (status < 0) { + Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status); + } DEBUG_P(PerlIO_printf(Perl_debug_log, - "utf16_textfilter(%p): %d %d (%d)\n", - FPTR2DPTR(void *, utf16_textfilter), - idx, maxlen, (int) count)); - if (count) { - U8* tmps; + "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", + FPTR2DPTR(void *, S_utf16_textfilter), + reverse ? 'l' : 'b', idx, maxlen, status, + (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); + + while (1) { + STRLEN chars; + STRLEN have; I32 newlen; - Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8); - Copy(SvPVX_const(sv), tmps, old, char); - utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old, - SvCUR(sv) - old, &newlen); - sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old); - } - DEBUG_P({sv_dump(sv);}); - return SvCUR(sv); + U8 *end; + /* First, look in our buffer of existing UTF-8 data: */ + char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); + + if (nl) { + ++nl; + } else if (status == 0) { + /* EOF */ + IoPAGE(filter) = 0; + nl = SvEND(utf8_buffer); + } + if (nl) { + STRLEN got = nl - SvPVX(utf8_buffer); + /* Did we have anything to append? */ + retval = got != 0; + sv_catpvn(sv, SvPVX(utf8_buffer), got); + /* Everything else in this code works just fine if SVp_POK isn't + set. This, however, needs it, and we need it to work, else + we loop infinitely because the buffer is never consumed. */ + sv_chop(utf8_buffer, nl); + break; + } + + /* OK, not a complete line there, so need to read some more UTF-16. + Read an extra octect if the buffer currently has an odd number. */ + while (1) { + if (status <= 0) + break; + if (SvCUR(utf16_buffer) >= 2) { + /* Location of the high octet of the last complete code point. + Gosh, UTF-16 is a pain. All the benefits of variable length, + *coupled* with all the benefits of partial reads and + endianness. */ + const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) + + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); + + if (*last_hi < 0xd8 || *last_hi > 0xdb) { + break; + } + + /* We have the first half of a surrogate. Read more. */ + DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); + } + + status = FILTER_READ(idx + 1, utf16_buffer, + 160 + (SvCUR(utf16_buffer) & 1)); + DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer))); + DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); + if (status < 0) { + /* Error */ + IoPAGE(filter) = status; + return status; + } + } + + chars = SvCUR(utf16_buffer) >> 1; + have = SvCUR(utf8_buffer); + SvGROW(utf8_buffer, have + chars * 3 + 1); + + if (reverse) { + end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), + (U8*)SvPVX_const(utf8_buffer) + have, + chars * 2, &newlen); + } else { + end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), + (U8*)SvPVX_const(utf8_buffer) + have, + chars * 2, &newlen); + } + SvCUR_set(utf8_buffer, have + newlen); + *end = '\0'; + + /* No need to keep this SV "well-formed" with a '\0' after the end, as + it's private to us, and utf16_to_utf8{,reversed} take a + (pointer,length) pair, rather than a NUL-terminated string. */ + if(SvCUR(utf16_buffer) & 1) { + *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; + SvCUR_set(utf16_buffer, 1); + } else { + SvCUR_set(utf16_buffer, 0); + } + } + DEBUG_P(PerlIO_printf(Perl_debug_log, + "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n", + status, + (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); + DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); + return retval; } -static I32 -utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen) +static U8 * +S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) { - dVAR; - const STRLEN old = SvCUR(sv); - const I32 count = FILTER_READ(idx+1, sv, maxlen); - DEBUG_P(PerlIO_printf(Perl_debug_log, - "utf16rev_textfilter(%p): %d %d (%d)\n", - FPTR2DPTR(void *, utf16rev_textfilter), - idx, maxlen, (int) count)); - if (count) { - U8* tmps; - I32 newlen; - Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8); - Copy(SvPVX_const(sv), tmps, old, char); - utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old, - SvCUR(sv) - old, &newlen); - sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old); + SV *filter = filter_add(S_utf16_textfilter, NULL); + + IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s)); + sv_setpvs(filter, ""); + IoLINES(filter) = reversed; + IoPAGE(filter) = 1; /* Not EOF */ + + /* Sadly, we have to return a valid pointer, come what may, so we have to + ignore any error return from this. */ + SvCUR_set(PL_linestr, 0); + if (FILTER_READ(0, PL_linestr, 0)) { + SvUTF8_on(PL_linestr); + } else { + SvUTF8_on(PL_linestr); } - DEBUG_P({ sv_dump(sv); }); - return count; + PL_bufend = SvEND(PL_linestr); + return (U8*)SvPVX(PL_linestr); } #endif @@ -12916,9 +13025,9 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) const UV orev = rev; rev += (*end - '0') * mult; mult *= 10; - if (orev > rev && ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), - "Integer overflow in decimal number"); + if (orev > rev) + Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in decimal number"); } } #ifdef EBCDIC @@ -12946,6 +13055,18 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) return (char *)s; } +int +Perl_keyword_plugin_standard(pTHX_ + char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) +{ + PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; + PERL_UNUSED_CONTEXT; + PERL_UNUSED_ARG(keyword_ptr); + PERL_UNUSED_ARG(keyword_len); + PERL_UNUSED_ARG(op_ptr); + return KEYWORD_PLUGIN_DECLINE; +} + /* * Local variables: * c-indentation-style: bsd