X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=de163fb5de960e68be78b2514be1c81bb823cd1e;hb=fc49d04a36bc948b50da6259c0a6c10f8acb6dd0;hp=258c927dbbcb07afe3e12854b9c88853c6330dae;hpb=77a135fea310715f77af2560dd03830df874b5fa;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index 258c927..de163fb 100644 --- a/toke.c +++ b/toke.c @@ -124,16 +124,14 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) #endif +/* The maximum number of characters preceding the unrecognized one to display */ +#define UNRECOGNIZED_PRECEDE_COUNT 10 + /* In variables named $^X, these are the legal values for X. * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) -/* On MacOS, respect nonbreaking spaces */ -#ifdef MACOS_TRADITIONAL -#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t') -#else #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') -#endif /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement @@ -1386,7 +1384,9 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) { dVAR; SV * const sv = newSVpvn_utf8(start, len, - UTF && !IN_BYTES + !IN_BYTES + && UTF + && !is_ascii_string((const U8*)start, len) && is_utf8_string((const U8*)start, len)); return sv; } @@ -2306,9 +2306,10 @@ S_scan_const(pTHX_ char *start) SvCUR_set(sv, d - SvPVX_const(sv)); SvPOK_on(sv); *d = '\0'; - sv_utf8_upgrade(sv); /* See Note on sizing above. */ - SvGROW(sv, SvCUR(sv) + UNISKIP(uv) + (STRLEN)(send - s) + 1); + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + UNISKIP(uv) + (STRLEN)(send - s) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } @@ -2396,9 +2397,10 @@ S_scan_const(pTHX_ char *start) SvCUR_set(sv, d - SvPVX_const(sv)); SvPOK_on(sv); *d = '\0'; - sv_utf8_upgrade(sv); /* See Note on sizing above. */ - SvGROW(sv, SvCUR(sv) + len + (STRLEN)(send - s) + 1); + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + len + (STRLEN)(send - s) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ @@ -2490,10 +2492,10 @@ S_scan_const(pTHX_ char *start) SvCUR_set(sv, d - SvPVX_const(sv)); SvPOK_on(sv); *d = '\0'; - sv_utf8_upgrade(sv); - /* See Note on sizing above. */ - SvGROW(sv, SvCUR(sv) + need + (STRLEN)(send - s) + 1); + sv_utf8_upgrade_flags_grow(sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + need + (STRLEN)(send - s) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } else if (need > len) { @@ -2824,7 +2826,7 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) bare_package: start_force(PL_curforce); NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, - newSVpvn(tmpbuf,len)); + S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; if (PL_madskills) curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start)); @@ -3655,8 +3657,17 @@ Perl_yylex(pTHX) default: if (isIDFIRST_lazy_if(s,UTF)) goto keylookup; - len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); - Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1); + { + unsigned char c = *s; + len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); + if (len > UNRECOGNIZED_PRECEDE_COUNT) { + d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; + } else { + d = PL_linestart; + } + *s = '\0'; + Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1); + } case 4: case 26: goto fake_eof; /* emulate EOF on ^D or ^Z */ @@ -3946,7 +3957,6 @@ Perl_yylex(pTHX) *s = '#'; /* Don't try to parse shebang line */ } #endif /* ALTERNATE_SHEBANG */ -#ifndef MACOS_TRADITIONAL if (!d && *s == '#' && ipathend > ipath && @@ -3962,7 +3972,7 @@ Perl_yylex(pTHX) while (s < PL_bufend && isSPACE(*s)) s++; if (s < PL_bufend) { - Newxz(newargv,PL_origargc+3,char*); + Newx(newargv,PL_origargc+3,char*); newargv[1] = s; while (s < PL_bufend && !isSPACE(*s)) s++; @@ -3977,7 +3987,6 @@ Perl_yylex(pTHX) PERL_FPU_POST_EXEC Perl_croak(aTHX_ "Can't exec %s", ipath); } -#endif if (d) { while (*d && !isSPACE(*d)) d++; @@ -3992,7 +4001,14 @@ Perl_yylex(pTHX) const char *d1 = d; do { - if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') { + bool baduni = FALSE; + if (*d1 == 'C') { + const char *d2 = d1 + 1; + if (parse_unicode_opts((const char **)&d2) + != PL_unicode) + baduni = TRUE; + } + if (baduni || *d1 == 'M' || *d1 == 'm') { const char * const m = d1; while (*d1 && !isSPACE(*d1)) d1++; @@ -4040,9 +4056,6 @@ Perl_yylex(pTHX) "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); #endif case ' ': case '\t': case '\f': case 013: -#ifdef MACOS_TRADITIONAL - case '\312': -#endif #ifdef PERL_MAD PL_realtokenstart = -1; if (!PL_thiswhite) @@ -4285,7 +4298,10 @@ Perl_yylex(pTHX) BOop(OP_BIT_XOR); case '[': PL_lex_brackets++; - /* FALL THROUGH */ + { + const char tmp = *s++; + OPERATOR(tmp); + } case '~': if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) @@ -4335,6 +4351,7 @@ Perl_yylex(pTHX) case KEY_or: case KEY_and: case KEY_for: + case KEY_foreach: case KEY_unless: case KEY_if: case KEY_while: @@ -4370,11 +4387,6 @@ Perl_yylex(pTHX) if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { sv_free(sv); if (PL_in_my == KEY_our) { -#ifdef USE_ITHREADS - GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval)); -#else - /* skip to avoid loading attributes.pm */ -#endif deprecate(":unique"); } else @@ -4389,7 +4401,7 @@ Perl_yylex(pTHX) } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { sv_free(sv); - CvLOCKED_on(PL_compcv); + deprecate(":locked"); } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { sv_free(sv); @@ -4802,10 +4814,6 @@ Perl_yylex(pTHX) pl_yylval.ival = 0; OPERATOR(ASSIGNOP); case '!': - if (PL_expect == XSTATE && s[1] == '!' && s[2] == '!') { - s += 3; - LOP(OP_DIE,XTERM); - } s++; { const char tmp = *s++; @@ -5057,10 +5065,6 @@ Perl_yylex(pTHX) AOPERATOR(DORDOR); } case '?': /* may either be conditional or pattern */ - if (PL_expect == XSTATE && s[1] == '?' && s[2] == '?') { - s += 3; - LOP(OP_WARN,XTERM); - } if (PL_expect == XOPERATOR) { char tmp = *s++; if(tmp == '?') { @@ -5276,14 +5280,17 @@ Perl_yylex(pTHX) /* Is this a label? */ if (!tmp && PL_expect == XSTATE && d < PL_bufend && *d == ':' && *(d + 1) != ':') { + tmp = keyword(PL_tokenbuf, len, 0); + if (tmp) + Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf); s = d + 1; pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf); CLINE; TOKEN(LABEL); } - - /* Check for keywords */ - tmp = keyword(PL_tokenbuf, len, 0); + else + /* Check for keywords */ + tmp = keyword(PL_tokenbuf, len, 0); /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { @@ -5694,10 +5701,22 @@ Perl_yylex(pTHX) /* Call it a bare word */ - bareword: if (PL_hints & HINT_STRICT_SUBS) pl_yylval.opval->op_private |= OPpCONST_STRICT; else { + bareword: + /* after "print" and similar functions (corresponding to + * "F? L" in opcode.pl), whatever wasn't already parsed as + * a filehandle should be subject to "strict subs". + * Likewise for the optional indirect-object argument to system + * or exec, which can't be a bareword */ + if ((PL_last_lop_op == OP_PRINT + || PL_last_lop_op == OP_PRTF + || PL_last_lop_op == OP_SAY + || PL_last_lop_op == OP_SYSTEM + || PL_last_lop_op == OP_EXEC) + && (PL_hints & HINT_STRICT_SUBS)) + pl_yylval.opval->op_private |= OPpCONST_STRICT; if (lastchar != '-') { if (ckWARN(WARN_RESERVED)) { d = PL_tokenbuf;