X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=32073a58420620d5502b857993d77b55bb0ad2c8;hb=a7ffa9b9a1a8caeff31a83d25b70b5aca6ba0d12;hp=98210a010f518b7d00ca2f002a55e548367b45cb;hpb=6e3aabd614ca77aec0b07c2811e8d104cc5690f3;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index 98210a0..32073a5 100644 --- a/toke.c +++ b/toke.c @@ -80,15 +80,19 @@ static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen); #endif #ifdef USE_PURE_BISON -YYSTYPE* yylval_pointer = NULL; -int* yychar_pointer = NULL; +#ifndef YYMAXLEVEL +#define YYMAXLEVEL 100 +#endif +YYSTYPE* yylval_pointer[YYMAXLEVEL]; +int* yychar_pointer[YYMAXLEVEL]; +int yyactlevel = 0; # undef yylval # undef yychar -# define yylval (*yylval_pointer) -# define yychar (*yychar_pointer) -# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer -# undef yylex -# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer) +# define yylval (*yylval_pointer[yyactlevel]) +# define yychar (*yychar_pointer[yyactlevel]) +# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel] +# undef yylex +# define yylex() Perl_yylex(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]) #endif #include "keywords.h" @@ -209,10 +213,8 @@ S_no_op(pTHX_ char *what, char *s) if (!s) s = oldbp; - else { - assert(s >= oldbp); + else PL_bufptr = s; - } yywarn(Perl_form(aTHX_ "%s found where operator expected", what)); if (is_first) Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n"); @@ -223,8 +225,10 @@ S_no_op(pTHX_ char *what, char *s) Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n", t - PL_oldoldbufptr, PL_oldoldbufptr); } - else + else { + assert(s >= oldbp); Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp); + } PL_bufptr = oldbp; } @@ -357,7 +361,6 @@ Perl_lex_start(pTHX_ SV *line) SAVEVPTR(PL_nextval[toke]); } SAVEI32(PL_nexttoke); - PL_nexttoke = 0; } SAVECOPLINE(PL_curcop); SAVEPPTR(PL_bufptr); @@ -391,6 +394,7 @@ Perl_lex_start(pTHX_ SV *line) PL_lex_stuff = Nullsv; PL_lex_repl = Nullsv; PL_lex_inpat = 0; + PL_nexttoke = 0; PL_lex_inwhat = 0; PL_sublex_info.sub_inwhat = 0; PL_linestr = line; @@ -809,10 +813,10 @@ Perl_str_to_version(pTHX_ SV *sv) bool utf = SvUTF8(sv) ? TRUE : FALSE; char *end = start + len; while (start < end) { - I32 skip; + STRLEN skip; UV n; if (utf) - n = utf8_to_uv((U8*)start, &skip); + n = utf8_to_uv_chk((U8*)start, len, &skip, 0); else { n = *(U8*)start; skip = 1; @@ -844,7 +848,7 @@ S_force_version(pTHX_ char *s) for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++); if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) { SV *ver; - s = scan_num(s); + s = scan_num(s, &yylval); version = yylval.opval; ver = cSVOPx(version)->op_sv; if (SvPOK(ver) && !SvNIOK(ver)) { @@ -1184,7 +1188,6 @@ S_scan_const(pTHX_ char *start) bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ bool has_utf = FALSE; /* embedded \x{} */ - I32 len; /* ? */ UV uv; I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) @@ -1219,7 +1222,7 @@ S_scan_const(pTHX_ char *start) if (min > max) { Perl_croak(aTHX_ "Invalid [] range \"%c-%c\" in transliteration operator", - min, max); + (char)min, (char)max); } #ifndef ASCIIish @@ -1305,9 +1308,11 @@ S_scan_const(pTHX_ char *start) *d++ = *s++; } - /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */ + /* check for embedded arrays + (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-) + */ else if (*s == '@' && s[1] - && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1]))) + && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1]))) break; /* check for embedded scalars. only stop if we're sure it's a @@ -1323,20 +1328,23 @@ S_scan_const(pTHX_ char *start) /* (now in tr/// code again) */ if (*s & 0x80 && thisutf) { - (void)utf8_to_uv((U8*)s, &len); - if (len == 1) { - /* illegal UTF8, make it valid */ - char *old_pvx = SvPVX(sv); - /* need space for one extra char (NOTE: SvCUR() not set here) */ - d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx); - d = (char*)uv_to_utf8((U8*)d, (U8)*s++); - } - else { - while (len--) - *d++ = *s++; - } - has_utf = TRUE; - continue; + STRLEN len; + UV uv; + + uv = utf8_to_uv_chk((U8*)s, send - s, &len, 1); + if (len == 1) { + /* illegal UTF8, make it valid */ + char *old_pvx = SvPVX(sv); + /* need space for one extra char (NOTE: SvCUR() not set here) */ + d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx); + d = (char*)uv_to_utf8((U8*)d, (U8)*s++); + } + else { + while (len--) + *d++ = *s++; + } + has_utf = TRUE; + continue; } /* backslashes */ @@ -1392,9 +1400,11 @@ S_scan_const(pTHX_ char *start) /* \132 indicates an octal constant */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - len = 0; /* disallow underscores */ - uv = (UV)scan_oct(s, 3, &len); - s += len; + { + STRLEN len = 0; /* disallow underscores */ + uv = (UV)scan_oct(s, 3, &len); + s += len; + } goto NUM_ESCAPE_INSERT; /* \x24 indicates a hex constant */ @@ -1406,14 +1416,18 @@ S_scan_const(pTHX_ char *start) yyerror("Missing right brace on \\x{}"); e = s; } - len = 1; /* allow underscores */ - uv = (UV)scan_hex(s + 1, e - s - 1, &len); - s = e + 1; + { + STRLEN len = 1; /* allow underscores */ + uv = (UV)scan_hex(s + 1, e - s - 1, &len); + } + s = e + 1; } else { - len = 0; /* disallow underscores */ - uv = (UV)scan_hex(s, 2, &len); - s += len; + { + STRLEN len = 0; /* disallow underscores */ + uv = (UV)scan_hex(s, 2, &len); + s += len; + } } NUM_ESCAPE_INSERT: @@ -1489,7 +1503,10 @@ S_scan_const(pTHX_ char *start) char *ostart = SvPVX(sv); SvCUR_set(sv, d - ostart); SvPOK_on(sv); + *d = '\0'; sv_utf8_upgrade(sv); + /* this just broke our allocation above... */ + SvGROW(sv, send - start); d = SvPVX(sv) + SvCUR(sv); has_utf = TRUE; } @@ -1519,8 +1536,10 @@ S_scan_const(pTHX_ char *start) *d = toCTRL(*d); d++; #else - len = *s++; - *d++ = toCTRL(len); + { + U8 c = *s++; + *d++ = toCTRL(c); + } #endif continue; @@ -2063,6 +2082,29 @@ Perl_yylex(pTHX) #endif { dTHR; + int r; + +#ifdef USE_PURE_BISON + yylval_pointer[yyactlevel] = lvalp; + yychar_pointer[yyactlevel] = lcharp; + yyactlevel++; + if (yyactlevel >= YYMAXLEVEL) + Perl_croak(aTHX_ "panic: YYMAXLEVEL"); +#endif + + r = S_syylex(aTHX); + +#ifdef USE_PURE_BISON + yyactlevel--; +#endif + + return r; +} + +STATIC int +S_syylex(pTHX) /* need to be separate from yylex for reentrancy */ +{ + dTHR; register char *s; register char *d; register I32 tmp; @@ -2070,11 +2112,6 @@ Perl_yylex(pTHX) GV *gv = Nullgv; GV **gvp = 0; -#ifdef USE_PURE_BISON - yylval_pointer = lvalp; - yychar_pointer = lcharp; -#endif - /* check if there's an identifier for us to look at */ if (PL_pending_ident) { /* pit holds the identifier we read and pending_ident is reset */ @@ -2482,7 +2519,8 @@ Perl_yylex(pTHX) do { bool bof; bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */ - if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) { + s = filter_gets(PL_linestr, PL_rsfp, 0); + if (s == Nullch) { fake_eof: if (PL_rsfp) { if (PL_preprocess && !PL_in_eval) @@ -2505,6 +2543,9 @@ Perl_yylex(pTHX) PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); sv_setpv(PL_linestr,""); TOKEN(';'); /* not infinite loop because rsfp is NULL now */ + } else if (bof) { + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + s = swallow_bom((U8*)s); } if (PL_doextract) { if (*s == '#' && s[1] == '!' && instr(s,"perl")) @@ -2518,8 +2559,6 @@ Perl_yylex(pTHX) PL_doextract = FALSE; } } - if (bof) - s = swallow_bom((U8*)s); incline(s); } while (PL_doextract); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; @@ -3515,7 +3554,7 @@ Perl_yylex(pTHX) /* FALL THROUGH */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - s = scan_num(s); + s = scan_num(s, &yylval); if (PL_expect == XOPERATOR) no_op("Number",s); TERM(THING); @@ -3585,7 +3624,7 @@ Perl_yylex(pTHX) while (isDIGIT(*start) || *start == '_') start++; if (*start == '.' && isDIGIT(start[1])) { - s = scan_num(s); + s = scan_num(s, &yylval); TERM(THING); } /* avoid v123abc() or $h{v1}, allow C */ @@ -3596,7 +3635,7 @@ Perl_yylex(pTHX) gv = gv_fetchpv(s, FALSE, SVt_PVCV); *start = c; if (!gv) { - s = scan_num(s); + s = scan_num(s, &yylval); TERM(THING); } } @@ -3978,11 +4017,11 @@ Perl_yylex(pTHX) /* Mark this internal pseudo-handle as clean */ IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; if (PL_preprocess) - IoTYPE(GvIOp(gv)) = '|'; + IoTYPE(GvIOp(gv)) = IoTYPE_PIPE; else if ((PerlIO*)PL_rsfp == PerlIO_stdin()) - IoTYPE(GvIOp(gv)) = '-'; + IoTYPE(GvIOp(gv)) = IoTYPE_STD; else - IoTYPE(GvIOp(gv)) = '<'; + IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY; #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS) /* if the script was opened in binmode, we need to revert * it to text mode for compatibility; but only iff it has CRs @@ -3991,7 +4030,7 @@ Perl_yylex(pTHX) && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r') { Off_t loc = 0; - if (IoTYPE(GvIOp(gv)) == '<') { + if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) { loc = PerlIO_tell(PL_rsfp); (void)PerlIO_seek(PL_rsfp, 0L, 0); } @@ -5090,12 +5129,12 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"cos")) return -KEY_cos; break; case 4: - if (strEQ(d,"chop")) return KEY_chop; + if (strEQ(d,"chop")) return -KEY_chop; break; case 5: if (strEQ(d,"close")) return -KEY_close; if (strEQ(d,"chdir")) return -KEY_chdir; - if (strEQ(d,"chomp")) return KEY_chomp; + if (strEQ(d,"chomp")) return -KEY_chomp; if (strEQ(d,"chmod")) return -KEY_chmod; if (strEQ(d,"chown")) return -KEY_chown; if (strEQ(d,"crypt")) return -KEY_crypt; @@ -5140,7 +5179,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) } break; case 'E': - if (strEQ(d,"EQ")) { deprecate(d); return -KEY_eq;} if (strEQ(d,"END")) return KEY_END; break; case 'e': @@ -5206,12 +5244,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) break; } break; - case 'G': - if (len == 2) { - if (strEQ(d,"GT")) { deprecate(d); return -KEY_gt;} - if (strEQ(d,"GE")) { deprecate(d); return -KEY_ge;} - } - break; case 'g': if (strnEQ(d,"get",3)) { d += 3; @@ -5311,12 +5343,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"kill")) return -KEY_kill; } break; - case 'L': - if (len == 2) { - if (strEQ(d,"LT")) { deprecate(d); return -KEY_lt;} - if (strEQ(d,"LE")) { deprecate(d); return -KEY_le;} - } - break; case 'l': switch (len) { case 2: @@ -5368,9 +5394,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) break; } break; - case 'N': - if (strEQ(d,"NE")) { deprecate(d); return -KEY_ne;} - break; case 'n': if (strEQ(d,"next")) return KEY_next; if (strEQ(d,"ne")) return -KEY_ne; @@ -5756,13 +5779,12 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, SAVETMPS; PUSHMARK(SP) ; - EXTEND(sp, 4); + EXTEND(sp, 3); if (pv) PUSHs(pv); PUSHs(sv); if (pv) PUSHs(typesv); - PUSHs(cv); PUTBACK; call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); @@ -6722,7 +6744,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) */ char * -Perl_scan_num(pTHX_ char *start) +Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) { register char *s = start; /* current position in buffer */ register char *d; /* destination in temp buffer */ @@ -7135,9 +7157,9 @@ vstring: /* make the op for the constant and return */ if (sv) - yylval.opval = newSVOP(OP_CONST, 0, sv); + lvalp->opval = newSVOP(OP_CONST, 0, sv); else - yylval.opval = Nullop; + lvalp->opval = Nullop; return s; } @@ -7369,7 +7391,7 @@ Perl_yyerror(pTHX_ char *s) qerror(msg); if (PL_error_count >= 10) { if (PL_in_eval && SvCUR(ERRSV)) - Perl_croak(aTHX_ "%_%s has too many errors.\n", + Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", ERRSV, CopFILE(PL_curcop)); else Perl_croak(aTHX_ "%s has too many errors.\n", @@ -7389,18 +7411,26 @@ S_swallow_bom(pTHX_ U8 *s) case 0xFF: if (s[1] == 0xFE) { /* UTF-16 little-endian */ -#ifndef PERL_NO_UTF16_FILTER - U8 *news; -#endif if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ Perl_croak(aTHX_ "Unsupported script encoding"); #ifndef PERL_NO_UTF16_FILTER + DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n")); s += 2; - filter_add(utf16rev_textfilter, NULL); - New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - PL_bufend = (char*)utf16_to_utf8((U16*)s, news, - PL_bufend - (char*)s); - s = news; + if (PL_bufend > (char*)s) { + U8 *news; + I32 newlen; + + filter_add(utf16rev_textfilter, NULL); + New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); + PL_bufend = (char*)utf16_to_utf8_reversed(s, news, + PL_bufend - (char*)s - 1, + &newlen); + Copy(news, s, newlen, U8); + SvCUR_set(PL_linestr, newlen); + PL_bufend = SvPVX(PL_linestr) + newlen; + news[newlen++] = '\0'; + Safefree(news); + } #else Perl_croak(aTHX_ "Unsupported script encoding"); #endif @@ -7409,12 +7439,23 @@ S_swallow_bom(pTHX_ U8 *s) case 0xFE: if (s[1] == 0xFF) { /* UTF-16 big-endian */ #ifndef PERL_NO_UTF16_FILTER - U8 *news; - filter_add(utf16_textfilter, NULL); - New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - PL_bufend = (char*)utf16_to_utf8((U16*)s, news, - PL_bufend - (char*)s); - s = news; + DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n")); + s += 2; + if (PL_bufend > (char *)s) { + U8 *news; + I32 newlen; + + filter_add(utf16_textfilter, NULL); + New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); + PL_bufend = (char*)utf16_to_utf8(s, news, + PL_bufend - (char*)s, + &newlen); + Copy(news, s, newlen, U8); + SvCUR_set(PL_linestr, newlen); + PL_bufend = SvPVX(PL_linestr) + newlen; + news[newlen++] = '\0'; + Safefree(news); + } #else Perl_croak(aTHX_ "Unsupported script encoding"); #endif @@ -7422,6 +7463,7 @@ S_swallow_bom(pTHX_ U8 *s) break; case 0xEF: if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { + DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n")); s += 3; /* UTF-8 */ } break; @@ -7464,8 +7506,13 @@ utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen) if (count) { U8* tmps; U8* tend; + I32 newlen; New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); - tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv)); + if (!*SvPV_nolen(sv)) + /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ + return count; + + tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); sv_usepvn(sv, (char*)tmps, tend - tmps); } return count; @@ -7478,8 +7525,13 @@ utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen) if (count) { U8* tmps; U8* tend; + I32 newlen; + if (!*SvPV_nolen(sv)) + /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ + return count; + New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); - tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv)); + tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); sv_usepvn(sv, (char*)tmps, tend - tmps); } return count;