X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=458e2585135fe406760dbe57806a6b3edba4a03e;hb=76ced9add7b621dfc9d4ecb534aeea8e131a418a;hp=b3c667407cd1edb8f0a6646bcddc01203d8d6ff1;hpb=22d4bb9ccb8701e68f9243547d7e3a3c55f70908;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index b3c6674..458e258 100644 --- a/toke.c +++ b/toke.c @@ -80,15 +80,19 @@ static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen); #endif #ifdef USE_PURE_BISON -YYSTYPE* yylval_pointer = NULL; -int* yychar_pointer = NULL; +# ifndef YYMAXLEVEL +# define YYMAXLEVEL 100 +# endif +YYSTYPE* yylval_pointer[YYMAXLEVEL]; +int* yychar_pointer[YYMAXLEVEL]; +int yyactlevel = 0; # undef yylval # undef yychar -# define yylval (*yylval_pointer) -# define yychar (*yychar_pointer) -# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer -# undef yylex -# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer) +# define yylval (*yylval_pointer[yyactlevel]) +# define yychar (*yychar_pointer[yyactlevel]) +# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel] +# undef yylex +# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]) #endif #include "keywords.h" @@ -809,10 +813,10 @@ Perl_str_to_version(pTHX_ SV *sv) bool utf = SvUTF8(sv) ? TRUE : FALSE; char *end = start + len; while (start < end) { - I32 skip; + STRLEN skip; UV n; if (utf) - n = utf8_to_uv_chk((U8*)start, &skip, 0); + n = utf8_to_uv((U8*)start, len, &skip, 0); else { n = *(U8*)start; skip = 1; @@ -844,7 +848,7 @@ S_force_version(pTHX_ char *s) for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++); if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) { SV *ver; - s = scan_num(s); + s = scan_num(s, &yylval); version = yylval.opval; ver = cSVOPx(version)->op_sv; if (SvPOK(ver) && !SvNIOK(ver)) { @@ -1183,14 +1187,13 @@ S_scan_const(pTHX_ char *start) register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ - bool has_utf = FALSE; /* embedded \x{} */ - I32 len; /* ? */ + bool has_utf8 = FALSE; /* embedded \x{} */ UV uv; I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) : UTF; - I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) + I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF)) : UTF; @@ -1324,21 +1327,24 @@ S_scan_const(pTHX_ char *start) /* (now in tr/// code again) */ - if (*s & 0x80 && thisutf) { - (void)utf8_to_uv_chk((U8*)s, &len, 0); - if (len == 1) { - /* illegal UTF8, make it valid */ - char *old_pvx = SvPVX(sv); - /* need space for one extra char (NOTE: SvCUR() not set here) */ - d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx); - d = (char*)uv_to_utf8((U8*)d, (U8)*s++); - } - else { - while (len--) - *d++ = *s++; - } - has_utf = TRUE; - continue; + if (*s & 0x80 && this_utf8) { + STRLEN len; + UV uv; + + uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY); + if (len == 1) { + /* Illegal UTF8 (a high-bit byte), make it valid. */ + char *old_pvx = SvPVX(sv); + /* need space for one extra char (NOTE: SvCUR() not set here) */ + d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx); + d = (char*)uv_to_utf8((U8*)d, (U8)*s++); + } + else { + while (len--) + *d++ = *s++; + } + has_utf8 = TRUE; + continue; } /* backslashes */ @@ -1394,9 +1400,11 @@ S_scan_const(pTHX_ char *start) /* \132 indicates an octal constant */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - len = 0; /* disallow underscores */ - uv = (UV)scan_oct(s, 3, &len); - s += len; + { + STRLEN len = 0; /* disallow underscores */ + uv = (UV)scan_oct(s, 3, &len); + s += len; + } goto NUM_ESCAPE_INSERT; /* \x24 indicates a hex constant */ @@ -1408,14 +1416,19 @@ S_scan_const(pTHX_ char *start) yyerror("Missing right brace on \\x{}"); e = s; } - len = 1; /* allow underscores */ - uv = (UV)scan_hex(s + 1, e - s - 1, &len); - s = e + 1; + else { + STRLEN len = 1; /* allow underscores */ + uv = (UV)scan_hex(s + 1, e - s - 1, &len); + has_utf8 = TRUE; + } + s = e + 1; } else { - len = 0; /* disallow underscores */ - uv = (UV)scan_hex(s, 2, &len); - s += len; + { + STRLEN len = 0; /* disallow underscores */ + uv = (UV)scan_hex(s, 2, &len); + s += len; + } } NUM_ESCAPE_INSERT: @@ -1423,8 +1436,8 @@ S_scan_const(pTHX_ char *start) * There will always enough room in sv since such escapes will * be longer than any utf8 sequence they can end up as */ - if (uv > 127) { - if (!thisutf && !has_utf && uv > 255) { + if (uv > 127 || has_utf8) { + if (!this_utf8 && !has_utf8 && uv > 255) { /* might need to recode whatever we have accumulated so far * if it contains any hibit chars */ @@ -1456,9 +1469,9 @@ S_scan_const(pTHX_ char *start) } } - if (thisutf || uv > 255) { + if (has_utf8 || uv > 255) { d = (char*)uv_to_utf8((U8*)d, uv); - has_utf = TRUE; + this_utf8 = TRUE; } else { *d++ = (char)uv; @@ -1487,7 +1500,7 @@ S_scan_const(pTHX_ char *start) res = new_constant( Nullch, 0, "charnames", res, Nullsv, "\\N{...}" ); str = SvPV(res,len); - if (!has_utf && SvUTF8(res)) { + if (!has_utf8 && SvUTF8(res)) { char *ostart = SvPVX(sv); SvCUR_set(sv, d - ostart); SvPOK_on(sv); @@ -1496,7 +1509,7 @@ S_scan_const(pTHX_ char *start) /* this just broke our allocation above... */ SvGROW(sv, send - start); d = SvPVX(sv) + SvCUR(sv); - has_utf = TRUE; + has_utf8 = TRUE; } if (len > e - s + 4) { char *odest = SvPVX(sv); @@ -1524,8 +1537,10 @@ S_scan_const(pTHX_ char *start) *d = toCTRL(*d); d++; #else - len = *s++; - *d++ = toCTRL(len); + { + U8 c = *s++; + *d++ = toCTRL(c); + } #endif continue; @@ -1573,7 +1588,7 @@ S_scan_const(pTHX_ char *start) *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); SvPOK_on(sv); - if (has_utf) + if (has_utf8) SvUTF8_on(sv); /* shrink the sv if we allocated more than we used */ @@ -2057,9 +2072,34 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len) if we already built the token before, use it. */ +#ifdef USE_PURE_BISON +#ifdef __SC__ +#pragma segment Perl_yylex_r +#endif +int +Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp) +{ + dTHR; + int r; + + yylval_pointer[yyactlevel] = lvalp; + yychar_pointer[yyactlevel] = lcharp; + yyactlevel++; + if (yyactlevel >= YYMAXLEVEL) + Perl_croak(aTHX_ "panic: YYMAXLEVEL"); + + r = Perl_yylex(aTHX); + + yyactlevel--; + + return r; +} +#endif + #ifdef __SC__ #pragma segment Perl_yylex #endif + int #ifdef USE_PURE_BISON Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp) @@ -2075,11 +2115,6 @@ Perl_yylex(pTHX) GV *gv = Nullgv; GV **gvp = 0; -#ifdef USE_PURE_BISON - yylval_pointer = lvalp; - yychar_pointer = lcharp; -#endif - /* check if there's an identifier for us to look at */ if (PL_pending_ident) { /* pit holds the identifier we read and pending_ident is reset */ @@ -2485,8 +2520,32 @@ Perl_yylex(pTHX) goto retry; } do { - bool bof; - bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */ + bool bof = PL_rsfp ? TRUE : FALSE; + if (bof) { +#ifdef PERLIO_IS_STDIO +# ifdef __GNU_LIBRARY__ +# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */ +# define FTELL_FOR_PIPE_IS_BROKEN +# endif +# else +# ifdef __GLIBC__ +# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */ +# define FTELL_FOR_PIPE_IS_BROKEN +# endif +# endif +# endif +#endif +#ifdef FTELL_FOR_PIPE_IS_BROKEN + /* This loses the possibility to detect the bof + * situation on perl -P when the libc5 is being used. + * Workaround? Maybe attach some extra state to PL_rsfp? + */ + if (!PL_preprocess) + bof = PerlIO_tell(PL_rsfp) == 0; +#else + bof = PerlIO_tell(PL_rsfp) == 0; +#endif + } s = filter_gets(PL_linestr, PL_rsfp, 0); if (s == Nullch) { fake_eof: @@ -3522,7 +3581,7 @@ Perl_yylex(pTHX) /* FALL THROUGH */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - s = scan_num(s); + s = scan_num(s, &yylval); if (PL_expect == XOPERATOR) no_op("Number",s); TERM(THING); @@ -3592,7 +3651,7 @@ Perl_yylex(pTHX) while (isDIGIT(*start) || *start == '_') start++; if (*start == '.' && isDIGIT(start[1])) { - s = scan_num(s); + s = scan_num(s, &yylval); TERM(THING); } /* avoid v123abc() or $h{v1}, allow C */ @@ -3603,7 +3662,7 @@ Perl_yylex(pTHX) gv = gv_fetchpv(s, FALSE, SVt_PVCV); *start = c; if (!gv) { - s = scan_num(s); + s = scan_num(s, &yylval); TERM(THING); } } @@ -5163,7 +5222,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"exit")) return -KEY_exit; if (strEQ(d,"eval")) return KEY_eval; if (strEQ(d,"exec")) return -KEY_exec; - if (strEQ(d,"each")) return KEY_each; + if (strEQ(d,"each")) return -KEY_each; break; case 5: if (strEQ(d,"elsif")) return KEY_elsif; @@ -5307,7 +5366,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) break; case 'k': if (len == 4) { - if (strEQ(d,"keys")) return KEY_keys; + if (strEQ(d,"keys")) return -KEY_keys; if (strEQ(d,"kill")) return -KEY_kill; } break; @@ -5389,11 +5448,11 @@ Perl_keyword(pTHX_ register char *d, I32 len) case 'p': switch (len) { case 3: - if (strEQ(d,"pop")) return KEY_pop; + if (strEQ(d,"pop")) return -KEY_pop; if (strEQ(d,"pos")) return KEY_pos; break; case 4: - if (strEQ(d,"push")) return KEY_push; + if (strEQ(d,"push")) return -KEY_push; if (strEQ(d,"pack")) return -KEY_pack; if (strEQ(d,"pipe")) return -KEY_pipe; break; @@ -5500,7 +5559,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) case 'h': switch (len) { case 5: - if (strEQ(d,"shift")) return KEY_shift; + if (strEQ(d,"shift")) return -KEY_shift; break; case 6: if (strEQ(d,"shmctl")) return -KEY_shmctl; @@ -5529,7 +5588,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) case 'p': if (strEQ(d,"split")) return KEY_split; if (strEQ(d,"sprintf")) return -KEY_sprintf; - if (strEQ(d,"splice")) return KEY_splice; + if (strEQ(d,"splice")) return -KEY_splice; break; case 'q': if (strEQ(d,"sqrt")) return -KEY_sqrt; @@ -5609,7 +5668,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"unlink")) return -KEY_unlink; break; case 7: - if (strEQ(d,"unshift")) return KEY_unshift; + if (strEQ(d,"unshift")) return -KEY_unshift; if (strEQ(d,"ucfirst")) return -KEY_ucfirst; break; } @@ -5714,14 +5773,23 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { SV *msg; - why1 = "%^H is not consistent"; why2 = strEQ(key,"charnames") - ? " (missing \"use charnames ...\"?)" + ? "(possibly a missing \"use charnames ...\")" : ""; - why3 = ""; + msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s", + (type ? type: "undef"), why2); + + /* This is convoluted and evil ("goto considered harmful") + * but I do not understand the intricacies of all the different + * failure modes of %^H in here. The goal here is to make + * the most probable error message user-friendly. --jhi */ + + goto msgdone; + report: - msg = Perl_newSVpvf(aTHX_ "constant(%s): %s%s%s", + msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s", (type ? type: "undef"), why1, why2, why3); + msgdone: yyerror(SvPVX(msg)); SvREFCNT_dec(msg); return sv; @@ -6519,7 +6587,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) register char term; /* terminating character */ register char *to; /* current position in the sv's data */ I32 brackets = 1; /* bracket nesting level */ - bool has_utf = FALSE; /* is there any utf8 content? */ + bool has_utf8 = FALSE; /* is there any utf8 content? */ /* skip space before the delimiter */ if (isSPACE(*s)) @@ -6531,7 +6599,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* after skipping whitespace, the next character is the terminator */ term = *s; if ((term & 0x80) && UTF) - has_utf = TRUE; + has_utf8 = TRUE; /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); @@ -6577,8 +6645,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) have found the terminator */ else if (*s == term) break; - else if (!has_utf && (*s & 0x80) && UTF) - has_utf = TRUE; + else if (!has_utf8 && (*s & 0x80) && UTF) + has_utf8 = TRUE; *to = *s; } } @@ -6606,8 +6674,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) break; else if (*s == PL_multi_open) brackets++; - else if (!has_utf && (*s & 0x80) && UTF) - has_utf = TRUE; + else if (!has_utf8 && (*s & 0x80) && UTF) + has_utf8 = TRUE; *to = *s; } } @@ -6667,7 +6735,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) if (keep_delims) sv_catpvn(sv, s, 1); - if (has_utf) + if (has_utf8) SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop); s++; @@ -6712,7 +6780,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) */ char * -Perl_scan_num(pTHX_ char *start) +Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) { register char *s = start; /* current position in buffer */ register char *d; /* destination in temp buffer */ @@ -7125,9 +7193,9 @@ vstring: /* make the op for the constant and return */ if (sv) - yylval.opval = newSVOP(OP_CONST, 0, sv); + lvalp->opval = newSVOP(OP_CONST, 0, sv); else - yylval.opval = Nullop; + lvalp->opval = Nullop; return s; }