X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=28c71ff4528ca9dfd4e0f542749ab7e3994d27dd;hb=5bd07a3d26012a115fab327912ac8788755e1251;hp=62850846a7732867fdd5dab05c2a77480a801e6d;hpb=d8ae67563df20ade634a53f13f534f31c6a0d46d;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index 6285084..28c71ff 100644 --- a/toke.c +++ b/toke.c @@ -126,31 +126,40 @@ int yyactlevel = -1; * Also see LOP and lop() below. */ -#define TOKEN(retval) return (PL_bufptr = s,(int)retval) -#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval) -#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval)) -#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval) -#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval) -#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval) -#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval) -#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX) -#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP) -#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0) -#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1) -#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP)) -#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP)) -#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP)) -#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP)) -#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP) -#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP)) -#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP)) -#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP) -#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP) +#ifdef DEBUGGING /* Serve -DT. */ +# define REPORT(x,retval) tokereport(x,s,(int)retval) +# define REPORT2(x,retval) tokereport(x,s, yylval.ival) +#else +# define REPORT(x,retval) 1 +# define REPORT2(x,retval) 1 +#endif + +#define TOKEN(retval) return (REPORT2("token",retval), PL_bufptr = s,(int)retval) +#define OPERATOR(retval) return (REPORT2("operator",retval), PL_expect = XTERM, PL_bufptr = s,(int)retval) +#define AOPERATOR(retval) return ao((REPORT2("aop",retval), PL_expect = XTERM, PL_bufptr = s,(int)retval)) +#define PREBLOCK(retval) return (REPORT2("preblock",retval), PL_expect = XBLOCK,PL_bufptr = s,(int)retval) +#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval), PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval) +#define PREREF(retval) return (REPORT2("preref",retval), PL_expect = XREF,PL_bufptr = s,(int)retval) +#define TERM(retval) return (CLINE, REPORT2("term",retval), PL_expect = XOPERATOR, PL_bufptr = s,(int)retval) +#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f), PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX) +#define FTST(f) return(yylval.ival=f, REPORT("ftst",f), PL_expect = XTERM,PL_bufptr = s,(int)UNIOP) +#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f), PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0) +#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f), PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1) +#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f), PL_expect = XTERM,PL_bufptr = s,(int)BITOROP)) +#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f), PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP)) +#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f), PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP)) +#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f), PL_expect = XTERM,PL_bufptr = s,(int)POWOP)) +#define PMop(f) return(yylval.ival=f, REPORT("matchop",f), PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP) +#define Aop(f) return ao((yylval.ival=f, REPORT("add",f), PL_expect = XTERM,PL_bufptr = s,(int)ADDOP)) +#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f), PL_expect = XTERM,PL_bufptr = s,(int)MULOP)) +#define Eop(f) return(yylval.ival=f, REPORT("eq",f), PL_expect = XTERM,PL_bufptr = s,(int)EQOP) +#define Rop(f) return(yylval.ival=f, REPORT("rel",f), PL_expect = XTERM,PL_bufptr = s,(int)RELOP) /* This bit of chicanery makes a unary function followed by * a parenthesis into a function with one argument, highest precedence. */ #define UNI(f) return(yylval.ival = f, \ + REPORT("uni",f), \ PL_expect = XTERM, \ PL_bufptr = s, \ PL_last_uni = PL_oldbufptr, \ @@ -158,6 +167,7 @@ int yyactlevel = -1; (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) #define UNIBRACK(f) return(yylval.ival = f, \ + REPORT("uni",f), \ PL_bufptr = s, \ PL_last_uni = PL_oldbufptr, \ (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) @@ -165,6 +175,24 @@ int yyactlevel = -1; /* grandfather return to old style */ #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP) +STATIC void +S_tokereport(pTHX_ char *thing, char* s, I32 rv) +{ + SV *report; + DEBUG_T({ + report = newSVpv(thing, 0); + Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv); + + if (s - PL_bufptr > 0) + sv_catpvn(report, PL_bufptr, s - PL_bufptr); + else { + if (PL_oldbufptr && *PL_oldbufptr) + sv_catpv(report, PL_tokenbuf); + } + PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report)); + }) +} + /* * S_ao * @@ -677,6 +705,7 @@ S_lop(pTHX_ I32 f, int x, char *s) { yylval.ival = f; CLINE; + REPORT("lop", f); PL_expect = x; PL_bufptr = s; PL_last_lop = PL_oldbufptr; @@ -1045,8 +1074,11 @@ STATIC I32 S_sublex_done(pTHX) { if (!PL_lex_starts++) { + SV *sv = newSVpvn("",0); + if (SvUTF8(PL_linestr)) + SvUTF8_on(sv); PL_expect = XOPERATOR; - yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0)); + yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); return THING; } @@ -1173,7 +1205,8 @@ S_scan_const(pTHX_ char *start) register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ - bool has_utf8 = FALSE; /* embedded \x{} */ + bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr)); + /* the constant is UTF8 */ UV uv; I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) @@ -1313,8 +1346,6 @@ S_scan_const(pTHX_ char *start) /* backslashes */ if (*s == '\\' && s+1 < send) { - bool to_be_utf8 = FALSE; - s++; /* some backslashes we leave behind */ @@ -1357,8 +1388,7 @@ S_scan_const(pTHX_ char *start) "Unrecognized escape \\%c passed through", *s); /* default action is to copy the quoted character */ - *d++ = *s++; - continue; + goto default_action; } /* \132 indicates an octal constant */ @@ -1383,8 +1413,6 @@ S_scan_const(pTHX_ char *start) else { STRLEN len = 1; /* allow underscores */ uv = (UV)scan_hex(s + 1, e - s - 1, &len); - if (PL_hints & HINT_UTF8) - to_be_utf8 = TRUE; } s = e + 1; } @@ -1408,7 +1436,7 @@ S_scan_const(pTHX_ char *start) * repertoire. --jhi */ if (uv > 127) { - if (!has_utf8 && (to_be_utf8 || uv > 255)) { + if (!has_utf8 && uv > 255) { /* Might need to recode whatever we have * accumulated so far if it contains any * hibit chars. @@ -1447,9 +1475,16 @@ S_scan_const(pTHX_ char *start) } } - if (to_be_utf8 || has_utf8 || uv > 255) { + if (has_utf8 || uv > 255) { d = (char*)uv_to_utf8((U8*)d, uv); has_utf8 = TRUE; + if (PL_lex_inwhat == OP_TRANS && + PL_sublex_info.sub_op) { + PL_sublex_info.sub_op->op_private |= + (PL_lex_repl ? OPpTRANS_FROM_UTF + : OPpTRANS_TO_UTF); + utf = TRUE; + } } else { *d++ = (char)uv; @@ -1477,6 +1512,8 @@ S_scan_const(pTHX_ char *start) res = newSVpvn(s + 1, e - s - 1); res = new_constant( Nullch, 0, "charnames", res, Nullsv, "\\N{...}" ); + if (has_utf8) + sv_utf8_upgrade(res); str = SvPV(res,len); if (!has_utf8 && SvUTF8(res)) { char *ostart = SvPVX(sv); @@ -1559,8 +1596,7 @@ S_scan_const(pTHX_ char *start) continue; } /* end if (backslash) */ - /* (now in tr/// code again) */ - + default_action: if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) { STRLEN len = (STRLEN) -1; UV uv; @@ -1579,10 +1615,15 @@ S_scan_const(pTHX_ char *start) *d++ = *s++; } has_utf8 = TRUE; + if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { + PL_sublex_info.sub_op->op_private |= + (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); + utf = TRUE; + } continue; } - *d++ = *s++; + *d++ = *s++; } /* while loop to process each character */ /* terminate the string and set up the sv */ @@ -2531,7 +2572,32 @@ Perl_yylex(pTHX) } do { bof = PL_rsfp ? TRUE : FALSE; - if (bof) { + if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) { + fake_eof: + if (PL_rsfp) { + if (PL_preprocess && !PL_in_eval) + (void)PerlProc_pclose(PL_rsfp); + else if ((PerlIO *)PL_rsfp == PerlIO_stdin()) + PerlIO_clearerr(PL_rsfp); + else + (void)PerlIO_close(PL_rsfp); + PL_rsfp = Nullfp; + PL_doextract = FALSE; + } + if (!PL_in_eval && (PL_minus_n || PL_minus_p)) { + sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : ""); + sv_catpv(PL_linestr,";}"); + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_minus_n = PL_minus_p = 0; + goto retry; + } + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + sv_setpv(PL_linestr,""); + TOKEN(';'); /* not infinite loop because rsfp is NULL now */ + } + /* if it looks like the start of a BOM, check if it in fact is */ + else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) { #ifdef PERLIO_IS_STDIO # ifdef __GNU_LIBRARY__ # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */ @@ -2551,38 +2617,14 @@ Perl_yylex(pTHX) * Workaround? Maybe attach some extra state to PL_rsfp? */ if (!PL_preprocess) - bof = PerlIO_tell(PL_rsfp) == 0; + bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); #else - bof = PerlIO_tell(PL_rsfp) == 0; + bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); #endif - } - s = filter_gets(PL_linestr, PL_rsfp, 0); - if (s == Nullch) { - fake_eof: - if (PL_rsfp) { - if (PL_preprocess && !PL_in_eval) - (void)PerlProc_pclose(PL_rsfp); - else if ((PerlIO *)PL_rsfp == PerlIO_stdin()) - PerlIO_clearerr(PL_rsfp); - else - (void)PerlIO_close(PL_rsfp); - PL_rsfp = Nullfp; - PL_doextract = FALSE; - } - if (!PL_in_eval && (PL_minus_n || PL_minus_p)) { - sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : ""); - sv_catpv(PL_linestr,";}"); - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + if (bof) { PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_minus_n = PL_minus_p = 0; - goto retry; + s = swallow_bom((U8*)s); } - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); - sv_setpv(PL_linestr,""); - TOKEN(';'); /* not infinite loop because rsfp is NULL now */ - } else if (bof) { - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - s = swallow_bom((U8*)s); } if (PL_doextract) { if (*s == '#' && s[1] == '!' && instr(s,"perl")) @@ -2861,7 +2903,7 @@ Perl_yylex(pTHX) if (ftst) { PL_last_lop_op = ftst; DEBUG_T( { PerlIO_printf(Perl_debug_log, - "### Saw file test %c\n", ftst); + "### Saw file test %c\n", (int)ftst); } ) FTST(ftst); } @@ -2869,7 +2911,8 @@ Perl_yylex(pTHX) /* Assume it was a minus followed by a one-letter named * subroutine call (or a -bareword), then. */ DEBUG_T( { PerlIO_printf(Perl_debug_log, - "### %c looked like a file test but was not\n", ftst); + "### %c looked like a file test but was not\n", + (int)ftst); } ) s -= 2; } @@ -3024,9 +3067,21 @@ Perl_yylex(pTHX) PL_lex_stuff = Nullsv; } else { - attrs = append_elem(OP_LIST, attrs, - newSVOP(OP_CONST, 0, - newSVpvn(s, len))); + if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len)) + CvLVALUE_on(PL_compcv); + else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len)) + CvLOCKED_on(PL_compcv); + else if (!PL_in_my && len == 6 && strnEQ(s, "method", len)) + CvMETHOD_on(PL_compcv); + /* After we've set the flags, it could be argued that + we don't need to do the attributes.pm-based setting + process, and shouldn't bother appending recognized + flags. To experiment with that, uncomment the + following "else": */ + /* else */ + attrs = append_elem(OP_LIST, attrs, + newSVOP(OP_CONST, 0, + newSVpvn(s, len))); } s = skipspace(d); if (*s == ':' && s[1] != ':') @@ -4115,10 +4170,6 @@ Perl_yylex(pTHX) (void)PerlIO_seek(PL_rsfp, 0L, 0); } if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { -#if defined(__BORLANDC__) - /* XXX see note in do_binmode() */ - ((FILE*)PL_rsfp)->flags |= _F_BIN; -#endif if (loc > 0) PerlIO_seek(PL_rsfp, loc, 0); } @@ -4698,7 +4749,11 @@ Perl_yylex(pTHX) TOKEN('('); case KEY_qq: + case KEY_qu: s = scan_str(s,FALSE,FALSE); + if (tmp == KEY_qu && + is_utf8_string((U8*)SvPVX(PL_lex_stuff), SvCUR(PL_lex_stuff))) + SvUTF8_on(PL_lex_stuff); if (!s) missingterm((char*)0); yylval.ival = OP_STRINGIFY; @@ -5535,6 +5590,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"q")) return KEY_q; if (strEQ(d,"qr")) return KEY_qr; if (strEQ(d,"qq")) return KEY_qq; + if (strEQ(d,"qu")) return KEY_qu; if (strEQ(d,"qw")) return KEY_qw; if (strEQ(d,"qx")) return KEY_qx; } @@ -7191,10 +7247,9 @@ vstring: while (isDIGIT(*pos) || *pos == '_') pos++; if (!isALPHA(*pos)) { - UV rev; + UV rev, revmax = 0; U8 tmpbuf[UTF8_MAXLEN+1]; U8 *tmpend; - bool utf8 = FALSE; s++; /* get past 'v' */ sv = NEWSV(92,5); @@ -7221,7 +7276,8 @@ vstring: } } tmpend = uv_to_utf8(tmpbuf, rev); - utf8 = utf8 || rev > 127; + if (rev > revmax) + revmax = rev; sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); if (*pos == '.' && isDIGIT(pos[1])) s = ++pos; @@ -7235,9 +7291,9 @@ vstring: SvPOK_on(sv); SvREADONLY_on(sv); - if (utf8) { + if (revmax > 127) { SvUTF8_on(sv); - if (!UTF||IN_BYTE) + if (revmax < 256) sv_utf8_downgrade(sv, TRUE); } }