X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=23ae908f3069e21bfa28a1ed473e1a2faa1f2b79;hb=79ed0f4335cfe1640742a00ebb11ecae8eaf178e;hp=2e862b47f2970dc25a3cd0dc4ed1067bc2d73b8f;hpb=da6eedaa8c807f963938cbb14de34b87260db473;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index 2e862b4..23ae908 100644 --- a/toke.c +++ b/toke.c @@ -1,6 +1,7 @@ /* toke.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + * 2000, 2001, 2002, 2003, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -41,11 +42,7 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); #ifdef USE_UTF8_SCRIPTS # define UTF (!IN_BYTES) #else -# ifdef EBCDIC /* For now 'use utf8' does not affect tokenizer on EBCDIC */ -# define UTF (PL_linestr && DO_UTF8(PL_linestr)) -# else -# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) -# endif +# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) #endif /* In variables named $^X, these are the legal values for X. @@ -153,7 +150,7 @@ int yyactlevel = -1; #define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval) #define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval) #define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX) -#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP) +#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP) #define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0) #define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1) #define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP)) @@ -168,14 +165,18 @@ int yyactlevel = -1; /* This bit of chicanery makes a unary function followed by * a parenthesis into a function with one argument, highest precedence. + * The UNIDOR macro is for unary functions that can be followed by the // + * operator (such as C). */ -#define UNI(f) return(yylval.ival = f, \ +#define UNI2(f,x) return(yylval.ival = f, \ REPORT("uni",f) \ - PL_expect = XTERM, \ + PL_expect = x, \ PL_bufptr = s, \ PL_last_uni = PL_oldbufptr, \ PL_last_lop_op = f, \ (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) +#define UNI(f) UNI2(f,XTERM) +#define UNIDOR(f) UNI2(f,XTERMORDORDOR) #define UNIBRACK(f) return(yylval.ival = f, \ REPORT("uni",f) \ @@ -211,8 +212,8 @@ S_tokereport(pTHX_ char *thing, char* s, I32 rv) /* * S_ao * - * This subroutine detects &&= and ||= and turns an ANDAND or OROR - * into an OP_ANDASSIGN or OP_ORASSIGN + * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR + * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN */ STATIC int @@ -224,6 +225,8 @@ S_ao(pTHX_ int toketype) yylval.ival = OP_ANDASSIGN; else if (toketype == OROR) yylval.ival = OP_ORASSIGN; + else if (toketype == DORDOR) + yylval.ival = OP_DORASSIGN; toketype = ASSIGNOP; } return toketype; @@ -302,7 +305,7 @@ S_missingterm(pTHX_ char *s) s = tmpbuf; } else { - *tmpbuf = PL_multi_close; + *tmpbuf = (char)PL_multi_close; tmpbuf[1] = '\0'; s = tmpbuf; } @@ -422,8 +425,8 @@ Perl_lex_start(pTHX_ SV *line) SAVEPPTR(PL_last_uni); SAVEPPTR(PL_linestart); SAVESPTR(PL_linestr); - SAVEPPTR(PL_lex_brackstack); - SAVEPPTR(PL_lex_casestack); + SAVEGENERICPV(PL_lex_brackstack); + SAVEGENERICPV(PL_lex_casestack); SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp); SAVESPTR(PL_lex_stuff); SAVEI32(PL_lex_defer); @@ -438,8 +441,6 @@ Perl_lex_start(pTHX_ SV *line) PL_lex_brackets = 0; New(899, PL_lex_brackstack, 120, char); New(899, PL_lex_casestack, 12, char); - SAVEFREEPV(PL_lex_brackstack); - SAVEFREEPV(PL_lex_casestack); PL_lex_casemods = 0; *PL_lex_casestack = '\0'; PL_lex_dojoin = 0; @@ -454,7 +455,7 @@ Perl_lex_start(pTHX_ SV *line) if (SvREADONLY(PL_linestr)) PL_linestr = sv_2mortal(newSVsv(PL_linestr)); s = SvPV(PL_linestr, len); - if (len && s[len-1] != ';') { + if (!len || s[len-1] != ';') { if (!(SvFLAGS(PL_linestr) & SVs_TEMP)) PL_linestr = sv_2mortal(newSVsv(PL_linestr)); sv_catpvn(PL_linestr, "\n;", 2); @@ -711,7 +712,7 @@ S_lop(pTHX_ I32 f, int x, char *s) PL_expect = x; PL_bufptr = s; PL_last_lop = PL_oldbufptr; - PL_last_lop_op = f; + PL_last_lop_op = (OPCODE)f; if (PL_nexttoke) return LSTOP; if (*s == '(') @@ -999,6 +1000,9 @@ S_sublex_start(pTHX) } yylval.opval = (OP*)newSVOP(op_type, 0, sv); PL_lex_stuff = Nullsv; + /* Allow // "foo" */ + if (op_type == OP_READLINE) + PL_expect = XTERMORDORDOR; return THING; } @@ -1047,8 +1051,8 @@ S_sublex_push(pTHX) SAVEPPTR(PL_last_uni); SAVEPPTR(PL_linestart); SAVESPTR(PL_linestr); - SAVEPPTR(PL_lex_brackstack); - SAVEPPTR(PL_lex_casestack); + SAVEGENERICPV(PL_lex_brackstack); + SAVEGENERICPV(PL_lex_casestack); PL_linestr = PL_lex_stuff; PL_lex_stuff = Nullsv; @@ -1063,13 +1067,11 @@ S_sublex_push(pTHX) PL_lex_brackets = 0; New(899, PL_lex_brackstack, 120, char); New(899, PL_lex_casestack, 12, char); - SAVEFREEPV(PL_lex_brackstack); - SAVEFREEPV(PL_lex_casestack); PL_lex_casemods = 0; *PL_lex_casestack = '\0'; PL_lex_starts = 0; PL_lex_state = LEX_INTERPCONCAT; - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); PL_lex_inwhat = PL_sublex_info.sub_inwhat; if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) @@ -1268,7 +1270,7 @@ S_scan_const(pTHX_ char *start) if (min > max) { Perl_croak(aTHX_ - "Invalid [] range \"%c-%c\" in transliteration operator", + "Invalid range \"%c-%c\" in transliteration operator", (char)min, (char)max); } @@ -1288,7 +1290,7 @@ S_scan_const(pTHX_ char *start) else #endif for (i = min; i <= max; i++) - *d++ = i; + *d++ = (char)i; /* mark the range as done, and continue */ dorange = FALSE; @@ -1496,8 +1498,8 @@ S_scan_const(pTHX_ char *start) while (src >= (U8 *)SvPVX(sv)) { if (!NATIVE_IS_INVARIANT(*src)) { U8 ch = NATIVE_TO_ASCII(*src); - *dst-- = UTF8_EIGHT_BIT_LO(ch); - *dst-- = UTF8_EIGHT_BIT_HI(ch); + *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch); + *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch); } else { *dst-- = *src; @@ -1540,6 +1542,16 @@ S_scan_const(pTHX_ char *start) e = s - 1; goto cont_scan; } + if (e > s + 2 && s[1] == 'U' && s[2] == '+') { + /* \N{U+...} */ + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | + PERL_SCAN_DISALLOW_PREFIX; + s += 3; + len = e - s; + uv = grok_hex(s, &len, &flags, NULL); + s = e + 1; + goto NUM_ESCAPE_INSERT; + } res = newSVpvn(s + 1, e - s - 1); res = new_constant( Nullch, 0, "charnames", res, Nullsv, "\\N{...}" ); @@ -1573,11 +1585,11 @@ S_scan_const(pTHX_ char *start) *d = '\0'; sv_utf8_upgrade(sv); /* this just broke our allocation above... */ - SvGROW(sv, send - start); + SvGROW(sv, (STRLEN)(send - start)); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } - if (len > e - s + 4) { /* I _guess_ 4 is \N{} --jhi */ + if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ char *odest = SvPVX(sv); SvGROW(sv, (SvLEN(sv) + len - (e - s + 4))); @@ -1596,7 +1608,7 @@ S_scan_const(pTHX_ char *start) /* \c is a control character */ case 'c': s++; - { + if (s < send) { U8 c = *s++; #ifdef EBCDIC if (isLOWER(c)) @@ -1604,6 +1616,9 @@ S_scan_const(pTHX_ char *start) #endif *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c)); } + else { + yyerror("Missing control char name in \\c"); + } continue; /* printf-style backslashes, formfeeds, newlines, etc */ @@ -1659,17 +1674,18 @@ S_scan_const(pTHX_ char *start) *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); if (SvCUR(sv) >= SvLEN(sv)) - Perl_croak(aTHX_ "panic: constant overflowed allocated space"); + Perl_croak(aTHX_ "panic: constant overflowed allocated space"); SvPOK_on(sv); if (PL_encoding && !has_utf8) { - sv_recode_to_utf8(sv, PL_encoding); - has_utf8 = TRUE; + sv_recode_to_utf8(sv, PL_encoding); + if (SvUTF8(sv)) + has_utf8 = TRUE; } if (has_utf8) { SvUTF8_on(sv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { - PL_sublex_info.sub_op->op_private |= + PL_sublex_info.sub_op->op_private |= (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); } } @@ -1864,7 +1880,7 @@ S_intuit_more(pTHX_ register char *s) * Method if it's "foo $bar" * Not a method if it's really "print foo $bar" * Method if it's really "foo package::" (interpreted as package->foo) - * Not a method if bar is known to be a subroutne ("sub bar; foo bar") + * Not a method if bar is known to be a subroutine ("sub bar; foo bar") * Not a method if bar is a filehandle or package, but is quoted with * => */ @@ -1948,7 +1964,7 @@ S_incl_perldb(pTHX) if (pdb) return pdb; - SETERRNO(0,SS$_NORMAL); + SETERRNO(0,SS_NORMAL); return "BEGIN { require 'perl5db.pl' }"; } return ""; @@ -2039,7 +2055,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) int old_len = SvCUR(buf_sv) ; /* ensure buf_sv is large enough */ - SvGROW(buf_sv, old_len + maxlen) ; + SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ; if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){ if (PerlIO_error(PL_rsfp)) return -1; /* error */ @@ -2126,7 +2142,7 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len) #ifdef DEBUGGING static char* exp_name[] = { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", - "ATTRTERM", "TERMBLOCK" + "ATTRTERM", "TERMBLOCK", "TERMORDORDOR" }; #endif @@ -2189,6 +2205,7 @@ Perl_yylex(pTHX) GV *gv = Nullgv; GV **gvp = 0; bool bof = FALSE; + I32 orig_keyword = 0; /* check if there's an identifier for us to look at */ if (PL_pending_ident) @@ -2250,39 +2267,40 @@ Perl_yylex(pTHX) DEBUG_T({ PerlIO_printf(Perl_debug_log, "### Saw case modifier at '%s'\n", PL_bufptr); }); s = PL_bufptr + 1; - if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) - tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */ - if (strchr("LU", *s) && - (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) - { - PL_lex_casestack[--PL_lex_casemods] = '\0'; - return ')'; + if (s[1] == '\\' && s[2] == 'E') { + PL_bufptr = s + 3; + PL_lex_state = LEX_INTERPCONCAT; + return yylex(); } - if (PL_lex_casemods > 10) { - char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char); - if (newlb != PL_lex_casestack) { - SAVEFREEPV(newlb); - PL_lex_casestack = newlb; + else { + if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) + tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ + if (strchr("LU", *s) && + (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) { + PL_lex_casestack[--PL_lex_casemods] = '\0'; + return ')'; } + if (PL_lex_casemods > 10) + Renew(PL_lex_casestack, PL_lex_casemods + 2, char); + PL_lex_casestack[PL_lex_casemods++] = *s; + PL_lex_casestack[PL_lex_casemods] = '\0'; + PL_lex_state = LEX_INTERPCONCAT; + PL_nextval[PL_nexttoke].ival = 0; + force_next('('); + if (*s == 'l') + PL_nextval[PL_nexttoke].ival = OP_LCFIRST; + else if (*s == 'u') + PL_nextval[PL_nexttoke].ival = OP_UCFIRST; + else if (*s == 'L') + PL_nextval[PL_nexttoke].ival = OP_LC; + else if (*s == 'U') + PL_nextval[PL_nexttoke].ival = OP_UC; + else if (*s == 'Q') + PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA; + else + Perl_croak(aTHX_ "panic: yylex"); + PL_bufptr = s + 1; } - PL_lex_casestack[PL_lex_casemods++] = *s; - PL_lex_casestack[PL_lex_casemods] = '\0'; - PL_lex_state = LEX_INTERPCONCAT; - PL_nextval[PL_nexttoke].ival = 0; - force_next('('); - if (*s == 'l') - PL_nextval[PL_nexttoke].ival = OP_LCFIRST; - else if (*s == 'u') - PL_nextval[PL_nexttoke].ival = OP_UCFIRST; - else if (*s == 'L') - PL_nextval[PL_nexttoke].ival = OP_LC; - else if (*s == 'U') - PL_nextval[PL_nexttoke].ival = OP_UC; - else if (*s == 'Q') - PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA; - else - Perl_croak(aTHX_ "panic: yylex"); - PL_bufptr = s + 1; force_next(FUNC); if (PL_lex_starts) { s = PL_bufptr; @@ -2307,13 +2325,7 @@ Perl_yylex(pTHX) if (PL_lex_dojoin) { PL_nextval[PL_nexttoke].ival = 0; force_next(','); -#ifdef USE_5005THREADS - PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0); - PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\""); - force_next(PRIVATEREF); -#else force_ident("\"", '$'); -#endif /* USE_5005THREADS */ PL_nextval[PL_nexttoke].ival = 0; force_next('$'); PL_nextval[PL_nexttoke].ival = 0; @@ -2535,7 +2547,7 @@ Perl_yylex(pTHX) if (!PL_preprocess) bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); #else - bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); + bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr); #endif if (bof) { PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); @@ -2608,6 +2620,19 @@ Perl_yylex(pTHX) sv_setpvn(x, ipath, ipathend - ipath); SvSETMAGIC(x); } + else { + STRLEN blen; + STRLEN llen; + char *bstart = SvPV(CopFILESV(PL_curcop),blen); + char *lstart = SvPV(x,llen); + if (llen < blen) { + bstart += blen - llen; + if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { + sv_setpvn(x, ipath, ipathend - ipath); + SvSETMAGIC(x); + } + } + } TAINT_NOT; /* $^X is always tainted, but that's OK */ } #endif /* ARG_ZERO_IS_SCRIPT */ @@ -2830,7 +2855,7 @@ Perl_yylex(pTHX) break; } if (ftst) { - PL_last_lop_op = ftst; + PL_last_lop_op = (OPCODE)ftst; DEBUG_T( { PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)ftst); } ); @@ -2960,6 +2985,7 @@ Perl_yylex(pTHX) switch (tmp) { case KEY_or: case KEY_and: + case KEY_err: case KEY_for: case KEY_unless: case KEY_if: @@ -3000,6 +3026,8 @@ Perl_yylex(pTHX) CvLOCKED_on(PL_compcv); else if (!PL_in_my && len == 6 && strnEQ(s, "method", len)) CvMETHOD_on(PL_compcv); + else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len)) + CvASSERTION_on(PL_compcv); #ifdef USE_ITHREADS else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "unique", len)) @@ -3027,7 +3055,7 @@ Perl_yylex(pTHX) break; /* require real whitespace or :'s */ } tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */ - if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) { + if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) { char q = ((*s == '\'') ? '"' : '\''); /* If here for an expression, and parsed no attrs, back off. */ if (tmp == '=' && !attrs) { @@ -3061,6 +3089,7 @@ Perl_yylex(pTHX) PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ else PL_expect = XTERM; + s = skipspace(s); TOKEN('('); case ';': CLINE; @@ -3089,11 +3118,7 @@ Perl_yylex(pTHX) leftbracket: s++; if (PL_lex_brackets > 100) { - char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char); - if (newlb != PL_lex_brackstack) { - SAVEFREEPV(newlb); - PL_lex_brackstack = newlb; - } + Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); } switch (PL_expect) { case XTERM: @@ -3186,12 +3211,17 @@ Perl_yylex(pTHX) || ((*t == 'q' || *t == 'x') && ++t < PL_bufend && !isALNUM(*t)))) { + /* skip q//-like construct */ char *tmps; char open, close, term; I32 brackets = 1; while (t < PL_bufend && isSPACE(*t)) t++; + /* check for q => */ + if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { + OPERATOR(HASHBRACK); + } term = *t; open = term; if (term && (tmps = strchr("([{< )]}> )]}>",term))) @@ -3204,7 +3234,7 @@ Perl_yylex(pTHX) else if (*t == open) break; } - else + else { for (t++; t < PL_bufend; t++) { if (*t == '\\' && t+1 < PL_bufend) t++; @@ -3213,8 +3243,13 @@ Perl_yylex(pTHX) else if (*t == open) brackets++; } + } + t++; } - t++; + else + /* skip plain q word */ + while (t < PL_bufend && isALNUM_lazy_if(t,UTF)) + t += UTF8SKIP(t); } else if (isALNUM_lazy_if(t,UTF)) { t += UTF8SKIP(t); @@ -3523,8 +3558,11 @@ Perl_yylex(pTHX) PL_expect = XTERM; /* e.g. print $fh 3 */ else if (*s == '.' && isDIGIT(s[1])) PL_expect = XTERM; /* e.g. print $fh .3 */ - else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=') - PL_expect = XTERM; /* e.g. print $fh -1 */ + else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=') + PL_expect = XTERM; /* e.g. print $fh -1 */ + else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/') + PL_expect = XTERM; /* e.g. print $fh /.../ + XXX except DORDOR operator */ else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=') PL_expect = XTERM; /* print $fh <<"EOF" */ } @@ -3566,22 +3604,40 @@ Perl_yylex(pTHX) PL_pending_ident = '@'; TERM('@'); - case '/': /* may either be division or pattern */ - case '?': /* may either be conditional or pattern */ - if (PL_expect != XOPERATOR) { - /* Disable warning on "study /blah/" */ - if (PL_oldoldbufptr == PL_last_uni - && (*PL_last_uni != 's' || s - PL_last_uni < 5 - || memNE(PL_last_uni, "study", 5) - || isALNUM_lazy_if(PL_last_uni+5,UTF))) - check_uni(); - s = scan_pat(s,OP_MATCH); - TERM(sublex_start()); - } - tmp = *s++; - if (tmp == '/') - Mop(OP_DIVIDE); - OPERATOR(tmp); + case '/': /* may be division, defined-or, or pattern */ + if (PL_expect == XTERMORDORDOR && s[1] == '/') { + s += 2; + AOPERATOR(DORDOR); + } + case '?': /* may either be conditional or pattern */ + if(PL_expect == XOPERATOR) { + tmp = *s++; + if(tmp == '?') { + OPERATOR('?'); + } + else { + tmp = *s++; + if(tmp == '/') { + /* A // operator. */ + AOPERATOR(DORDOR); + } + else { + s--; + Mop(OP_DIVIDE); + } + } + } + else { + /* Disable warning on "study /blah/" */ + if (PL_oldoldbufptr == PL_last_uni + && (*PL_last_uni != 's' || s - PL_last_uni < 5 + || memNE(PL_last_uni, "study", 5) + || isALNUM_lazy_if(PL_last_uni+5,UTF) + )) + check_uni(); + s = scan_pat(s,OP_MATCH); + TERM(sublex_start()); + } case '.': if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack @@ -3701,7 +3757,9 @@ Perl_yylex(pTHX) TERM(THING); } /* avoid v123abc() or $h{v1}, allow C */ - else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) { + else if (!isALPHA(*start) && (PL_expect == XTERM + || PL_expect == XREF || PL_expect == XSTATE + || PL_expect == XTERMORDORDOR)) { char c = *start; GV *gv; *start = '\0'; @@ -3750,6 +3808,7 @@ Perl_yylex(pTHX) case 'z': case 'Z': keylookup: { + orig_keyword = 0; gv = Nullgv; gvp = 0; @@ -3814,6 +3873,7 @@ Perl_yylex(pTHX) } } if (ogv) { + orig_keyword = tmp; tmp = 0; /* overridden by import or by GLOBAL */ } else if (gv && !gvp @@ -3907,6 +3967,10 @@ Perl_yylex(pTHX) CLINE; yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); yylval.opval->op_private = OPpCONST_BARE; + /* UTF-8 package name? */ + if (UTF && !IN_BYTES && + is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) + SvUTF8_on(sv); /* And if "Foo::", then that's what it certainly is. */ @@ -3985,7 +4049,9 @@ Perl_yylex(pTHX) /* If followed by a bareword, see if it looks like indir obj. */ - if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp = intuit_method(s,gv))) + if (!orig_keyword + && (isIDFIRST_lazy_if(s,UTF) || *s == '$') + && (tmp = intuit_method(s,gv))) return tmp; /* Not a method, so call it a subroutine (if defined) */ @@ -4041,7 +4107,7 @@ Perl_yylex(pTHX) if (ckWARN(WARN_RESERVED)) { if (lastchar != '-') { for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ; - if (!*d && strNE(PL_tokenbuf,"main")) + if (!*d && !gv_stashpv(PL_tokenbuf,FALSE)) Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, PL_tokenbuf); } @@ -4134,8 +4200,29 @@ Perl_yylex(pTHX) } #endif #ifdef PERLIO_LAYERS - if (UTF && !IN_BYTES) - PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); + if (!IN_BYTES) { + if (UTF) + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); + else if (PL_encoding) { + SV *name; + dSP; + ENTER; + SAVETMPS; + PUSHMARK(sp); + EXTEND(SP, 1); + XPUSHs(PL_encoding); + PUTBACK; + call_method("name", G_SCALAR); + SPAGAIN; + name = POPs; + PUTBACK; + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, + Perl_form(aTHX_ ":encoding(%"SVf")", + name)); + FREETMPS; + LEAVE; + } + } #endif PL_rsfp = Nullfp; } @@ -4293,6 +4380,9 @@ Perl_yylex(pTHX) case KEY_eof: UNI(OP_EOF); + case KEY_err: + OPERATOR(DOROP); + case KEY_exp: UNI(OP_EXP); @@ -4376,7 +4466,7 @@ Perl_yylex(pTHX) UNI(OP_GMTIME); case KEY_getc: - UNI(OP_GETC); + UNIDOR(OP_GETC); case KEY_getppid: FUN0(OP_GETPPID); @@ -4591,10 +4681,14 @@ Perl_yylex(pTHX) char *t; for (d = s; isALNUM_lazy_if(d,UTF); d++) ; t = skipspace(d); - if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)) + if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) + /* [perl #16184] */ + && !(t[0] == '=' && t[1] == '>') + ) { Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), "Precedence problem: open %.*s should be open(%.*s)", - d-s,s, d-s,s); + d - s, s, d - s, s); + } } LOP(OP_OPEN,XTERM); @@ -4626,10 +4720,10 @@ Perl_yylex(pTHX) LOP(OP_PUSH,XTERM); case KEY_pop: - UNI(OP_POP); + UNIDOR(OP_POP); case KEY_pos: - UNI(OP_POS); + UNIDOR(OP_POS); case KEY_pack: LOP(OP_PACK,XTERM); @@ -4769,7 +4863,7 @@ Perl_yylex(pTHX) case KEY_readline: set_csh(); - UNI(OP_READLINE); + UNIDOR(OP_READLINE); case KEY_readpipe: set_csh(); @@ -4785,7 +4879,7 @@ Perl_yylex(pTHX) LOP(OP_REVERSE,XTERM); case KEY_readlink: - UNI(OP_READLINK); + UNIDOR(OP_READLINK); case KEY_ref: UNI(OP_REF); @@ -4852,7 +4946,7 @@ Perl_yylex(pTHX) LOP(OP_SSOCKOPT,XTERM); case KEY_shift: - UNI(OP_SHIFT); + UNIDOR(OP_SHIFT); case KEY_shmctl: LOP(OP_SHMCTL,XTERM); @@ -4983,8 +5077,8 @@ Perl_yylex(pTHX) d[tmp] = '\0'; if (bad_proto && ckWARN(WARN_SYNTAX)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Illegal character in prototype for %s : %s", - SvPVX(PL_subname), d); + "Illegal character in prototype for %"SVf" : %s", + PL_subname, d); SvCUR(PL_lex_stuff) = tmp; have_proto = TRUE; @@ -4995,6 +5089,8 @@ Perl_yylex(pTHX) if (*s == ':' && s[1] != ':') PL_expect = attrful; + else if (!have_name && *s != '{' && key == KEY_sub) + Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); if (have_proto) { PL_nextval[PL_nexttoke].opval = @@ -5082,7 +5178,7 @@ Perl_yylex(pTHX) LOP(OP_UNLINK,XTERM); case KEY_undef: - UNI(OP_UNDEF); + UNIDOR(OP_UNDEF); case KEY_unpack: LOP(OP_UNPACK,XTERM); @@ -5091,7 +5187,7 @@ Perl_yylex(pTHX) LOP(OP_UTIME,XTERM); case KEY_umask: - UNI(OP_UMASK); + UNIDOR(OP_UMASK); case KEY_unshift: LOP(OP_UNSHIFT,XTERM); @@ -5178,7 +5274,7 @@ static int S_pending_ident(pTHX) { register char *d; - register I32 tmp; + register I32 tmp = 0; /* pit holds the identifier we read and pending_ident is reset */ char pit = PL_pending_ident; PL_pending_ident = 0; @@ -5198,14 +5294,14 @@ S_pending_ident(pTHX) yyerror(Perl_form(aTHX_ "No package name allowed for " "variable %s in \"our\"", PL_tokenbuf)); - tmp = pad_allocmy(PL_tokenbuf); + tmp = allocmy(PL_tokenbuf); } else { if (strchr(PL_tokenbuf,':')) yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf)); yylval.opval = newOP(OP_PADANY, 0); - yylval.opval->op_targ = pad_allocmy(PL_tokenbuf); + yylval.opval->op_targ = allocmy(PL_tokenbuf); return PRIVATEREF; } } @@ -5223,23 +5319,13 @@ S_pending_ident(pTHX) */ if (!strchr(PL_tokenbuf,':')) { -#ifdef USE_5005THREADS - /* Check for single character per-thread SVs */ - if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0' - && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */ - && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD) - { - yylval.opval = newOP(OP_THREADSV, 0); - yylval.opval->op_targ = tmp; - return PRIVATEREF; - } -#endif /* USE_5005THREADS */ - if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) { - SV *namesv = AvARRAY(PL_comppad_name)[tmp]; + if (!PL_in_my) + tmp = pad_findmy(PL_tokenbuf); + if (tmp != NOT_IN_PAD) { /* might be an "our" variable" */ - if (SvFLAGS(namesv) & SVpad_OUR) { + if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) { /* build ops for a bareword */ - SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0); + SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0); sv_catpvn(sym, "::", 2); sv_catpv(sym, PL_tokenbuf+1); yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); @@ -5415,6 +5501,7 @@ Perl_keyword(pTHX_ register char *d, I32 len) break; case 3: if (strEQ(d,"eof")) return -KEY_eof; + if (strEQ(d,"err")) return -KEY_err; if (strEQ(d,"exp")) return -KEY_exp; break; case 4: @@ -6222,8 +6309,10 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des } if (*s == '}') { s++; - if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) + if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { PL_lex_state = LEX_INTERPEND; + PL_expect = XREF; + } if (funny == '#') funny = '@'; if (PL_lex_state == LEX_NORMAL) { @@ -6247,7 +6336,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des } void -Perl_pmflag(pTHX_ U16 *pmfl, int ch) +Perl_pmflag(pTHX_ U32* pmfl, int ch) { if (ch == 'i') *pmfl |= PMf_FOLD; @@ -6523,7 +6612,7 @@ S_scan_heredoc(pTHX_ register char *s) CopLINE_inc(PL_curcop); } if (s >= bufend) { - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } sv_setpvn(herewas,bufptr,d-bufptr+1); @@ -6543,7 +6632,7 @@ S_scan_heredoc(pTHX_ register char *s) CopLINE_inc(PL_curcop); } if (s >= PL_bufend) { - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } sv_setpvn(tmpstr,d+1,s-d); @@ -6561,7 +6650,7 @@ S_scan_heredoc(pTHX_ register char *s) while (s >= PL_bufend) { /* multiple line string? */ if (!outer || !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } CopLINE_inc(PL_curcop); @@ -6610,8 +6699,12 @@ retval: Renew(SvPVX(tmpstr), SvLEN(tmpstr), char); } SvREFCNT_dec(herewas); - if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) - SvUTF8_on(tmpstr); + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) + SvUTF8_on(tmpstr); + else if (PL_encoding) + sv_recode_to_utf8(tmpstr, PL_encoding); + } PL_lex_stuff = tmpstr; yylval.ival = op_type; return s; @@ -6717,9 +6810,9 @@ S_scan_inputsymbol(pTHX_ char *start) add symbol table ops */ if ((tmp = pad_findmy(d)) != NOT_IN_PAD) { - SV *namesv = AvARRAY(PL_comppad_name)[tmp]; - if (SvFLAGS(namesv) & SVpad_OUR) { - SV *sym = sv_2mortal(newSVpv(HvNAME(GvSTASH(namesv)),0)); + if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) { + SV *sym = sv_2mortal( + newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0)); sv_catpvn(sym, "::", 2); sv_catpv(sym, d+1); d = SvPVX(sym); @@ -6830,6 +6923,10 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) register char *to; /* current position in the sv's data */ I32 brackets = 1; /* bracket nesting level */ bool has_utf8 = FALSE; /* is there any utf8 content? */ + I32 termcode; /* terminating char. code */ + U8 termstr[UTF8_MAXLEN]; /* terminating string */ + STRLEN termlen; /* length of terminating string */ + char *last = NULL; /* last position for nesting bracket */ /* skip space before the delimiter */ if (isSPACE(*s)) @@ -6840,8 +6937,16 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* after skipping whitespace, the next character is the terminator */ term = *s; - if (!UTF8_IS_INVARIANT((U8)term) && UTF) - has_utf8 = TRUE; + if (!UTF) { + termcode = termstr[0] = term; + termlen = 1; + } + else { + termcode = utf8_to_uvchr((U8*)s, &termlen); + Copy(s, termstr, termlen, U8); + if (!UTF8_IS_INVARIANT(term)) + has_utf8 = TRUE; + } /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); @@ -6849,21 +6954,92 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* find corresponding closing delimiter */ if (term && (tmps = strchr("([{< )]}> )]}>",term))) - term = tmps[5]; + termcode = termstr[0] = term = tmps[5]; + PL_multi_close = term; /* create a new SV to hold the contents. 87 is leak category, I'm assuming. 79 is the SV's initial length. What a random number. */ sv = NEWSV(87,79); sv_upgrade(sv, SVt_PVIV); - SvIVX(sv) = term; + SvIVX(sv) = termcode; (void)SvPOK_only(sv); /* validate pointer */ /* move past delimiter and try to read a complete string */ if (keep_delims) - sv_catpvn(sv, s, 1); - s++; + sv_catpvn(sv, s, termlen); + s += termlen; for (;;) { + if (PL_encoding && !UTF) { + bool cont = TRUE; + + while (cont) { + int offset = s - SvPVX(PL_linestr); + bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, + &offset, (char*)termstr, termlen); + char *ns = SvPVX(PL_linestr) + offset; + char *svlast = SvEND(sv) - 1; + + for (; s < ns; s++) { + if (*s == '\n' && !PL_rsfp) + CopLINE_inc(PL_curcop); + } + if (!found) + goto read_more_line; + else { + /* handle quoted delimiters */ + if (*(svlast-1) == '\\') { + char *t; + for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';) + t--; + if ((svlast-1 - t) % 2) { + if (!keep_quoted) { + *(svlast-1) = term; + *svlast = '\0'; + SvCUR_set(sv, SvCUR(sv) - 1); + } + continue; + } + } + if (PL_multi_open == PL_multi_close) { + cont = FALSE; + } + else { + char *t, *w; + if (!last) + last = SvPVX(sv); + for (w = t = last; t < svlast; w++, t++) { + /* At here, all closes are "was quoted" one, + so we don't check PL_multi_close. */ + if (*t == '\\') { + if (!keep_quoted && *(t+1) == PL_multi_open) + t++; + else + *w++ = *t++; + } + else if (*t == PL_multi_open) + brackets++; + + *w = *t; + } + if (w < t) { + *w++ = term; + *w = '\0'; + SvCUR_set(sv, w - SvPVX(sv)); + } + last = w; + if (--brackets <= 0) + cont = FALSE; + } + } + } + if (!keep_delims) { + SvCUR_set(sv, SvCUR(sv) - 1); + *SvEND(sv) = '\0'; + } + break; + } + /* extend sv if need be */ SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); /* set 'to' to the next character in the sv's string */ @@ -6885,8 +7061,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } /* terminate when run out of buffer (the for() condition), or have found the terminator */ - else if (*s == term) - break; + else if (*s == term) { + if (termlen == 1) + break; + if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) + break; + } else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; *to = *s; @@ -6948,13 +7128,14 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) to[-1] = '\n'; #endif + read_more_line: /* if we're out of file, or a read fails, bail and reset the current line marker so we can report where the unterminated string began */ if (!PL_rsfp || !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { sv_free(sv); - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); return Nullch; } /* we read a line, so increment our line counter */ @@ -6978,12 +7159,15 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* at this point, we have successfully read the delimited string */ - if (keep_delims) - sv_catpvn(sv, s, 1); - if (has_utf8) + if (!PL_encoding || UTF) { + if (keep_delims) + sv_catpvn(sv, s, termlen); + s += termlen; + } + if (has_utf8 || PL_encoding) SvUTF8_on(sv); + PL_multi_end = CopLINE(PL_curcop); - s++; /* if we allocated too much space, give some back */ if (SvCUR(sv) + 5 < SvLEN(sv)) { @@ -7383,7 +7567,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) case 'v': vstring: sv = NEWSV(92,5); /* preallocate storage space */ - s = new_vstring(s,sv); + s = scan_vstring(s,sv); break; } @@ -7470,6 +7654,12 @@ S_scan_formline(pTHX_ register char *s) } else PL_lex_state = LEX_FORMLINE; + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff))) + SvUTF8_on(stuff); + else if (PL_encoding) + sv_recode_to_utf8(stuff, PL_encoding); + } PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff); force_next(THING); PL_nextval[PL_nexttoke].ival = OP_FORMLINE; @@ -7497,52 +7687,22 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) { I32 oldsavestack_ix = PL_savestack_ix; CV* outsidecv = PL_compcv; - AV* comppadlist; if (PL_compcv) { assert(SvTYPE(PL_compcv) == SVt_PVCV); } SAVEI32(PL_subline); save_item(PL_subname); - SAVEI32(PL_padix); - SAVECOMPPAD(); - SAVESPTR(PL_comppad_name); SAVESPTR(PL_compcv); - SAVEI32(PL_comppad_name_fill); - SAVEI32(PL_min_intro_pending); - SAVEI32(PL_max_intro_pending); - SAVEI32(PL_pad_reset_pending); PL_compcv = (CV*)NEWSV(1104,0); sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV); CvFLAGS(PL_compcv) |= flags; - PL_comppad = newAV(); - av_push(PL_comppad, Nullsv); - PL_curpad = AvARRAY(PL_comppad); - PL_comppad_name = newAV(); - PL_comppad_name_fill = 0; - PL_min_intro_pending = 0; - PL_padix = 0; PL_subline = CopLINE(PL_curcop); -#ifdef USE_5005THREADS - av_store(PL_comppad_name, 0, newSVpvn("@_", 2)); - PL_curpad[0] = (SV*)newAV(); - SvPADMY_on(PL_curpad[0]); /* XXX Needed? */ -#endif /* USE_5005THREADS */ - - comppadlist = newAV(); - AvREAL_off(comppadlist); - av_store(comppadlist, 0, (SV*)PL_comppad_name); - av_store(comppadlist, 1, (SV*)PL_comppad); - - CvPADLIST(PL_compcv) = comppadlist; + CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv); -#ifdef USE_5005THREADS - CvOWNER(PL_compcv) = 0; - New(666, CvMUTEXP(PL_compcv), 1, perl_mutex); - MUTEX_INIT(CvMUTEXP(PL_compcv)); -#endif /* USE_5005THREADS */ + CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; return oldsavestack_ix; } @@ -7571,15 +7731,33 @@ Perl_yyerror(pTHX_ char *s) where = "at EOF"; else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) { + /* + Only for NetWare: + The code below is removed for NetWare because it abends/crashes on NetWare + when the script has error such as not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ +#ifndef NETWARE while (isSPACE(*PL_oldoldbufptr)) PL_oldoldbufptr++; +#endif context = PL_oldoldbufptr; contlen = PL_bufptr - PL_oldoldbufptr; } else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) { + /* + Only for NetWare: + The code below is removed for NetWare because it abends/crashes on NetWare + when the script has error such as not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ +#ifndef NETWARE while (isSPACE(*PL_oldbufptr)) PL_oldbufptr++; +#endif context = PL_oldbufptr; contlen = PL_bufptr - PL_oldbufptr; }