X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=23ae908f3069e21bfa28a1ed473e1a2faa1f2b79;hb=79ed0f4335cfe1640742a00ebb11ecae8eaf178e;hp=d95b0a72bb98ce31f4ef57b76d2a593688783adb;hpb=d0063567ae6829f18fa94be9ac4f0b3986e32f5a;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index d95b0a7..23ae908 100644 --- a/toke.c +++ b/toke.c @@ -1,6 +1,7 @@ /* toke.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + * 2000, 2001, 2002, 2003, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -424,8 +425,8 @@ Perl_lex_start(pTHX_ SV *line) SAVEPPTR(PL_last_uni); SAVEPPTR(PL_linestart); SAVESPTR(PL_linestr); - SAVEPPTR(PL_lex_brackstack); - SAVEPPTR(PL_lex_casestack); + SAVEGENERICPV(PL_lex_brackstack); + SAVEGENERICPV(PL_lex_casestack); SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp); SAVESPTR(PL_lex_stuff); SAVEI32(PL_lex_defer); @@ -440,8 +441,6 @@ Perl_lex_start(pTHX_ SV *line) PL_lex_brackets = 0; New(899, PL_lex_brackstack, 120, char); New(899, PL_lex_casestack, 12, char); - SAVEFREEPV(PL_lex_brackstack); - SAVEFREEPV(PL_lex_casestack); PL_lex_casemods = 0; *PL_lex_casestack = '\0'; PL_lex_dojoin = 0; @@ -1052,8 +1051,8 @@ S_sublex_push(pTHX) SAVEPPTR(PL_last_uni); SAVEPPTR(PL_linestart); SAVESPTR(PL_linestr); - SAVEPPTR(PL_lex_brackstack); - SAVEPPTR(PL_lex_casestack); + SAVEGENERICPV(PL_lex_brackstack); + SAVEGENERICPV(PL_lex_casestack); PL_linestr = PL_lex_stuff; PL_lex_stuff = Nullsv; @@ -1068,8 +1067,6 @@ S_sublex_push(pTHX) PL_lex_brackets = 0; New(899, PL_lex_brackstack, 120, char); New(899, PL_lex_casestack, 12, char); - SAVEFREEPV(PL_lex_brackstack); - SAVEFREEPV(PL_lex_casestack); PL_lex_casemods = 0; *PL_lex_casestack = '\0'; PL_lex_starts = 0; @@ -1611,7 +1608,7 @@ S_scan_const(pTHX_ char *start) /* \c is a control character */ case 'c': s++; - { + if (s < send) { U8 c = *s++; #ifdef EBCDIC if (isLOWER(c)) @@ -1619,6 +1616,9 @@ S_scan_const(pTHX_ char *start) #endif *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c)); } + else { + yyerror("Missing control char name in \\c"); + } continue; /* printf-style backslashes, formfeeds, newlines, etc */ @@ -2142,7 +2142,7 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len) #ifdef DEBUGGING static char* exp_name[] = { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", - "ATTRTERM", "TERMBLOCK" + "ATTRTERM", "TERMBLOCK", "TERMORDORDOR" }; #endif @@ -2205,6 +2205,7 @@ Perl_yylex(pTHX) GV *gv = Nullgv; GV **gvp = 0; bool bof = FALSE; + I32 orig_keyword = 0; /* check if there's an identifier for us to look at */ if (PL_pending_ident) @@ -2266,39 +2267,40 @@ Perl_yylex(pTHX) DEBUG_T({ PerlIO_printf(Perl_debug_log, "### Saw case modifier at '%s'\n", PL_bufptr); }); s = PL_bufptr + 1; - if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) - tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ - if (strchr("LU", *s) && - (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) - { - PL_lex_casestack[--PL_lex_casemods] = '\0'; - return ')'; + if (s[1] == '\\' && s[2] == 'E') { + PL_bufptr = s + 3; + PL_lex_state = LEX_INTERPCONCAT; + return yylex(); } - if (PL_lex_casemods > 10) { - char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char); - if (newlb != PL_lex_casestack) { - SAVEFREEPV(newlb); - PL_lex_casestack = newlb; + else { + if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) + tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ + if (strchr("LU", *s) && + (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) { + PL_lex_casestack[--PL_lex_casemods] = '\0'; + return ')'; } + if (PL_lex_casemods > 10) + Renew(PL_lex_casestack, PL_lex_casemods + 2, char); + PL_lex_casestack[PL_lex_casemods++] = *s; + PL_lex_casestack[PL_lex_casemods] = '\0'; + PL_lex_state = LEX_INTERPCONCAT; + PL_nextval[PL_nexttoke].ival = 0; + force_next('('); + if (*s == 'l') + PL_nextval[PL_nexttoke].ival = OP_LCFIRST; + else if (*s == 'u') + PL_nextval[PL_nexttoke].ival = OP_UCFIRST; + else if (*s == 'L') + PL_nextval[PL_nexttoke].ival = OP_LC; + else if (*s == 'U') + PL_nextval[PL_nexttoke].ival = OP_UC; + else if (*s == 'Q') + PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA; + else + Perl_croak(aTHX_ "panic: yylex"); + PL_bufptr = s + 1; } - PL_lex_casestack[PL_lex_casemods++] = *s; - PL_lex_casestack[PL_lex_casemods] = '\0'; - PL_lex_state = LEX_INTERPCONCAT; - PL_nextval[PL_nexttoke].ival = 0; - force_next('('); - if (*s == 'l') - PL_nextval[PL_nexttoke].ival = OP_LCFIRST; - else if (*s == 'u') - PL_nextval[PL_nexttoke].ival = OP_UCFIRST; - else if (*s == 'L') - PL_nextval[PL_nexttoke].ival = OP_LC; - else if (*s == 'U') - PL_nextval[PL_nexttoke].ival = OP_UC; - else if (*s == 'Q') - PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA; - else - Perl_croak(aTHX_ "panic: yylex"); - PL_bufptr = s + 1; force_next(FUNC); if (PL_lex_starts) { s = PL_bufptr; @@ -3024,6 +3026,8 @@ Perl_yylex(pTHX) CvLOCKED_on(PL_compcv); else if (!PL_in_my && len == 6 && strnEQ(s, "method", len)) CvMETHOD_on(PL_compcv); + else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len)) + CvASSERTION_on(PL_compcv); #ifdef USE_ITHREADS else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "unique", len)) @@ -3085,6 +3089,7 @@ Perl_yylex(pTHX) PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ else PL_expect = XTERM; + s = skipspace(s); TOKEN('('); case ';': CLINE; @@ -3113,11 +3118,7 @@ Perl_yylex(pTHX) leftbracket: s++; if (PL_lex_brackets > 100) { - char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char); - if (newlb != PL_lex_brackstack) { - SAVEFREEPV(newlb); - PL_lex_brackstack = newlb; - } + Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); } switch (PL_expect) { case XTERM: @@ -3210,12 +3211,17 @@ Perl_yylex(pTHX) || ((*t == 'q' || *t == 'x') && ++t < PL_bufend && !isALNUM(*t)))) { + /* skip q//-like construct */ char *tmps; char open, close, term; I32 brackets = 1; while (t < PL_bufend && isSPACE(*t)) t++; + /* check for q => */ + if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { + OPERATOR(HASHBRACK); + } term = *t; open = term; if (term && (tmps = strchr("([{< )]}> )]}>",term))) @@ -3228,7 +3234,7 @@ Perl_yylex(pTHX) else if (*t == open) break; } - else + else { for (t++; t < PL_bufend; t++) { if (*t == '\\' && t+1 < PL_bufend) t++; @@ -3237,8 +3243,13 @@ Perl_yylex(pTHX) else if (*t == open) brackets++; } + } + t++; } - t++; + else + /* skip plain q word */ + while (t < PL_bufend && isALNUM_lazy_if(t,UTF)) + t += UTF8SKIP(t); } else if (isALNUM_lazy_if(t,UTF)) { t += UTF8SKIP(t); @@ -3797,7 +3808,7 @@ Perl_yylex(pTHX) case 'z': case 'Z': keylookup: { - I32 orig_keyword = 0; + orig_keyword = 0; gv = Nullgv; gvp = 0; @@ -4189,8 +4200,29 @@ Perl_yylex(pTHX) } #endif #ifdef PERLIO_LAYERS - if (UTF && !IN_BYTES) - PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); + if (!IN_BYTES) { + if (UTF) + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); + else if (PL_encoding) { + SV *name; + dSP; + ENTER; + SAVETMPS; + PUSHMARK(sp); + EXTEND(SP, 1); + XPUSHs(PL_encoding); + PUTBACK; + call_method("name", G_SCALAR); + SPAGAIN; + name = POPs; + PUTBACK; + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, + Perl_form(aTHX_ ":encoding(%"SVf")", + name)); + FREETMPS; + LEAVE; + } + } #endif PL_rsfp = Nullfp; } @@ -5045,8 +5077,8 @@ Perl_yylex(pTHX) d[tmp] = '\0'; if (bad_proto && ckWARN(WARN_SYNTAX)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Illegal character in prototype for %s : %s", - SvPVX(PL_subname), d); + "Illegal character in prototype for %"SVf" : %s", + PL_subname, d); SvCUR(PL_lex_stuff) = tmp; have_proto = TRUE; @@ -5057,6 +5089,8 @@ Perl_yylex(pTHX) if (*s == ':' && s[1] != ':') PL_expect = attrful; + else if (!have_name && *s != '{' && key == KEY_sub) + Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); if (have_proto) { PL_nextval[PL_nexttoke].opval = @@ -5240,7 +5274,7 @@ static int S_pending_ident(pTHX) { register char *d; - register I32 tmp; + register I32 tmp = 0; /* pit holds the identifier we read and pending_ident is reset */ char pit = PL_pending_ident; PL_pending_ident = 0; @@ -5285,7 +5319,9 @@ S_pending_ident(pTHX) */ if (!strchr(PL_tokenbuf,':')) { - if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) { + if (!PL_in_my) + tmp = pad_findmy(PL_tokenbuf); + if (tmp != NOT_IN_PAD) { /* might be an "our" variable" */ if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) { /* build ops for a bareword */ @@ -6273,8 +6309,10 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des } if (*s == '}') { s++; - if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) + if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { PL_lex_state = LEX_INTERPEND; + PL_expect = XREF; + } if (funny == '#') funny = '@'; if (PL_lex_state == LEX_NORMAL) { @@ -6661,8 +6699,12 @@ retval: Renew(SvPVX(tmpstr), SvLEN(tmpstr), char); } SvREFCNT_dec(herewas); - if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) - SvUTF8_on(tmpstr); + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) + SvUTF8_on(tmpstr); + else if (PL_encoding) + sv_recode_to_utf8(tmpstr, PL_encoding); + } PL_lex_stuff = tmpstr; yylval.ival = op_type; return s; @@ -6881,6 +6923,10 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) register char *to; /* current position in the sv's data */ I32 brackets = 1; /* bracket nesting level */ bool has_utf8 = FALSE; /* is there any utf8 content? */ + I32 termcode; /* terminating char. code */ + U8 termstr[UTF8_MAXLEN]; /* terminating string */ + STRLEN termlen; /* length of terminating string */ + char *last = NULL; /* last position for nesting bracket */ /* skip space before the delimiter */ if (isSPACE(*s)) @@ -6891,8 +6937,16 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* after skipping whitespace, the next character is the terminator */ term = *s; - if (!UTF8_IS_INVARIANT((U8)term) && UTF) - has_utf8 = TRUE; + if (!UTF) { + termcode = termstr[0] = term; + termlen = 1; + } + else { + termcode = utf8_to_uvchr((U8*)s, &termlen); + Copy(s, termstr, termlen, U8); + if (!UTF8_IS_INVARIANT(term)) + has_utf8 = TRUE; + } /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); @@ -6900,21 +6954,92 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* find corresponding closing delimiter */ if (term && (tmps = strchr("([{< )]}> )]}>",term))) - term = tmps[5]; + termcode = termstr[0] = term = tmps[5]; + PL_multi_close = term; /* create a new SV to hold the contents. 87 is leak category, I'm assuming. 79 is the SV's initial length. What a random number. */ sv = NEWSV(87,79); sv_upgrade(sv, SVt_PVIV); - SvIVX(sv) = term; + SvIVX(sv) = termcode; (void)SvPOK_only(sv); /* validate pointer */ /* move past delimiter and try to read a complete string */ if (keep_delims) - sv_catpvn(sv, s, 1); - s++; + sv_catpvn(sv, s, termlen); + s += termlen; for (;;) { + if (PL_encoding && !UTF) { + bool cont = TRUE; + + while (cont) { + int offset = s - SvPVX(PL_linestr); + bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, + &offset, (char*)termstr, termlen); + char *ns = SvPVX(PL_linestr) + offset; + char *svlast = SvEND(sv) - 1; + + for (; s < ns; s++) { + if (*s == '\n' && !PL_rsfp) + CopLINE_inc(PL_curcop); + } + if (!found) + goto read_more_line; + else { + /* handle quoted delimiters */ + if (*(svlast-1) == '\\') { + char *t; + for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';) + t--; + if ((svlast-1 - t) % 2) { + if (!keep_quoted) { + *(svlast-1) = term; + *svlast = '\0'; + SvCUR_set(sv, SvCUR(sv) - 1); + } + continue; + } + } + if (PL_multi_open == PL_multi_close) { + cont = FALSE; + } + else { + char *t, *w; + if (!last) + last = SvPVX(sv); + for (w = t = last; t < svlast; w++, t++) { + /* At here, all closes are "was quoted" one, + so we don't check PL_multi_close. */ + if (*t == '\\') { + if (!keep_quoted && *(t+1) == PL_multi_open) + t++; + else + *w++ = *t++; + } + else if (*t == PL_multi_open) + brackets++; + + *w = *t; + } + if (w < t) { + *w++ = term; + *w = '\0'; + SvCUR_set(sv, w - SvPVX(sv)); + } + last = w; + if (--brackets <= 0) + cont = FALSE; + } + } + } + if (!keep_delims) { + SvCUR_set(sv, SvCUR(sv) - 1); + *SvEND(sv) = '\0'; + } + break; + } + /* extend sv if need be */ SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); /* set 'to' to the next character in the sv's string */ @@ -6936,8 +7061,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } /* terminate when run out of buffer (the for() condition), or have found the terminator */ - else if (*s == term) - break; + else if (*s == term) { + if (termlen == 1) + break; + if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) + break; + } else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; *to = *s; @@ -6999,6 +7128,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) to[-1] = '\n'; #endif + read_more_line: /* if we're out of file, or a read fails, bail and reset the current line marker so we can report where the unterminated string began */ @@ -7029,15 +7159,15 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* at this point, we have successfully read the delimited string */ - if (keep_delims) - sv_catpvn(sv, s, 1); - if (has_utf8) + if (!PL_encoding || UTF) { + if (keep_delims) + sv_catpvn(sv, s, termlen); + s += termlen; + } + if (has_utf8 || PL_encoding) SvUTF8_on(sv); - else if (PL_encoding) - sv_recode_to_utf8(sv, PL_encoding); PL_multi_end = CopLINE(PL_curcop); - s++; /* if we allocated too much space, give some back */ if (SvCUR(sv) + 5 < SvLEN(sv)) { @@ -7524,6 +7654,12 @@ S_scan_formline(pTHX_ register char *s) } else PL_lex_state = LEX_FORMLINE; + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff))) + SvUTF8_on(stuff); + else if (PL_encoding) + sv_recode_to_utf8(stuff, PL_encoding); + } PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff); force_next(THING); PL_nextval[PL_nexttoke].ival = OP_FORMLINE; @@ -7566,6 +7702,7 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) PL_subline = CopLINE(PL_curcop); CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv); + CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; return oldsavestack_ix; }