X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=toke.c;h=d57dc0cef994884983f82d9826ba6ecd9250bb40;hb=0384d2fffd4e9480d75fd2afa397b45866ec8ac8;hp=e7834c4bb74c956da6d9a3055295bc4343483dbf;hpb=6e909404a2579c635b9578c0b6ce5416a5ed7b0b;p=p5sagit%2Fp5-mst-13.2.git diff --git a/toke.c b/toke.c index e7834c4..d57dc0c 100644 --- a/toke.c +++ b/toke.c @@ -1,6 +1,7 @@ /* toke.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + * 2000, 2001, 2002, 2003, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -2141,7 +2142,7 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len) #ifdef DEBUGGING static char* exp_name[] = { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", - "ATTRTERM", "TERMBLOCK" + "ATTRTERM", "TERMBLOCK", "TERMORDORDOR" }; #endif @@ -2726,6 +2727,14 @@ Perl_yylex(pTHX) } d = moreswitches(d); } while (d); + if (PL_doswitches && !switches_done) { + int argc = PL_origargc; + char **argv = PL_origargv; + do { + argc--,argv++; + } while (argc && argv[0][0] == '-' && argv[0][1]); + init_argv_symbols(argc,argv); + } if ((PERLDB_LINE && !oldpdb) || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) /* if we have already added "LINE: while (<>) {", @@ -2939,8 +2948,6 @@ Perl_yylex(pTHX) PL_tokenbuf[0] = '%'; s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); if (!PL_tokenbuf[1]) { - if (s == PL_bufend) - yyerror("Final % should be \\% or %name"); PREREF('%'); } PL_pending_ident = '%'; @@ -3025,6 +3032,8 @@ Perl_yylex(pTHX) CvLOCKED_on(PL_compcv); else if (!PL_in_my && len == 6 && strnEQ(s, "method", len)) CvMETHOD_on(PL_compcv); + else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len)) + CvASSERTION_on(PL_compcv); #ifdef USE_ITHREADS else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "unique", len)) @@ -3086,6 +3095,7 @@ Perl_yylex(pTHX) PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ else PL_expect = XTERM; + s = skipspace(s); TOKEN('('); case ';': CLINE; @@ -3207,12 +3217,17 @@ Perl_yylex(pTHX) || ((*t == 'q' || *t == 'x') && ++t < PL_bufend && !isALNUM(*t)))) { + /* skip q//-like construct */ char *tmps; char open, close, term; I32 brackets = 1; while (t < PL_bufend && isSPACE(*t)) t++; + /* check for q => */ + if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { + OPERATOR(HASHBRACK); + } term = *t; open = term; if (term && (tmps = strchr("([{< )]}> )]}>",term))) @@ -3225,7 +3240,7 @@ Perl_yylex(pTHX) else if (*t == open) break; } - else + else { for (t++; t < PL_bufend; t++) { if (*t == '\\' && t+1 < PL_bufend) t++; @@ -3234,8 +3249,13 @@ Perl_yylex(pTHX) else if (*t == open) brackets++; } + } + t++; } - t++; + else + /* skip plain q word */ + while (t < PL_bufend && isALNUM_lazy_if(t,UTF)) + t += UTF8SKIP(t); } else if (isALNUM_lazy_if(t,UTF)) { t += UTF8SKIP(t); @@ -3561,8 +3581,6 @@ Perl_yylex(pTHX) PL_tokenbuf[0] = '@'; s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); if (!PL_tokenbuf[1]) { - if (s == PL_bufend) - yyerror("Final @ should be \\@ or @name"); PREREF('@'); } if (PL_lex_state == LEX_NORMAL) @@ -4072,6 +4090,8 @@ Perl_yylex(pTHX) TERM(FUNC0SUB); if (strEQ(proto, "$")) OPERATOR(UNIOPSUB); + while (*proto == ';') + proto++; if (*proto == '&' && *s == '{') { sv_setpv(PL_subname, PL_curstash ? "__ANON__" : "__ANON__::__ANON__"); @@ -4186,8 +4206,29 @@ Perl_yylex(pTHX) } #endif #ifdef PERLIO_LAYERS - if (UTF && !IN_BYTES) - PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); + if (!IN_BYTES) { + if (UTF) + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); + else if (PL_encoding) { + SV *name; + dSP; + ENTER; + SAVETMPS; + PUSHMARK(sp); + EXTEND(SP, 1); + XPUSHs(PL_encoding); + PUTBACK; + call_method("name", G_SCALAR); + SPAGAIN; + name = POPs; + PUTBACK; + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, + Perl_form(aTHX_ ":encoding(%"SVf")", + name)); + FREETMPS; + LEAVE; + } + } #endif PL_rsfp = Nullfp; } @@ -5239,7 +5280,7 @@ static int S_pending_ident(pTHX) { register char *d; - register I32 tmp; + register I32 tmp = 0; /* pit holds the identifier we read and pending_ident is reset */ char pit = PL_pending_ident; PL_pending_ident = 0; @@ -6274,8 +6315,10 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des } if (*s == '}') { s++; - if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) + if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { PL_lex_state = LEX_INTERPEND; + PL_expect = XREF; + } if (funny == '#') funny = '@'; if (PL_lex_state == LEX_NORMAL) { @@ -6662,8 +6705,12 @@ retval: Renew(SvPVX(tmpstr), SvLEN(tmpstr), char); } SvREFCNT_dec(herewas); - if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) - SvUTF8_on(tmpstr); + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) + SvUTF8_on(tmpstr); + else if (PL_encoding) + sv_recode_to_utf8(tmpstr, PL_encoding); + } PL_lex_stuff = tmpstr; yylval.ival = op_type; return s; @@ -6882,6 +6929,10 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) register char *to; /* current position in the sv's data */ I32 brackets = 1; /* bracket nesting level */ bool has_utf8 = FALSE; /* is there any utf8 content? */ + I32 termcode; /* terminating char. code */ + U8 termstr[UTF8_MAXLEN]; /* terminating string */ + STRLEN termlen; /* length of terminating string */ + char *last = NULL; /* last position for nesting bracket */ /* skip space before the delimiter */ if (isSPACE(*s)) @@ -6892,8 +6943,16 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* after skipping whitespace, the next character is the terminator */ term = *s; - if (!UTF8_IS_INVARIANT((U8)term) && UTF) - has_utf8 = TRUE; + if (!UTF) { + termcode = termstr[0] = term; + termlen = 1; + } + else { + termcode = utf8_to_uvchr((U8*)s, &termlen); + Copy(s, termstr, termlen, U8); + if (!UTF8_IS_INVARIANT(term)) + has_utf8 = TRUE; + } /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); @@ -6901,21 +6960,92 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* find corresponding closing delimiter */ if (term && (tmps = strchr("([{< )]}> )]}>",term))) - term = tmps[5]; + termcode = termstr[0] = term = tmps[5]; + PL_multi_close = term; /* create a new SV to hold the contents. 87 is leak category, I'm assuming. 79 is the SV's initial length. What a random number. */ sv = NEWSV(87,79); sv_upgrade(sv, SVt_PVIV); - SvIVX(sv) = term; + SvIVX(sv) = termcode; (void)SvPOK_only(sv); /* validate pointer */ /* move past delimiter and try to read a complete string */ if (keep_delims) - sv_catpvn(sv, s, 1); - s++; + sv_catpvn(sv, s, termlen); + s += termlen; for (;;) { + if (PL_encoding && !UTF) { + bool cont = TRUE; + + while (cont) { + int offset = s - SvPVX(PL_linestr); + bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, + &offset, (char*)termstr, termlen); + char *ns = SvPVX(PL_linestr) + offset; + char *svlast = SvEND(sv) - 1; + + for (; s < ns; s++) { + if (*s == '\n' && !PL_rsfp) + CopLINE_inc(PL_curcop); + } + if (!found) + goto read_more_line; + else { + /* handle quoted delimiters */ + if (*(svlast-1) == '\\') { + char *t; + for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';) + t--; + if ((svlast-1 - t) % 2) { + if (!keep_quoted) { + *(svlast-1) = term; + *svlast = '\0'; + SvCUR_set(sv, SvCUR(sv) - 1); + } + continue; + } + } + if (PL_multi_open == PL_multi_close) { + cont = FALSE; + } + else { + char *t, *w; + if (!last) + last = SvPVX(sv); + for (w = t = last; t < svlast; w++, t++) { + /* At here, all closes are "was quoted" one, + so we don't check PL_multi_close. */ + if (*t == '\\') { + if (!keep_quoted && *(t+1) == PL_multi_open) + t++; + else + *w++ = *t++; + } + else if (*t == PL_multi_open) + brackets++; + + *w = *t; + } + if (w < t) { + *w++ = term; + *w = '\0'; + SvCUR_set(sv, w - SvPVX(sv)); + } + last = w; + if (--brackets <= 0) + cont = FALSE; + } + } + } + if (!keep_delims) { + SvCUR_set(sv, SvCUR(sv) - 1); + *SvEND(sv) = '\0'; + } + break; + } + /* extend sv if need be */ SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); /* set 'to' to the next character in the sv's string */ @@ -6937,8 +7067,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } /* terminate when run out of buffer (the for() condition), or have found the terminator */ - else if (*s == term) - break; + else if (*s == term) { + if (termlen == 1) + break; + if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) + break; + } else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; *to = *s; @@ -7000,6 +7134,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) to[-1] = '\n'; #endif + read_more_line: /* if we're out of file, or a read fails, bail and reset the current line marker so we can report where the unterminated string began */ @@ -7030,15 +7165,15 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* at this point, we have successfully read the delimited string */ - if (keep_delims) - sv_catpvn(sv, s, 1); - if (has_utf8) + if (!PL_encoding || UTF) { + if (keep_delims) + sv_catpvn(sv, s, termlen); + s += termlen; + } + if (has_utf8 || PL_encoding) SvUTF8_on(sv); - else if (PL_encoding) - sv_recode_to_utf8(sv, PL_encoding); PL_multi_end = CopLINE(PL_curcop); - s++; /* if we allocated too much space, give some back */ if (SvCUR(sv) + 5 < SvLEN(sv)) { @@ -7525,6 +7660,12 @@ S_scan_formline(pTHX_ register char *s) } else PL_lex_state = LEX_FORMLINE; + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff))) + SvUTF8_on(stuff); + else if (PL_encoding) + sv_recode_to_utf8(stuff, PL_encoding); + } PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff); force_next(THING); PL_nextval[PL_nexttoke].ival = OP_FORMLINE;