3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
40 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
42 # define CURMAD(slot,sv)
43 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
46 #define XFAKEBRACK 128
49 #ifdef USE_UTF8_SCRIPTS
50 # define UTF (!IN_BYTES)
52 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
55 /* In variables named $^X, these are the legal values for X.
56 * 1999-02-27 mjd-perl-patch@plover.com */
57 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
59 /* On MacOS, respect nonbreaking spaces */
60 #ifdef MACOS_TRADITIONAL
61 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
63 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
66 /* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
68 * can get by with a single comparison (if the compiler is smart enough).
71 /* #define LEX_NOTPARSING 11 is done in perl.h. */
73 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
74 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
79 /* at end of code, eg "$x" followed by: */
80 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
83 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85 #define LEX_INTERPCONST 2 /* NOT USED */
86 #define LEX_FORMLINE 1 /* expecting a format line */
87 #define LEX_KNOWNEXT 0 /* next token known; just return it */
91 static const char* const lex_state_names[] = {
110 #include "keywords.h"
112 /* CLINE is a macro that ensures PL_copline has a sane value */
117 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
120 # define SKIPSPACE0(s) skipspace0(s)
121 # define SKIPSPACE1(s) skipspace1(s)
122 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123 # define PEEKSPACE(s) skipspace2(s,0)
125 # define SKIPSPACE0(s) skipspace(s)
126 # define SKIPSPACE1(s) skipspace(s)
127 # define SKIPSPACE2(s,tsv) skipspace(s)
128 # define PEEKSPACE(s) skipspace(s)
132 * Convenience functions to return different tokens and prime the
133 * lexer for the next token. They all take an argument.
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
145 * FUN1 : not used, except for not, which isn't a UNIOP
146 * BOop : bitwise or or xor
148 * SHop : shift operator
149 * PWop : power operator
150 * PMop : pattern-matching operator
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
154 * Rop : relational operator <= != gt
156 * Also see LOP and lop() below.
159 #ifdef DEBUGGING /* Serve -DT. */
160 # define REPORT(retval) tokereport((I32)retval)
162 # define REPORT(retval) (retval)
165 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
186 /* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
191 #define UNI2(f,x) { \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
198 return REPORT( (int)FUNC1 ); \
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
202 #define UNI(f) UNI2(f,XTERM)
203 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
205 #define UNIBRACK(f) { \
208 PL_last_uni = PL_oldbufptr; \
210 return REPORT( (int)FUNC1 ); \
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
215 /* grandfather return to old style */
216 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
220 /* how to interpret the yylval associated with the token */
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
230 static struct debug_tokens {
232 enum token_type type;
234 } const debug_tokens[] =
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
301 { 0, TOKENTYPE_NONE, NULL }
304 /* dump the returned token in rv, plus any optional arg in yylval */
307 S_tokereport(pTHX_ I32 rv)
311 const char *name = NULL;
312 enum token_type type = TOKENTYPE_NONE;
313 const struct debug_tokens *p;
314 SV* const report = newSVpvs("<== ");
316 for (p = debug_tokens; p->token; p++) {
317 if (p->token == (int)rv) {
324 Perl_sv_catpv(aTHX_ report, name);
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
328 sv_catpvs(report, "EOF");
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
345 case TOKENTYPE_OPVAL:
347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
348 PL_op_name[yylval.opval->op_type]);
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
356 sv_catpvs(report, "(opval=null)");
359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
365 /* print the buffer with suitable escapes */
368 S_printbuf(pTHX_ const char* fmt, const char* s)
370 SV* const tmp = newSVpvs("");
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
385 S_ao(pTHX_ int toketype)
388 if (*PL_bufptr == '=') {
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
415 S_no_op(pTHX_ const char *what, char *s)
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
426 if (ckWARN_d(WARN_SYNTAX)) {
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
432 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
450 * Complain about missing quote/regexp/heredoc terminator.
451 * If it's called with NULL then it cauterizes the line buffer.
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
458 S_missingterm(pTHX_ char *s)
464 char * const nl = strrchr(s,'\n');
470 iscntrl(PL_multi_close)
472 PL_multi_close < 32 || PL_multi_close == 127
476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
481 *tmpbuf = (char)PL_multi_close;
485 q = strchr(s,'"') ? '\'' : '"';
486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
489 #define FEATURE_IS_ENABLED(name) \
490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
497 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
500 HV * const hinthv = GvHV(PL_hintgv);
501 char he_name[32] = "feature_";
502 (void) my_strlcpy(&he_name[8], name, 24);
504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
512 Perl_deprecate(pTHX_ const char *s)
514 if (ckWARN(WARN_DEPRECATED))
515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
519 Perl_deprecate_old(pTHX_ const char *s)
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
531 "Use of %s is deprecated", s);
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
539 #ifdef PERL_CR_FILTER
543 register const char *s = SvPVX_const(sv);
544 register const char * const e = s + SvCUR(sv);
545 /* outer loop optimized to do nothing if there are no CR-LFs */
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
552 if (*s == '\r' && s[1] == '\n')
563 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
566 if (count > 0 && !maxlen)
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
579 Perl_lex_start(pTHX_ SV *line)
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
590 SAVEVPTR(PL_lex_inpat);
591 SAVEI32(PL_lex_inwhat);
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
601 SAVEI32(PL_lasttoke);
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
614 SAVEI32(PL_curforce);
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
622 SAVEI32(PL_nexttoke);
625 SAVECOPLINE(PL_curcop);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
639 SAVEI32(PL_sublex_info.sub_inwhat);
640 SAVESPTR(PL_lex_repl);
642 SAVEINT(PL_lex_expect);
644 PL_lex_state = LEX_NORMAL;
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
651 *PL_lex_casestack = '\0';
663 PL_sublex_info.sub_inwhat = 0;
665 s = SvPV_const(PL_linestr, len);
666 if (SvREADONLY(PL_linestr) || !len || s[len-1] != ';') {
667 PL_linestr = sv_2mortal(len ? newSVsv(PL_linestr) : newSVpvn(s, 0));
668 if (!len || s[len-1] != ';')
669 sv_catpvs(PL_linestr, "\n;");
671 SvTEMP_off(PL_linestr);
672 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
673 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
674 PL_last_lop = PL_last_uni = NULL;
680 * Finalizer for lexing operations. Must be called when the parser is
681 * done with the lexer.
688 PL_doextract = FALSE;
693 * This subroutine has nothing to do with tilting, whether at windmills
694 * or pinball tables. Its name is short for "increment line". It
695 * increments the current line number in CopLINE(PL_curcop) and checks
696 * to see whether the line starts with a comment of the form
697 * # line 500 "foo.pm"
698 * If so, it sets the current line number and file to the values in the comment.
702 S_incline(pTHX_ char *s)
710 CopLINE_inc(PL_curcop);
713 while (SPACE_OR_TAB(*s))
715 if (strnEQ(s, "line", 4))
719 if (SPACE_OR_TAB(*s))
723 while (SPACE_OR_TAB(*s))
731 while (SPACE_OR_TAB(*s))
733 if (*s == '"' && (t = strchr(s+1, '"'))) {
743 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
745 if (*e != '\n' && *e != '\0')
746 return; /* false alarm */
752 const char * const cf = CopFILE(PL_curcop);
753 STRLEN tmplen = cf ? strlen(cf) : 0;
754 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
755 /* must copy *{"::_<(eval N)[oldfilename:L]"}
756 * to *{"::_<newfilename"} */
757 char smallbuf[256], smallbuf2[256];
758 char *tmpbuf, *tmpbuf2;
760 STRLEN tmplen2 = strlen(s);
761 if (tmplen + 3 < sizeof smallbuf)
764 Newx(tmpbuf, tmplen + 3, char);
765 if (tmplen2 + 3 < sizeof smallbuf2)
768 Newx(tmpbuf2, tmplen2 + 3, char);
769 tmpbuf[0] = tmpbuf2[0] = '_';
770 tmpbuf[1] = tmpbuf2[1] = '<';
771 memcpy(tmpbuf + 2, cf, ++tmplen);
772 memcpy(tmpbuf2 + 2, s, ++tmplen2);
774 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
776 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
778 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
779 /* adjust ${"::_<newfilename"} to store the new file name */
780 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
781 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
782 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
784 if (tmpbuf != smallbuf) Safefree(tmpbuf);
785 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
788 CopFILE_free(PL_curcop);
789 CopFILE_set(PL_curcop, s);
792 CopLINE_set(PL_curcop, atoi(n)-1);
796 /* skip space before PL_thistoken */
799 S_skipspace0(pTHX_ register char *s)
806 PL_thiswhite = newSVpvs("");
807 sv_catsv(PL_thiswhite, PL_skipwhite);
808 sv_free(PL_skipwhite);
811 PL_realtokenstart = s - SvPVX(PL_linestr);
815 /* skip space after PL_thistoken */
818 S_skipspace1(pTHX_ register char *s)
820 const char *start = s;
821 I32 startoff = start - SvPVX(PL_linestr);
826 start = SvPVX(PL_linestr) + startoff;
827 if (!PL_thistoken && PL_realtokenstart >= 0) {
828 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
829 PL_thistoken = newSVpvn(tstart, start - tstart);
831 PL_realtokenstart = -1;
834 PL_nextwhite = newSVpvs("");
835 sv_catsv(PL_nextwhite, PL_skipwhite);
836 sv_free(PL_skipwhite);
843 S_skipspace2(pTHX_ register char *s, SV **svp)
846 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
847 const I32 startoff = s - SvPVX(PL_linestr);
850 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
851 if (!PL_madskills || !svp)
853 start = SvPVX(PL_linestr) + startoff;
854 if (!PL_thistoken && PL_realtokenstart >= 0) {
855 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
856 PL_thistoken = newSVpvn(tstart, start - tstart);
857 PL_realtokenstart = -1;
862 sv_setsv(*svp, PL_skipwhite);
863 sv_free(PL_skipwhite);
873 * Called to gobble the appropriate amount and type of whitespace.
874 * Skips comments as well.
878 S_skipspace(pTHX_ register char *s)
883 int startoff = s - SvPVX(PL_linestr);
886 sv_free(PL_skipwhite);
891 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
892 while (s < PL_bufend && SPACE_OR_TAB(*s))
902 SSize_t oldprevlen, oldoldprevlen;
903 SSize_t oldloplen = 0, oldunilen = 0;
904 while (s < PL_bufend && isSPACE(*s)) {
905 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
910 if (s < PL_bufend && *s == '#') {
911 while (s < PL_bufend && *s != '\n')
915 if (PL_in_eval && !PL_rsfp) {
922 /* only continue to recharge the buffer if we're at the end
923 * of the buffer, we're not reading from a source filter, and
924 * we're in normal lexing mode
926 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
927 PL_lex_state == LEX_FORMLINE)
934 /* try to recharge the buffer */
936 curoff = s - SvPVX(PL_linestr);
939 if ((s = filter_gets(PL_linestr, PL_rsfp,
940 (prevlen = SvCUR(PL_linestr)))) == NULL)
943 if (PL_madskills && curoff != startoff) {
945 PL_skipwhite = newSVpvs("");
946 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
950 /* mustn't throw out old stuff yet if madpropping */
951 SvCUR(PL_linestr) = curoff;
952 s = SvPVX(PL_linestr) + curoff;
954 if (curoff && s[-1] == '\n')
958 /* end of file. Add on the -p or -n magic */
959 /* XXX these shouldn't really be added here, can't set PL_faketokens */
963 ";}continue{print or die qq(-p destination: $!\\n);}");
966 ";}continue{print or die qq(-p destination: $!\\n);}");
968 PL_minus_n = PL_minus_p = 0;
970 else if (PL_minus_n) {
972 sv_catpvn(PL_linestr, ";}", 2);
974 sv_setpvn(PL_linestr, ";}", 2);
980 sv_catpvn(PL_linestr,";", 1);
982 sv_setpvn(PL_linestr,";", 1);
985 /* reset variables for next time we lex */
986 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
992 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
993 PL_last_lop = PL_last_uni = NULL;
995 /* Close the filehandle. Could be from -P preprocessor,
996 * STDIN, or a regular file. If we were reading code from
997 * STDIN (because the commandline held no -e or filename)
998 * then we don't close it, we reset it so the code can
999 * read from STDIN too.
1002 if (PL_preprocess && !PL_in_eval)
1003 (void)PerlProc_pclose(PL_rsfp);
1004 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1005 PerlIO_clearerr(PL_rsfp);
1007 (void)PerlIO_close(PL_rsfp);
1012 /* not at end of file, so we only read another line */
1013 /* make corresponding updates to old pointers, for yyerror() */
1014 oldprevlen = PL_oldbufptr - PL_bufend;
1015 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1017 oldunilen = PL_last_uni - PL_bufend;
1019 oldloplen = PL_last_lop - PL_bufend;
1020 PL_linestart = PL_bufptr = s + prevlen;
1021 PL_bufend = s + SvCUR(PL_linestr);
1023 PL_oldbufptr = s + oldprevlen;
1024 PL_oldoldbufptr = s + oldoldprevlen;
1026 PL_last_uni = s + oldunilen;
1028 PL_last_lop = s + oldloplen;
1031 /* debugger active and we're not compiling the debugger code,
1032 * so store the line into the debugger's array of lines
1034 if (PERLDB_LINE && PL_curstash != PL_debstash) {
1035 SV * const sv = newSV(0);
1037 sv_upgrade(sv, SVt_PVMG);
1038 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
1041 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
1049 PL_skipwhite = newSVpvs("");
1050 curoff = s - SvPVX(PL_linestr);
1051 if (curoff - startoff)
1052 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1061 * Check the unary operators to ensure there's no ambiguity in how they're
1062 * used. An ambiguous piece of code would be:
1064 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1065 * the +5 is its argument.
1075 if (PL_oldoldbufptr != PL_last_uni)
1077 while (isSPACE(*PL_last_uni))
1080 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1082 if ((t = strchr(s, '(')) && t < PL_bufptr)
1085 if (ckWARN_d(WARN_AMBIGUOUS)){
1086 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1087 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1088 (int)(s - PL_last_uni), PL_last_uni);
1093 * LOP : macro to build a list operator. Its behaviour has been replaced
1094 * with a subroutine, S_lop() for which LOP is just another name.
1097 #define LOP(f,x) return lop(f,x,s)
1101 * Build a list operator (or something that might be one). The rules:
1102 * - if we have a next token, then it's a list operator [why?]
1103 * - if the next thing is an opening paren, then it's a function
1104 * - else it's a list operator
1108 S_lop(pTHX_ I32 f, int x, char *s)
1115 PL_last_lop = PL_oldbufptr;
1116 PL_last_lop_op = (OPCODE)f;
1119 return REPORT(LSTOP);
1122 return REPORT(LSTOP);
1125 return REPORT(FUNC);
1128 return REPORT(FUNC);
1130 return REPORT(LSTOP);
1136 * Sets up for an eventual force_next(). start_force(0) basically does
1137 * an unshift, while start_force(-1) does a push. yylex removes items
1142 S_start_force(pTHX_ int where)
1146 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1147 where = PL_lasttoke;
1148 assert(PL_curforce < 0 || PL_curforce == where);
1149 if (PL_curforce != where) {
1150 for (i = PL_lasttoke; i > where; --i) {
1151 PL_nexttoke[i] = PL_nexttoke[i-1];
1155 if (PL_curforce < 0) /* in case of duplicate start_force() */
1156 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1157 PL_curforce = where;
1160 curmad('^', newSVpvs(""));
1161 CURMAD('_', PL_nextwhite);
1166 S_curmad(pTHX_ char slot, SV *sv)
1172 if (PL_curforce < 0)
1173 where = &PL_thismad;
1175 where = &PL_nexttoke[PL_curforce].next_mad;
1178 sv_setpvn(sv, "", 0);
1181 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1183 else if (PL_encoding) {
1184 sv_recode_to_utf8(sv, PL_encoding);
1189 /* keep a slot open for the head of the list? */
1190 if (slot != '_' && *where && (*where)->mad_key == '^') {
1191 (*where)->mad_key = slot;
1192 sv_free((*where)->mad_val);
1193 (*where)->mad_val = (void*)sv;
1196 addmad(newMADsv(slot, sv), where, 0);
1199 # define start_force(where) NOOP
1200 # define curmad(slot, sv) NOOP
1205 * When the lexer realizes it knows the next token (for instance,
1206 * it is reordering tokens for the parser) then it can call S_force_next
1207 * to know what token to return the next time the lexer is called. Caller
1208 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1209 * and possibly PL_expect to ensure the lexer handles the token correctly.
1213 S_force_next(pTHX_ I32 type)
1217 if (PL_curforce < 0)
1218 start_force(PL_lasttoke);
1219 PL_nexttoke[PL_curforce].next_type = type;
1220 if (PL_lex_state != LEX_KNOWNEXT)
1221 PL_lex_defer = PL_lex_state;
1222 PL_lex_state = LEX_KNOWNEXT;
1223 PL_lex_expect = PL_expect;
1226 PL_nexttype[PL_nexttoke] = type;
1228 if (PL_lex_state != LEX_KNOWNEXT) {
1229 PL_lex_defer = PL_lex_state;
1230 PL_lex_expect = PL_expect;
1231 PL_lex_state = LEX_KNOWNEXT;
1237 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1240 SV * const sv = newSVpvn(start,len);
1241 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1248 * When the lexer knows the next thing is a word (for instance, it has
1249 * just seen -> and it knows that the next char is a word char, then
1250 * it calls S_force_word to stick the next word into the PL_next lookahead.
1253 * char *start : buffer position (must be within PL_linestr)
1254 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1255 * int check_keyword : if true, Perl checks to make sure the word isn't
1256 * a keyword (do this if the word is a label, e.g. goto FOO)
1257 * int allow_pack : if true, : characters will also be allowed (require,
1258 * use, etc. do this)
1259 * int allow_initial_tick : used by the "sub" lexer only.
1263 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1269 start = SKIPSPACE1(start);
1271 if (isIDFIRST_lazy_if(s,UTF) ||
1272 (allow_pack && *s == ':') ||
1273 (allow_initial_tick && *s == '\'') )
1275 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1276 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1278 start_force(PL_curforce);
1280 curmad('X', newSVpvn(start,s-start));
1281 if (token == METHOD) {
1286 PL_expect = XOPERATOR;
1289 NEXTVAL_NEXTTOKE.opval
1290 = (OP*)newSVOP(OP_CONST,0,
1291 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1292 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1300 * Called when the lexer wants $foo *foo &foo etc, but the program
1301 * text only contains the "foo" portion. The first argument is a pointer
1302 * to the "foo", and the second argument is the type symbol to prefix.
1303 * Forces the next token to be a "WORD".
1304 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1308 S_force_ident(pTHX_ register const char *s, int kind)
1312 const STRLEN len = strlen(s);
1313 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1314 start_force(PL_curforce);
1315 NEXTVAL_NEXTTOKE.opval = o;
1318 o->op_private = OPpCONST_ENTERED;
1319 /* XXX see note in pp_entereval() for why we forgo typo
1320 warnings if the symbol must be introduced in an eval.
1322 gv_fetchpvn_flags(s, len,
1323 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1325 kind == '$' ? SVt_PV :
1326 kind == '@' ? SVt_PVAV :
1327 kind == '%' ? SVt_PVHV :
1335 Perl_str_to_version(pTHX_ SV *sv)
1340 const char *start = SvPV_const(sv,len);
1341 const char * const end = start + len;
1342 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1343 while (start < end) {
1347 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1352 retval += ((NV)n)/nshift;
1361 * Forces the next token to be a version number.
1362 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1363 * and if "guessing" is TRUE, then no new token is created (and the caller
1364 * must use an alternative parsing method).
1368 S_force_version(pTHX_ char *s, int guessing)
1374 I32 startoff = s - SvPVX(PL_linestr);
1383 while (isDIGIT(*d) || *d == '_' || *d == '.')
1387 start_force(PL_curforce);
1388 curmad('X', newSVpvn(s,d-s));
1391 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1393 s = scan_num(s, &yylval);
1394 version = yylval.opval;
1395 ver = cSVOPx(version)->op_sv;
1396 if (SvPOK(ver) && !SvNIOK(ver)) {
1397 SvUPGRADE(ver, SVt_PVNV);
1398 SvNV_set(ver, str_to_version(ver));
1399 SvNOK_on(ver); /* hint that it is a version */
1402 else if (guessing) {
1405 sv_free(PL_nextwhite); /* let next token collect whitespace */
1407 s = SvPVX(PL_linestr) + startoff;
1415 if (PL_madskills && !version) {
1416 sv_free(PL_nextwhite); /* let next token collect whitespace */
1418 s = SvPVX(PL_linestr) + startoff;
1421 /* NOTE: The parser sees the package name and the VERSION swapped */
1422 start_force(PL_curforce);
1423 NEXTVAL_NEXTTOKE.opval = version;
1431 * Tokenize a quoted string passed in as an SV. It finds the next
1432 * chunk, up to end of string or a backslash. It may make a new
1433 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1438 S_tokeq(pTHX_ SV *sv)
1442 register char *send;
1450 s = SvPV_force(sv, len);
1451 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1454 while (s < send && *s != '\\')
1459 if ( PL_hints & HINT_NEW_STRING ) {
1460 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1466 if (s + 1 < send && (s[1] == '\\'))
1467 s++; /* all that, just for this */
1472 SvCUR_set(sv, d - SvPVX_const(sv));
1474 if ( PL_hints & HINT_NEW_STRING )
1475 return new_constant(NULL, 0, "q", sv, pv, "q");
1480 * Now come three functions related to double-quote context,
1481 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1482 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1483 * interact with PL_lex_state, and create fake ( ... ) argument lists
1484 * to handle functions and concatenation.
1485 * They assume that whoever calls them will be setting up a fake
1486 * join call, because each subthing puts a ',' after it. This lets
1489 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1491 * (I'm not sure whether the spurious commas at the end of lcfirst's
1492 * arguments and join's arguments are created or not).
1497 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1499 * Pattern matching will set PL_lex_op to the pattern-matching op to
1500 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1502 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1504 * Everything else becomes a FUNC.
1506 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1507 * had an OP_CONST or OP_READLINE). This just sets us up for a
1508 * call to S_sublex_push().
1512 S_sublex_start(pTHX)
1515 register const I32 op_type = yylval.ival;
1517 if (op_type == OP_NULL) {
1518 yylval.opval = PL_lex_op;
1522 if (op_type == OP_CONST || op_type == OP_READLINE) {
1523 SV *sv = tokeq(PL_lex_stuff);
1525 if (SvTYPE(sv) == SVt_PVIV) {
1526 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1528 const char * const p = SvPV_const(sv, len);
1529 SV * const nsv = newSVpvn(p, len);
1535 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1536 PL_lex_stuff = NULL;
1537 /* Allow <FH> // "foo" */
1538 if (op_type == OP_READLINE)
1539 PL_expect = XTERMORDORDOR;
1542 else if (op_type == OP_BACKTICK && PL_lex_op) {
1543 /* readpipe() vas overriden */
1544 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1545 yylval.opval = PL_lex_op;
1547 PL_lex_stuff = NULL;
1551 PL_sublex_info.super_state = PL_lex_state;
1552 PL_sublex_info.sub_inwhat = op_type;
1553 PL_sublex_info.sub_op = PL_lex_op;
1554 PL_lex_state = LEX_INTERPPUSH;
1558 yylval.opval = PL_lex_op;
1568 * Create a new scope to save the lexing state. The scope will be
1569 * ended in S_sublex_done. Returns a '(', starting the function arguments
1570 * to the uc, lc, etc. found before.
1571 * Sets PL_lex_state to LEX_INTERPCONCAT.
1580 PL_lex_state = PL_sublex_info.super_state;
1581 SAVEI32(PL_lex_dojoin);
1582 SAVEI32(PL_lex_brackets);
1583 SAVEI32(PL_lex_casemods);
1584 SAVEI32(PL_lex_starts);
1585 SAVEI32(PL_lex_state);
1586 SAVEVPTR(PL_lex_inpat);
1587 SAVEI32(PL_lex_inwhat);
1588 SAVECOPLINE(PL_curcop);
1589 SAVEPPTR(PL_bufptr);
1590 SAVEPPTR(PL_bufend);
1591 SAVEPPTR(PL_oldbufptr);
1592 SAVEPPTR(PL_oldoldbufptr);
1593 SAVEPPTR(PL_last_lop);
1594 SAVEPPTR(PL_last_uni);
1595 SAVEPPTR(PL_linestart);
1596 SAVESPTR(PL_linestr);
1597 SAVEGENERICPV(PL_lex_brackstack);
1598 SAVEGENERICPV(PL_lex_casestack);
1600 PL_linestr = PL_lex_stuff;
1601 PL_lex_stuff = NULL;
1603 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1604 = SvPVX(PL_linestr);
1605 PL_bufend += SvCUR(PL_linestr);
1606 PL_last_lop = PL_last_uni = NULL;
1607 SAVEFREESV(PL_linestr);
1609 PL_lex_dojoin = FALSE;
1610 PL_lex_brackets = 0;
1611 Newx(PL_lex_brackstack, 120, char);
1612 Newx(PL_lex_casestack, 12, char);
1613 PL_lex_casemods = 0;
1614 *PL_lex_casestack = '\0';
1616 PL_lex_state = LEX_INTERPCONCAT;
1617 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1619 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1620 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1621 PL_lex_inpat = PL_sublex_info.sub_op;
1623 PL_lex_inpat = NULL;
1630 * Restores lexer state after a S_sublex_push.
1637 if (!PL_lex_starts++) {
1638 SV * const sv = newSVpvs("");
1639 if (SvUTF8(PL_linestr))
1641 PL_expect = XOPERATOR;
1642 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1646 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1647 PL_lex_state = LEX_INTERPCASEMOD;
1651 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1652 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1653 PL_linestr = PL_lex_repl;
1655 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1656 PL_bufend += SvCUR(PL_linestr);
1657 PL_last_lop = PL_last_uni = NULL;
1658 SAVEFREESV(PL_linestr);
1659 PL_lex_dojoin = FALSE;
1660 PL_lex_brackets = 0;
1661 PL_lex_casemods = 0;
1662 *PL_lex_casestack = '\0';
1664 if (SvEVALED(PL_lex_repl)) {
1665 PL_lex_state = LEX_INTERPNORMAL;
1667 /* we don't clear PL_lex_repl here, so that we can check later
1668 whether this is an evalled subst; that means we rely on the
1669 logic to ensure sublex_done() is called again only via the
1670 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1673 PL_lex_state = LEX_INTERPCONCAT;
1683 PL_endwhite = newSVpvs("");
1684 sv_catsv(PL_endwhite, PL_thiswhite);
1688 sv_setpvn(PL_thistoken,"",0);
1690 PL_realtokenstart = -1;
1694 PL_bufend = SvPVX(PL_linestr);
1695 PL_bufend += SvCUR(PL_linestr);
1696 PL_expect = XOPERATOR;
1697 PL_sublex_info.sub_inwhat = 0;
1705 Extracts a pattern, double-quoted string, or transliteration. This
1708 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1709 processing a pattern (PL_lex_inpat is true), a transliteration
1710 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1712 Returns a pointer to the character scanned up to. If this is
1713 advanced from the start pointer supplied (i.e. if anything was
1714 successfully parsed), will leave an OP for the substring scanned
1715 in yylval. Caller must intuit reason for not parsing further
1716 by looking at the next characters herself.
1720 double-quoted style: \r and \n
1721 regexp special ones: \D \s
1724 case and quoting: \U \Q \E
1725 stops on @ and $, but not for $ as tail anchor
1727 In transliterations:
1728 characters are VERY literal, except for - not at the start or end
1729 of the string, which indicates a range. If the range is in bytes,
1730 scan_const expands the range to the full set of intermediate
1731 characters. If the range is in utf8, the hyphen is replaced with
1732 a certain range mark which will be handled by pmtrans() in op.c.
1734 In double-quoted strings:
1736 double-quoted style: \r and \n
1738 deprecated backrefs: \1 (in substitution replacements)
1739 case and quoting: \U \Q \E
1742 scan_const does *not* construct ops to handle interpolated strings.
1743 It stops processing as soon as it finds an embedded $ or @ variable
1744 and leaves it to the caller to work out what's going on.
1746 embedded arrays (whether in pattern or not) could be:
1747 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1749 $ in double-quoted strings must be the symbol of an embedded scalar.
1751 $ in pattern could be $foo or could be tail anchor. Assumption:
1752 it's a tail anchor if $ is the last thing in the string, or if it's
1753 followed by one of "()| \r\n\t"
1755 \1 (backreferences) are turned into $1
1757 The structure of the code is
1758 while (there's a character to process) {
1759 handle transliteration ranges
1760 skip regexp comments /(?#comment)/ and codes /(?{code})/
1761 skip #-initiated comments in //x patterns
1762 check for embedded arrays
1763 check for embedded scalars
1765 leave intact backslashes from leaveit (below)
1766 deprecate \1 in substitution replacements
1767 handle string-changing backslashes \l \U \Q \E, etc.
1768 switch (what was escaped) {
1769 handle \- in a transliteration (becomes a literal -)
1770 handle \132 (octal characters)
1771 handle \x15 and \x{1234} (hex characters)
1772 handle \N{name} (named characters)
1773 handle \cV (control characters)
1774 handle printf-style backslashes (\f, \r, \n, etc)
1776 } (end if backslash)
1777 } (end while character to read)
1782 S_scan_const(pTHX_ char *start)
1785 register char *send = PL_bufend; /* end of the constant */
1786 SV *sv = newSV(send - start); /* sv for the constant */
1787 register char *s = start; /* start of the constant */
1788 register char *d = SvPVX(sv); /* destination for copies */
1789 bool dorange = FALSE; /* are we in a translit range? */
1790 bool didrange = FALSE; /* did we just finish a range? */
1791 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1792 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1795 UV literal_endpoint = 0;
1796 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1799 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1800 /* If we are doing a trans and we know we want UTF8 set expectation */
1801 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1802 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1806 while (s < send || dorange) {
1807 /* get transliterations out of the way (they're most literal) */
1808 if (PL_lex_inwhat == OP_TRANS) {
1809 /* expand a range A-Z to the full set of characters. AIE! */
1811 I32 i; /* current expanded character */
1812 I32 min; /* first character in range */
1813 I32 max; /* last character in range */
1824 char * const c = (char*)utf8_hop((U8*)d, -1);
1828 *c = (char)UTF_TO_NATIVE(0xff);
1829 /* mark the range as done, and continue */
1835 i = d - SvPVX_const(sv); /* remember current offset */
1838 SvLEN(sv) + (has_utf8 ?
1839 (512 - UTF_CONTINUATION_MARK +
1842 /* How many two-byte within 0..255: 128 in UTF-8,
1843 * 96 in UTF-8-mod. */
1845 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1847 d = SvPVX(sv) + i; /* refresh d after realloc */
1851 for (j = 0; j <= 1; j++) {
1852 char * const c = (char*)utf8_hop((U8*)d, -1);
1853 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1859 max = (U8)0xff; /* only to \xff */
1860 uvmax = uv; /* \x{100} to uvmax */
1862 d = c; /* eat endpoint chars */
1867 d -= 2; /* eat the first char and the - */
1868 min = (U8)*d; /* first char in range */
1869 max = (U8)d[1]; /* last char in range */
1876 "Invalid range \"%c-%c\" in transliteration operator",
1877 (char)min, (char)max);
1881 if (literal_endpoint == 2 &&
1882 ((isLOWER(min) && isLOWER(max)) ||
1883 (isUPPER(min) && isUPPER(max)))) {
1885 for (i = min; i <= max; i++)
1887 *d++ = NATIVE_TO_NEED(has_utf8,i);
1889 for (i = min; i <= max; i++)
1891 *d++ = NATIVE_TO_NEED(has_utf8,i);
1896 for (i = min; i <= max; i++)
1899 const U8 ch = (U8)NATIVE_TO_UTF(i);
1900 if (UNI_IS_INVARIANT(ch))
1903 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1904 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1913 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1915 *d++ = (char)UTF_TO_NATIVE(0xff);
1917 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1921 /* mark the range as done, and continue */
1925 literal_endpoint = 0;
1930 /* range begins (ignore - as first or last char) */
1931 else if (*s == '-' && s+1 < send && s != start) {
1933 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1940 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1950 literal_endpoint = 0;
1951 native_range = TRUE;
1956 /* if we get here, we're not doing a transliteration */
1958 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1959 except for the last char, which will be done separately. */
1960 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1962 while (s+1 < send && *s != ')')
1963 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1965 else if (s[2] == '{' /* This should match regcomp.c */
1966 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1969 char *regparse = s + (s[2] == '{' ? 3 : 4);
1972 while (count && (c = *regparse)) {
1973 if (c == '\\' && regparse[1])
1981 if (*regparse != ')')
1982 regparse--; /* Leave one char for continuation. */
1983 while (s < regparse)
1984 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1988 /* likewise skip #-initiated comments in //x patterns */
1989 else if (*s == '#' && PL_lex_inpat &&
1990 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1991 while (s+1 < send && *s != '\n')
1992 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1995 /* check for embedded arrays
1996 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1998 else if (*s == '@' && s[1]) {
1999 if (isALNUM_lazy_if(s+1,UTF))
2001 if (strchr(":'{$", s[1]))
2003 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2004 break; /* in regexp, neither @+ nor @- are interpolated */
2007 /* check for embedded scalars. only stop if we're sure it's a
2010 else if (*s == '$') {
2011 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2013 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2014 break; /* in regexp, $ might be tail anchor */
2017 /* End of else if chain - OP_TRANS rejoin rest */
2020 if (*s == '\\' && s+1 < send) {
2023 /* deprecate \1 in strings and substitution replacements */
2024 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2025 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2027 if (ckWARN(WARN_SYNTAX))
2028 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2033 /* string-change backslash escapes */
2034 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2038 /* skip any other backslash escapes in a pattern */
2039 else if (PL_lex_inpat) {
2040 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2041 goto default_action;
2044 /* if we get here, it's either a quoted -, or a digit */
2047 /* quoted - in transliterations */
2049 if (PL_lex_inwhat == OP_TRANS) {
2056 if ((isALPHA(*s) || isDIGIT(*s)) &&
2058 Perl_warner(aTHX_ packWARN(WARN_MISC),
2059 "Unrecognized escape \\%c passed through",
2061 /* default action is to copy the quoted character */
2062 goto default_action;
2065 /* \132 indicates an octal constant */
2066 case '0': case '1': case '2': case '3':
2067 case '4': case '5': case '6': case '7':
2071 uv = grok_oct(s, &len, &flags, NULL);
2074 goto NUM_ESCAPE_INSERT;
2076 /* \x24 indicates a hex constant */
2080 char* const e = strchr(s, '}');
2081 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2082 PERL_SCAN_DISALLOW_PREFIX;
2087 yyerror("Missing right brace on \\x{}");
2091 uv = grok_hex(s, &len, &flags, NULL);
2097 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2098 uv = grok_hex(s, &len, &flags, NULL);
2104 /* Insert oct or hex escaped character.
2105 * There will always enough room in sv since such
2106 * escapes will be longer than any UTF-8 sequence
2107 * they can end up as. */
2109 /* We need to map to chars to ASCII before doing the tests
2112 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2113 if (!has_utf8 && uv > 255) {
2114 /* Might need to recode whatever we have
2115 * accumulated so far if it contains any
2118 * (Can't we keep track of that and avoid
2119 * this rescan? --jhi)
2123 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2124 if (!NATIVE_IS_INVARIANT(*c)) {
2129 const STRLEN offset = d - SvPVX_const(sv);
2131 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2135 while (src >= (const U8 *)SvPVX_const(sv)) {
2136 if (!NATIVE_IS_INVARIANT(*src)) {
2137 const U8 ch = NATIVE_TO_ASCII(*src);
2138 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2139 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2149 if (has_utf8 || uv > 255) {
2150 d = (char*)uvchr_to_utf8((U8*)d, uv);
2152 if (PL_lex_inwhat == OP_TRANS &&
2153 PL_sublex_info.sub_op) {
2154 PL_sublex_info.sub_op->op_private |=
2155 (PL_lex_repl ? OPpTRANS_FROM_UTF
2159 if (uv > 255 && !dorange)
2160 native_range = FALSE;
2172 /* \N{LATIN SMALL LETTER A} is a named character */
2176 char* e = strchr(s, '}');
2183 yyerror("Missing right brace on \\N{}");
2187 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2189 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2190 PERL_SCAN_DISALLOW_PREFIX;
2193 uv = grok_hex(s, &len, &flags, NULL);
2194 if ( e > s && len != (STRLEN)(e - s) ) {
2198 goto NUM_ESCAPE_INSERT;
2200 res = newSVpvn(s + 1, e - s - 1);
2201 type = newSVpvn(s - 2,e - s + 3);
2202 res = new_constant( NULL, 0, "charnames",
2203 res, NULL, SvPVX(type) );
2206 sv_utf8_upgrade(res);
2207 str = SvPV_const(res,len);
2208 #ifdef EBCDIC_NEVER_MIND
2209 /* charnames uses pack U and that has been
2210 * recently changed to do the below uni->native
2211 * mapping, so this would be redundant (and wrong,
2212 * the code point would be doubly converted).
2213 * But leave this in just in case the pack U change
2214 * gets revoked, but the semantics is still
2215 * desireable for charnames. --jhi */
2217 UV uv = utf8_to_uvchr((const U8*)str, 0);
2220 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2222 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2223 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2224 str = SvPV_const(res, len);
2228 if (!has_utf8 && SvUTF8(res)) {
2229 const char * const ostart = SvPVX_const(sv);
2230 SvCUR_set(sv, d - ostart);
2233 sv_utf8_upgrade(sv);
2234 /* this just broke our allocation above... */
2235 SvGROW(sv, (STRLEN)(send - start));
2236 d = SvPVX(sv) + SvCUR(sv);
2239 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2240 const char * const odest = SvPVX_const(sv);
2242 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2243 d = SvPVX(sv) + (d - odest);
2247 native_range = FALSE; /* \N{} is guessed to be Unicode */
2249 Copy(str, d, len, char);
2256 yyerror("Missing braces on \\N{}");
2259 /* \c is a control character */
2268 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2271 yyerror("Missing control char name in \\c");
2275 /* printf-style backslashes, formfeeds, newlines, etc */
2277 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2280 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2283 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2286 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2289 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2292 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2295 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2301 } /* end if (backslash) */
2308 /* If we started with encoded form, or already know we want it
2309 and then encode the next character */
2310 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2312 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2313 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2316 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2317 const STRLEN off = d - SvPVX_const(sv);
2318 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2320 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2323 if (uv > 255 && !dorange)
2324 native_range = FALSE;
2328 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2330 } /* while loop to process each character */
2332 /* terminate the string and set up the sv */
2334 SvCUR_set(sv, d - SvPVX_const(sv));
2335 if (SvCUR(sv) >= SvLEN(sv))
2336 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2339 if (PL_encoding && !has_utf8) {
2340 sv_recode_to_utf8(sv, PL_encoding);
2346 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2347 PL_sublex_info.sub_op->op_private |=
2348 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2352 /* shrink the sv if we allocated more than we used */
2353 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2354 SvPV_shrink_to_cur(sv);
2357 /* return the substring (via yylval) only if we parsed anything */
2358 if (s > PL_bufptr) {
2359 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2360 sv = new_constant(start, s - start,
2361 (const char *)(PL_lex_inpat ? "qr" : "q"),
2364 (( PL_lex_inwhat == OP_TRANS
2366 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2369 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2376 * Returns TRUE if there's more to the expression (e.g., a subscript),
2379 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2381 * ->[ and ->{ return TRUE
2382 * { and [ outside a pattern are always subscripts, so return TRUE
2383 * if we're outside a pattern and it's not { or [, then return FALSE
2384 * if we're in a pattern and the first char is a {
2385 * {4,5} (any digits around the comma) returns FALSE
2386 * if we're in a pattern and the first char is a [
2388 * [SOMETHING] has a funky algorithm to decide whether it's a
2389 * character class or not. It has to deal with things like
2390 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2391 * anything else returns TRUE
2394 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2397 S_intuit_more(pTHX_ register char *s)
2400 if (PL_lex_brackets)
2402 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2404 if (*s != '{' && *s != '[')
2409 /* In a pattern, so maybe we have {n,m}. */
2426 /* On the other hand, maybe we have a character class */
2429 if (*s == ']' || *s == '^')
2432 /* this is terrifying, and it works */
2433 int weight = 2; /* let's weigh the evidence */
2435 unsigned char un_char = 255, last_un_char;
2436 const char * const send = strchr(s,']');
2437 char tmpbuf[sizeof PL_tokenbuf * 4];
2439 if (!send) /* has to be an expression */
2442 Zero(seen,256,char);
2445 else if (isDIGIT(*s)) {
2447 if (isDIGIT(s[1]) && s[2] == ']')
2453 for (; s < send; s++) {
2454 last_un_char = un_char;
2455 un_char = (unsigned char)*s;
2460 weight -= seen[un_char] * 10;
2461 if (isALNUM_lazy_if(s+1,UTF)) {
2463 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2464 len = (int)strlen(tmpbuf);
2465 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2470 else if (*s == '$' && s[1] &&
2471 strchr("[#!%*<>()-=",s[1])) {
2472 if (/*{*/ strchr("])} =",s[2]))
2481 if (strchr("wds]",s[1]))
2483 else if (seen[(U8)'\''] || seen[(U8)'"'])
2485 else if (strchr("rnftbxcav",s[1]))
2487 else if (isDIGIT(s[1])) {
2489 while (s[1] && isDIGIT(s[1]))
2499 if (strchr("aA01! ",last_un_char))
2501 if (strchr("zZ79~",s[1]))
2503 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2504 weight -= 5; /* cope with negative subscript */
2507 if (!isALNUM(last_un_char)
2508 && !(last_un_char == '$' || last_un_char == '@'
2509 || last_un_char == '&')
2510 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2515 if (keyword(tmpbuf, d - tmpbuf, 0))
2518 if (un_char == last_un_char + 1)
2520 weight -= seen[un_char];
2525 if (weight >= 0) /* probably a character class */
2535 * Does all the checking to disambiguate
2537 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2538 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2540 * First argument is the stuff after the first token, e.g. "bar".
2542 * Not a method if bar is a filehandle.
2543 * Not a method if foo is a subroutine prototyped to take a filehandle.
2544 * Not a method if it's really "Foo $bar"
2545 * Method if it's "foo $bar"
2546 * Not a method if it's really "print foo $bar"
2547 * Method if it's really "foo package::" (interpreted as package->foo)
2548 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2549 * Not a method if bar is a filehandle or package, but is quoted with
2554 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2557 char *s = start + (*start == '$');
2558 char tmpbuf[sizeof PL_tokenbuf];
2566 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2570 const char *proto = SvPVX_const(cv);
2581 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2582 /* start is the beginning of the possible filehandle/object,
2583 * and s is the end of it
2584 * tmpbuf is a copy of it
2587 if (*start == '$') {
2588 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2591 len = start - SvPVX(PL_linestr);
2595 start = SvPVX(PL_linestr) + len;
2599 return *s == '(' ? FUNCMETH : METHOD;
2601 if (!keyword(tmpbuf, len, 0)) {
2602 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2606 soff = s - SvPVX(PL_linestr);
2610 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2611 if (indirgv && GvCVu(indirgv))
2613 /* filehandle or package name makes it a method */
2614 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2616 soff = s - SvPVX(PL_linestr);
2619 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2620 return 0; /* no assumptions -- "=>" quotes bearword */
2622 start_force(PL_curforce);
2623 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2624 newSVpvn(tmpbuf,len));
2625 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2627 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2632 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2634 return *s == '(' ? FUNCMETH : METHOD;
2642 * Return a string of Perl code to load the debugger. If PERL5DB
2643 * is set, it will return the contents of that, otherwise a
2644 * compile-time require of perl5db.pl.
2652 const char * const pdb = PerlEnv_getenv("PERL5DB");
2656 SETERRNO(0,SS_NORMAL);
2657 return "BEGIN { require 'perl5db.pl' }";
2663 /* Encoded script support. filter_add() effectively inserts a
2664 * 'pre-processing' function into the current source input stream.
2665 * Note that the filter function only applies to the current source file
2666 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2668 * The datasv parameter (which may be NULL) can be used to pass
2669 * private data to this instance of the filter. The filter function
2670 * can recover the SV using the FILTER_DATA macro and use it to
2671 * store private buffers and state information.
2673 * The supplied datasv parameter is upgraded to a PVIO type
2674 * and the IoDIRP/IoANY field is used to store the function pointer,
2675 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2676 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2677 * private use must be set using malloc'd pointers.
2681 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2687 if (!PL_rsfp_filters)
2688 PL_rsfp_filters = newAV();
2691 SvUPGRADE(datasv, SVt_PVIO);
2692 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2693 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2694 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2695 FPTR2DPTR(void *, IoANY(datasv)),
2696 SvPV_nolen(datasv)));
2697 av_unshift(PL_rsfp_filters, 1);
2698 av_store(PL_rsfp_filters, 0, datasv) ;
2703 /* Delete most recently added instance of this filter function. */
2705 Perl_filter_del(pTHX_ filter_t funcp)
2711 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2712 FPTR2DPTR(void*, funcp)));
2714 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2716 /* if filter is on top of stack (usual case) just pop it off */
2717 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2718 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2719 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2720 IoANY(datasv) = (void *)NULL;
2721 sv_free(av_pop(PL_rsfp_filters));
2725 /* we need to search for the correct entry and clear it */
2726 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2730 /* Invoke the idxth filter function for the current rsfp. */
2731 /* maxlen 0 = read one text line */
2733 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2738 /* This API is bad. It should have been using unsigned int for maxlen.
2739 Not sure if we want to change the API, but if not we should sanity
2740 check the value here. */
2741 const unsigned int correct_length
2750 if (!PL_rsfp_filters)
2752 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2753 /* Provide a default input filter to make life easy. */
2754 /* Note that we append to the line. This is handy. */
2755 DEBUG_P(PerlIO_printf(Perl_debug_log,
2756 "filter_read %d: from rsfp\n", idx));
2757 if (correct_length) {
2760 const int old_len = SvCUR(buf_sv);
2762 /* ensure buf_sv is large enough */
2763 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2764 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2765 correct_length)) <= 0) {
2766 if (PerlIO_error(PL_rsfp))
2767 return -1; /* error */
2769 return 0 ; /* end of file */
2771 SvCUR_set(buf_sv, old_len + len) ;
2774 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2775 if (PerlIO_error(PL_rsfp))
2776 return -1; /* error */
2778 return 0 ; /* end of file */
2781 return SvCUR(buf_sv);
2783 /* Skip this filter slot if filter has been deleted */
2784 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2785 DEBUG_P(PerlIO_printf(Perl_debug_log,
2786 "filter_read %d: skipped (filter deleted)\n",
2788 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2790 /* Get function pointer hidden within datasv */
2791 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2792 DEBUG_P(PerlIO_printf(Perl_debug_log,
2793 "filter_read %d: via function %p (%s)\n",
2794 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2795 /* Call function. The function is expected to */
2796 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2797 /* Return: <0:error, =0:eof, >0:not eof */
2798 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2802 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2805 #ifdef PERL_CR_FILTER
2806 if (!PL_rsfp_filters) {
2807 filter_add(S_cr_textfilter,NULL);
2810 if (PL_rsfp_filters) {
2812 SvCUR_set(sv, 0); /* start with empty line */
2813 if (FILTER_READ(0, sv, 0) > 0)
2814 return ( SvPVX(sv) ) ;
2819 return (sv_gets(sv, fp, append));
2823 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2828 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2832 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2833 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2835 return GvHV(gv); /* Foo:: */
2838 /* use constant CLASS => 'MyClass' */
2839 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2840 if (gv && GvCV(gv)) {
2841 SV * const sv = cv_const_sv(GvCV(gv));
2843 pkgname = SvPV_nolen_const(sv);
2846 return gv_stashpv(pkgname, FALSE);
2850 * S_readpipe_override
2851 * Check whether readpipe() is overriden, and generates the appropriate
2852 * optree, provided sublex_start() is called afterwards.
2855 S_readpipe_override(pTHX)
2858 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2859 yylval.ival = OP_BACKTICK;
2861 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2863 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2864 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2865 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2867 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2868 append_elem(OP_LIST,
2869 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2870 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2880 * The intent of this yylex wrapper is to minimize the changes to the
2881 * tokener when we aren't interested in collecting madprops. It remains
2882 * to be seen how successful this strategy will be...
2889 char *s = PL_bufptr;
2891 /* make sure PL_thiswhite is initialized */
2895 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2896 if (PL_pending_ident)
2897 return S_pending_ident(aTHX);
2899 /* previous token ate up our whitespace? */
2900 if (!PL_lasttoke && PL_nextwhite) {
2901 PL_thiswhite = PL_nextwhite;
2905 /* isolate the token, and figure out where it is without whitespace */
2906 PL_realtokenstart = -1;
2910 assert(PL_curforce < 0);
2912 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2913 if (!PL_thistoken) {
2914 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2915 PL_thistoken = newSVpvs("");
2917 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2918 PL_thistoken = newSVpvn(tstart, s - tstart);
2921 if (PL_thismad) /* install head */
2922 CURMAD('X', PL_thistoken);
2925 /* last whitespace of a sublex? */
2926 if (optype == ')' && PL_endwhite) {
2927 CURMAD('X', PL_endwhite);
2932 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
2933 if (!PL_thiswhite && !PL_endwhite && !optype) {
2934 sv_free(PL_thistoken);
2939 /* put off final whitespace till peg */
2940 if (optype == ';' && !PL_rsfp) {
2941 PL_nextwhite = PL_thiswhite;
2944 else if (PL_thisopen) {
2945 CURMAD('q', PL_thisopen);
2947 sv_free(PL_thistoken);
2951 /* Store actual token text as madprop X */
2952 CURMAD('X', PL_thistoken);
2956 /* add preceding whitespace as madprop _ */
2957 CURMAD('_', PL_thiswhite);
2961 /* add quoted material as madprop = */
2962 CURMAD('=', PL_thisstuff);
2966 /* add terminating quote as madprop Q */
2967 CURMAD('Q', PL_thisclose);
2971 /* special processing based on optype */
2975 /* opval doesn't need a TOKEN since it can already store mp */
2986 append_madprops(PL_thismad, yylval.opval, 0);
2994 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3003 /* remember any fake bracket that lexer is about to discard */
3004 if (PL_lex_brackets == 1 &&
3005 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3008 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3011 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3012 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3015 break; /* don't bother looking for trailing comment */
3024 /* attach a trailing comment to its statement instead of next token */
3028 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3030 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3032 if (*s == '\n' || *s == '#') {
3033 while (s < PL_bufend && *s != '\n')
3037 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3038 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3055 /* Create new token struct. Note: opvals return early above. */
3056 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3063 S_tokenize_use(pTHX_ int is_use, char *s) {
3065 if (PL_expect != XSTATE)
3066 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3067 is_use ? "use" : "no"));
3069 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3070 s = force_version(s, TRUE);
3071 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3072 start_force(PL_curforce);
3073 NEXTVAL_NEXTTOKE.opval = NULL;
3076 else if (*s == 'v') {
3077 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3078 s = force_version(s, FALSE);
3082 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3083 s = force_version(s, FALSE);
3085 yylval.ival = is_use;
3089 static const char* const exp_name[] =
3090 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3091 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3098 Works out what to call the token just pulled out of the input
3099 stream. The yacc parser takes care of taking the ops we return and
3100 stitching them into a tree.
3106 if read an identifier
3107 if we're in a my declaration
3108 croak if they tried to say my($foo::bar)
3109 build the ops for a my() declaration
3110 if it's an access to a my() variable
3111 are we in a sort block?
3112 croak if my($a); $a <=> $b
3113 build ops for access to a my() variable
3114 if in a dq string, and they've said @foo and we can't find @foo
3116 build ops for a bareword
3117 if we already built the token before, use it.
3122 #pragma segment Perl_yylex
3128 register char *s = PL_bufptr;
3133 /* orig_keyword, gvp, and gv are initialized here because
3134 * jump to the label just_a_word_zero can bypass their
3135 * initialization later. */
3136 I32 orig_keyword = 0;
3141 SV* tmp = newSVpvs("");
3142 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3143 (IV)CopLINE(PL_curcop),
3144 lex_state_names[PL_lex_state],
3145 exp_name[PL_expect],
3146 pv_display(tmp, s, strlen(s), 0, 60));
3149 /* check if there's an identifier for us to look at */
3150 if (PL_pending_ident)
3151 return REPORT(S_pending_ident(aTHX));
3153 /* no identifier pending identification */
3155 switch (PL_lex_state) {
3157 case LEX_NORMAL: /* Some compilers will produce faster */
3158 case LEX_INTERPNORMAL: /* code if we comment these out. */
3162 /* when we've already built the next token, just pull it out of the queue */
3166 yylval = PL_nexttoke[PL_lasttoke].next_val;
3168 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3169 PL_nexttoke[PL_lasttoke].next_mad = 0;
3170 if (PL_thismad && PL_thismad->mad_key == '_') {
3171 PL_thiswhite = (SV*)PL_thismad->mad_val;
3172 PL_thismad->mad_val = 0;
3173 mad_free(PL_thismad);
3178 PL_lex_state = PL_lex_defer;
3179 PL_expect = PL_lex_expect;
3180 PL_lex_defer = LEX_NORMAL;
3181 if (!PL_nexttoke[PL_lasttoke].next_type)
3186 yylval = PL_nextval[PL_nexttoke];
3188 PL_lex_state = PL_lex_defer;
3189 PL_expect = PL_lex_expect;
3190 PL_lex_defer = LEX_NORMAL;
3194 /* FIXME - can these be merged? */
3195 return(PL_nexttoke[PL_lasttoke].next_type);
3197 return REPORT(PL_nexttype[PL_nexttoke]);
3200 /* interpolated case modifiers like \L \U, including \Q and \E.
3201 when we get here, PL_bufptr is at the \
3203 case LEX_INTERPCASEMOD:
3205 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3206 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3208 /* handle \E or end of string */
3209 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3211 if (PL_lex_casemods) {
3212 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3213 PL_lex_casestack[PL_lex_casemods] = '\0';
3215 if (PL_bufptr != PL_bufend
3216 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3218 PL_lex_state = LEX_INTERPCONCAT;
3221 PL_thistoken = newSVpvs("\\E");
3227 while (PL_bufptr != PL_bufend &&
3228 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3230 PL_thiswhite = newSVpvs("");
3231 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3235 if (PL_bufptr != PL_bufend)
3238 PL_lex_state = LEX_INTERPCONCAT;
3242 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3243 "### Saw case modifier\n"); });
3245 if (s[1] == '\\' && s[2] == 'E') {
3248 PL_thiswhite = newSVpvs("");
3249 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3252 PL_lex_state = LEX_INTERPCONCAT;
3257 if (!PL_madskills) /* when just compiling don't need correct */
3258 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3259 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3260 if ((*s == 'L' || *s == 'U') &&
3261 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3262 PL_lex_casestack[--PL_lex_casemods] = '\0';
3265 if (PL_lex_casemods > 10)
3266 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3267 PL_lex_casestack[PL_lex_casemods++] = *s;
3268 PL_lex_casestack[PL_lex_casemods] = '\0';
3269 PL_lex_state = LEX_INTERPCONCAT;
3270 start_force(PL_curforce);
3271 NEXTVAL_NEXTTOKE.ival = 0;
3273 start_force(PL_curforce);
3275 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3277 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3279 NEXTVAL_NEXTTOKE.ival = OP_LC;
3281 NEXTVAL_NEXTTOKE.ival = OP_UC;
3283 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3285 Perl_croak(aTHX_ "panic: yylex");
3287 SV* const tmpsv = newSVpvs("");
3288 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3294 if (PL_lex_starts) {
3300 sv_free(PL_thistoken);
3301 PL_thistoken = newSVpvs("");
3304 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3305 if (PL_lex_casemods == 1 && PL_lex_inpat)
3314 case LEX_INTERPPUSH:
3315 return REPORT(sublex_push());
3317 case LEX_INTERPSTART:
3318 if (PL_bufptr == PL_bufend)
3319 return REPORT(sublex_done());
3320 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3321 "### Interpolated variable\n"); });
3323 PL_lex_dojoin = (*PL_bufptr == '@');
3324 PL_lex_state = LEX_INTERPNORMAL;
3325 if (PL_lex_dojoin) {
3326 start_force(PL_curforce);
3327 NEXTVAL_NEXTTOKE.ival = 0;
3329 start_force(PL_curforce);
3330 force_ident("\"", '$');
3331 start_force(PL_curforce);
3332 NEXTVAL_NEXTTOKE.ival = 0;
3334 start_force(PL_curforce);
3335 NEXTVAL_NEXTTOKE.ival = 0;
3337 start_force(PL_curforce);
3338 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3341 if (PL_lex_starts++) {
3346 sv_free(PL_thistoken);
3347 PL_thistoken = newSVpvs("");
3350 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3351 if (!PL_lex_casemods && PL_lex_inpat)
3358 case LEX_INTERPENDMAYBE:
3359 if (intuit_more(PL_bufptr)) {
3360 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3366 if (PL_lex_dojoin) {
3367 PL_lex_dojoin = FALSE;
3368 PL_lex_state = LEX_INTERPCONCAT;
3372 sv_free(PL_thistoken);
3373 PL_thistoken = newSVpvs("");
3378 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3379 && SvEVALED(PL_lex_repl))
3381 if (PL_bufptr != PL_bufend)
3382 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3386 case LEX_INTERPCONCAT:
3388 if (PL_lex_brackets)
3389 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3391 if (PL_bufptr == PL_bufend)
3392 return REPORT(sublex_done());
3394 if (SvIVX(PL_linestr) == '\'') {
3395 SV *sv = newSVsv(PL_linestr);
3398 else if ( PL_hints & HINT_NEW_RE )
3399 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3400 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3404 s = scan_const(PL_bufptr);
3406 PL_lex_state = LEX_INTERPCASEMOD;
3408 PL_lex_state = LEX_INTERPSTART;
3411 if (s != PL_bufptr) {
3412 start_force(PL_curforce);
3414 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3416 NEXTVAL_NEXTTOKE = yylval;
3419 if (PL_lex_starts++) {
3423 sv_free(PL_thistoken);
3424 PL_thistoken = newSVpvs("");
3427 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3428 if (!PL_lex_casemods && PL_lex_inpat)
3441 PL_lex_state = LEX_NORMAL;
3442 s = scan_formline(PL_bufptr);
3443 if (!PL_lex_formbrack)
3449 PL_oldoldbufptr = PL_oldbufptr;
3455 sv_free(PL_thistoken);
3458 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3462 if (isIDFIRST_lazy_if(s,UTF))
3464 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3467 goto fake_eof; /* emulate EOF on ^D or ^Z */
3476 if (PL_lex_brackets) {
3477 yyerror((const char *)
3479 ? "Format not terminated"
3480 : "Missing right curly or square bracket"));
3482 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3483 "### Tokener got EOF\n");
3487 if (s++ < PL_bufend)
3488 goto retry; /* ignore stray nulls */
3491 if (!PL_in_eval && !PL_preambled) {
3492 PL_preambled = TRUE;
3497 sv_setpv(PL_linestr,incl_perldb());
3498 if (SvCUR(PL_linestr))
3499 sv_catpvs(PL_linestr,";");
3501 while(AvFILLp(PL_preambleav) >= 0) {
3502 SV *tmpsv = av_shift(PL_preambleav);
3503 sv_catsv(PL_linestr, tmpsv);
3504 sv_catpvs(PL_linestr, ";");
3507 sv_free((SV*)PL_preambleav);
3508 PL_preambleav = NULL;
3510 if (PL_minus_n || PL_minus_p) {
3511 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3513 sv_catpvs(PL_linestr,"chomp;");
3516 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3517 || *PL_splitstr == '"')
3518 && strchr(PL_splitstr + 1, *PL_splitstr))
3519 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3521 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3522 bytes can be used as quoting characters. :-) */
3523 const char *splits = PL_splitstr;
3524 sv_catpvs(PL_linestr, "our @F=split(q\0");
3527 if (*splits == '\\')
3528 sv_catpvn(PL_linestr, splits, 1);
3529 sv_catpvn(PL_linestr, splits, 1);
3530 } while (*splits++);
3531 /* This loop will embed the trailing NUL of
3532 PL_linestr as the last thing it does before
3534 sv_catpvs(PL_linestr, ");");
3538 sv_catpvs(PL_linestr,"our @F=split(' ');");
3542 sv_catpvs(PL_linestr,"use feature ':5.10';");
3543 sv_catpvs(PL_linestr, "\n");
3544 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3545 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3546 PL_last_lop = PL_last_uni = NULL;
3547 if (PERLDB_LINE && PL_curstash != PL_debstash) {
3548 SV * const sv = newSV(0);
3550 sv_upgrade(sv, SVt_PVMG);
3551 sv_setsv(sv,PL_linestr);
3554 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
3559 bof = PL_rsfp ? TRUE : FALSE;
3560 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3563 PL_realtokenstart = -1;
3566 if (PL_preprocess && !PL_in_eval)
3567 (void)PerlProc_pclose(PL_rsfp);
3568 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3569 PerlIO_clearerr(PL_rsfp);
3571 (void)PerlIO_close(PL_rsfp);
3573 PL_doextract = FALSE;
3575 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3580 sv_setpv(PL_linestr,
3583 ? ";}continue{print;}" : ";}"));
3584 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3585 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3586 PL_last_lop = PL_last_uni = NULL;
3587 PL_minus_n = PL_minus_p = 0;
3590 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3591 PL_last_lop = PL_last_uni = NULL;
3592 sv_setpvn(PL_linestr,"",0);
3593 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3595 /* If it looks like the start of a BOM or raw UTF-16,
3596 * check if it in fact is. */
3602 #ifdef PERLIO_IS_STDIO
3603 # ifdef __GNU_LIBRARY__
3604 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3605 # define FTELL_FOR_PIPE_IS_BROKEN
3609 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3610 # define FTELL_FOR_PIPE_IS_BROKEN
3615 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3616 /* This loses the possibility to detect the bof
3617 * situation on perl -P when the libc5 is being used.
3618 * Workaround? Maybe attach some extra state to PL_rsfp?
3621 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3623 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3626 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3627 s = swallow_bom((U8*)s);
3631 /* Incest with pod. */
3634 sv_catsv(PL_thiswhite, PL_linestr);
3636 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3637 sv_setpvn(PL_linestr, "", 0);
3638 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3639 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3640 PL_last_lop = PL_last_uni = NULL;
3641 PL_doextract = FALSE;
3645 } while (PL_doextract);
3646 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3647 if (PERLDB_LINE && PL_curstash != PL_debstash) {
3648 SV * const sv = newSV(0);
3650 sv_upgrade(sv, SVt_PVMG);
3651 sv_setsv(sv,PL_linestr);
3654 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
3656 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3657 PL_last_lop = PL_last_uni = NULL;
3658 if (CopLINE(PL_curcop) == 1) {
3659 while (s < PL_bufend && isSPACE(*s))
3661 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3665 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3669 if (*s == '#' && *(s+1) == '!')
3671 #ifdef ALTERNATE_SHEBANG
3673 static char const as[] = ALTERNATE_SHEBANG;
3674 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3675 d = s + (sizeof(as) - 1);
3677 #endif /* ALTERNATE_SHEBANG */
3686 while (*d && !isSPACE(*d))
3690 #ifdef ARG_ZERO_IS_SCRIPT
3691 if (ipathend > ipath) {
3693 * HP-UX (at least) sets argv[0] to the script name,
3694 * which makes $^X incorrect. And Digital UNIX and Linux,
3695 * at least, set argv[0] to the basename of the Perl
3696 * interpreter. So, having found "#!", we'll set it right.
3698 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3700 assert(SvPOK(x) || SvGMAGICAL(x));
3701 if (sv_eq(x, CopFILESV(PL_curcop))) {
3702 sv_setpvn(x, ipath, ipathend - ipath);
3708 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3709 const char * const lstart = SvPV_const(x,llen);
3711 bstart += blen - llen;
3712 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3713 sv_setpvn(x, ipath, ipathend - ipath);
3718 TAINT_NOT; /* $^X is always tainted, but that's OK */
3720 #endif /* ARG_ZERO_IS_SCRIPT */
3725 d = instr(s,"perl -");
3727 d = instr(s,"perl");
3729 /* avoid getting into infinite loops when shebang
3730 * line contains "Perl" rather than "perl" */
3732 for (d = ipathend-4; d >= ipath; --d) {
3733 if ((*d == 'p' || *d == 'P')
3734 && !ibcmp(d, "perl", 4))
3744 #ifdef ALTERNATE_SHEBANG
3746 * If the ALTERNATE_SHEBANG on this system starts with a
3747 * character that can be part of a Perl expression, then if
3748 * we see it but not "perl", we're probably looking at the
3749 * start of Perl code, not a request to hand off to some
3750 * other interpreter. Similarly, if "perl" is there, but
3751 * not in the first 'word' of the line, we assume the line
3752 * contains the start of the Perl program.
3754 if (d && *s != '#') {
3755 const char *c = ipath;
3756 while (*c && !strchr("; \t\r\n\f\v#", *c))
3759 d = NULL; /* "perl" not in first word; ignore */
3761 *s = '#'; /* Don't try to parse shebang line */
3763 #endif /* ALTERNATE_SHEBANG */
3764 #ifndef MACOS_TRADITIONAL
3769 !instr(s,"indir") &&
3770 instr(PL_origargv[0],"perl"))
3777 while (s < PL_bufend && isSPACE(*s))
3779 if (s < PL_bufend) {
3780 Newxz(newargv,PL_origargc+3,char*);
3782 while (s < PL_bufend && !isSPACE(*s))
3785 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3788 newargv = PL_origargv;
3791 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3793 Perl_croak(aTHX_ "Can't exec %s", ipath);
3797 while (*d && !isSPACE(*d))
3799 while (SPACE_OR_TAB(*d))
3803 const bool switches_done = PL_doswitches;
3804 const U32 oldpdb = PL_perldb;
3805 const bool oldn = PL_minus_n;
3806 const bool oldp = PL_minus_p;
3809 if (*d == 'M' || *d == 'm' || *d == 'C') {
3810 const char * const m = d;
3811 while (*d && !isSPACE(*d))
3813 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3816 d = moreswitches(d);
3818 if (PL_doswitches && !switches_done) {
3819 int argc = PL_origargc;
3820 char **argv = PL_origargv;
3823 } while (argc && argv[0][0] == '-' && argv[0][1]);
3824 init_argv_symbols(argc,argv);
3826 if ((PERLDB_LINE && !oldpdb) ||
3827 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3828 /* if we have already added "LINE: while (<>) {",
3829 we must not do it again */
3831 sv_setpvn(PL_linestr, "", 0);
3832 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3833 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3834 PL_last_lop = PL_last_uni = NULL;
3835 PL_preambled = FALSE;
3837 (void)gv_fetchfile(PL_origfilename);
3844 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3846 PL_lex_state = LEX_FORMLINE;
3851 #ifdef PERL_STRICT_CR
3852 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3854 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3856 case ' ': case '\t': case '\f': case 013:
3857 #ifdef MACOS_TRADITIONAL
3861 PL_realtokenstart = -1;
3870 PL_realtokenstart = -1;
3874 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3875 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3876 /* handle eval qq[#line 1 "foo"\n ...] */
3877 CopLINE_dec(PL_curcop);
3880 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3882 if (!PL_in_eval || PL_rsfp)
3887 while (d < PL_bufend && *d != '\n')
3891 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3892 Perl_croak(aTHX_ "panic: input overflow");
3895 PL_thiswhite = newSVpvn(s, d - s);
3900 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3902 PL_lex_state = LEX_FORMLINE;
3908 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3909 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3912 TOKEN(PEG); /* make sure any #! line is accessible */
3917 /* if (PL_madskills && PL_lex_formbrack) { */
3919 while (d < PL_bufend && *d != '\n')
3923 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3924 Perl_croak(aTHX_ "panic: input overflow");
3925 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3927 PL_thiswhite = newSVpvs("");
3928 if (CopLINE(PL_curcop) == 1) {
3929 sv_setpvn(PL_thiswhite, "", 0);
3932 sv_catpvn(PL_thiswhite, s, d - s);
3946 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3954 while (s < PL_bufend && SPACE_OR_TAB(*s))
3957 if (strnEQ(s,"=>",2)) {
3958 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3959 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
3960 OPERATOR('-'); /* unary minus */
3962 PL_last_uni = PL_oldbufptr;
3964 case 'r': ftst = OP_FTEREAD; break;
3965 case 'w': ftst = OP_FTEWRITE; break;
3966 case 'x': ftst = OP_FTEEXEC; break;
3967 case 'o': ftst = OP_FTEOWNED; break;
3968 case 'R': ftst = OP_FTRREAD; break;
3969 case 'W': ftst = OP_FTRWRITE; break;
3970 case 'X': ftst = OP_FTREXEC; break;
3971 case 'O': ftst = OP_FTROWNED; break;
3972 case 'e': ftst = OP_FTIS; break;
3973 case 'z': ftst = OP_FTZERO; break;
3974 case 's': ftst = OP_FTSIZE; break;
3975 case 'f': ftst = OP_FTFILE; break;
3976 case 'd': ftst = OP_FTDIR; break;
3977 case 'l': ftst = OP_FTLINK; break;
3978 case 'p': ftst = OP_FTPIPE; break;
3979 case 'S': ftst = OP_FTSOCK; break;
3980 case 'u': ftst = OP_FTSUID; break;
3981 case 'g': ftst = OP_FTSGID; break;
3982 case 'k': ftst = OP_FTSVTX; break;
3983 case 'b': ftst = OP_FTBLK; break;
3984 case 'c': ftst = OP_FTCHR; break;
3985 case 't': ftst = OP_FTTTY; break;
3986 case 'T': ftst = OP_FTTEXT; break;
3987 case 'B': ftst = OP_FTBINARY; break;
3988 case 'M': case 'A': case 'C':
3989 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
3991 case 'M': ftst = OP_FTMTIME; break;
3992 case 'A': ftst = OP_FTATIME; break;
3993 case 'C': ftst = OP_FTCTIME; break;
4001 PL_last_lop_op = (OPCODE)ftst;
4002 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4003 "### Saw file test %c\n", (int)tmp);
4008 /* Assume it was a minus followed by a one-letter named
4009 * subroutine call (or a -bareword), then. */
4010 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4011 "### '-%c' looked like a file test but was not\n",
4018 const char tmp = *s++;
4021 if (PL_expect == XOPERATOR)
4026 else if (*s == '>') {
4029 if (isIDFIRST_lazy_if(s,UTF)) {
4030 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4038 if (PL_expect == XOPERATOR)
4041 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4043 OPERATOR('-'); /* unary minus */
4049 const char tmp = *s++;
4052 if (PL_expect == XOPERATOR)
4057 if (PL_expect == XOPERATOR)
4060 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4067 if (PL_expect != XOPERATOR) {
4068 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4069 PL_expect = XOPERATOR;
4070 force_ident(PL_tokenbuf, '*');
4083 if (PL_expect == XOPERATOR) {
4087 PL_tokenbuf[0] = '%';
4088 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
4089 if (!PL_tokenbuf[1]) {
4092 PL_pending_ident = '%';
4103 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
4104 && FEATURE_IS_ENABLED("~~"))
4111 const char tmp = *s++;
4117 goto just_a_word_zero_gv;
4120 switch (PL_expect) {
4126 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4128 PL_bufptr = s; /* update in case we back off */
4134 PL_expect = XTERMBLOCK;
4137 stuffstart = s - SvPVX(PL_linestr) - 1;
4141 while (isIDFIRST_lazy_if(s,UTF)) {
4144 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4145 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4146 if (tmp < 0) tmp = -tmp;
4161 sv = newSVpvn(s, len);
4163 d = scan_str(d,TRUE,TRUE);
4165 /* MUST advance bufptr here to avoid bogus
4166 "at end of line" context messages from yyerror().
4168 PL_bufptr = s + len;
4169 yyerror("Unterminated attribute parameter in attribute list");
4173 return REPORT(0); /* EOF indicator */
4177 sv_catsv(sv, PL_lex_stuff);
4178 attrs = append_elem(OP_LIST, attrs,
4179 newSVOP(OP_CONST, 0, sv));
4180 SvREFCNT_dec(PL_lex_stuff);
4181 PL_lex_stuff = NULL;
4184 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4186 if (PL_in_my == KEY_our) {
4188 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4190 /* skip to avoid loading attributes.pm */
4192 deprecate(":unique");
4195 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4198 /* NOTE: any CV attrs applied here need to be part of
4199 the CVf_BUILTIN_ATTRS define in cv.h! */
4200 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4202 CvLVALUE_on(PL_compcv);
4204 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4206 CvLOCKED_on(PL_compcv);
4208 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4210 CvMETHOD_on(PL_compcv);
4212 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4214 CvASSERTION_on(PL_compcv);
4216 /* After we've set the flags, it could be argued that
4217 we don't need to do the attributes.pm-based setting
4218 process, and shouldn't bother appending recognized
4219 flags. To experiment with that, uncomment the
4220 following "else". (Note that's already been
4221 uncommented. That keeps the above-applied built-in
4222 attributes from being intercepted (and possibly
4223 rejected) by a package's attribute routines, but is
4224 justified by the performance win for the common case
4225 of applying only built-in attributes.) */
4227 attrs = append_elem(OP_LIST, attrs,
4228 newSVOP(OP_CONST, 0,
4232 if (*s == ':' && s[1] != ':')
4235 break; /* require real whitespace or :'s */
4236 /* XXX losing whitespace on sequential attributes here */
4240 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4241 if (*s != ';' && *s != '}' && *s != tmp
4242 && (tmp != '=' || *s != ')')) {
4243 const char q = ((*s == '\'') ? '"' : '\'');
4244 /* If here for an expression, and parsed no attrs, back
4246 if (tmp == '=' && !attrs) {
4250 /* MUST advance bufptr here to avoid bogus "at end of line"
4251 context messages from yyerror().
4254 yyerror( (const char *)
4256 ? Perl_form(aTHX_ "Invalid separator character "
4257 "%c%c%c in attribute list", q, *s, q)
4258 : "Unterminated attribute list" ) );
4266 start_force(PL_curforce);
4267 NEXTVAL_NEXTTOKE.opval = attrs;
4268 CURMAD('_', PL_nextwhite);
4273 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4274 (s - SvPVX(PL_linestr)) - stuffstart);
4282 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4283 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4291 const char tmp = *s++;
4296 const char tmp = *s++;
4304 if (PL_lex_brackets <= 0)
4305 yyerror("Unmatched right square bracket");
4308 if (PL_lex_state == LEX_INTERPNORMAL) {
4309 if (PL_lex_brackets == 0) {
4310 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4311 PL_lex_state = LEX_INTERPEND;
4318 if (PL_lex_brackets > 100) {
4319 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4321 switch (PL_expect) {
4323 if (PL_lex_formbrack) {
4327 if (PL_oldoldbufptr == PL_last_lop)
4328 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4330 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4331 OPERATOR(HASHBRACK);
4333 while (s < PL_bufend && SPACE_OR_TAB(*s))
4336 PL_tokenbuf[0] = '\0';
4337 if (d < PL_bufend && *d == '-') {
4338 PL_tokenbuf[0] = '-';
4340 while (d < PL_bufend && SPACE_OR_TAB(*d))
4343 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4344 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4346 while (d < PL_bufend && SPACE_OR_TAB(*d))
4349 const char minus = (PL_tokenbuf[0] == '-');
4350 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4358 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4363 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4368 if (PL_oldoldbufptr == PL_last_lop)
4369 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4371 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4374 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4376 /* This hack is to get the ${} in the message. */
4378 yyerror("syntax error");
4381 OPERATOR(HASHBRACK);
4383 /* This hack serves to disambiguate a pair of curlies
4384 * as being a block or an anon hash. Normally, expectation
4385 * determines that, but in cases where we're not in a
4386 * position to expect anything in particular (like inside
4387 * eval"") we have to resolve the ambiguity. This code
4388 * covers the case where the first term in the curlies is a
4389 * quoted string. Most other cases need to be explicitly
4390 * disambiguated by prepending a "+" before the opening
4391 * curly in order to force resolution as an anon hash.
4393 * XXX should probably propagate the outer expectation
4394 * into eval"" to rely less on this hack, but that could
4395 * potentially break current behavior of eval"".
4399 if (*s == '\'' || *s == '"' || *s == '`') {
4400 /* common case: get past first string, handling escapes */
4401 for (t++; t < PL_bufend && *t != *s;)
4402 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4406 else if (*s == 'q') {
4409 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4412 /* skip q//-like construct */
4414 char open, close, term;
4417 while (t < PL_bufend && isSPACE(*t))
4419 /* check for q => */
4420 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4421 OPERATOR(HASHBRACK);
4425 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4429 for (t++; t < PL_bufend; t++) {
4430 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4432 else if (*t == open)
4436 for (t++; t < PL_bufend; t++) {
4437 if (*t == '\\' && t+1 < PL_bufend)
4439 else if (*t == close && --brackets <= 0)
4441 else if (*t == open)
4448 /* skip plain q word */
4449 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4452 else if (isALNUM_lazy_if(t,UTF)) {
4454 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4457 while (t < PL_bufend && isSPACE(*t))
4459 /* if comma follows first term, call it an anon hash */
4460 /* XXX it could be a comma expression with loop modifiers */
4461 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4462 || (*t == '=' && t[1] == '>')))
4463 OPERATOR(HASHBRACK);
4464 if (PL_expect == XREF)
4467 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4473 yylval.ival = CopLINE(PL_curcop);
4474 if (isSPACE(*s) || *s == '#')
4475 PL_copline = NOLINE; /* invalidate current command line number */
4480 if (PL_lex_brackets <= 0)
4481 yyerror("Unmatched right curly bracket");
4483 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4484 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4485 PL_lex_formbrack = 0;
4486 if (PL_lex_state == LEX_INTERPNORMAL) {
4487 if (PL_lex_brackets == 0) {
4488 if (PL_expect & XFAKEBRACK) {
4489 PL_expect &= XENUMMASK;
4490 PL_lex_state = LEX_INTERPEND;
4495 PL_thiswhite = newSVpvs("");
4496 sv_catpvn(PL_thiswhite,"}",1);
4499 return yylex(); /* ignore fake brackets */
4501 if (*s == '-' && s[1] == '>')
4502 PL_lex_state = LEX_INTERPENDMAYBE;
4503 else if (*s != '[' && *s != '{')
4504 PL_lex_state = LEX_INTERPEND;
4507 if (PL_expect & XFAKEBRACK) {
4508 PL_expect &= XENUMMASK;
4510 return yylex(); /* ignore fake brackets */
4512 start_force(PL_curforce);
4514 curmad('X', newSVpvn(s-1,1));
4515 CURMAD('_', PL_thiswhite);
4520 PL_thistoken = newSVpvs("");
4528 if (PL_expect == XOPERATOR) {
4529 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4530 && isIDFIRST_lazy_if(s,UTF))
4532 CopLINE_dec(PL_curcop);
4533 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4534 CopLINE_inc(PL_curcop);
4539 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4541 PL_expect = XOPERATOR;
4542 force_ident(PL_tokenbuf, '&');
4546 yylval.ival = (OPpENTERSUB_AMPER<<8);
4558 const char tmp = *s++;
4565 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4566 && strchr("+-*/%.^&|<",tmp))
4567 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4568 "Reversed %c= operator",(int)tmp);
4570 if (PL_expect == XSTATE && isALPHA(tmp) &&
4571 (s == PL_linestart+1 || s[-2] == '\n') )
4573 if (PL_in_eval && !PL_rsfp) {
4578 if (strnEQ(s,"=cut",4)) {
4594 PL_thiswhite = newSVpvs("");
4595 sv_catpvn(PL_thiswhite, PL_linestart,
4596 PL_bufend - PL_linestart);
4600 PL_doextract = TRUE;
4604 if (PL_lex_brackets < PL_lex_formbrack) {
4606 #ifdef PERL_STRICT_CR
4607 while (SPACE_OR_TAB(*t))
4609 while (SPACE_OR_TAB(*t) || *t == '\r')
4612 if (*t == '\n' || *t == '#') {
4623 const char tmp = *s++;
4625 /* was this !=~ where !~ was meant?
4626 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4628 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4629 const char *t = s+1;
4631 while (t < PL_bufend && isSPACE(*t))
4634 if (*t == '/' || *t == '?' ||
4635 ((*t == 'm' || *t == 's' || *t == 'y')
4636 && !isALNUM(t[1])) ||
4637 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4638 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4639 "!=~ should be !~");
4649 if (PL_expect != XOPERATOR) {
4650 if (s[1] != '<' && !strchr(s,'>'))
4653 s = scan_heredoc(s);
4655 s = scan_inputsymbol(s);
4656 TERM(sublex_start());
4662 SHop(OP_LEFT_SHIFT);
4676 const char tmp = *s++;
4678 SHop(OP_RIGHT_SHIFT);
4679 else if (tmp == '=')
4688 if (PL_expect == XOPERATOR) {
4689 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4691 deprecate_old(commaless_variable_list);
4692 return REPORT(','); /* grandfather non-comma-format format */
4696 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4697 PL_tokenbuf[0] = '@';
4698 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4699 sizeof PL_tokenbuf - 1, FALSE);
4700 if (PL_expect == XOPERATOR)
4701 no_op("Array length", s);
4702 if (!PL_tokenbuf[1])
4704 PL_expect = XOPERATOR;
4705 PL_pending_ident = '#';
4709 PL_tokenbuf[0] = '$';
4710 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4711 sizeof PL_tokenbuf - 1, FALSE);
4712 if (PL_expect == XOPERATOR)
4714 if (!PL_tokenbuf[1]) {
4716 yyerror("Final $ should be \\$ or $name");
4720 /* This kludge not intended to be bulletproof. */
4721 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4722 yylval.opval = newSVOP(OP_CONST, 0,
4723 newSViv(CopARYBASE_get(&PL_compiling)));
4724 yylval.opval->op_private = OPpCONST_ARYBASE;
4730 const char tmp = *s;
4731 if (PL_lex_state == LEX_NORMAL)
4734 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4735 && intuit_more(s)) {
4737 PL_tokenbuf[0] = '@';
4738 if (ckWARN(WARN_SYNTAX)) {
4741 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4744 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4745 while (t < PL_bufend && *t != ']')
4747 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4748 "Multidimensional syntax %.*s not supported",
4749 (int)((t - PL_bufptr) + 1), PL_bufptr);
4753 else if (*s == '{') {
4755 PL_tokenbuf[0] = '%';
4756 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4757 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4759 char tmpbuf[sizeof PL_tokenbuf];
4762 } while (isSPACE(*t));
4763 if (isIDFIRST_lazy_if(t,UTF)) {
4765 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4769 if (*t == ';' && get_cv(tmpbuf, FALSE))
4770 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4771 "You need to quote \"%s\"",
4778 PL_expect = XOPERATOR;
4779 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4780 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4781 if (!islop || PL_last_lop_op == OP_GREPSTART)
4782 PL_expect = XOPERATOR;
4783 else if (strchr("$@\"'`q", *s))
4784 PL_expect = XTERM; /* e.g. print $fh "foo" */
4785 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4786 PL_expect = XTERM; /* e.g. print $fh &sub */
4787 else if (isIDFIRST_lazy_if(s,UTF)) {
4788 char tmpbuf[sizeof PL_tokenbuf];
4790 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4791 if ((t2 = keyword(tmpbuf, len, 0))) {
4792 /* binary operators exclude handle interpretations */
4804 PL_expect = XTERM; /* e.g. print $fh length() */
4809 PL_expect = XTERM; /* e.g. print $fh subr() */
4812 else if (isDIGIT(*s))
4813 PL_expect = XTERM; /* e.g. print $fh 3 */
4814 else if (*s == '.' && isDIGIT(s[1]))
4815 PL_expect = XTERM; /* e.g. print $fh .3 */
4816 else if ((*s == '?' || *s == '-' || *s == '+')
4817 && !isSPACE(s[1]) && s[1] != '=')
4818 PL_expect = XTERM; /* e.g. print $fh -1 */
4819 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4821 PL_expect = XTERM; /* e.g. print $fh /.../
4822 XXX except DORDOR operator
4824 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4826 PL_expect = XTERM; /* print $fh <<"EOF" */
4829 PL_pending_ident = '$';
4833 if (PL_expect == XOPERATOR)
4835 PL_tokenbuf[0] = '@';
4836 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4837 if (!PL_tokenbuf[1]) {
4840 if (PL_lex_state == LEX_NORMAL)
4842 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4844 PL_tokenbuf[0] = '%';
4846 /* Warn about @ where they meant $. */
4847 if (*s == '[' || *s == '{') {
4848 if (ckWARN(WARN_SYNTAX)) {
4849 const char *t = s + 1;
4850 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4852 if (*t == '}' || *t == ']') {
4854 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4855 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4856 "Scalar value %.*s better written as $%.*s",
4857 (int)(t-PL_bufptr), PL_bufptr,
4858 (int)(t-PL_bufptr-1), PL_bufptr+1);
4863 PL_pending_ident = '@';
4866 case '/': /* may be division, defined-or, or pattern */
4867 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4871 case '?': /* may either be conditional or pattern */
4872 if(PL_expect == XOPERATOR) {
4880 /* A // operator. */
4890 /* Disable warning on "study /blah/" */
4891 if (PL_oldoldbufptr == PL_last_uni
4892 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4893 || memNE(PL_last_uni, "study", 5)
4894 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4897 s = scan_pat(s,OP_MATCH);
4898 TERM(sublex_start());
4902 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4903 #ifdef PERL_STRICT_CR
4906 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4908 && (s == PL_linestart || s[-1] == '\n') )
4910 PL_lex_formbrack = 0;
4914 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4920 yylval.ival = OPf_SPECIAL;
4926 if (PL_expect != XOPERATOR)
4931 case '0': case '1': case '2': case '3': case '4':
4932 case '5': case '6': case '7': case '8': case '9':
4933 s = scan_num(s, &yylval);
4934 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
4935 if (PL_expect == XOPERATOR)
4940 s = scan_str(s,!!PL_madskills,FALSE);
4941 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4942 if (PL_expect == XOPERATOR) {
4943 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4945 deprecate_old(commaless_variable_list);
4946 return REPORT(','); /* grandfather non-comma-format format */
4953 yylval.ival = OP_CONST;
4954 TERM(sublex_start());
4957 s = scan_str(s,!!PL_madskills,FALSE);
4958 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4959 if (PL_expect == XOPERATOR) {
4960 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4962 deprecate_old(commaless_variable_list);
4963 return REPORT(','); /* grandfather non-comma-format format */
4970 yylval.ival = OP_CONST;
4971 /* FIXME. I think that this can be const if char *d is replaced by
4972 more localised variables. */
4973 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4974 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4975 yylval.ival = OP_STRINGIFY;
4979 TERM(sublex_start());
4982 s = scan_str(s,!!PL_madskills,FALSE);
4983 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
4984 if (PL_expect == XOPERATOR)
4985 no_op("Backticks",s);
4988 readpipe_override();
4989 TERM(sublex_start());
4993 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4994 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4996 if (PL_expect == XOPERATOR)
4997 no_op("Backslash",s);
5001 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5002 char *start = s + 2;
5003 while (isDIGIT(*start) || *start == '_')
5005 if (*start == '.' && isDIGIT(start[1])) {
5006 s = scan_num(s, &yylval);
5009 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5010 else if (!isALPHA(*start) && (PL_expect == XTERM
5011 || PL_expect == XREF || PL_expect == XSTATE
5012 || PL_expect == XTERMORDORDOR)) {
5013 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5014 const char c = *start;
5017 gv = gv_fetchpv(s, 0, SVt_PVCV);
5020 s = scan_num(s, &yylval);
5027 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5069 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5071 /* Some keywords can be followed by any delimiter, including ':' */
5072 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5073 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5074 (PL_tokenbuf[0] == 'q' &&
5075 strchr("qwxr", PL_tokenbuf[1])))));
5077 /* x::* is just a word, unless x is "CORE" */
5078 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5082 while (d < PL_bufend && isSPACE(*d))
5083 d++; /* no comments skipped here, or s### is misparsed */
5085 /* Is this a label? */
5086 if (!tmp && PL_expect == XSTATE
5087 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5089 yylval.pval = savepv(PL_tokenbuf);
5094 /* Check for keywords */
5095 tmp = keyword(PL_tokenbuf, len, 0);
5097 /* Is this a word before a => operator? */
5098 if (*d == '=' && d[1] == '>') {
5101 = (OP*)newSVOP(OP_CONST, 0,
5102 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5103 yylval.opval->op_private = OPpCONST_BARE;
5107 if (tmp < 0) { /* second-class keyword? */
5108 GV *ogv = NULL; /* override (winner) */
5109 GV *hgv = NULL; /* hidden (loser) */
5110 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5112 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5115 if (GvIMPORTED_CV(gv))
5117 else if (! CvMETHOD(cv))
5121 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5122 (gv = *gvp) != (GV*)&PL_sv_undef &&
5123 GvCVu(gv) && GvIMPORTED_CV(gv))
5130 tmp = 0; /* overridden by import or by GLOBAL */
5133 && -tmp==KEY_lock /* XXX generalizable kludge */
5135 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
5137 tmp = 0; /* any sub overrides "weak" keyword */
5139 else { /* no override */
5141 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5142 Perl_warner(aTHX_ packWARN(WARN_MISC),
5143 "dump() better written as CORE::dump()");
5147 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5148 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5149 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5150 "Ambiguous call resolved as CORE::%s(), %s",
5151 GvENAME(hgv), "qualify as such or use &");
5158 default: /* not a keyword */
5159 /* Trade off - by using this evil construction we can pull the
5160 variable gv into the block labelled keylookup. If not, then
5161 we have to give it function scope so that the goto from the
5162 earlier ':' case doesn't bypass the initialisation. */
5164 just_a_word_zero_gv:
5172 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5175 SV *nextPL_nextwhite = 0;
5179 /* Get the rest if it looks like a package qualifier */
5181 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5183 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5186 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5187 *s == '\'' ? "'" : "::");
5192 if (PL_expect == XOPERATOR) {
5193 if (PL_bufptr == PL_linestart) {
5194 CopLINE_dec(PL_curcop);
5195 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5196 CopLINE_inc(PL_curcop);
5199 no_op("Bareword",s);
5202 /* Look for a subroutine with this name in current package,
5203 unless name is "Foo::", in which case Foo is a bearword
5204 (and a package name). */
5206 if (len > 2 && !PL_madskills &&
5207 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5209 if (ckWARN(WARN_BAREWORD)
5210 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5211 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5212 "Bareword \"%s\" refers to nonexistent package",
5215 PL_tokenbuf[len] = '\0';
5221 /* Mustn't actually add anything to a symbol table.
5222 But also don't want to "initialise" any placeholder
5223 constants that might already be there into full
5224 blown PVGVs with attached PVCV. */
5225 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5226 GV_NOADD_NOINIT, SVt_PVCV);
5231 /* if we saw a global override before, get the right name */
5234 sv = newSVpvs("CORE::GLOBAL::");
5235 sv_catpv(sv,PL_tokenbuf);
5238 /* If len is 0, newSVpv does strlen(), which is correct.
5239 If len is non-zero, then it will be the true length,
5240 and so the scalar will be created correctly. */
5241 sv = newSVpv(PL_tokenbuf,len);
5244 if (PL_madskills && !PL_thistoken) {
5245 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5246 PL_thistoken = newSVpv(start,s - start);
5247 PL_realtokenstart = s - SvPVX(PL_linestr);
5251 /* Presume this is going to be a bareword of some sort. */
5254 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5255 yylval.opval->op_private = OPpCONST_BARE;
5256 /* UTF-8 package name? */
5257 if (UTF && !IN_BYTES &&
5258 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5261 /* And if "Foo::", then that's what it certainly is. */
5266 /* Do the explicit type check so that we don't need to force
5267 the initialisation of the symbol table to have a real GV.
5268 Beware - gv may not really be a PVGV, cv may not really be
5269 a PVCV, (because of the space optimisations that gv_init
5270 understands) But they're true if for this symbol there is
5271 respectively a typeglob and a subroutine.
5273 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5274 /* Real typeglob, so get the real subroutine: */
5276 /* A proxy for a subroutine in this package? */
5277 : SvOK(gv) ? (CV *) gv : NULL)
5280 /* See if it's the indirect object for a list operator. */
5282 if (PL_oldoldbufptr &&
5283 PL_oldoldbufptr < PL_bufptr &&
5284 (PL_oldoldbufptr == PL_last_lop
5285 || PL_oldoldbufptr == PL_last_uni) &&
5286 /* NO SKIPSPACE BEFORE HERE! */
5287 (PL_expect == XREF ||
5288 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5290 bool immediate_paren = *s == '(';
5292 /* (Now we can afford to cross potential line boundary.) */
5293 s = SKIPSPACE2(s,nextPL_nextwhite);
5295 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5298 /* Two barewords in a row may indicate method call. */
5300 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5301 (tmp = intuit_method(s, gv, cv)))
5304 /* If not a declared subroutine, it's an indirect object. */
5305 /* (But it's an indir obj regardless for sort.) */
5306 /* Also, if "_" follows a filetest operator, it's a bareword */
5309 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5311 (PL_last_lop_op != OP_MAPSTART &&
5312 PL_last_lop_op != OP_GREPSTART))))
5313 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5314 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5317 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5322 PL_expect = XOPERATOR;
5325 s = SKIPSPACE2(s,nextPL_nextwhite);
5326 PL_nextwhite = nextPL_nextwhite;
5331 /* Is this a word before a => operator? */
5332 if (*s == '=' && s[1] == '>' && !pkgname) {
5334 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5335 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5336 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5340 /* If followed by a paren, it's certainly a subroutine. */
5345 while (SPACE_OR_TAB(*d))
5347 if (*d == ')' && (sv = gv_const_sv(gv))) {
5351 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5352 sv_catpvn(PL_thistoken, par, s - par);
5354 sv_free(PL_nextwhite);
5364 PL_nextwhite = PL_thiswhite;
5367 start_force(PL_curforce);
5369 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5370 PL_expect = XOPERATOR;
5373 PL_nextwhite = nextPL_nextwhite;
5374 curmad('X', PL_thistoken);
5375 PL_thistoken = newSVpvs("");
5383 /* If followed by var or block, call it a method (unless sub) */
5385 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5386 PL_last_lop = PL_oldbufptr;
5387 PL_last_lop_op = OP_METHOD;
5391 /* If followed by a bareword, see if it looks like indir obj. */
5394 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5395 && (tmp = intuit_method(s, gv, cv)))
5398 /* Not a method, so call it a subroutine (if defined) */
5401 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5402 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5403 "Ambiguous use of -%s resolved as -&%s()",
5404 PL_tokenbuf, PL_tokenbuf);
5405 /* Check for a constant sub */
5406 if ((sv = gv_const_sv(gv))) {
5408 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5409 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5410 yylval.opval->op_private = 0;
5414 /* Resolve to GV now. */
5415 if (SvTYPE(gv) != SVt_PVGV) {
5416 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5417 assert (SvTYPE(gv) == SVt_PVGV);
5418 /* cv must have been some sort of placeholder, so
5419 now needs replacing with a real code reference. */
5423 op_free(yylval.opval);
5424 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5425 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5426 PL_last_lop = PL_oldbufptr;
5427 PL_last_lop_op = OP_ENTERSUB;
5428 /* Is there a prototype? */
5436 const char *proto = SvPV_const((SV*)cv, protolen);
5439 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5441 while (*proto == ';')
5443 if (*proto == '&' && *s == '{') {
5444 sv_setpv(PL_subname,
5447 "__ANON__" : "__ANON__::__ANON__"));
5454 PL_nextwhite = PL_thiswhite;
5457 start_force(PL_curforce);
5458 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5461 PL_nextwhite = nextPL_nextwhite;
5462 curmad('X', PL_thistoken);
5463 PL_thistoken = newSVpvs("");
5470 /* Guess harder when madskills require "best effort". */
5471 if (PL_madskills && (!gv || !GvCVu(gv))) {
5472 int probable_sub = 0;
5473 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5475 else if (isALPHA(*s)) {
5479 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5480 if (!keyword(tmpbuf, tmplen, 0))
5483 while (d < PL_bufend && isSPACE(*d))
5485 if (*d == '=' && d[1] == '>')
5490 gv = gv_fetchpv(PL_tokenbuf, TRUE, SVt_PVCV);
5491 op_free(yylval.opval);
5492 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5493 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5494 PL_last_lop = PL_oldbufptr;
5495 PL_last_lop_op = OP_ENTERSUB;
5496 PL_nextwhite = PL_thiswhite;
5498 start_force(PL_curforce);
5499 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5501 PL_nextwhite = nextPL_nextwhite;
5502 curmad('X', PL_thistoken);
5503 PL_thistoken = newSVpvs("");
5508 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5515 /* Call it a bare word */
5517 if (PL_hints & HINT_STRICT_SUBS)
5518 yylval.opval->op_private |= OPpCONST_STRICT;
5521 if (lastchar != '-') {
5522 if (ckWARN(WARN_RESERVED)) {
5526 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
5527 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5534 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5535 && ckWARN_d(WARN_AMBIGUOUS)) {
5536 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5537 "Operator or semicolon missing before %c%s",
5538 lastchar, PL_tokenbuf);
5539 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5540 "Ambiguous use of %c resolved as operator %c",
5541 lastchar, lastchar);
5547 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5548 newSVpv(CopFILE(PL_curcop),0));
5552 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5553 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5556 case KEY___PACKAGE__:
5557 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5559 ? newSVhek(HvNAME_HEK(PL_curstash))
5566 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5567 const char *pname = "main";
5568 if (PL_tokenbuf[2] == 'D')
5569 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5570 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5574 GvIOp(gv) = newIO();
5575 IoIFP(GvIOp(gv)) = PL_rsfp;
5576 #if defined(HAS_FCNTL) && defined(F_SETFD)
5578 const int fd = PerlIO_fileno(PL_rsfp);
5579 fcntl(fd,F_SETFD,fd >= 3);
5582 /* Mark this internal pseudo-handle as clean */
5583 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5585 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5586 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5587 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5589 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5590 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5591 /* if the script was opened in binmode, we need to revert
5592 * it to text mode for compatibility; but only iff it has CRs
5593 * XXX this is a questionable hack at best. */
5594 if (PL_bufend-PL_bufptr > 2
5595 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5598 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5599 loc = PerlIO_tell(PL_rsfp);
5600 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5603 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5605 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5606 #endif /* NETWARE */
5607 #ifdef PERLIO_IS_STDIO /* really? */
5608 # if defined(__BORLANDC__)
5609 /* XXX see note in do_binmode() */
5610 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5614 PerlIO_seek(PL_rsfp, loc, 0);
5618 #ifdef PERLIO_LAYERS
5621 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5622 else if (PL_encoding) {
5629 XPUSHs(PL_encoding);
5631 call_method("name", G_SCALAR);
5635 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5636 Perl_form(aTHX_ ":encoding(%"SVf")",
5645 if (PL_realtokenstart >= 0) {
5646 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5648 PL_endwhite = newSVpvs("");
5649 sv_catsv(PL_endwhite, PL_thiswhite);
5651 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5652 PL_realtokenstart = -1;
5654 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5655 SvCUR(PL_endwhite))) != Nullch) ;
5670 if (PL_expect == XSTATE) {
5677 if (*s == ':' && s[1] == ':') {
5680 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5681 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5682 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5685 else if (tmp == KEY_require || tmp == KEY_do)
5686 /* that's a way to remember we saw "CORE::" */
5699 LOP(OP_ACCEPT,XTERM);
5705 LOP(OP_ATAN2,XTERM);
5711 LOP(OP_BINMODE,XTERM);
5714 LOP(OP_BLESS,XTERM);
5723 /* When 'use switch' is in effect, continue has a dual
5724 life as a control operator. */
5726 if (!FEATURE_IS_ENABLED("switch"))
5729 /* We have to disambiguate the two senses of
5730 "continue". If the next token is a '{' then
5731 treat it as the start of a continue block;
5732 otherwise treat it as a control operator.
5744 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5761 if (!PL_cryptseen) {
5762 PL_cryptseen = TRUE;
5766 LOP(OP_CRYPT,XTERM);
5769 LOP(OP_CHMOD,XTERM);
5772 LOP(OP_CHOWN,XTERM);
5775 LOP(OP_CONNECT,XTERM);
5794 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5795 if (orig_keyword == KEY_do) {
5804 PL_hints |= HINT_BLOCK_SCOPE;
5814 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5815 LOP(OP_DBMOPEN,XTERM);
5821 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5828 yylval.ival = CopLINE(PL_curcop);
5844 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5845 UNIBRACK(OP_ENTEREVAL);
5863 case KEY_endhostent:
5869 case KEY_endservent:
5872 case KEY_endprotoent:
5883 yylval.ival = CopLINE(PL_curcop);
5885 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5888 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5891 if ((PL_bufend - p) >= 3 &&
5892 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5894 else if ((PL_bufend - p) >= 4 &&
5895 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5898 if (isIDFIRST_lazy_if(p,UTF)) {
5899 p = scan_ident(p, PL_bufend,
5900 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5904 Perl_croak(aTHX_ "Missing $ on loop variable");
5906 s = SvPVX(PL_linestr) + soff;
5912 LOP(OP_FORMLINE,XTERM);
5918 LOP(OP_FCNTL,XTERM);
5924 LOP(OP_FLOCK,XTERM);
5933 LOP(OP_GREPSTART, XREF);
5936 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5951 case KEY_getpriority:
5952 LOP(OP_GETPRIORITY,XTERM);
5954 case KEY_getprotobyname:
5957 case KEY_getprotobynumber:
5958 LOP(OP_GPBYNUMBER,XTERM);
5960 case KEY_getprotoent:
5972 case KEY_getpeername:
5973 UNI(OP_GETPEERNAME);
5975 case KEY_gethostbyname:
5978 case KEY_gethostbyaddr:
5979 LOP(OP_GHBYADDR,XTERM);
5981 case KEY_gethostent:
5984 case KEY_getnetbyname:
5987 case KEY_getnetbyaddr:
5988 LOP(OP_GNBYADDR,XTERM);
5993 case KEY_getservbyname:
5994 LOP(OP_GSBYNAME,XTERM);
5996 case KEY_getservbyport:
5997 LOP(OP_GSBYPORT,XTERM);
5999 case KEY_getservent:
6002 case KEY_getsockname:
6003 UNI(OP_GETSOCKNAME);
6005 case KEY_getsockopt:
6006 LOP(OP_GSOCKOPT,XTERM);
6021 yylval.ival = CopLINE(PL_curcop);
6032 yylval.ival = CopLINE(PL_curcop);
6036 LOP(OP_INDEX,XTERM);
6042 LOP(OP_IOCTL,XTERM);
6054 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6086 LOP(OP_LISTEN,XTERM);
6095 s = scan_pat(s,OP_MATCH);
6096 TERM(sublex_start());
6099 LOP(OP_MAPSTART, XREF);
6102 LOP(OP_MKDIR,XTERM);
6105 LOP(OP_MSGCTL,XTERM);
6108 LOP(OP_MSGGET,XTERM);
6111 LOP(OP_MSGRCV,XTERM);
6114 LOP(OP_MSGSND,XTERM);
6121 if (isIDFIRST_lazy_if(s,UTF)) {
6125 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6126 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6128 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6129 if (!PL_in_my_stash) {
6132 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6136 if (PL_madskills) { /* just add type to declarator token */
6137 sv_catsv(PL_thistoken, PL_nextwhite);
6139 sv_catpvn(PL_thistoken, start, s - start);
6147 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6154 s = tokenize_use(0, s);
6158 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6165 if (isIDFIRST_lazy_if(s,UTF)) {
6167 for (d = s; isALNUM_lazy_if(d,UTF);)
6169 for (t=d; isSPACE(*t);)
6171 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6173 && !(t[0] == '=' && t[1] == '>')
6175 int parms_len = (int)(d-s);
6176 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6177 "Precedence problem: open %.*s should be open(%.*s)",
6178 parms_len, s, parms_len, s);
6184 yylval.ival = OP_OR;
6194 LOP(OP_OPEN_DIR,XTERM);
6197 checkcomma(s,PL_tokenbuf,"filehandle");
6201 checkcomma(s,PL_tokenbuf,"filehandle");
6220 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6224 LOP(OP_PIPE_OP,XTERM);
6227 s = scan_str(s,!!PL_madskills,FALSE);
6230 yylval.ival = OP_CONST;
6231 TERM(sublex_start());
6237 s = scan_str(s,!!PL_madskills,FALSE);
6240 PL_expect = XOPERATOR;
6242 if (SvCUR(PL_lex_stuff)) {
6245 d = SvPV_force(PL_lex_stuff, len);
6247 for (; isSPACE(*d) && len; --len, ++d)
6252 if (!warned && ckWARN(WARN_QW)) {
6253 for (; !isSPACE(*d) && len; --len, ++d) {
6255 Perl_warner(aTHX_ packWARN(WARN_QW),
6256 "Possible attempt to separate words with commas");
6259 else if (*d == '#') {
6260 Perl_warner(aTHX_ packWARN(WARN_QW),
6261 "Possible attempt to put comments in qw() list");
6267 for (; !isSPACE(*d) && len; --len, ++d)
6270 sv = newSVpvn(b, d-b);
6271 if (DO_UTF8(PL_lex_stuff))
6273 words = append_elem(OP_LIST, words,
6274 newSVOP(OP_CONST, 0, tokeq(sv)));
6278 start_force(PL_curforce);
6279 NEXTVAL_NEXTTOKE.opval = words;
6284 SvREFCNT_dec(PL_lex_stuff);
6285 PL_lex_stuff = NULL;
6291 s = scan_str(s,!!PL_madskills,FALSE);
6294 yylval.ival = OP_STRINGIFY;
6295 if (SvIVX(PL_lex_stuff) == '\'')
6296 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6297 TERM(sublex_start());
6300 s = scan_pat(s,OP_QR);
6301 TERM(sublex_start());
6304 s = scan_str(s,!!PL_madskills,FALSE);
6307 readpipe_override();
6308 TERM(sublex_start());
6316 s = force_version(s, FALSE);
6318 else if (*s != 'v' || !isDIGIT(s[1])
6319 || (s = force_version(s, TRUE), *s == 'v'))
6321 *PL_tokenbuf = '\0';
6322 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6323 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6324 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
6326 yyerror("<> should be quotes");
6328 if (orig_keyword == KEY_require) {
6336 PL_last_uni = PL_oldbufptr;
6337 PL_last_lop_op = OP_REQUIRE;
6339 return REPORT( (int)REQUIRE );
6345 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6349 LOP(OP_RENAME,XTERM);
6358 LOP(OP_RINDEX,XTERM);
6368 UNIDOR(OP_READLINE);
6381 LOP(OP_REVERSE,XTERM);
6384 UNIDOR(OP_READLINK);
6392 TERM(sublex_start());
6394 TOKEN(1); /* force error */
6397 checkcomma(s,PL_tokenbuf,"filehandle");
6407 LOP(OP_SELECT,XTERM);
6413 LOP(OP_SEMCTL,XTERM);
6416 LOP(OP_SEMGET,XTERM);
6419 LOP(OP_SEMOP,XTERM);
6425 LOP(OP_SETPGRP,XTERM);
6427 case KEY_setpriority:
6428 LOP(OP_SETPRIORITY,XTERM);
6430 case KEY_sethostent:
6436 case KEY_setservent:
6439 case KEY_setprotoent:
6449 LOP(OP_SEEKDIR,XTERM);
6451 case KEY_setsockopt:
6452 LOP(OP_SSOCKOPT,XTERM);
6458 LOP(OP_SHMCTL,XTERM);
6461 LOP(OP_SHMGET,XTERM);
6464 LOP(OP_SHMREAD,XTERM);
6467 LOP(OP_SHMWRITE,XTERM);
6470 LOP(OP_SHUTDOWN,XTERM);
6479 LOP(OP_SOCKET,XTERM);
6481 case KEY_socketpair:
6482 LOP(OP_SOCKPAIR,XTERM);
6485 checkcomma(s,PL_tokenbuf,"subroutine name");
6487 if (*s == ';' || *s == ')') /* probably a close */
6488 Perl_croak(aTHX_ "sort is now a reserved word");
6490 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6494 LOP(OP_SPLIT,XTERM);
6497 LOP(OP_SPRINTF,XTERM);
6500 LOP(OP_SPLICE,XTERM);
6515 LOP(OP_SUBSTR,XTERM);
6521 char tmpbuf[sizeof PL_tokenbuf];
6522 SSize_t tboffset = 0;
6523 expectation attrful;
6524 bool have_name, have_proto;
6525 const int key = tmp;
6530 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6531 SV *subtoken = newSVpvn(tstart, s - tstart);
6535 s = SKIPSPACE2(s,tmpwhite);
6540 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6541 (*s == ':' && s[1] == ':'))
6548 attrful = XATTRBLOCK;
6549 /* remember buffer pos'n for later force_word */
6550 tboffset = s - PL_oldbufptr;
6551 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6554 nametoke = newSVpvn(s, d - s);
6556 if (strchr(tmpbuf, ':'))
6557 sv_setpv(PL_subname, tmpbuf);
6559 sv_setsv(PL_subname,PL_curstname);
6560 sv_catpvs(PL_subname,"::");
6561 sv_catpvn(PL_subname,tmpbuf,len);
6568 CURMAD('X', nametoke);
6569 CURMAD('_', tmpwhite);
6570 (void) force_word(PL_oldbufptr + tboffset, WORD,
6573 s = SKIPSPACE2(d,tmpwhite);
6580 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6581 PL_expect = XTERMBLOCK;
6582 attrful = XATTRTERM;
6583 sv_setpvn(PL_subname,"?",1);
6587 if (key == KEY_format) {
6589 PL_lex_formbrack = PL_lex_brackets + 1;
6591 PL_thistoken = subtoken;
6595 (void) force_word(PL_oldbufptr + tboffset, WORD,
6601 /* Look for a prototype */
6604 bool bad_proto = FALSE;
6605 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6607 s = scan_str(s,!!PL_madskills,FALSE);
6609 Perl_croak(aTHX_ "Prototype not terminated");
6610 /* strip spaces and check for bad characters */
6611 d = SvPVX(PL_lex_stuff);
6613 for (p = d; *p; ++p) {
6616 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6622 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6623 "Illegal character in prototype for %"SVf" : %s",
6624 (void*)PL_subname, d);
6625 SvCUR_set(PL_lex_stuff, tmp);
6630 CURMAD('q', PL_thisopen);
6631 CURMAD('_', tmpwhite);
6632 CURMAD('=', PL_thisstuff);
6633 CURMAD('Q', PL_thisclose);
6634 NEXTVAL_NEXTTOKE.opval =
6635 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6636 PL_lex_stuff = Nullsv;
6639 s = SKIPSPACE2(s,tmpwhite);
6647 if (*s == ':' && s[1] != ':')
6648 PL_expect = attrful;
6649 else if (*s != '{' && key == KEY_sub) {
6651 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6653 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, (void*)PL_subname);
6660 curmad('^', newSVpvs(""));
6661 CURMAD('_', tmpwhite);
6665 PL_thistoken = subtoken;
6668 NEXTVAL_NEXTTOKE.opval =
6669 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6670 PL_lex_stuff = NULL;
6675 sv_setpv(PL_subname,
6677 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6681 (void) force_word(PL_oldbufptr + tboffset, WORD,
6691 LOP(OP_SYSTEM,XREF);
6694 LOP(OP_SYMLINK,XTERM);
6697 LOP(OP_SYSCALL,XTERM);
6700 LOP(OP_SYSOPEN,XTERM);
6703 LOP(OP_SYSSEEK,XTERM);
6706 LOP(OP_SYSREAD,XTERM);
6709 LOP(OP_SYSWRITE,XTERM);
6713 TERM(sublex_start());
6734 LOP(OP_TRUNCATE,XTERM);
6746 yylval.ival = CopLINE(PL_curcop);
6750 yylval.ival = CopLINE(PL_curcop);
6754 LOP(OP_UNLINK,XTERM);
6760 LOP(OP_UNPACK,XTERM);
6763 LOP(OP_UTIME,XTERM);
6769 LOP(OP_UNSHIFT,XTERM);
6772 s = tokenize_use(1, s);
6782 yylval.ival = CopLINE(PL_curcop);
6786 yylval.ival = CopLINE(PL_curcop);
6790 PL_hints |= HINT_BLOCK_SCOPE;
6797 LOP(OP_WAITPID,XTERM);
6806 ctl_l[0] = toCTRL('L');
6808 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6811 /* Make sure $^L is defined */
6812 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6817 if (PL_expect == XOPERATOR)
6823 yylval.ival = OP_XOR;
6828 TERM(sublex_start());
6833 #pragma segment Main
6837 S_pending_ident(pTHX)
6842 /* pit holds the identifier we read and pending_ident is reset */
6843 char pit = PL_pending_ident;
6844 PL_pending_ident = 0;
6846 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6847 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6848 "### Pending identifier '%s'\n", PL_tokenbuf); });
6850 /* if we're in a my(), we can't allow dynamics here.
6851 $foo'bar has already been turned into $foo::bar, so
6852 just check for colons.
6854 if it's a legal name, the OP is a PADANY.
6857 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6858 if (strchr(PL_tokenbuf,':'))
6859 yyerror(Perl_form(aTHX_ "No package name allowed for "
6860 "variable %s in \"our\"",
6862 tmp = allocmy(PL_tokenbuf);
6865 if (strchr(PL_tokenbuf,':'))
6866 yyerror(Perl_form(aTHX_ PL_no_myglob,
6867 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6869 yylval.opval = newOP(OP_PADANY, 0);
6870 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6876 build the ops for accesses to a my() variable.
6878 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6879 then used in a comparison. This catches most, but not
6880 all cases. For instance, it catches
6881 sort { my($a); $a <=> $b }
6883 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6884 (although why you'd do that is anyone's guess).
6887 if (!strchr(PL_tokenbuf,':')) {
6889 tmp = pad_findmy(PL_tokenbuf);
6890 if (tmp != NOT_IN_PAD) {
6891 /* might be an "our" variable" */
6892 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6893 /* build ops for a bareword */
6894 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6895 HEK * const stashname = HvNAME_HEK(stash);
6896 SV * const sym = newSVhek(stashname);
6897 sv_catpvs(sym, "::");
6898 sv_catpv(sym, PL_tokenbuf+1);
6899 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6900 yylval.opval->op_private = OPpCONST_ENTERED;
6903 ? (GV_ADDMULTI | GV_ADDINEVAL)
6906 ((PL_tokenbuf[0] == '$') ? SVt_PV
6907 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6912 /* if it's a sort block and they're naming $a or $b */
6913 if (PL_last_lop_op == OP_SORT &&
6914 PL_tokenbuf[0] == '$' &&
6915 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6918 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6919 d < PL_bufend && *d != '\n';
6922 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6923 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6929 yylval.opval = newOP(OP_PADANY, 0);
6930 yylval.opval->op_targ = tmp;
6936 Whine if they've said @foo in a doublequoted string,
6937 and @foo isn't a variable we can find in the symbol
6940 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
6941 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
6942 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
6943 && ckWARN(WARN_AMBIGUOUS))
6945 /* Downgraded from fatal to warning 20000522 mjd */
6946 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6947 "Possible unintended interpolation of %s in string",
6952 /* build ops for a bareword */
6953 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
6954 yylval.opval->op_private = OPpCONST_ENTERED;
6957 /* If the identifier refers to a stash, don't autovivify it.
6958 * Change 24660 had the side effect of causing symbol table
6959 * hashes to always be defined, even if they were freshly
6960 * created and the only reference in the entire program was
6961 * the single statement with the defined %foo::bar:: test.
6962 * It appears that all code in the wild doing this actually
6963 * wants to know whether sub-packages have been loaded, so
6964 * by avoiding auto-vivifying symbol tables, we ensure that
6965 * defined %foo::bar:: continues to be false, and the existing
6966 * tests still give the expected answers, even though what
6967 * they're actually testing has now changed subtly.
6969 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
6971 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
6972 ((PL_tokenbuf[0] == '$') ? SVt_PV
6973 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6979 * The following code was generated by perl_keyword.pl.
6983 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
6988 case 1: /* 5 tokens of length 1 */
7020 case 2: /* 18 tokens of length 2 */
7166 case 3: /* 29 tokens of length 3 */
7170 if (name[1] == 'N' &&
7233 if (name[1] == 'i' &&
7255 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7273 if (name[1] == 'o' &&
7282 if (name[1] == 'e' &&
7291 if (name[1] == 'n' &&
7300 if (name[1] == 'o' &&
7309 if (name[1] == 'a' &&
7318 if (name[1] == 'o' &&
7380 if (name[1] == 'e' &&
7394 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7420 if (name[1] == 'i' &&
7429 if (name[1] == 's' &&
7438 if (name[1] == 'e' &&
7447 if (name[1] == 'o' &&
7459 case 4: /* 41 tokens of length 4 */
7463 if (name[1] == 'O' &&
7473 if (name[1] == 'N' &&
7483 if (name[1] == 'i' &&
7493 if (name[1] == 'h' &&
7503 if (name[1] == 'u' &&
7516 if (name[2] == 'c' &&
7525 if (name[2] == 's' &&
7534 if (name[2] == 'a' &&
7570 if (name[1] == 'o' &&
7583 if (name[2] == 't' &&
7592 if (name[2] == 'o' &&
7601 if (name[2] == 't' &&
7610 if (name[2] == 'e' &&
7623 if (name[1] == 'o' &&
7636 if (name[2] == 'y' &&
7645 if (name[2] == 'l' &&
7661 if (name[2] == 's' &&
7670 if (name[2] == 'n' &&
7679 if (name[2] == 'c' &&
7692 if (name[1] == 'e' &&
7702 if (name[1] == 'p' &&
7715 if (name[2] == 'c' &&
7724 if (name[2] == 'p' &&
7733 if (name[2] == 's' &&
7749 if (name[2] == 'n' &&
7819 if (name[2] == 'r' &&
7828 if (name[2] == 'r' &&
7837 if (name[2] == 'a' &&
7853 if (name[2] == 'l' &&
7915 if (name[2] == 'e' &&
7918 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
7931 case 5: /* 39 tokens of length 5 */
7935 if (name[1] == 'E' &&
7946 if (name[1] == 'H' &&
7960 if (name[2] == 'a' &&
7970 if (name[2] == 'a' &&
7987 if (name[2] == 'e' &&
7997 if (name[2] == 'e' &&
8001 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8017 if (name[3] == 'i' &&
8026 if (name[3] == 'o' &&
8062 if (name[2] == 'o' &&
8072 if (name[2] == 'y' &&
8086 if (name[1] == 'l' &&
8100 if (name[2] == 'n' &&
8110 if (name[2] == 'o' &&
8124 if (name[1] == 'i' &&
8129 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8138 if (name[2] == 'd' &&
8148 if (name[2] == 'c' &&
8165 if (name[2] == 'c' &&
8175 if (name[2] == 't' &&
8189 if (name[1] == 'k' &&
8200 if (name[1] == 'r' &&
8214 if (name[2] == 's' &&
8224 if (name[2] == 'd' &&
8241 if (name[2] == 'm' &&
8251 if (name[2] == 'i' &&
8261 if (name[2] == 'e' &&
8271 if (name[2] == 'l' &&
8281 if (name[2] == 'a' &&
8294 if (name[3] == 't' &&
8297 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8303 if (name[3] == 'd' &&
8320 if (name[1] == 'i' &&
8334 if (name[2] == 'a' &&
8347 if (name[3] == 'e' &&
8382 if (name[2] == 'i' &&
8399 if (name[2] == 'i' &&
8409 if (name[2] == 'i' &&
8426 case 6: /* 33 tokens of length 6 */
8430 if (name[1] == 'c' &&
8445 if (name[2] == 'l' &&
8456 if (name[2] == 'r' &&
8471 if (name[1] == 'e' &&
8486 if (name[2] == 's' &&
8491 if(ckWARN_d(WARN_SYNTAX))
8492 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8498 if (name[2] == 'i' &&
8516 if (name[2] == 'l' &&
8527 if (name[2] == 'r' &&
8542 if (name[1] == 'm' &&
8557 if (name[2] == 'n' &&
8568 if (name[2] == 's' &&
8583 if (name[1] == 's' &&
8589 if (name[4] == 't' &&
8598 if (name[4] == 'e' &&
8607 if (name[4] == 'c' &&
8616 if (name[4] == 'n' &&
8632 if (name[1] == 'r' &&
8650 if (name[3] == 'a' &&
8660 if (name[3] == 'u' &&
8674 if (name[2] == 'n' &&
8692 if (name[2] == 'a' &&
8706 if (name[3] == 'e' &&
8719 if (name[4] == 't' &&
8728 if (name[4] == 'e' &&
8750 if (name[4] == 't' &&
8759 if (name[4] == 'e' &&
8775 if (name[2] == 'c' &&
8786 if (name[2] == 'l' &&
8797 if (name[2] == 'b' &&
8808 if (name[2] == 's' &&
8831 if (name[4] == 's' &&
8840 if (name[4] == 'n' &&
8853 if (name[3] == 'a' &&
8870 if (name[1] == 'a' &&
8885 case 7: /* 29 tokens of length 7 */
8889 if (name[1] == 'E' &&
8902 if (name[1] == '_' &&
8915 if (name[1] == 'i' &&
8922 return -KEY_binmode;
8928 if (name[1] == 'o' &&
8935 return -KEY_connect;
8944 if (name[2] == 'm' &&
8950 return -KEY_dbmopen;
8961 if (name[4] == 'u' &&
8965 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
8971 if (name[4] == 'n' &&
8992 if (name[1] == 'o' &&
9005 if (name[1] == 'e' &&
9012 if (name[5] == 'r' &&
9015 return -KEY_getpgrp;
9021 if (name[5] == 'i' &&
9024 return -KEY_getppid;
9037 if (name[1] == 'c' &&
9044 return -KEY_lcfirst;
9050 if (name[1] == 'p' &&
9057 return -KEY_opendir;
9063 if (name[1] == 'a' &&
9081 if (name[3] == 'd' &&
9086 return -KEY_readdir;
9092 if (name[3] == 'u' &&
9103 if (name[3] == 'e' &&
9108 return -KEY_reverse;
9127 if (name[3] == 'k' &&
9132 return -KEY_seekdir;
9138 if (name[3] == 'p' &&
9143 return -KEY_setpgrp;
9153 if (name[2] == 'm' &&
9159 return -KEY_shmread;
9165 if (name[2] == 'r' &&
9171 return -KEY_sprintf;
9180 if (name[3] == 'l' &&
9185 return -KEY_symlink;
9194 if (name[4] == 'a' &&
9198 return -KEY_syscall;
9204 if (name[4] == 'p' &&
9208 return -KEY_sysopen;
9214 if (name[4] == 'e' &&
9218 return -KEY_sysread;
9224 if (name[4] == 'e' &&
9228 return -KEY_sysseek;
9246 if (name[1] == 'e' &&
9253 return -KEY_telldir;
9262 if (name[2] == 'f' &&
9268 return -KEY_ucfirst;
9274 if (name[2] == 's' &&
9280 return -KEY_unshift;
9290 if (name[1] == 'a' &&
9297 return -KEY_waitpid;
9306 case 8: /* 26 tokens of length 8 */
9310 if (name[1] == 'U' &&
9318 return KEY_AUTOLOAD;
9329 if (name[3] == 'A' &&
9335 return KEY___DATA__;
9341 if (name[3] == 'I' &&
9347 return -KEY___FILE__;
9353 if (name[3] == 'I' &&
9359 return -KEY___LINE__;
9375 if (name[2] == 'o' &&
9382 return -KEY_closedir;
9388 if (name[2] == 'n' &&
9395 return -KEY_continue;
9405 if (name[1] == 'b' &&
9413 return -KEY_dbmclose;
9419 if (name[1] == 'n' &&
9425 if (name[4] == 'r' &&
9430 return -KEY_endgrent;
9436 if (name[4] == 'w' &&
9441 return -KEY_endpwent;
9454 if (name[1] == 'o' &&
9462 return -KEY_formline;
9468 if (name[1] == 'e' &&
9479 if (name[6] == 'n' &&
9482 return -KEY_getgrent;
9488 if (name[6] == 'i' &&
9491 return -KEY_getgrgid;
9497 if (name[6] == 'a' &&
9500 return -KEY_getgrnam;
9513 if (name[4] == 'o' &&
9518 return -KEY_getlogin;
9529 if (name[6] == 'n' &&
9532 return -KEY_getpwent;
9538 if (name[6] == 'a' &&
9541 return -KEY_getpwnam;
9547 if (name[6] == 'i' &&
9550 return -KEY_getpwuid;
9570 if (name[1] == 'e' &&
9577 if (name[5] == 'i' &&
9584 return -KEY_readline;
9589 return -KEY_readlink;
9600 if (name[5] == 'i' &&
9604 return -KEY_readpipe;
9625 if (name[4] == 'r' &&
9630 return -KEY_setgrent;
9636 if (name[4] == 'w' &&
9641 return -KEY_setpwent;
9657 if (name[3] == 'w' &&
9663 return -KEY_shmwrite;
9669 if (name[3] == 't' &&
9675 return -KEY_shutdown;
9685 if (name[2] == 's' &&
9692 return -KEY_syswrite;
9702 if (name[1] == 'r' &&
9710 return -KEY_truncate;
9719 case 9: /* 9 tokens of length 9 */
9723 if (name[1] == 'N' &&
9732 return KEY_UNITCHECK;
9738 if (name[1] == 'n' &&
9747 return -KEY_endnetent;
9753 if (name[1] == 'e' &&
9762 return -KEY_getnetent;
9768 if (name[1] == 'o' &&
9777 return -KEY_localtime;
9783 if (name[1] == 'r' &&
9792 return KEY_prototype;
9798 if (name[1] == 'u' &&
9807 return -KEY_quotemeta;
9813 if (name[1] == 'e' &&
9822 return -KEY_rewinddir;
9828 if (name[1] == 'e' &&
9837 return -KEY_setnetent;
9843 if (name[1] == 'a' &&
9852 return -KEY_wantarray;
9861 case 10: /* 9 tokens of length 10 */
9865 if (name[1] == 'n' &&
9871 if (name[4] == 'o' &&
9878 return -KEY_endhostent;
9884 if (name[4] == 'e' &&
9891 return -KEY_endservent;
9904 if (name[1] == 'e' &&
9910 if (name[4] == 'o' &&
9917 return -KEY_gethostent;
9926 if (name[5] == 'r' &&
9932 return -KEY_getservent;
9938 if (name[5] == 'c' &&
9944 return -KEY_getsockopt;
9969 if (name[4] == 'o' &&
9976 return -KEY_sethostent;
9985 if (name[5] == 'r' &&
9991 return -KEY_setservent;
9997 if (name[5] == 'c' &&
10003 return -KEY_setsockopt;
10020 if (name[2] == 'c' &&
10029 return -KEY_socketpair;
10042 case 11: /* 8 tokens of length 11 */
10046 if (name[1] == '_' &&
10056 { /* __PACKAGE__ */
10057 return -KEY___PACKAGE__;
10063 if (name[1] == 'n' &&
10073 { /* endprotoent */
10074 return -KEY_endprotoent;
10080 if (name[1] == 'e' &&
10089 if (name[5] == 'e' &&
10095 { /* getpeername */
10096 return -KEY_getpeername;
10105 if (name[6] == 'o' &&
10110 { /* getpriority */
10111 return -KEY_getpriority;
10117 if (name[6] == 't' &&
10122 { /* getprotoent */
10123 return -KEY_getprotoent;
10137 if (name[4] == 'o' &&
10144 { /* getsockname */
10145 return -KEY_getsockname;
10158 if (name[1] == 'e' &&
10166 if (name[6] == 'o' &&
10171 { /* setpriority */
10172 return -KEY_setpriority;
10178 if (name[6] == 't' &&
10183 { /* setprotoent */
10184 return -KEY_setprotoent;
10200 case 12: /* 2 tokens of length 12 */
10201 if (name[0] == 'g' &&
10213 if (name[9] == 'd' &&
10216 { /* getnetbyaddr */
10217 return -KEY_getnetbyaddr;
10223 if (name[9] == 'a' &&
10226 { /* getnetbyname */
10227 return -KEY_getnetbyname;
10239 case 13: /* 4 tokens of length 13 */
10240 if (name[0] == 'g' &&
10247 if (name[4] == 'o' &&
10256 if (name[10] == 'd' &&
10259 { /* gethostbyaddr */
10260 return -KEY_gethostbyaddr;
10266 if (name[10] == 'a' &&
10269 { /* gethostbyname */
10270 return -KEY_gethostbyname;
10283 if (name[4] == 'e' &&
10292 if (name[10] == 'a' &&
10295 { /* getservbyname */
10296 return -KEY_getservbyname;
10302 if (name[10] == 'o' &&
10305 { /* getservbyport */
10306 return -KEY_getservbyport;
10325 case 14: /* 1 tokens of length 14 */
10326 if (name[0] == 'g' &&
10340 { /* getprotobyname */
10341 return -KEY_getprotobyname;
10346 case 16: /* 1 tokens of length 16 */
10347 if (name[0] == 'g' &&
10363 { /* getprotobynumber */
10364 return -KEY_getprotobynumber;
10378 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10382 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10383 if (ckWARN(WARN_SYNTAX)) {
10386 for (w = s+2; *w && level; w++) {
10389 else if (*w == ')')
10392 while (isSPACE(*w))
10394 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
10395 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10396 "%s (...) interpreted as function",name);
10399 while (s < PL_bufend && isSPACE(*s))
10403 while (s < PL_bufend && isSPACE(*s))
10405 if (isIDFIRST_lazy_if(s,UTF)) {
10406 const char * const w = s++;
10407 while (isALNUM_lazy_if(s,UTF))
10409 while (s < PL_bufend && isSPACE(*s))
10413 if (keyword(w, s - w, 0))
10416 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10417 if (gv && GvCVu(gv))
10419 Perl_croak(aTHX_ "No comma allowed after %s", what);
10424 /* Either returns sv, or mortalizes sv and returns a new SV*.
10425 Best used as sv=new_constant(..., sv, ...).
10426 If s, pv are NULL, calls subroutine with one argument,
10427 and type is used with error messages only. */
10430 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10434 HV * const table = GvHV(PL_hintgv); /* ^H */
10438 const char *why1 = "", *why2 = "", *why3 = "";
10440 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10443 why2 = (const char *)
10444 (strEQ(key,"charnames")
10445 ? "(possibly a missing \"use charnames ...\")"
10447 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10448 (type ? type: "undef"), why2);
10450 /* This is convoluted and evil ("goto considered harmful")
10451 * but I do not understand the intricacies of all the different
10452 * failure modes of %^H in here. The goal here is to make
10453 * the most probable error message user-friendly. --jhi */
10458 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10459 (type ? type: "undef"), why1, why2, why3);
10461 yyerror(SvPVX_const(msg));
10465 cvp = hv_fetch(table, key, strlen(key), FALSE);
10466 if (!cvp || !SvOK(*cvp)) {
10469 why3 = "} is not defined";
10472 sv_2mortal(sv); /* Parent created it permanently */
10475 pv = sv_2mortal(newSVpvn(s, len));
10477 typesv = sv_2mortal(newSVpv(type, 0));
10479 typesv = &PL_sv_undef;
10481 PUSHSTACKi(PERLSI_OVERLOAD);
10493 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10497 /* Check the eval first */
10498 if (!PL_in_eval && SvTRUE(ERRSV)) {
10499 sv_catpvs(ERRSV, "Propagated");
10500 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10502 res = SvREFCNT_inc_simple(sv);
10506 SvREFCNT_inc_simple_void(res);
10515 why1 = "Call to &{$^H{";
10517 why3 = "}} did not return a defined value";
10525 /* Returns a NUL terminated string, with the length of the string written to
10529 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10532 register char *d = dest;
10533 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10536 Perl_croak(aTHX_ ident_too_long);
10537 if (isALNUM(*s)) /* UTF handled below */
10539 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10544 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10548 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10549 char *t = s + UTF8SKIP(s);
10551 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10555 Perl_croak(aTHX_ ident_too_long);
10556 Copy(s, d, len, char);
10569 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10572 char *bracket = NULL;
10574 register char *d = dest;
10575 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10580 while (isDIGIT(*s)) {
10582 Perl_croak(aTHX_ ident_too_long);
10589 Perl_croak(aTHX_ ident_too_long);
10590 if (isALNUM(*s)) /* UTF handled below */
10592 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10597 else if (*s == ':' && s[1] == ':') {
10601 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10602 char *t = s + UTF8SKIP(s);
10603 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10605 if (d + (t - s) > e)
10606 Perl_croak(aTHX_ ident_too_long);
10607 Copy(s, d, t - s, char);
10618 if (PL_lex_state != LEX_NORMAL)
10619 PL_lex_state = LEX_INTERPENDMAYBE;
10622 if (*s == '$' && s[1] &&
10623 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10636 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10641 if (isSPACE(s[-1])) {
10643 const char ch = *s++;
10644 if (!SPACE_OR_TAB(ch)) {
10650 if (isIDFIRST_lazy_if(d,UTF)) {
10654 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10655 end += UTF8SKIP(end);
10656 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10657 end += UTF8SKIP(end);
10659 Copy(s, d, end - s, char);
10664 while ((isALNUM(*s) || *s == ':') && d < e)
10667 Perl_croak(aTHX_ ident_too_long);
10670 while (s < send && SPACE_OR_TAB(*s))
10672 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10673 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10674 const char * const brack =
10676 ((*s == '[') ? "[...]" : "{...}");
10677 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10678 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10679 funny, dest, brack, funny, dest, brack);
10682 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10686 /* Handle extended ${^Foo} variables
10687 * 1999-02-27 mjd-perl-patch@plover.com */
10688 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10692 while (isALNUM(*s) && d < e) {
10696 Perl_croak(aTHX_ ident_too_long);
10701 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10702 PL_lex_state = LEX_INTERPEND;
10705 if (PL_lex_state == LEX_NORMAL) {
10706 if (ckWARN(WARN_AMBIGUOUS) &&
10707 (keyword(dest, d - dest, 0) || get_cv(dest, FALSE)))
10711 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10712 "Ambiguous use of %c{%s} resolved to %c%s",
10713 funny, dest, funny, dest);
10718 s = bracket; /* let the parser handle it */
10722 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10723 PL_lex_state = LEX_INTERPEND;
10728 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10730 PERL_UNUSED_CONTEXT;
10733 else if (ch == 'g')
10734 *pmfl |= PMf_GLOBAL;
10735 else if (ch == 'c')
10736 *pmfl |= PMf_CONTINUE;
10737 else if (ch == 'o')
10739 else if (ch == 'm')
10740 *pmfl |= PMf_MULTILINE;
10741 else if (ch == 's')
10742 *pmfl |= PMf_SINGLELINE;
10743 else if (ch == 'x')
10744 *pmfl |= PMf_EXTENDED;
10748 S_scan_pat(pTHX_ char *start, I32 type)
10752 char *s = scan_str(start,!!PL_madskills,FALSE);
10753 const char * const valid_flags =
10754 (const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
10761 const char * const delimiter = skipspace(start);
10765 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10766 : "Search pattern not terminated" ));
10769 pm = (PMOP*)newPMOP(type, 0);
10770 if (PL_multi_open == '?')
10771 pm->op_pmflags |= PMf_ONCE;
10775 while (*s && strchr(valid_flags, *s))
10776 pmflag(&pm->op_pmflags,*s++);
10778 if (PL_madskills && modstart != s) {
10779 SV* tmptoken = newSVpvn(modstart, s - modstart);
10780 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10783 /* issue a warning if /c is specified,but /g is not */
10784 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10785 && ckWARN(WARN_REGEXP))
10787 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
10790 pm->op_pmpermflags = pm->op_pmflags;
10792 PL_lex_op = (OP*)pm;
10793 yylval.ival = OP_MATCH;
10798 S_scan_subst(pTHX_ char *start)
10809 yylval.ival = OP_NULL;
10811 s = scan_str(start,!!PL_madskills,FALSE);
10814 Perl_croak(aTHX_ "Substitution pattern not terminated");
10816 if (s[-1] == PL_multi_open)
10819 if (PL_madskills) {
10820 CURMAD('q', PL_thisopen);
10821 CURMAD('_', PL_thiswhite);
10822 CURMAD('E', PL_thisstuff);
10823 CURMAD('Q', PL_thisclose);
10824 PL_realtokenstart = s - SvPVX(PL_linestr);
10828 first_start = PL_multi_start;
10829 s = scan_str(s,!!PL_madskills,FALSE);
10831 if (PL_lex_stuff) {
10832 SvREFCNT_dec(PL_lex_stuff);
10833 PL_lex_stuff = NULL;
10835 Perl_croak(aTHX_ "Substitution replacement not terminated");
10837 PL_multi_start = first_start; /* so whole substitution is taken together */
10839 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10842 if (PL_madskills) {
10843 CURMAD('z', PL_thisopen);
10844 CURMAD('R', PL_thisstuff);
10845 CURMAD('Z', PL_thisclose);
10855 else if (strchr("iogcmsx", *s))
10856 pmflag(&pm->op_pmflags,*s++);
10862 if (PL_madskills) {
10864 curmad('m', newSVpvn(modstart, s - modstart));
10865 append_madprops(PL_thismad, (OP*)pm, 0);
10869 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10870 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10874 SV * const repl = newSVpvs("");
10876 PL_sublex_info.super_bufptr = s;
10877 PL_sublex_info.super_bufend = PL_bufend;
10879 pm->op_pmflags |= PMf_EVAL;
10881 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10882 sv_catpvs(repl, "{");
10883 sv_catsv(repl, PL_lex_repl);
10884 if (strchr(SvPVX(PL_lex_repl), '#'))
10885 sv_catpvs(repl, "\n");
10886 sv_catpvs(repl, "}");
10888 SvREFCNT_dec(PL_lex_repl);
10889 PL_lex_repl = repl;
10892 pm->op_pmpermflags = pm->op_pmflags;
10893 PL_lex_op = (OP*)pm;
10894 yylval.ival = OP_SUBST;
10899 S_scan_trans(pTHX_ char *start)
10912 yylval.ival = OP_NULL;
10914 s = scan_str(start,!!PL_madskills,FALSE);
10916 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10918 if (s[-1] == PL_multi_open)
10921 if (PL_madskills) {
10922 CURMAD('q', PL_thisopen);
10923 CURMAD('_', PL_thiswhite);
10924 CURMAD('E', PL_thisstuff);
10925 CURMAD('Q', PL_thisclose);
10926 PL_realtokenstart = s - SvPVX(PL_linestr);
10930 s = scan_str(s,!!PL_madskills,FALSE);
10932 if (PL_lex_stuff) {
10933 SvREFCNT_dec(PL_lex_stuff);
10934 PL_lex_stuff = NULL;
10936 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10938 if (PL_madskills) {
10939 CURMAD('z', PL_thisopen);
10940 CURMAD('R', PL_thisstuff);
10941 CURMAD('Z', PL_thisclose);
10944 complement = del = squash = 0;
10951 complement = OPpTRANS_COMPLEMENT;
10954 del = OPpTRANS_DELETE;
10957 squash = OPpTRANS_SQUASH;
10966 Newx(tbl, complement&&!del?258:256, short);
10967 o = newPVOP(OP_TRANS, 0, (char*)tbl);
10968 o->op_private &= ~OPpTRANS_ALL;
10969 o->op_private |= del|squash|complement|
10970 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
10971 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
10974 yylval.ival = OP_TRANS;
10977 if (PL_madskills) {
10979 curmad('m', newSVpvn(modstart, s - modstart));
10980 append_madprops(PL_thismad, o, 0);
10989 S_scan_heredoc(pTHX_ register char *s)
10993 I32 op_type = OP_SCALAR;
10997 const char *found_newline;
11001 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11003 I32 stuffstart = s - SvPVX(PL_linestr);
11006 PL_realtokenstart = -1;
11011 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11015 while (SPACE_OR_TAB(*peek))
11017 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11020 s = delimcpy(d, e, s, PL_bufend, term, &len);
11030 if (!isALNUM_lazy_if(s,UTF))
11031 deprecate_old("bare << to mean <<\"\"");
11032 for (; isALNUM_lazy_if(s,UTF); s++) {
11037 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11038 Perl_croak(aTHX_ "Delimiter for here document is too long");
11041 len = d - PL_tokenbuf;
11044 if (PL_madskills) {
11045 tstart = PL_tokenbuf + !outer;
11046 PL_thisclose = newSVpvn(tstart, len - !outer);
11047 tstart = SvPVX(PL_linestr) + stuffstart;
11048 PL_thisopen = newSVpvn(tstart, s - tstart);
11049 stuffstart = s - SvPVX(PL_linestr);
11052 #ifndef PERL_STRICT_CR
11053 d = strchr(s, '\r');
11055 char * const olds = s;
11057 while (s < PL_bufend) {
11063 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11072 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11079 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11080 herewas = newSVpvn(s,PL_bufend-s);
11084 herewas = newSVpvn(s-1,found_newline-s+1);
11087 herewas = newSVpvn(s,found_newline-s);
11091 if (PL_madskills) {
11092 tstart = SvPVX(PL_linestr) + stuffstart;
11094 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11096 PL_thisstuff = newSVpvn(tstart, s - tstart);
11099 s += SvCUR(herewas);
11102 stuffstart = s - SvPVX(PL_linestr);
11108 tmpstr = newSV(79);
11109 sv_upgrade(tmpstr, SVt_PVIV);
11110 if (term == '\'') {
11111 op_type = OP_CONST;
11112 SvIV_set(tmpstr, -1);
11114 else if (term == '`') {
11115 op_type = OP_BACKTICK;
11116 SvIV_set(tmpstr, '\\');
11120 PL_multi_start = CopLINE(PL_curcop);
11121 PL_multi_open = PL_multi_close = '<';
11122 term = *PL_tokenbuf;
11123 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11124 char * const bufptr = PL_sublex_info.super_bufptr;
11125 char * const bufend = PL_sublex_info.super_bufend;
11126 char * const olds = s - SvCUR(herewas);
11127 s = strchr(bufptr, '\n');
11131 while (s < bufend &&
11132 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11134 CopLINE_inc(PL_curcop);
11137 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11138 missingterm(PL_tokenbuf);
11140 sv_setpvn(herewas,bufptr,d-bufptr+1);
11141 sv_setpvn(tmpstr,d+1,s-d);
11143 sv_catpvn(herewas,s,bufend-s);
11144 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11151 while (s < PL_bufend &&
11152 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11154 CopLINE_inc(PL_curcop);
11156 if (s >= PL_bufend) {
11157 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11158 missingterm(PL_tokenbuf);
11160 sv_setpvn(tmpstr,d+1,s-d);
11162 if (PL_madskills) {
11164 sv_catpvn(PL_thisstuff, d + 1, s - d);
11166 PL_thisstuff = newSVpvn(d + 1, s - d);
11167 stuffstart = s - SvPVX(PL_linestr);
11171 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11173 sv_catpvn(herewas,s,PL_bufend-s);
11174 sv_setsv(PL_linestr,herewas);
11175 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11176 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11177 PL_last_lop = PL_last_uni = NULL;
11180 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11181 while (s >= PL_bufend) { /* multiple line string? */
11183 if (PL_madskills) {
11184 tstart = SvPVX(PL_linestr) + stuffstart;
11186 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11188 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11192 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11193 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11194 missingterm(PL_tokenbuf);
11197 stuffstart = s - SvPVX(PL_linestr);
11199 CopLINE_inc(PL_curcop);
11200 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11201 PL_last_lop = PL_last_uni = NULL;
11202 #ifndef PERL_STRICT_CR
11203 if (PL_bufend - PL_linestart >= 2) {
11204 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11205 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11207 PL_bufend[-2] = '\n';
11209 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11211 else if (PL_bufend[-1] == '\r')
11212 PL_bufend[-1] = '\n';
11214 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11215 PL_bufend[-1] = '\n';
11217 if (PERLDB_LINE && PL_curstash != PL_debstash) {
11218 SV * const sv = newSV(0);
11220 sv_upgrade(sv, SVt_PVMG);
11221 sv_setsv(sv,PL_linestr);
11222 (void)SvIOK_on(sv);
11224 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
11226 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11227 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11228 *(SvPVX(PL_linestr) + off ) = ' ';
11229 sv_catsv(PL_linestr,herewas);
11230 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11231 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11235 sv_catsv(tmpstr,PL_linestr);
11240 PL_multi_end = CopLINE(PL_curcop);
11241 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11242 SvPV_shrink_to_cur(tmpstr);
11244 SvREFCNT_dec(herewas);
11246 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11248 else if (PL_encoding)
11249 sv_recode_to_utf8(tmpstr, PL_encoding);
11251 PL_lex_stuff = tmpstr;
11252 yylval.ival = op_type;
11256 /* scan_inputsymbol
11257 takes: current position in input buffer
11258 returns: new position in input buffer
11259 side-effects: yylval and lex_op are set.
11264 <FH> read from filehandle
11265 <pkg::FH> read from package qualified filehandle
11266 <pkg'FH> read from package qualified filehandle
11267 <$fh> read from filehandle in $fh
11268 <*.h> filename glob
11273 S_scan_inputsymbol(pTHX_ char *start)
11276 register char *s = start; /* current position in buffer */
11280 char *d = PL_tokenbuf; /* start of temp holding space */
11281 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11283 end = strchr(s, '\n');
11286 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11288 /* die if we didn't have space for the contents of the <>,
11289 or if it didn't end, or if we see a newline
11292 if (len >= (I32)sizeof PL_tokenbuf)
11293 Perl_croak(aTHX_ "Excessively long <> operator");
11295 Perl_croak(aTHX_ "Unterminated <> operator");
11300 Remember, only scalar variables are interpreted as filehandles by
11301 this code. Anything more complex (e.g., <$fh{$num}>) will be
11302 treated as a glob() call.
11303 This code makes use of the fact that except for the $ at the front,
11304 a scalar variable and a filehandle look the same.
11306 if (*d == '$' && d[1]) d++;
11308 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11309 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11312 /* If we've tried to read what we allow filehandles to look like, and
11313 there's still text left, then it must be a glob() and not a getline.
11314 Use scan_str to pull out the stuff between the <> and treat it
11315 as nothing more than a string.
11318 if (d - PL_tokenbuf != len) {
11319 yylval.ival = OP_GLOB;
11321 s = scan_str(start,!!PL_madskills,FALSE);
11323 Perl_croak(aTHX_ "Glob not terminated");
11327 bool readline_overriden = FALSE;
11330 /* we're in a filehandle read situation */
11333 /* turn <> into <ARGV> */
11335 Copy("ARGV",d,5,char);
11337 /* Check whether readline() is overriden */
11338 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11340 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11342 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11343 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
11344 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11345 readline_overriden = TRUE;
11347 /* if <$fh>, create the ops to turn the variable into a
11351 /* try to find it in the pad for this block, otherwise find
11352 add symbol table ops
11354 const PADOFFSET tmp = pad_findmy(d);
11355 if (tmp != NOT_IN_PAD) {
11356 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11357 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11358 HEK * const stashname = HvNAME_HEK(stash);
11359 SV * const sym = sv_2mortal(newSVhek(stashname));
11360 sv_catpvs(sym, "::");
11361 sv_catpv(sym, d+1);
11366 OP * const o = newOP(OP_PADSV, 0);
11368 PL_lex_op = readline_overriden
11369 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11370 append_elem(OP_LIST, o,
11371 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11372 : (OP*)newUNOP(OP_READLINE, 0, o);
11381 ? (GV_ADDMULTI | GV_ADDINEVAL)
11384 PL_lex_op = readline_overriden
11385 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11386 append_elem(OP_LIST,
11387 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11388 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11389 : (OP*)newUNOP(OP_READLINE, 0,
11390 newUNOP(OP_RV2SV, 0,
11391 newGVOP(OP_GV, 0, gv)));
11393 if (!readline_overriden)
11394 PL_lex_op->op_flags |= OPf_SPECIAL;
11395 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11396 yylval.ival = OP_NULL;
11399 /* If it's none of the above, it must be a literal filehandle
11400 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11402 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11403 PL_lex_op = readline_overriden
11404 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11405 append_elem(OP_LIST,
11406 newGVOP(OP_GV, 0, gv),
11407 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11408 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11409 yylval.ival = OP_NULL;
11418 takes: start position in buffer
11419 keep_quoted preserve \ on the embedded delimiter(s)
11420 keep_delims preserve the delimiters around the string
11421 returns: position to continue reading from buffer
11422 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11423 updates the read buffer.
11425 This subroutine pulls a string out of the input. It is called for:
11426 q single quotes q(literal text)
11427 ' single quotes 'literal text'
11428 qq double quotes qq(interpolate $here please)
11429 " double quotes "interpolate $here please"
11430 qx backticks qx(/bin/ls -l)
11431 ` backticks `/bin/ls -l`
11432 qw quote words @EXPORT_OK = qw( func() $spam )
11433 m// regexp match m/this/
11434 s/// regexp substitute s/this/that/
11435 tr/// string transliterate tr/this/that/
11436 y/// string transliterate y/this/that/
11437 ($*@) sub prototypes sub foo ($)
11438 (stuff) sub attr parameters sub foo : attr(stuff)
11439 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11441 In most of these cases (all but <>, patterns and transliterate)
11442 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11443 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11444 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11447 It skips whitespace before the string starts, and treats the first
11448 character as the delimiter. If the delimiter is one of ([{< then
11449 the corresponding "close" character )]}> is used as the closing
11450 delimiter. It allows quoting of delimiters, and if the string has
11451 balanced delimiters ([{<>}]) it allows nesting.
11453 On success, the SV with the resulting string is put into lex_stuff or,
11454 if that is already non-NULL, into lex_repl. The second case occurs only
11455 when parsing the RHS of the special constructs s/// and tr/// (y///).
11456 For convenience, the terminating delimiter character is stuffed into
11461 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11464 SV *sv; /* scalar value: string */
11465 const char *tmps; /* temp string, used for delimiter matching */
11466 register char *s = start; /* current position in the buffer */
11467 register char term; /* terminating character */
11468 register char *to; /* current position in the sv's data */
11469 I32 brackets = 1; /* bracket nesting level */
11470 bool has_utf8 = FALSE; /* is there any utf8 content? */
11471 I32 termcode; /* terminating char. code */
11472 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11473 STRLEN termlen; /* length of terminating string */
11474 char *last = NULL; /* last position for nesting bracket */
11480 /* skip space before the delimiter */
11486 if (PL_realtokenstart >= 0) {
11487 stuffstart = PL_realtokenstart;
11488 PL_realtokenstart = -1;
11491 stuffstart = start - SvPVX(PL_linestr);
11493 /* mark where we are, in case we need to report errors */
11496 /* after skipping whitespace, the next character is the terminator */
11499 termcode = termstr[0] = term;
11503 termcode = utf8_to_uvchr((U8*)s, &termlen);
11504 Copy(s, termstr, termlen, U8);
11505 if (!UTF8_IS_INVARIANT(term))
11509 /* mark where we are */
11510 PL_multi_start = CopLINE(PL_curcop);
11511 PL_multi_open = term;
11513 /* find corresponding closing delimiter */
11514 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11515 termcode = termstr[0] = term = tmps[5];
11517 PL_multi_close = term;
11519 /* create a new SV to hold the contents. 79 is the SV's initial length.
11520 What a random number. */
11522 sv_upgrade(sv, SVt_PVIV);
11523 SvIV_set(sv, termcode);
11524 (void)SvPOK_only(sv); /* validate pointer */
11526 /* move past delimiter and try to read a complete string */
11528 sv_catpvn(sv, s, termlen);
11531 tstart = SvPVX(PL_linestr) + stuffstart;
11532 if (!PL_thisopen && !keep_delims) {
11533 PL_thisopen = newSVpvn(tstart, s - tstart);
11534 stuffstart = s - SvPVX(PL_linestr);
11538 if (PL_encoding && !UTF) {
11542 int offset = s - SvPVX_const(PL_linestr);
11543 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11544 &offset, (char*)termstr, termlen);
11545 const char * const ns = SvPVX_const(PL_linestr) + offset;
11546 char * const svlast = SvEND(sv) - 1;
11548 for (; s < ns; s++) {
11549 if (*s == '\n' && !PL_rsfp)
11550 CopLINE_inc(PL_curcop);
11553 goto read_more_line;
11555 /* handle quoted delimiters */
11556 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11558 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11560 if ((svlast-1 - t) % 2) {
11561 if (!keep_quoted) {
11562 *(svlast-1) = term;
11564 SvCUR_set(sv, SvCUR(sv) - 1);
11569 if (PL_multi_open == PL_multi_close) {
11577 for (t = w = last; t < svlast; w++, t++) {
11578 /* At here, all closes are "was quoted" one,
11579 so we don't check PL_multi_close. */
11581 if (!keep_quoted && *(t+1) == PL_multi_open)
11586 else if (*t == PL_multi_open)
11594 SvCUR_set(sv, w - SvPVX_const(sv));
11597 if (--brackets <= 0)
11602 if (!keep_delims) {
11603 SvCUR_set(sv, SvCUR(sv) - 1);
11609 /* extend sv if need be */
11610 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11611 /* set 'to' to the next character in the sv's string */
11612 to = SvPVX(sv)+SvCUR(sv);
11614 /* if open delimiter is the close delimiter read unbridle */
11615 if (PL_multi_open == PL_multi_close) {
11616 for (; s < PL_bufend; s++,to++) {
11617 /* embedded newlines increment the current line number */
11618 if (*s == '\n' && !PL_rsfp)
11619 CopLINE_inc(PL_curcop);
11620 /* handle quoted delimiters */
11621 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11622 if (!keep_quoted && s[1] == term)
11624 /* any other quotes are simply copied straight through */
11628 /* terminate when run out of buffer (the for() condition), or
11629 have found the terminator */
11630 else if (*s == term) {
11633 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11636 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11642 /* if the terminator isn't the same as the start character (e.g.,
11643 matched brackets), we have to allow more in the quoting, and
11644 be prepared for nested brackets.
11647 /* read until we run out of string, or we find the terminator */
11648 for (; s < PL_bufend; s++,to++) {
11649 /* embedded newlines increment the line count */
11650 if (*s == '\n' && !PL_rsfp)
11651 CopLINE_inc(PL_curcop);
11652 /* backslashes can escape the open or closing characters */
11653 if (*s == '\\' && s+1 < PL_bufend) {
11654 if (!keep_quoted &&
11655 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11660 /* allow nested opens and closes */
11661 else if (*s == PL_multi_close && --brackets <= 0)
11663 else if (*s == PL_multi_open)
11665 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11670 /* terminate the copied string and update the sv's end-of-string */
11672 SvCUR_set(sv, to - SvPVX_const(sv));
11675 * this next chunk reads more into the buffer if we're not done yet
11679 break; /* handle case where we are done yet :-) */
11681 #ifndef PERL_STRICT_CR
11682 if (to - SvPVX_const(sv) >= 2) {
11683 if ((to[-2] == '\r' && to[-1] == '\n') ||
11684 (to[-2] == '\n' && to[-1] == '\r'))
11688 SvCUR_set(sv, to - SvPVX_const(sv));
11690 else if (to[-1] == '\r')
11693 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11698 /* if we're out of file, or a read fails, bail and reset the current
11699 line marker so we can report where the unterminated string began
11702 if (PL_madskills) {
11703 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11705 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11707 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11711 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11713 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11719 /* we read a line, so increment our line counter */
11720 CopLINE_inc(PL_curcop);
11722 /* update debugger info */
11723 if (PERLDB_LINE && PL_curstash != PL_debstash) {
11724 SV * const line_sv = newSV(0);
11726 sv_upgrade(line_sv, SVt_PVMG);
11727 sv_setsv(line_sv,PL_linestr);
11728 (void)SvIOK_on(line_sv);
11729 SvIV_set(line_sv, 0);
11730 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), line_sv);
11733 /* having changed the buffer, we must update PL_bufend */
11734 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11735 PL_last_lop = PL_last_uni = NULL;
11738 /* at this point, we have successfully read the delimited string */
11740 if (!PL_encoding || UTF) {
11742 if (PL_madskills) {
11743 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11744 const int len = s - start;
11746 sv_catpvn(PL_thisstuff, tstart, len);
11748 PL_thisstuff = newSVpvn(tstart, len);
11749 if (!PL_thisclose && !keep_delims)
11750 PL_thisclose = newSVpvn(s,termlen);
11755 sv_catpvn(sv, s, termlen);
11760 if (PL_madskills) {
11761 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11762 const int len = s - tstart - termlen;
11764 sv_catpvn(PL_thisstuff, tstart, len);
11766 PL_thisstuff = newSVpvn(tstart, len);
11767 if (!PL_thisclose && !keep_delims)
11768 PL_thisclose = newSVpvn(s - termlen,termlen);
11772 if (has_utf8 || PL_encoding)
11775 PL_multi_end = CopLINE(PL_curcop);
11777 /* if we allocated too much space, give some back */
11778 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11779 SvLEN_set(sv, SvCUR(sv) + 1);
11780 SvPV_renew(sv, SvLEN(sv));
11783 /* decide whether this is the first or second quoted string we've read
11796 takes: pointer to position in buffer
11797 returns: pointer to new position in buffer
11798 side-effects: builds ops for the constant in yylval.op
11800 Read a number in any of the formats that Perl accepts:
11802 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11803 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11806 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11808 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11811 If it reads a number without a decimal point or an exponent, it will
11812 try converting the number to an integer and see if it can do so
11813 without loss of precision.
11817 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11820 register const char *s = start; /* current position in buffer */
11821 register char *d; /* destination in temp buffer */
11822 register char *e; /* end of temp buffer */
11823 NV nv; /* number read, as a double */
11824 SV *sv = NULL; /* place to put the converted number */
11825 bool floatit; /* boolean: int or float? */
11826 const char *lastub = NULL; /* position of last underbar */
11827 static char const number_too_long[] = "Number too long";
11829 /* We use the first character to decide what type of number this is */
11833 Perl_croak(aTHX_ "panic: scan_num");
11835 /* if it starts with a 0, it could be an octal number, a decimal in
11836 0.13 disguise, or a hexadecimal number, or a binary number. */
11840 u holds the "number so far"
11841 shift the power of 2 of the base
11842 (hex == 4, octal == 3, binary == 1)
11843 overflowed was the number more than we can hold?
11845 Shift is used when we add a digit. It also serves as an "are
11846 we in octal/hex/binary?" indicator to disallow hex characters
11847 when in octal mode.
11852 bool overflowed = FALSE;
11853 bool just_zero = TRUE; /* just plain 0 or binary number? */
11854 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11855 static const char* const bases[5] =
11856 { "", "binary", "", "octal", "hexadecimal" };
11857 static const char* const Bases[5] =
11858 { "", "Binary", "", "Octal", "Hexadecimal" };
11859 static const char* const maxima[5] =
11861 "0b11111111111111111111111111111111",
11865 const char *base, *Base, *max;
11867 /* check for hex */
11872 } else if (s[1] == 'b') {
11877 /* check for a decimal in disguise */
11878 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11880 /* so it must be octal */
11887 if (ckWARN(WARN_SYNTAX))
11888 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11889 "Misplaced _ in number");
11893 base = bases[shift];
11894 Base = Bases[shift];
11895 max = maxima[shift];
11897 /* read the rest of the number */
11899 /* x is used in the overflow test,
11900 b is the digit we're adding on. */
11905 /* if we don't mention it, we're done */
11909 /* _ are ignored -- but warned about if consecutive */
11911 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11912 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11913 "Misplaced _ in number");
11917 /* 8 and 9 are not octal */
11918 case '8': case '9':
11920 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11924 case '2': case '3': case '4':
11925 case '5': case '6': case '7':
11927 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11930 case '0': case '1':
11931 b = *s++ & 15; /* ASCII digit -> value of digit */
11935 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11936 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11937 /* make sure they said 0x */
11940 b = (*s++ & 7) + 9;
11942 /* Prepare to put the digit we have onto the end
11943 of the number so far. We check for overflows.
11949 x = u << shift; /* make room for the digit */
11951 if ((x >> shift) != u
11952 && !(PL_hints & HINT_NEW_BINARY)) {
11955 if (ckWARN_d(WARN_OVERFLOW))
11956 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11957 "Integer overflow in %s number",
11960 u = x | b; /* add the digit to the end */
11963 n *= nvshift[shift];
11964 /* If an NV has not enough bits in its
11965 * mantissa to represent an UV this summing of
11966 * small low-order numbers is a waste of time
11967 * (because the NV cannot preserve the
11968 * low-order bits anyway): we could just
11969 * remember when did we overflow and in the
11970 * end just multiply n by the right
11978 /* if we get here, we had success: make a scalar value from
11983 /* final misplaced underbar check */
11984 if (s[-1] == '_') {
11985 if (ckWARN(WARN_SYNTAX))
11986 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11991 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
11992 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
11993 "%s number > %s non-portable",
11999 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12000 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12001 "%s number > %s non-portable",
12006 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12007 sv = new_constant(start, s - start, "integer",
12009 else if (PL_hints & HINT_NEW_BINARY)
12010 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12015 handle decimal numbers.
12016 we're also sent here when we read a 0 as the first digit
12018 case '1': case '2': case '3': case '4': case '5':
12019 case '6': case '7': case '8': case '9': case '.':
12022 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12025 /* read next group of digits and _ and copy into d */
12026 while (isDIGIT(*s) || *s == '_') {
12027 /* skip underscores, checking for misplaced ones
12031 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12032 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12033 "Misplaced _ in number");
12037 /* check for end of fixed-length buffer */
12039 Perl_croak(aTHX_ number_too_long);
12040 /* if we're ok, copy the character */
12045 /* final misplaced underbar check */
12046 if (lastub && s == lastub + 1) {
12047 if (ckWARN(WARN_SYNTAX))
12048 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12051 /* read a decimal portion if there is one. avoid
12052 3..5 being interpreted as the number 3. followed
12055 if (*s == '.' && s[1] != '.') {
12060 if (ckWARN(WARN_SYNTAX))
12061 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12062 "Misplaced _ in number");
12066 /* copy, ignoring underbars, until we run out of digits.
12068 for (; isDIGIT(*s) || *s == '_'; s++) {
12069 /* fixed length buffer check */
12071 Perl_croak(aTHX_ number_too_long);
12073 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12074 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12075 "Misplaced _ in number");
12081 /* fractional part ending in underbar? */
12082 if (s[-1] == '_') {
12083 if (ckWARN(WARN_SYNTAX))
12084 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12085 "Misplaced _ in number");
12087 if (*s == '.' && isDIGIT(s[1])) {
12088 /* oops, it's really a v-string, but without the "v" */
12094 /* read exponent part, if present */
12095 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12099 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12100 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12102 /* stray preinitial _ */
12104 if (ckWARN(WARN_SYNTAX))
12105 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12106 "Misplaced _ in number");
12110 /* allow positive or negative exponent */
12111 if (*s == '+' || *s == '-')
12114 /* stray initial _ */
12116 if (ckWARN(WARN_SYNTAX))
12117 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12118 "Misplaced _ in number");
12122 /* read digits of exponent */
12123 while (isDIGIT(*s) || *s == '_') {
12126 Perl_croak(aTHX_ number_too_long);
12130 if (((lastub && s == lastub + 1) ||
12131 (!isDIGIT(s[1]) && s[1] != '_'))
12132 && ckWARN(WARN_SYNTAX))
12133 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12134 "Misplaced _ in number");
12141 /* make an sv from the string */
12145 We try to do an integer conversion first if no characters
12146 indicating "float" have been found.
12151 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12153 if (flags == IS_NUMBER_IN_UV) {
12155 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12158 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12159 if (uv <= (UV) IV_MIN)
12160 sv_setiv(sv, -(IV)uv);
12167 /* terminate the string */
12169 nv = Atof(PL_tokenbuf);
12173 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12174 (PL_hints & HINT_NEW_INTEGER) )
12175 sv = new_constant(PL_tokenbuf,
12178 (floatit ? "float" : "integer"),
12182 /* if it starts with a v, it could be a v-string */
12185 sv = newSV(5); /* preallocate storage space */
12186 s = scan_vstring(s,sv);
12190 /* make the op for the constant and return */
12193 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12195 lvalp->opval = NULL;
12201 S_scan_formline(pTHX_ register char *s)
12204 register char *eol;
12206 SV * const stuff = newSVpvs("");
12207 bool needargs = FALSE;
12208 bool eofmt = FALSE;
12210 char *tokenstart = s;
12213 if (PL_madskills) {
12214 savewhite = PL_thiswhite;
12219 while (!needargs) {
12222 #ifdef PERL_STRICT_CR
12223 while (SPACE_OR_TAB(*t))
12226 while (SPACE_OR_TAB(*t) || *t == '\r')
12229 if (*t == '\n' || t == PL_bufend) {
12234 if (PL_in_eval && !PL_rsfp) {
12235 eol = (char *) memchr(s,'\n',PL_bufend-s);
12240 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12242 for (t = s; t < eol; t++) {
12243 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12245 goto enough; /* ~~ must be first line in formline */
12247 if (*t == '@' || *t == '^')
12251 sv_catpvn(stuff, s, eol-s);
12252 #ifndef PERL_STRICT_CR
12253 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12254 char *end = SvPVX(stuff) + SvCUR(stuff);
12257 SvCUR_set(stuff, SvCUR(stuff) - 1);
12267 if (PL_madskills) {
12269 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12271 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12274 s = filter_gets(PL_linestr, PL_rsfp, 0);
12276 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12278 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12280 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12281 PL_last_lop = PL_last_uni = NULL;
12290 if (SvCUR(stuff)) {
12293 PL_lex_state = LEX_NORMAL;
12294 start_force(PL_curforce);
12295 NEXTVAL_NEXTTOKE.ival = 0;
12299 PL_lex_state = LEX_FORMLINE;
12301 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12303 else if (PL_encoding)
12304 sv_recode_to_utf8(stuff, PL_encoding);
12306 start_force(PL_curforce);
12307 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12309 start_force(PL_curforce);
12310 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12314 SvREFCNT_dec(stuff);
12316 PL_lex_formbrack = 0;
12320 if (PL_madskills) {
12322 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12324 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12325 PL_thiswhite = savewhite;
12337 PL_cshlen = strlen(PL_cshname);
12339 #if defined(USE_ITHREADS)
12340 PERL_UNUSED_CONTEXT;
12346 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12349 const I32 oldsavestack_ix = PL_savestack_ix;
12350 CV* const outsidecv = PL_compcv;
12353 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12355 SAVEI32(PL_subline);
12356 save_item(PL_subname);
12357 SAVESPTR(PL_compcv);
12359 PL_compcv = (CV*)newSV(0);
12360 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
12361 CvFLAGS(PL_compcv) |= flags;
12363 PL_subline = CopLINE(PL_curcop);
12364 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12365 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12366 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12368 return oldsavestack_ix;
12372 #pragma segment Perl_yylex
12375 Perl_yywarn(pTHX_ const char *s)
12378 PL_in_eval |= EVAL_WARNONLY;
12380 PL_in_eval &= ~EVAL_WARNONLY;
12385 Perl_yyerror(pTHX_ const char *s)
12388 const char *where = NULL;
12389 const char *context = NULL;
12393 if (!yychar || (yychar == ';' && !PL_rsfp))
12395 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12396 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12397 PL_oldbufptr != PL_bufptr) {
12400 The code below is removed for NetWare because it abends/crashes on NetWare
12401 when the script has error such as not having the closing quotes like:
12402 if ($var eq "value)
12403 Checking of white spaces is anyway done in NetWare code.
12406 while (isSPACE(*PL_oldoldbufptr))
12409 context = PL_oldoldbufptr;
12410 contlen = PL_bufptr - PL_oldoldbufptr;
12412 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12413 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12416 The code below is removed for NetWare because it abends/crashes on NetWare
12417 when the script has error such as not having the closing quotes like:
12418 if ($var eq "value)
12419 Checking of white spaces is anyway done in NetWare code.
12422 while (isSPACE(*PL_oldbufptr))
12425 context = PL_oldbufptr;
12426 contlen = PL_bufptr - PL_oldbufptr;
12428 else if (yychar > 255)
12429 where = "next token ???";
12430 else if (yychar == -2) { /* YYEMPTY */
12431 if (PL_lex_state == LEX_NORMAL ||
12432 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12433 where = "at end of line";
12434 else if (PL_lex_inpat)
12435 where = "within pattern";
12437 where = "within string";
12440 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12442 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12443 else if (isPRINT_LC(yychar))
12444 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12446 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12447 where = SvPVX_const(where_sv);
12449 msg = sv_2mortal(newSVpv(s, 0));
12450 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12451 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12453 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12455 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12456 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12457 Perl_sv_catpvf(aTHX_ msg,
12458 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12459 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12462 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12463 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, (void*)msg);
12466 if (PL_error_count >= 10) {
12467 if (PL_in_eval && SvCUR(ERRSV))
12468 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12469 (void*)ERRSV, OutCopFILE(PL_curcop));
12471 Perl_croak(aTHX_ "%s has too many errors.\n",
12472 OutCopFILE(PL_curcop));
12475 PL_in_my_stash = NULL;
12479 #pragma segment Main
12483 S_swallow_bom(pTHX_ U8 *s)
12486 const STRLEN slen = SvCUR(PL_linestr);
12489 if (s[1] == 0xFE) {
12490 /* UTF-16 little-endian? (or UTF32-LE?) */
12491 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12492 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12493 #ifndef PERL_NO_UTF16_FILTER
12494 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12497 if (PL_bufend > (char*)s) {
12501 filter_add(utf16rev_textfilter, NULL);
12502 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12503 utf16_to_utf8_reversed(s, news,
12504 PL_bufend - (char*)s - 1,
12506 sv_setpvn(PL_linestr, (const char*)news, newlen);
12508 s = (U8*)SvPVX(PL_linestr);
12509 Copy(news, s, newlen, U8);
12513 SvUTF8_on(PL_linestr);
12514 s = (U8*)SvPVX(PL_linestr);
12516 /* FIXME - is this a general bug fix? */
12519 PL_bufend = SvPVX(PL_linestr) + newlen;
12522 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12527 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12528 #ifndef PERL_NO_UTF16_FILTER
12529 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12532 if (PL_bufend > (char *)s) {
12536 filter_add(utf16_textfilter, NULL);
12537 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12538 utf16_to_utf8(s, news,
12539 PL_bufend - (char*)s,
12541 sv_setpvn(PL_linestr, (const char*)news, newlen);
12543 SvUTF8_on(PL_linestr);
12544 s = (U8*)SvPVX(PL_linestr);
12545 PL_bufend = SvPVX(PL_linestr) + newlen;
12548 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12553 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12554 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12555 s += 3; /* UTF-8 */
12561 if (s[2] == 0xFE && s[3] == 0xFF) {
12562 /* UTF-32 big-endian */
12563 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12566 else if (s[2] == 0 && s[3] != 0) {
12569 * are a good indicator of UTF-16BE. */
12570 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12576 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12577 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12578 s += 4; /* UTF-8 */
12584 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12587 * are a good indicator of UTF-16LE. */
12588 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12597 * Restore a source filter.
12601 restore_rsfp(pTHX_ void *f)
12604 PerlIO * const fp = (PerlIO*)f;
12606 if (PL_rsfp == PerlIO_stdin())
12607 PerlIO_clearerr(PL_rsfp);
12608 else if (PL_rsfp && (PL_rsfp != fp))
12609 PerlIO_close(PL_rsfp);
12613 #ifndef PERL_NO_UTF16_FILTER
12615 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12618 const STRLEN old = SvCUR(sv);
12619 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12620 DEBUG_P(PerlIO_printf(Perl_debug_log,
12621 "utf16_textfilter(%p): %d %d (%d)\n",
12622 FPTR2DPTR(void *, utf16_textfilter),
12623 idx, maxlen, (int) count));
12627 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12628 Copy(SvPVX_const(sv), tmps, old, char);
12629 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12630 SvCUR(sv) - old, &newlen);
12631 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12633 DEBUG_P({sv_dump(sv);});
12638 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12641 const STRLEN old = SvCUR(sv);
12642 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12643 DEBUG_P(PerlIO_printf(Perl_debug_log,
12644 "utf16rev_textfilter(%p): %d %d (%d)\n",
12645 FPTR2DPTR(void *, utf16rev_textfilter),
12646 idx, maxlen, (int) count));
12650 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12651 Copy(SvPVX_const(sv), tmps, old, char);
12652 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12653 SvCUR(sv) - old, &newlen);
12654 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12656 DEBUG_P({ sv_dump(sv); });
12662 Returns a pointer to the next character after the parsed
12663 vstring, as well as updating the passed in sv.
12665 Function must be called like
12668 s = scan_vstring(s,sv);
12670 The sv should already be large enough to store the vstring
12671 passed in, for performance reasons.
12676 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
12679 const char *pos = s;
12680 const char *start = s;
12681 if (*pos == 'v') pos++; /* get past 'v' */
12682 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12684 if ( *pos != '.') {
12685 /* this may not be a v-string if followed by => */
12686 const char *next = pos;
12687 while (next < PL_bufend && isSPACE(*next))
12689 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
12690 /* return string not v-string */
12691 sv_setpvn(sv,(char *)s,pos-s);
12692 return (char *)pos;
12696 if (!isALPHA(*pos)) {
12697 U8 tmpbuf[UTF8_MAXBYTES+1];
12700 s++; /* get past 'v' */
12702 sv_setpvn(sv, "", 0);
12705 /* this is atoi() that tolerates underscores */
12708 const char *end = pos;
12710 while (--end >= s) {
12712 const UV orev = rev;
12713 rev += (*end - '0') * mult;
12715 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12716 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12717 "Integer overflow in decimal number");
12721 if (rev > 0x7FFFFFFF)
12722 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12724 /* Append native character for the rev point */
12725 tmpend = uvchr_to_utf8(tmpbuf, rev);
12726 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12727 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12729 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
12735 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12739 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12747 * c-indentation-style: bsd
12748 * c-basic-offset: 4
12749 * indent-tabs-mode: t
12752 * ex: set ts=8 sts=4 sw=4 noet: