3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 #define PERL_IN_TOKE_C
28 #define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
31 #define pl_yylval (PL_parser->yylval)
33 /* YYINITDEPTH -- initial size of the parser's stacks. */
34 #define YYINITDEPTH 200
36 /* XXX temporary backwards compatibility */
37 #define PL_lex_brackets (PL_parser->lex_brackets)
38 #define PL_lex_brackstack (PL_parser->lex_brackstack)
39 #define PL_lex_casemods (PL_parser->lex_casemods)
40 #define PL_lex_casestack (PL_parser->lex_casestack)
41 #define PL_lex_defer (PL_parser->lex_defer)
42 #define PL_lex_dojoin (PL_parser->lex_dojoin)
43 #define PL_lex_expect (PL_parser->lex_expect)
44 #define PL_lex_formbrack (PL_parser->lex_formbrack)
45 #define PL_lex_inpat (PL_parser->lex_inpat)
46 #define PL_lex_inwhat (PL_parser->lex_inwhat)
47 #define PL_lex_op (PL_parser->lex_op)
48 #define PL_lex_repl (PL_parser->lex_repl)
49 #define PL_lex_starts (PL_parser->lex_starts)
50 #define PL_lex_stuff (PL_parser->lex_stuff)
51 #define PL_multi_start (PL_parser->multi_start)
52 #define PL_multi_open (PL_parser->multi_open)
53 #define PL_multi_close (PL_parser->multi_close)
54 #define PL_pending_ident (PL_parser->pending_ident)
55 #define PL_preambled (PL_parser->preambled)
56 #define PL_sublex_info (PL_parser->sublex_info)
57 #define PL_linestr (PL_parser->linestr)
58 #define PL_expect (PL_parser->expect)
59 #define PL_copline (PL_parser->copline)
60 #define PL_bufptr (PL_parser->bufptr)
61 #define PL_oldbufptr (PL_parser->oldbufptr)
62 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63 #define PL_linestart (PL_parser->linestart)
64 #define PL_bufend (PL_parser->bufend)
65 #define PL_last_uni (PL_parser->last_uni)
66 #define PL_last_lop (PL_parser->last_lop)
67 #define PL_last_lop_op (PL_parser->last_lop_op)
68 #define PL_lex_state (PL_parser->lex_state)
69 #define PL_rsfp (PL_parser->rsfp)
70 #define PL_rsfp_filters (PL_parser->rsfp_filters)
71 #define PL_in_my (PL_parser->in_my)
72 #define PL_in_my_stash (PL_parser->in_my_stash)
73 #define PL_tokenbuf (PL_parser->tokenbuf)
74 #define PL_multi_end (PL_parser->multi_end)
75 #define PL_error_count (PL_parser->error_count)
78 # define PL_endwhite (PL_parser->endwhite)
79 # define PL_faketokens (PL_parser->faketokens)
80 # define PL_lasttoke (PL_parser->lasttoke)
81 # define PL_nextwhite (PL_parser->nextwhite)
82 # define PL_realtokenstart (PL_parser->realtokenstart)
83 # define PL_skipwhite (PL_parser->skipwhite)
84 # define PL_thisclose (PL_parser->thisclose)
85 # define PL_thismad (PL_parser->thismad)
86 # define PL_thisopen (PL_parser->thisopen)
87 # define PL_thisstuff (PL_parser->thisstuff)
88 # define PL_thistoken (PL_parser->thistoken)
89 # define PL_thiswhite (PL_parser->thiswhite)
90 # define PL_thiswhite (PL_parser->thiswhite)
91 # define PL_nexttoke (PL_parser->nexttoke)
92 # define PL_curforce (PL_parser->curforce)
94 # define PL_nexttoke (PL_parser->nexttoke)
95 # define PL_nexttype (PL_parser->nexttype)
96 # define PL_nextval (PL_parser->nextval)
100 S_pending_ident(pTHX);
102 static const char ident_too_long[] = "Identifier too long";
105 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
106 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
108 # define CURMAD(slot,sv)
109 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
112 #define XFAKEBRACK 128
113 #define XENUMMASK 127
115 #ifdef USE_UTF8_SCRIPTS
116 # define UTF (!IN_BYTES)
118 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
121 /* The maximum number of characters preceding the unrecognized one to display */
122 #define UNRECOGNIZED_PRECEDE_COUNT 10
124 /* In variables named $^X, these are the legal values for X.
125 * 1999-02-27 mjd-perl-patch@plover.com */
126 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
128 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
130 /* LEX_* are values for PL_lex_state, the state of the lexer.
131 * They are arranged oddly so that the guard on the switch statement
132 * can get by with a single comparison (if the compiler is smart enough).
135 /* #define LEX_NOTPARSING 11 is done in perl.h. */
137 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
138 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
139 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
140 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
141 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
143 /* at end of code, eg "$x" followed by: */
144 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
145 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
147 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
148 string or after \E, $foo, etc */
149 #define LEX_INTERPCONST 2 /* NOT USED */
150 #define LEX_FORMLINE 1 /* expecting a format line */
151 #define LEX_KNOWNEXT 0 /* next token known; just return it */
155 static const char* const lex_state_names[] = {
174 #include "keywords.h"
176 /* CLINE is a macro that ensures PL_copline has a sane value */
181 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
184 # define SKIPSPACE0(s) skipspace0(s)
185 # define SKIPSPACE1(s) skipspace1(s)
186 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
187 # define PEEKSPACE(s) skipspace2(s,0)
189 # define SKIPSPACE0(s) skipspace(s)
190 # define SKIPSPACE1(s) skipspace(s)
191 # define SKIPSPACE2(s,tsv) skipspace(s)
192 # define PEEKSPACE(s) skipspace(s)
196 * Convenience functions to return different tokens and prime the
197 * lexer for the next token. They all take an argument.
199 * TOKEN : generic token (used for '(', DOLSHARP, etc)
200 * OPERATOR : generic operator
201 * AOPERATOR : assignment operator
202 * PREBLOCK : beginning the block after an if, while, foreach, ...
203 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
204 * PREREF : *EXPR where EXPR is not a simple identifier
205 * TERM : expression term
206 * LOOPX : loop exiting command (goto, last, dump, etc)
207 * FTST : file test operator
208 * FUN0 : zero-argument function
209 * FUN1 : not used, except for not, which isn't a UNIOP
210 * BOop : bitwise or or xor
212 * SHop : shift operator
213 * PWop : power operator
214 * PMop : pattern-matching operator
215 * Aop : addition-level operator
216 * Mop : multiplication-level operator
217 * Eop : equality-testing operator
218 * Rop : relational operator <= != gt
220 * Also see LOP and lop() below.
223 #ifdef DEBUGGING /* Serve -DT. */
224 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
226 # define REPORT(retval) (retval)
229 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
230 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
231 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
232 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
233 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
234 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
235 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
236 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
237 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
238 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
239 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
240 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
241 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
246 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
247 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
248 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
250 /* This bit of chicanery makes a unary function followed by
251 * a parenthesis into a function with one argument, highest precedence.
252 * The UNIDOR macro is for unary functions that can be followed by the //
253 * operator (such as C<shift // 0>).
255 #define UNI2(f,x) { \
256 pl_yylval.ival = f; \
259 PL_last_uni = PL_oldbufptr; \
260 PL_last_lop_op = f; \
262 return REPORT( (int)FUNC1 ); \
264 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
266 #define UNI(f) UNI2(f,XTERM)
267 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
269 #define UNIBRACK(f) { \
270 pl_yylval.ival = f; \
272 PL_last_uni = PL_oldbufptr; \
274 return REPORT( (int)FUNC1 ); \
276 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
279 /* grandfather return to old style */
280 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
284 /* how to interpret the pl_yylval associated with the token */
288 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
294 static struct debug_tokens {
296 enum token_type type;
298 } const debug_tokens[] =
300 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
301 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
302 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
303 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
304 { ARROW, TOKENTYPE_NONE, "ARROW" },
305 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
306 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
307 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
308 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
309 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
310 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
311 { DO, TOKENTYPE_NONE, "DO" },
312 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
313 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
314 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
315 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
316 { ELSE, TOKENTYPE_NONE, "ELSE" },
317 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
318 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
319 { FOR, TOKENTYPE_IVAL, "FOR" },
320 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
321 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
322 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
323 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
324 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
325 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
326 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
327 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
328 { IF, TOKENTYPE_IVAL, "IF" },
329 { LABEL, TOKENTYPE_PVAL, "LABEL" },
330 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
331 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
332 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
333 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
334 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
335 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
336 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
337 { MY, TOKENTYPE_IVAL, "MY" },
338 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
339 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
340 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
341 { OROP, TOKENTYPE_IVAL, "OROP" },
342 { OROR, TOKENTYPE_NONE, "OROR" },
343 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
344 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
345 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
346 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
347 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
348 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
349 { PREINC, TOKENTYPE_NONE, "PREINC" },
350 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
351 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
352 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
353 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
354 { SUB, TOKENTYPE_NONE, "SUB" },
355 { THING, TOKENTYPE_OPVAL, "THING" },
356 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
357 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
358 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
359 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
360 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
361 { USE, TOKENTYPE_IVAL, "USE" },
362 { WHEN, TOKENTYPE_IVAL, "WHEN" },
363 { WHILE, TOKENTYPE_IVAL, "WHILE" },
364 { WORD, TOKENTYPE_OPVAL, "WORD" },
365 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
366 { 0, TOKENTYPE_NONE, NULL }
369 /* dump the returned token in rv, plus any optional arg in pl_yylval */
372 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
376 PERL_ARGS_ASSERT_TOKEREPORT;
379 const char *name = NULL;
380 enum token_type type = TOKENTYPE_NONE;
381 const struct debug_tokens *p;
382 SV* const report = newSVpvs("<== ");
384 for (p = debug_tokens; p->token; p++) {
385 if (p->token == (int)rv) {
392 Perl_sv_catpv(aTHX_ report, name);
393 else if ((char)rv > ' ' && (char)rv < '~')
394 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
396 sv_catpvs(report, "EOF");
398 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
401 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
404 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
406 case TOKENTYPE_OPNUM:
407 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
408 PL_op_name[lvalp->ival]);
411 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
413 case TOKENTYPE_OPVAL:
415 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
416 PL_op_name[lvalp->opval->op_type]);
417 if (lvalp->opval->op_type == OP_CONST) {
418 Perl_sv_catpvf(aTHX_ report, " %s",
419 SvPEEK(cSVOPx_sv(lvalp->opval)));
424 sv_catpvs(report, "(opval=null)");
427 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
433 /* print the buffer with suitable escapes */
436 S_printbuf(pTHX_ const char *const fmt, const char *const s)
438 SV* const tmp = newSVpvs("");
440 PERL_ARGS_ASSERT_PRINTBUF;
442 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
449 S_deprecate_commaless_var_list(pTHX) {
451 deprecate("comma-less variable list");
452 return REPORT(','); /* grandfather non-comma-format format */
458 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
459 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
463 S_ao(pTHX_ int toketype)
466 if (*PL_bufptr == '=') {
468 if (toketype == ANDAND)
469 pl_yylval.ival = OP_ANDASSIGN;
470 else if (toketype == OROR)
471 pl_yylval.ival = OP_ORASSIGN;
472 else if (toketype == DORDOR)
473 pl_yylval.ival = OP_DORASSIGN;
481 * When Perl expects an operator and finds something else, no_op
482 * prints the warning. It always prints "<something> found where
483 * operator expected. It prints "Missing semicolon on previous line?"
484 * if the surprise occurs at the start of the line. "do you need to
485 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
486 * where the compiler doesn't know if foo is a method call or a function.
487 * It prints "Missing operator before end of line" if there's nothing
488 * after the missing operator, or "... before <...>" if there is something
489 * after the missing operator.
493 S_no_op(pTHX_ const char *const what, char *s)
496 char * const oldbp = PL_bufptr;
497 const bool is_first = (PL_oldbufptr == PL_linestart);
499 PERL_ARGS_ASSERT_NO_OP;
505 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
506 if (ckWARN_d(WARN_SYNTAX)) {
508 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
509 "\t(Missing semicolon on previous line?)\n");
510 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
512 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
514 if (t < PL_bufptr && isSPACE(*t))
515 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
516 "\t(Do you need to predeclare %.*s?)\n",
517 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
521 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
522 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
530 * Complain about missing quote/regexp/heredoc terminator.
531 * If it's called with NULL then it cauterizes the line buffer.
532 * If we're in a delimited string and the delimiter is a control
533 * character, it's reformatted into a two-char sequence like ^C.
538 S_missingterm(pTHX_ char *s)
544 char * const nl = strrchr(s,'\n');
548 else if (isCNTRL(PL_multi_close)) {
550 tmpbuf[1] = (char)toCTRL(PL_multi_close);
555 *tmpbuf = (char)PL_multi_close;
559 q = strchr(s,'"') ? '\'' : '"';
560 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
563 #define FEATURE_IS_ENABLED(name) \
564 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
565 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
566 /* The longest string we pass in. */
567 #define MAX_FEATURE_LEN (sizeof("switch")-1)
570 * S_feature_is_enabled
571 * Check whether the named feature is enabled.
574 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
577 HV * const hinthv = GvHV(PL_hintgv);
578 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
580 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
582 assert(namelen <= MAX_FEATURE_LEN);
583 memcpy(&he_name[8], name, namelen);
585 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
589 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
590 * utf16-to-utf8-reversed.
593 #ifdef PERL_CR_FILTER
597 register const char *s = SvPVX_const(sv);
598 register const char * const e = s + SvCUR(sv);
600 PERL_ARGS_ASSERT_STRIP_RETURN;
602 /* outer loop optimized to do nothing if there are no CR-LFs */
604 if (*s++ == '\r' && *s == '\n') {
605 /* hit a CR-LF, need to copy the rest */
606 register char *d = s - 1;
609 if (*s == '\r' && s[1] == '\n')
620 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
622 const I32 count = FILTER_READ(idx+1, sv, maxlen);
623 if (count > 0 && !maxlen)
634 * Create a parser object and initialise its parser and lexer fields
636 * rsfp is the opened file handle to read from (if any),
638 * line holds any initial content already read from the file (or in
639 * the case of no file, such as an eval, the whole contents);
641 * new_filter indicates that this is a new file and it shouldn't inherit
642 * the filters from the current parser (ie require).
646 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
649 const char *s = NULL;
651 yy_parser *parser, *oparser;
653 /* create and initialise a parser */
655 Newxz(parser, 1, yy_parser);
656 parser->old_parser = oparser = PL_parser;
659 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
660 parser->ps = parser->stack;
661 parser->stack_size = YYINITDEPTH;
663 parser->stack->state = 0;
664 parser->yyerrstatus = 0;
665 parser->yychar = YYEMPTY; /* Cause a token to be read. */
667 /* on scope exit, free this parser and restore any outer one */
669 parser->saved_curcop = PL_curcop;
671 /* initialise lexer state */
674 parser->curforce = -1;
676 parser->nexttoke = 0;
678 parser->error_count = oparser ? oparser->error_count : 0;
679 parser->copline = NOLINE;
680 parser->lex_state = LEX_NORMAL;
681 parser->expect = XSTATE;
683 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
684 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
686 Newx(parser->lex_brackstack, 120, char);
687 Newx(parser->lex_casestack, 12, char);
688 *parser->lex_casestack = '\0';
691 s = SvPV_const(line, len);
697 parser->linestr = newSVpvs("\n;");
698 } else if (SvREADONLY(line) || s[len-1] != ';') {
699 parser->linestr = newSVsv(line);
701 sv_catpvs(parser->linestr, "\n;");
704 SvREFCNT_inc_simple_void_NN(line);
705 parser->linestr = line;
707 parser->oldoldbufptr =
710 parser->linestart = SvPVX(parser->linestr);
711 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
712 parser->last_lop = parser->last_uni = NULL;
716 /* delete a parser object */
719 Perl_parser_free(pTHX_ const yy_parser *parser)
721 PERL_ARGS_ASSERT_PARSER_FREE;
723 PL_curcop = parser->saved_curcop;
724 SvREFCNT_dec(parser->linestr);
726 if (parser->rsfp == PerlIO_stdin())
727 PerlIO_clearerr(parser->rsfp);
728 else if (parser->rsfp && (!parser->old_parser ||
729 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
730 PerlIO_close(parser->rsfp);
731 SvREFCNT_dec(parser->rsfp_filters);
733 Safefree(parser->stack);
734 Safefree(parser->lex_brackstack);
735 Safefree(parser->lex_casestack);
736 PL_parser = parser->old_parser;
743 * Finalizer for lexing operations. Must be called when the parser is
744 * done with the lexer.
751 PL_doextract = FALSE;
756 * This subroutine has nothing to do with tilting, whether at windmills
757 * or pinball tables. Its name is short for "increment line". It
758 * increments the current line number in CopLINE(PL_curcop) and checks
759 * to see whether the line starts with a comment of the form
760 * # line 500 "foo.pm"
761 * If so, it sets the current line number and file to the values in the comment.
765 S_incline(pTHX_ const char *s)
772 PERL_ARGS_ASSERT_INCLINE;
774 CopLINE_inc(PL_curcop);
777 while (SPACE_OR_TAB(*s))
779 if (strnEQ(s, "line", 4))
783 if (SPACE_OR_TAB(*s))
787 while (SPACE_OR_TAB(*s))
795 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
797 while (SPACE_OR_TAB(*s))
799 if (*s == '"' && (t = strchr(s+1, '"'))) {
809 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
811 if (*e != '\n' && *e != '\0')
812 return; /* false alarm */
815 const STRLEN len = t - s;
817 SV *const temp_sv = CopFILESV(PL_curcop);
823 tmplen = SvCUR(temp_sv);
829 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
830 /* must copy *{"::_<(eval N)[oldfilename:L]"}
831 * to *{"::_<newfilename"} */
832 /* However, the long form of evals is only turned on by the
833 debugger - usually they're "(eval %lu)" */
837 STRLEN tmplen2 = len;
838 if (tmplen + 2 <= sizeof smallbuf)
841 Newx(tmpbuf, tmplen + 2, char);
844 memcpy(tmpbuf + 2, cf, tmplen);
846 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
851 if (tmplen2 + 2 <= sizeof smallbuf)
854 Newx(tmpbuf2, tmplen2 + 2, char);
856 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
857 /* Either they malloc'd it, or we malloc'd it,
858 so no prefix is present in ours. */
863 memcpy(tmpbuf2 + 2, s, tmplen2);
866 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
868 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
869 /* adjust ${"::_<newfilename"} to store the new file name */
870 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
871 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
872 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
875 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
877 if (tmpbuf != smallbuf) Safefree(tmpbuf);
880 CopFILE_free(PL_curcop);
881 CopFILE_setn(PL_curcop, s, len);
883 CopLINE_set(PL_curcop, atoi(n)-1);
887 /* skip space before PL_thistoken */
890 S_skipspace0(pTHX_ register char *s)
892 PERL_ARGS_ASSERT_SKIPSPACE0;
899 PL_thiswhite = newSVpvs("");
900 sv_catsv(PL_thiswhite, PL_skipwhite);
901 sv_free(PL_skipwhite);
904 PL_realtokenstart = s - SvPVX(PL_linestr);
908 /* skip space after PL_thistoken */
911 S_skipspace1(pTHX_ register char *s)
913 const char *start = s;
914 I32 startoff = start - SvPVX(PL_linestr);
916 PERL_ARGS_ASSERT_SKIPSPACE1;
921 start = SvPVX(PL_linestr) + startoff;
922 if (!PL_thistoken && PL_realtokenstart >= 0) {
923 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
924 PL_thistoken = newSVpvn(tstart, start - tstart);
926 PL_realtokenstart = -1;
929 PL_nextwhite = newSVpvs("");
930 sv_catsv(PL_nextwhite, PL_skipwhite);
931 sv_free(PL_skipwhite);
938 S_skipspace2(pTHX_ register char *s, SV **svp)
941 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
942 const I32 startoff = s - SvPVX(PL_linestr);
944 PERL_ARGS_ASSERT_SKIPSPACE2;
947 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
948 if (!PL_madskills || !svp)
950 start = SvPVX(PL_linestr) + startoff;
951 if (!PL_thistoken && PL_realtokenstart >= 0) {
952 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
953 PL_thistoken = newSVpvn(tstart, start - tstart);
954 PL_realtokenstart = -1;
959 sv_setsv(*svp, PL_skipwhite);
960 sv_free(PL_skipwhite);
969 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
971 AV *av = CopFILEAVx(PL_curcop);
973 SV * const sv = newSV_type(SVt_PVMG);
975 sv_setsv(sv, orig_sv);
977 sv_setpvn(sv, buf, len);
980 av_store(av, (I32)CopLINE(PL_curcop), sv);
986 * Called to gobble the appropriate amount and type of whitespace.
987 * Skips comments as well.
991 S_skipspace(pTHX_ register char *s)
996 int startoff = s - SvPVX(PL_linestr);
998 PERL_ARGS_ASSERT_SKIPSPACE;
1001 sv_free(PL_skipwhite);
1005 PERL_ARGS_ASSERT_SKIPSPACE;
1007 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1008 while (s < PL_bufend && SPACE_OR_TAB(*s))
1018 SSize_t oldprevlen, oldoldprevlen;
1019 SSize_t oldloplen = 0, oldunilen = 0;
1020 while (s < PL_bufend && isSPACE(*s)) {
1021 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1026 if (s < PL_bufend && *s == '#') {
1027 while (s < PL_bufend && *s != '\n')
1029 if (s < PL_bufend) {
1031 if (PL_in_eval && !PL_rsfp) {
1038 /* only continue to recharge the buffer if we're at the end
1039 * of the buffer, we're not reading from a source filter, and
1040 * we're in normal lexing mode
1042 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1043 PL_lex_state == LEX_FORMLINE)
1050 /* try to recharge the buffer */
1052 curoff = s - SvPVX(PL_linestr);
1055 if ((s = filter_gets(PL_linestr, PL_rsfp,
1056 (prevlen = SvCUR(PL_linestr)))) == NULL)
1059 if (PL_madskills && curoff != startoff) {
1061 PL_skipwhite = newSVpvs("");
1062 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1066 /* mustn't throw out old stuff yet if madpropping */
1067 SvCUR(PL_linestr) = curoff;
1068 s = SvPVX(PL_linestr) + curoff;
1070 if (curoff && s[-1] == '\n')
1074 /* end of file. Add on the -p or -n magic */
1075 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1078 sv_catpvs(PL_linestr,
1079 ";}continue{print or die qq(-p destination: $!\\n);}");
1081 sv_setpvs(PL_linestr,
1082 ";}continue{print or die qq(-p destination: $!\\n);}");
1084 PL_minus_n = PL_minus_p = 0;
1086 else if (PL_minus_n) {
1088 sv_catpvs(PL_linestr, ";}");
1090 sv_setpvs(PL_linestr, ";}");
1096 sv_catpvs(PL_linestr,";");
1098 sv_setpvs(PL_linestr,";");
1101 /* reset variables for next time we lex */
1102 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1108 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1109 PL_last_lop = PL_last_uni = NULL;
1111 /* Close the filehandle. Could be from
1112 * STDIN, or a regular file. If we were reading code from
1113 * STDIN (because the commandline held no -e or filename)
1114 * then we don't close it, we reset it so the code can
1115 * read from STDIN too.
1118 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1119 PerlIO_clearerr(PL_rsfp);
1121 (void)PerlIO_close(PL_rsfp);
1126 /* not at end of file, so we only read another line */
1127 /* make corresponding updates to old pointers, for yyerror() */
1128 oldprevlen = PL_oldbufptr - PL_bufend;
1129 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1131 oldunilen = PL_last_uni - PL_bufend;
1133 oldloplen = PL_last_lop - PL_bufend;
1134 PL_linestart = PL_bufptr = s + prevlen;
1135 PL_bufend = s + SvCUR(PL_linestr);
1137 PL_oldbufptr = s + oldprevlen;
1138 PL_oldoldbufptr = s + oldoldprevlen;
1140 PL_last_uni = s + oldunilen;
1142 PL_last_lop = s + oldloplen;
1145 /* debugger active and we're not compiling the debugger code,
1146 * so store the line into the debugger's array of lines
1148 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
1149 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1156 PL_skipwhite = newSVpvs("");
1157 curoff = s - SvPVX(PL_linestr);
1158 if (curoff - startoff)
1159 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1168 * Check the unary operators to ensure there's no ambiguity in how they're
1169 * used. An ambiguous piece of code would be:
1171 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1172 * the +5 is its argument.
1182 if (PL_oldoldbufptr != PL_last_uni)
1184 while (isSPACE(*PL_last_uni))
1187 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1189 if ((t = strchr(s, '(')) && t < PL_bufptr)
1192 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1193 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1194 (int)(s - PL_last_uni), PL_last_uni);
1198 * LOP : macro to build a list operator. Its behaviour has been replaced
1199 * with a subroutine, S_lop() for which LOP is just another name.
1202 #define LOP(f,x) return lop(f,x,s)
1206 * Build a list operator (or something that might be one). The rules:
1207 * - if we have a next token, then it's a list operator [why?]
1208 * - if the next thing is an opening paren, then it's a function
1209 * - else it's a list operator
1213 S_lop(pTHX_ I32 f, int x, char *s)
1217 PERL_ARGS_ASSERT_LOP;
1223 PL_last_lop = PL_oldbufptr;
1224 PL_last_lop_op = (OPCODE)f;
1227 return REPORT(LSTOP);
1230 return REPORT(LSTOP);
1233 return REPORT(FUNC);
1236 return REPORT(FUNC);
1238 return REPORT(LSTOP);
1244 * Sets up for an eventual force_next(). start_force(0) basically does
1245 * an unshift, while start_force(-1) does a push. yylex removes items
1250 S_start_force(pTHX_ int where)
1254 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1255 where = PL_lasttoke;
1256 assert(PL_curforce < 0 || PL_curforce == where);
1257 if (PL_curforce != where) {
1258 for (i = PL_lasttoke; i > where; --i) {
1259 PL_nexttoke[i] = PL_nexttoke[i-1];
1263 if (PL_curforce < 0) /* in case of duplicate start_force() */
1264 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1265 PL_curforce = where;
1268 curmad('^', newSVpvs(""));
1269 CURMAD('_', PL_nextwhite);
1274 S_curmad(pTHX_ char slot, SV *sv)
1280 if (PL_curforce < 0)
1281 where = &PL_thismad;
1283 where = &PL_nexttoke[PL_curforce].next_mad;
1289 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1291 else if (PL_encoding) {
1292 sv_recode_to_utf8(sv, PL_encoding);
1297 /* keep a slot open for the head of the list? */
1298 if (slot != '_' && *where && (*where)->mad_key == '^') {
1299 (*where)->mad_key = slot;
1300 sv_free(MUTABLE_SV(((*where)->mad_val)));
1301 (*where)->mad_val = (void*)sv;
1304 addmad(newMADsv(slot, sv), where, 0);
1307 # define start_force(where) NOOP
1308 # define curmad(slot, sv) NOOP
1313 * When the lexer realizes it knows the next token (for instance,
1314 * it is reordering tokens for the parser) then it can call S_force_next
1315 * to know what token to return the next time the lexer is called. Caller
1316 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1317 * and possibly PL_expect to ensure the lexer handles the token correctly.
1321 S_force_next(pTHX_ I32 type)
1326 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1327 tokereport(type, &NEXTVAL_NEXTTOKE);
1331 if (PL_curforce < 0)
1332 start_force(PL_lasttoke);
1333 PL_nexttoke[PL_curforce].next_type = type;
1334 if (PL_lex_state != LEX_KNOWNEXT)
1335 PL_lex_defer = PL_lex_state;
1336 PL_lex_state = LEX_KNOWNEXT;
1337 PL_lex_expect = PL_expect;
1340 PL_nexttype[PL_nexttoke] = type;
1342 if (PL_lex_state != LEX_KNOWNEXT) {
1343 PL_lex_defer = PL_lex_state;
1344 PL_lex_expect = PL_expect;
1345 PL_lex_state = LEX_KNOWNEXT;
1351 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1354 SV * const sv = newSVpvn_utf8(start, len,
1357 && !is_ascii_string((const U8*)start, len)
1358 && is_utf8_string((const U8*)start, len));
1364 * When the lexer knows the next thing is a word (for instance, it has
1365 * just seen -> and it knows that the next char is a word char, then
1366 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1370 * char *start : buffer position (must be within PL_linestr)
1371 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1372 * int check_keyword : if true, Perl checks to make sure the word isn't
1373 * a keyword (do this if the word is a label, e.g. goto FOO)
1374 * int allow_pack : if true, : characters will also be allowed (require,
1375 * use, etc. do this)
1376 * int allow_initial_tick : used by the "sub" lexer only.
1380 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1386 PERL_ARGS_ASSERT_FORCE_WORD;
1388 start = SKIPSPACE1(start);
1390 if (isIDFIRST_lazy_if(s,UTF) ||
1391 (allow_pack && *s == ':') ||
1392 (allow_initial_tick && *s == '\'') )
1394 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1395 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1397 start_force(PL_curforce);
1399 curmad('X', newSVpvn(start,s-start));
1400 if (token == METHOD) {
1405 PL_expect = XOPERATOR;
1409 curmad('g', newSVpvs( "forced" ));
1410 NEXTVAL_NEXTTOKE.opval
1411 = (OP*)newSVOP(OP_CONST,0,
1412 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1413 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1421 * Called when the lexer wants $foo *foo &foo etc, but the program
1422 * text only contains the "foo" portion. The first argument is a pointer
1423 * to the "foo", and the second argument is the type symbol to prefix.
1424 * Forces the next token to be a "WORD".
1425 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1429 S_force_ident(pTHX_ register const char *s, int kind)
1433 PERL_ARGS_ASSERT_FORCE_IDENT;
1436 const STRLEN len = strlen(s);
1437 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1438 start_force(PL_curforce);
1439 NEXTVAL_NEXTTOKE.opval = o;
1442 o->op_private = OPpCONST_ENTERED;
1443 /* XXX see note in pp_entereval() for why we forgo typo
1444 warnings if the symbol must be introduced in an eval.
1446 gv_fetchpvn_flags(s, len,
1447 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1449 kind == '$' ? SVt_PV :
1450 kind == '@' ? SVt_PVAV :
1451 kind == '%' ? SVt_PVHV :
1459 Perl_str_to_version(pTHX_ SV *sv)
1464 const char *start = SvPV_const(sv,len);
1465 const char * const end = start + len;
1466 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1468 PERL_ARGS_ASSERT_STR_TO_VERSION;
1470 while (start < end) {
1474 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1479 retval += ((NV)n)/nshift;
1488 * Forces the next token to be a version number.
1489 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1490 * and if "guessing" is TRUE, then no new token is created (and the caller
1491 * must use an alternative parsing method).
1495 S_force_version(pTHX_ char *s, int guessing)
1501 I32 startoff = s - SvPVX(PL_linestr);
1504 PERL_ARGS_ASSERT_FORCE_VERSION;
1512 while (isDIGIT(*d) || *d == '_' || *d == '.')
1516 start_force(PL_curforce);
1517 curmad('X', newSVpvn(s,d-s));
1520 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1522 s = scan_num(s, &pl_yylval);
1523 version = pl_yylval.opval;
1524 ver = cSVOPx(version)->op_sv;
1525 if (SvPOK(ver) && !SvNIOK(ver)) {
1526 SvUPGRADE(ver, SVt_PVNV);
1527 SvNV_set(ver, str_to_version(ver));
1528 SvNOK_on(ver); /* hint that it is a version */
1531 else if (guessing) {
1534 sv_free(PL_nextwhite); /* let next token collect whitespace */
1536 s = SvPVX(PL_linestr) + startoff;
1544 if (PL_madskills && !version) {
1545 sv_free(PL_nextwhite); /* let next token collect whitespace */
1547 s = SvPVX(PL_linestr) + startoff;
1550 /* NOTE: The parser sees the package name and the VERSION swapped */
1551 start_force(PL_curforce);
1552 NEXTVAL_NEXTTOKE.opval = version;
1560 * Tokenize a quoted string passed in as an SV. It finds the next
1561 * chunk, up to end of string or a backslash. It may make a new
1562 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1567 S_tokeq(pTHX_ SV *sv)
1571 register char *send;
1576 PERL_ARGS_ASSERT_TOKEQ;
1581 s = SvPV_force(sv, len);
1582 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1585 while (s < send && *s != '\\')
1590 if ( PL_hints & HINT_NEW_STRING ) {
1591 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1595 if (s + 1 < send && (s[1] == '\\'))
1596 s++; /* all that, just for this */
1601 SvCUR_set(sv, d - SvPVX_const(sv));
1603 if ( PL_hints & HINT_NEW_STRING )
1604 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1609 * Now come three functions related to double-quote context,
1610 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1611 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1612 * interact with PL_lex_state, and create fake ( ... ) argument lists
1613 * to handle functions and concatenation.
1614 * They assume that whoever calls them will be setting up a fake
1615 * join call, because each subthing puts a ',' after it. This lets
1618 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1620 * (I'm not sure whether the spurious commas at the end of lcfirst's
1621 * arguments and join's arguments are created or not).
1626 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1628 * Pattern matching will set PL_lex_op to the pattern-matching op to
1629 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1631 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1633 * Everything else becomes a FUNC.
1635 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1636 * had an OP_CONST or OP_READLINE). This just sets us up for a
1637 * call to S_sublex_push().
1641 S_sublex_start(pTHX)
1644 register const I32 op_type = pl_yylval.ival;
1646 if (op_type == OP_NULL) {
1647 pl_yylval.opval = PL_lex_op;
1651 if (op_type == OP_CONST || op_type == OP_READLINE) {
1652 SV *sv = tokeq(PL_lex_stuff);
1654 if (SvTYPE(sv) == SVt_PVIV) {
1655 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1657 const char * const p = SvPV_const(sv, len);
1658 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1662 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1663 PL_lex_stuff = NULL;
1664 /* Allow <FH> // "foo" */
1665 if (op_type == OP_READLINE)
1666 PL_expect = XTERMORDORDOR;
1669 else if (op_type == OP_BACKTICK && PL_lex_op) {
1670 /* readpipe() vas overriden */
1671 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1672 pl_yylval.opval = PL_lex_op;
1674 PL_lex_stuff = NULL;
1678 PL_sublex_info.super_state = PL_lex_state;
1679 PL_sublex_info.sub_inwhat = (U16)op_type;
1680 PL_sublex_info.sub_op = PL_lex_op;
1681 PL_lex_state = LEX_INTERPPUSH;
1685 pl_yylval.opval = PL_lex_op;
1695 * Create a new scope to save the lexing state. The scope will be
1696 * ended in S_sublex_done. Returns a '(', starting the function arguments
1697 * to the uc, lc, etc. found before.
1698 * Sets PL_lex_state to LEX_INTERPCONCAT.
1707 PL_lex_state = PL_sublex_info.super_state;
1708 SAVEBOOL(PL_lex_dojoin);
1709 SAVEI32(PL_lex_brackets);
1710 SAVEI32(PL_lex_casemods);
1711 SAVEI32(PL_lex_starts);
1712 SAVEI8(PL_lex_state);
1713 SAVEVPTR(PL_lex_inpat);
1714 SAVEI16(PL_lex_inwhat);
1715 SAVECOPLINE(PL_curcop);
1716 SAVEPPTR(PL_bufptr);
1717 SAVEPPTR(PL_bufend);
1718 SAVEPPTR(PL_oldbufptr);
1719 SAVEPPTR(PL_oldoldbufptr);
1720 SAVEPPTR(PL_last_lop);
1721 SAVEPPTR(PL_last_uni);
1722 SAVEPPTR(PL_linestart);
1723 SAVESPTR(PL_linestr);
1724 SAVEGENERICPV(PL_lex_brackstack);
1725 SAVEGENERICPV(PL_lex_casestack);
1727 PL_linestr = PL_lex_stuff;
1728 PL_lex_stuff = NULL;
1730 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1731 = SvPVX(PL_linestr);
1732 PL_bufend += SvCUR(PL_linestr);
1733 PL_last_lop = PL_last_uni = NULL;
1734 SAVEFREESV(PL_linestr);
1736 PL_lex_dojoin = FALSE;
1737 PL_lex_brackets = 0;
1738 Newx(PL_lex_brackstack, 120, char);
1739 Newx(PL_lex_casestack, 12, char);
1740 PL_lex_casemods = 0;
1741 *PL_lex_casestack = '\0';
1743 PL_lex_state = LEX_INTERPCONCAT;
1744 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1746 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1747 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1748 PL_lex_inpat = PL_sublex_info.sub_op;
1750 PL_lex_inpat = NULL;
1757 * Restores lexer state after a S_sublex_push.
1764 if (!PL_lex_starts++) {
1765 SV * const sv = newSVpvs("");
1766 if (SvUTF8(PL_linestr))
1768 PL_expect = XOPERATOR;
1769 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1773 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1774 PL_lex_state = LEX_INTERPCASEMOD;
1778 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1779 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1780 PL_linestr = PL_lex_repl;
1782 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1783 PL_bufend += SvCUR(PL_linestr);
1784 PL_last_lop = PL_last_uni = NULL;
1785 SAVEFREESV(PL_linestr);
1786 PL_lex_dojoin = FALSE;
1787 PL_lex_brackets = 0;
1788 PL_lex_casemods = 0;
1789 *PL_lex_casestack = '\0';
1791 if (SvEVALED(PL_lex_repl)) {
1792 PL_lex_state = LEX_INTERPNORMAL;
1794 /* we don't clear PL_lex_repl here, so that we can check later
1795 whether this is an evalled subst; that means we rely on the
1796 logic to ensure sublex_done() is called again only via the
1797 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1800 PL_lex_state = LEX_INTERPCONCAT;
1810 PL_endwhite = newSVpvs("");
1811 sv_catsv(PL_endwhite, PL_thiswhite);
1815 sv_setpvs(PL_thistoken,"");
1817 PL_realtokenstart = -1;
1821 PL_bufend = SvPVX(PL_linestr);
1822 PL_bufend += SvCUR(PL_linestr);
1823 PL_expect = XOPERATOR;
1824 PL_sublex_info.sub_inwhat = 0;
1832 Extracts a pattern, double-quoted string, or transliteration. This
1835 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1836 processing a pattern (PL_lex_inpat is true), a transliteration
1837 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1839 Returns a pointer to the character scanned up to. If this is
1840 advanced from the start pointer supplied (i.e. if anything was
1841 successfully parsed), will leave an OP for the substring scanned
1842 in pl_yylval. Caller must intuit reason for not parsing further
1843 by looking at the next characters herself.
1847 double-quoted style: \r and \n
1848 regexp special ones: \D \s
1851 case and quoting: \U \Q \E
1852 stops on @ and $, but not for $ as tail anchor
1854 In transliterations:
1855 characters are VERY literal, except for - not at the start or end
1856 of the string, which indicates a range. If the range is in bytes,
1857 scan_const expands the range to the full set of intermediate
1858 characters. If the range is in utf8, the hyphen is replaced with
1859 a certain range mark which will be handled by pmtrans() in op.c.
1861 In double-quoted strings:
1863 double-quoted style: \r and \n
1865 deprecated backrefs: \1 (in substitution replacements)
1866 case and quoting: \U \Q \E
1869 scan_const does *not* construct ops to handle interpolated strings.
1870 It stops processing as soon as it finds an embedded $ or @ variable
1871 and leaves it to the caller to work out what's going on.
1873 embedded arrays (whether in pattern or not) could be:
1874 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1876 $ in double-quoted strings must be the symbol of an embedded scalar.
1878 $ in pattern could be $foo or could be tail anchor. Assumption:
1879 it's a tail anchor if $ is the last thing in the string, or if it's
1880 followed by one of "()| \r\n\t"
1882 \1 (backreferences) are turned into $1
1884 The structure of the code is
1885 while (there's a character to process) {
1886 handle transliteration ranges
1887 skip regexp comments /(?#comment)/ and codes /(?{code})/
1888 skip #-initiated comments in //x patterns
1889 check for embedded arrays
1890 check for embedded scalars
1892 leave intact backslashes from leaveit (below)
1893 deprecate \1 in substitution replacements
1894 handle string-changing backslashes \l \U \Q \E, etc.
1895 switch (what was escaped) {
1896 handle \- in a transliteration (becomes a literal -)
1897 handle \132 (octal characters)
1898 handle \x15 and \x{1234} (hex characters)
1899 handle \N{name} (named characters)
1900 handle \cV (control characters)
1901 handle printf-style backslashes (\f, \r, \n, etc)
1904 } (end if backslash)
1905 handle regular character
1906 } (end while character to read)
1911 S_scan_const(pTHX_ char *start)
1914 register char *send = PL_bufend; /* end of the constant */
1915 SV *sv = newSV(send - start); /* sv for the constant. See
1916 note below on sizing. */
1917 register char *s = start; /* start of the constant */
1918 register char *d = SvPVX(sv); /* destination for copies */
1919 bool dorange = FALSE; /* are we in a translit range? */
1920 bool didrange = FALSE; /* did we just finish a range? */
1921 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1922 I32 this_utf8 = UTF; /* Is the source string assumed
1923 to be UTF8? But, this can
1924 show as true when the source
1925 isn't utf8, as for example
1926 when it is entirely composed
1929 /* Note on sizing: The scanned constant is placed into sv, which is
1930 * initialized by newSV() assuming one byte of output for every byte of
1931 * input. This routine expects newSV() to allocate an extra byte for a
1932 * trailing NUL, which this routine will append if it gets to the end of
1933 * the input. There may be more bytes of input than output (eg., \N{LATIN
1934 * CAPITAL LETTER A}), or more output than input if the constant ends up
1935 * recoded to utf8, but each time a construct is found that might increase
1936 * the needed size, SvGROW() is called. Its size parameter each time is
1937 * based on the best guess estimate at the time, namely the length used so
1938 * far, plus the length the current construct will occupy, plus room for
1939 * the trailing NUL, plus one byte for every input byte still unscanned */
1943 UV literal_endpoint = 0;
1944 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1947 PERL_ARGS_ASSERT_SCAN_CONST;
1949 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1950 /* If we are doing a trans and we know we want UTF8 set expectation */
1951 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1952 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1956 while (s < send || dorange) {
1957 /* get transliterations out of the way (they're most literal) */
1958 if (PL_lex_inwhat == OP_TRANS) {
1959 /* expand a range A-Z to the full set of characters. AIE! */
1961 I32 i; /* current expanded character */
1962 I32 min; /* first character in range */
1963 I32 max; /* last character in range */
1974 char * const c = (char*)utf8_hop((U8*)d, -1);
1978 *c = (char)UTF_TO_NATIVE(0xff);
1979 /* mark the range as done, and continue */
1985 i = d - SvPVX_const(sv); /* remember current offset */
1988 SvLEN(sv) + (has_utf8 ?
1989 (512 - UTF_CONTINUATION_MARK +
1992 /* How many two-byte within 0..255: 128 in UTF-8,
1993 * 96 in UTF-8-mod. */
1995 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1997 d = SvPVX(sv) + i; /* refresh d after realloc */
2001 for (j = 0; j <= 1; j++) {
2002 char * const c = (char*)utf8_hop((U8*)d, -1);
2003 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2009 max = (U8)0xff; /* only to \xff */
2010 uvmax = uv; /* \x{100} to uvmax */
2012 d = c; /* eat endpoint chars */
2017 d -= 2; /* eat the first char and the - */
2018 min = (U8)*d; /* first char in range */
2019 max = (U8)d[1]; /* last char in range */
2026 "Invalid range \"%c-%c\" in transliteration operator",
2027 (char)min, (char)max);
2031 if (literal_endpoint == 2 &&
2032 ((isLOWER(min) && isLOWER(max)) ||
2033 (isUPPER(min) && isUPPER(max)))) {
2035 for (i = min; i <= max; i++)
2037 *d++ = NATIVE_TO_NEED(has_utf8,i);
2039 for (i = min; i <= max; i++)
2041 *d++ = NATIVE_TO_NEED(has_utf8,i);
2046 for (i = min; i <= max; i++)
2049 const U8 ch = (U8)NATIVE_TO_UTF(i);
2050 if (UNI_IS_INVARIANT(ch))
2053 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2054 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2063 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2065 *d++ = (char)UTF_TO_NATIVE(0xff);
2067 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2071 /* mark the range as done, and continue */
2075 literal_endpoint = 0;
2080 /* range begins (ignore - as first or last char) */
2081 else if (*s == '-' && s+1 < send && s != start) {
2083 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2090 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2100 literal_endpoint = 0;
2101 native_range = TRUE;
2106 /* if we get here, we're not doing a transliteration */
2108 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2109 except for the last char, which will be done separately. */
2110 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2112 while (s+1 < send && *s != ')')
2113 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2115 else if (s[2] == '{' /* This should match regcomp.c */
2116 || (s[2] == '?' && s[3] == '{'))
2119 char *regparse = s + (s[2] == '{' ? 3 : 4);
2122 while (count && (c = *regparse)) {
2123 if (c == '\\' && regparse[1])
2131 if (*regparse != ')')
2132 regparse--; /* Leave one char for continuation. */
2133 while (s < regparse)
2134 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2138 /* likewise skip #-initiated comments in //x patterns */
2139 else if (*s == '#' && PL_lex_inpat &&
2140 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2141 while (s+1 < send && *s != '\n')
2142 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2145 /* check for embedded arrays
2146 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2148 else if (*s == '@' && s[1]) {
2149 if (isALNUM_lazy_if(s+1,UTF))
2151 if (strchr(":'{$", s[1]))
2153 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2154 break; /* in regexp, neither @+ nor @- are interpolated */
2157 /* check for embedded scalars. only stop if we're sure it's a
2160 else if (*s == '$') {
2161 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2163 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2165 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2166 "Possible unintended interpolation of $\\ in regex");
2168 break; /* in regexp, $ might be tail anchor */
2172 /* End of else if chain - OP_TRANS rejoin rest */
2175 if (*s == '\\' && s+1 < send) {
2178 /* deprecate \1 in strings and substitution replacements */
2179 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2180 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2182 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2187 /* string-change backslash escapes */
2188 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2192 /* skip any other backslash escapes in a pattern */
2193 else if (PL_lex_inpat) {
2194 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2195 goto default_action;
2198 /* if we get here, it's either a quoted -, or a digit */
2201 /* quoted - in transliterations */
2203 if (PL_lex_inwhat == OP_TRANS) {
2210 if ((isALPHA(*s) || isDIGIT(*s)))
2211 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2212 "Unrecognized escape \\%c passed through",
2214 /* default action is to copy the quoted character */
2215 goto default_action;
2218 /* eg. \132 indicates the octal constant 0x132 */
2219 case '0': case '1': case '2': case '3':
2220 case '4': case '5': case '6': case '7':
2224 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2227 goto NUM_ESCAPE_INSERT;
2229 /* eg. \x24 indicates the hex constant 0x24 */
2233 char* const e = strchr(s, '}');
2234 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2235 PERL_SCAN_DISALLOW_PREFIX;
2240 yyerror("Missing right brace on \\x{}");
2244 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2250 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2251 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2257 /* Insert oct, hex, or \N{U+...} escaped character. There will
2258 * always be enough room in sv since such escapes will be
2259 * longer than any UTF-8 sequence they can end up as, except if
2260 * they force us to recode the rest of the string into utf8 */
2262 /* Here uv is the ordinal of the next character being added in
2263 * unicode (converted from native). (It has to be done before
2264 * here because \N is interpreted as unicode, and oct and hex
2266 if (!UNI_IS_INVARIANT(uv)) {
2267 if (!has_utf8 && uv > 255) {
2268 /* Might need to recode whatever we have accumulated so
2269 * far if it contains any chars variant in utf8 or
2272 SvCUR_set(sv, d - SvPVX_const(sv));
2275 /* See Note on sizing above. */
2276 sv_utf8_upgrade_flags_grow(sv,
2277 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2278 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2279 d = SvPVX(sv) + SvCUR(sv);
2284 d = (char*)uvuni_to_utf8((U8*)d, uv);
2285 if (PL_lex_inwhat == OP_TRANS &&
2286 PL_sublex_info.sub_op) {
2287 PL_sublex_info.sub_op->op_private |=
2288 (PL_lex_repl ? OPpTRANS_FROM_UTF
2292 if (uv > 255 && !dorange)
2293 native_range = FALSE;
2305 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2310 char* e = strchr(s, '}');
2316 yyerror("Missing right brace on \\N{}");
2320 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2321 /* \N{U+...} The ... is a unicode value even on EBCDIC
2323 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2324 PERL_SCAN_DISALLOW_PREFIX;
2327 uv = grok_hex(s, &len, &flags, NULL);
2328 if ( e > s && len != (STRLEN)(e - s) ) {
2332 goto NUM_ESCAPE_INSERT;
2334 res = newSVpvn(s + 1, e - s - 1);
2335 res = new_constant( NULL, 0, "charnames",
2336 res, NULL, s - 2, e - s + 3 );
2338 sv_utf8_upgrade(res);
2339 str = SvPV_const(res,len);
2340 #ifdef EBCDIC_NEVER_MIND
2341 /* charnames uses pack U and that has been
2342 * recently changed to do the below uni->native
2343 * mapping, so this would be redundant (and wrong,
2344 * the code point would be doubly converted).
2345 * But leave this in just in case the pack U change
2346 * gets revoked, but the semantics is still
2347 * desireable for charnames. --jhi */
2349 UV uv = utf8_to_uvchr((const U8*)str, 0);
2352 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2354 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2355 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2356 str = SvPV_const(res, len);
2360 /* If destination is not in utf8 but this new character is,
2361 * recode the dest to utf8 */
2362 if (!has_utf8 && SvUTF8(res)) {
2363 SvCUR_set(sv, d - SvPVX_const(sv));
2366 /* See Note on sizing above. */
2367 sv_utf8_upgrade_flags_grow(sv,
2368 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2369 len + (STRLEN)(send - s) + 1);
2370 d = SvPVX(sv) + SvCUR(sv);
2372 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2374 /* See Note on sizing above. (NOTE: SvCUR() is not set
2375 * correctly here). */
2376 const STRLEN off = d - SvPVX_const(sv);
2377 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2381 native_range = FALSE; /* \N{} is guessed to be Unicode */
2383 Copy(str, d, len, char);
2390 yyerror("Missing braces on \\N{}");
2393 /* \c is a control character */
2402 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2405 yyerror("Missing control char name in \\c");
2409 /* printf-style backslashes, formfeeds, newlines, etc */
2411 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2414 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2417 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2420 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2423 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2426 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2429 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2435 } /* end if (backslash) */
2442 /* If we started with encoded form, or already know we want it,
2443 then encode the next character */
2444 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2448 /* One might think that it is wasted effort in the case of the
2449 * source being utf8 (this_utf8 == TRUE) to take the next character
2450 * in the source, convert it to an unsigned value, and then convert
2451 * it back again. But the source has not been validated here. The
2452 * routine that does the conversion checks for errors like
2455 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2456 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2458 SvCUR_set(sv, d - SvPVX_const(sv));
2461 /* See Note on sizing above. */
2462 sv_utf8_upgrade_flags_grow(sv,
2463 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2464 need + (STRLEN)(send - s) + 1);
2465 d = SvPVX(sv) + SvCUR(sv);
2467 } else if (need > len) {
2468 /* encoded value larger than old, may need extra space (NOTE:
2469 * SvCUR() is not set correctly here). See Note on sizing
2471 const STRLEN off = d - SvPVX_const(sv);
2472 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2476 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2478 if (uv > 255 && !dorange)
2479 native_range = FALSE;
2483 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2485 } /* while loop to process each character */
2487 /* terminate the string and set up the sv */
2489 SvCUR_set(sv, d - SvPVX_const(sv));
2490 if (SvCUR(sv) >= SvLEN(sv))
2491 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2494 if (PL_encoding && !has_utf8) {
2495 sv_recode_to_utf8(sv, PL_encoding);
2501 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2502 PL_sublex_info.sub_op->op_private |=
2503 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2507 /* shrink the sv if we allocated more than we used */
2508 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2509 SvPV_shrink_to_cur(sv);
2512 /* return the substring (via pl_yylval) only if we parsed anything */
2513 if (s > PL_bufptr) {
2514 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2515 const char *const key = PL_lex_inpat ? "qr" : "q";
2516 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2520 if (PL_lex_inwhat == OP_TRANS) {
2523 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2531 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2534 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2541 * Returns TRUE if there's more to the expression (e.g., a subscript),
2544 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2546 * ->[ and ->{ return TRUE
2547 * { and [ outside a pattern are always subscripts, so return TRUE
2548 * if we're outside a pattern and it's not { or [, then return FALSE
2549 * if we're in a pattern and the first char is a {
2550 * {4,5} (any digits around the comma) returns FALSE
2551 * if we're in a pattern and the first char is a [
2553 * [SOMETHING] has a funky algorithm to decide whether it's a
2554 * character class or not. It has to deal with things like
2555 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2556 * anything else returns TRUE
2559 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2562 S_intuit_more(pTHX_ register char *s)
2566 PERL_ARGS_ASSERT_INTUIT_MORE;
2568 if (PL_lex_brackets)
2570 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2572 if (*s != '{' && *s != '[')
2577 /* In a pattern, so maybe we have {n,m}. */
2594 /* On the other hand, maybe we have a character class */
2597 if (*s == ']' || *s == '^')
2600 /* this is terrifying, and it works */
2601 int weight = 2; /* let's weigh the evidence */
2603 unsigned char un_char = 255, last_un_char;
2604 const char * const send = strchr(s,']');
2605 char tmpbuf[sizeof PL_tokenbuf * 4];
2607 if (!send) /* has to be an expression */
2610 Zero(seen,256,char);
2613 else if (isDIGIT(*s)) {
2615 if (isDIGIT(s[1]) && s[2] == ']')
2621 for (; s < send; s++) {
2622 last_un_char = un_char;
2623 un_char = (unsigned char)*s;
2628 weight -= seen[un_char] * 10;
2629 if (isALNUM_lazy_if(s+1,UTF)) {
2631 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2632 len = (int)strlen(tmpbuf);
2633 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2638 else if (*s == '$' && s[1] &&
2639 strchr("[#!%*<>()-=",s[1])) {
2640 if (/*{*/ strchr("])} =",s[2]))
2649 if (strchr("wds]",s[1]))
2651 else if (seen[(U8)'\''] || seen[(U8)'"'])
2653 else if (strchr("rnftbxcav",s[1]))
2655 else if (isDIGIT(s[1])) {
2657 while (s[1] && isDIGIT(s[1]))
2667 if (strchr("aA01! ",last_un_char))
2669 if (strchr("zZ79~",s[1]))
2671 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2672 weight -= 5; /* cope with negative subscript */
2675 if (!isALNUM(last_un_char)
2676 && !(last_un_char == '$' || last_un_char == '@'
2677 || last_un_char == '&')
2678 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2683 if (keyword(tmpbuf, d - tmpbuf, 0))
2686 if (un_char == last_un_char + 1)
2688 weight -= seen[un_char];
2693 if (weight >= 0) /* probably a character class */
2703 * Does all the checking to disambiguate
2705 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2706 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2708 * First argument is the stuff after the first token, e.g. "bar".
2710 * Not a method if bar is a filehandle.
2711 * Not a method if foo is a subroutine prototyped to take a filehandle.
2712 * Not a method if it's really "Foo $bar"
2713 * Method if it's "foo $bar"
2714 * Not a method if it's really "print foo $bar"
2715 * Method if it's really "foo package::" (interpreted as package->foo)
2716 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2717 * Not a method if bar is a filehandle or package, but is quoted with
2722 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2725 char *s = start + (*start == '$');
2726 char tmpbuf[sizeof PL_tokenbuf];
2733 PERL_ARGS_ASSERT_INTUIT_METHOD;
2736 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2740 const char *proto = SvPVX_const(cv);
2751 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2752 /* start is the beginning of the possible filehandle/object,
2753 * and s is the end of it
2754 * tmpbuf is a copy of it
2757 if (*start == '$') {
2758 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2759 isUPPER(*PL_tokenbuf))
2762 len = start - SvPVX(PL_linestr);
2766 start = SvPVX(PL_linestr) + len;
2770 return *s == '(' ? FUNCMETH : METHOD;
2772 if (!keyword(tmpbuf, len, 0)) {
2773 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2777 soff = s - SvPVX(PL_linestr);
2781 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2782 if (indirgv && GvCVu(indirgv))
2784 /* filehandle or package name makes it a method */
2785 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2787 soff = s - SvPVX(PL_linestr);
2790 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2791 return 0; /* no assumptions -- "=>" quotes bearword */
2793 start_force(PL_curforce);
2794 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2795 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
2796 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2798 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2803 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2805 return *s == '(' ? FUNCMETH : METHOD;
2811 /* Encoded script support. filter_add() effectively inserts a
2812 * 'pre-processing' function into the current source input stream.
2813 * Note that the filter function only applies to the current source file
2814 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2816 * The datasv parameter (which may be NULL) can be used to pass
2817 * private data to this instance of the filter. The filter function
2818 * can recover the SV using the FILTER_DATA macro and use it to
2819 * store private buffers and state information.
2821 * The supplied datasv parameter is upgraded to a PVIO type
2822 * and the IoDIRP/IoANY field is used to store the function pointer,
2823 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2824 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2825 * private use must be set using malloc'd pointers.
2829 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2838 if (!PL_rsfp_filters)
2839 PL_rsfp_filters = newAV();
2842 SvUPGRADE(datasv, SVt_PVIO);
2843 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2844 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2845 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2846 FPTR2DPTR(void *, IoANY(datasv)),
2847 SvPV_nolen(datasv)));
2848 av_unshift(PL_rsfp_filters, 1);
2849 av_store(PL_rsfp_filters, 0, datasv) ;
2854 /* Delete most recently added instance of this filter function. */
2856 Perl_filter_del(pTHX_ filter_t funcp)
2861 PERL_ARGS_ASSERT_FILTER_DEL;
2864 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2865 FPTR2DPTR(void*, funcp)));
2867 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2869 /* if filter is on top of stack (usual case) just pop it off */
2870 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2871 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2872 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2873 IoANY(datasv) = (void *)NULL;
2874 sv_free(av_pop(PL_rsfp_filters));
2878 /* we need to search for the correct entry and clear it */
2879 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2883 /* Invoke the idxth filter function for the current rsfp. */
2884 /* maxlen 0 = read one text line */
2886 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2891 /* This API is bad. It should have been using unsigned int for maxlen.
2892 Not sure if we want to change the API, but if not we should sanity
2893 check the value here. */
2894 const unsigned int correct_length
2903 PERL_ARGS_ASSERT_FILTER_READ;
2905 if (!PL_parser || !PL_rsfp_filters)
2907 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2908 /* Provide a default input filter to make life easy. */
2909 /* Note that we append to the line. This is handy. */
2910 DEBUG_P(PerlIO_printf(Perl_debug_log,
2911 "filter_read %d: from rsfp\n", idx));
2912 if (correct_length) {
2915 const int old_len = SvCUR(buf_sv);
2917 /* ensure buf_sv is large enough */
2918 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2919 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2920 correct_length)) <= 0) {
2921 if (PerlIO_error(PL_rsfp))
2922 return -1; /* error */
2924 return 0 ; /* end of file */
2926 SvCUR_set(buf_sv, old_len + len) ;
2929 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2930 if (PerlIO_error(PL_rsfp))
2931 return -1; /* error */
2933 return 0 ; /* end of file */
2936 return SvCUR(buf_sv);
2938 /* Skip this filter slot if filter has been deleted */
2939 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2940 DEBUG_P(PerlIO_printf(Perl_debug_log,
2941 "filter_read %d: skipped (filter deleted)\n",
2943 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2945 /* Get function pointer hidden within datasv */
2946 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2947 DEBUG_P(PerlIO_printf(Perl_debug_log,
2948 "filter_read %d: via function %p (%s)\n",
2949 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2950 /* Call function. The function is expected to */
2951 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2952 /* Return: <0:error, =0:eof, >0:not eof */
2953 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2957 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2961 PERL_ARGS_ASSERT_FILTER_GETS;
2963 #ifdef PERL_CR_FILTER
2964 if (!PL_rsfp_filters) {
2965 filter_add(S_cr_textfilter,NULL);
2968 if (PL_rsfp_filters) {
2970 SvCUR_set(sv, 0); /* start with empty line */
2971 if (FILTER_READ(0, sv, 0) > 0)
2972 return ( SvPVX(sv) ) ;
2977 return (sv_gets(sv, fp, append));
2981 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
2986 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2988 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2992 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2993 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2995 return GvHV(gv); /* Foo:: */
2998 /* use constant CLASS => 'MyClass' */
2999 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3000 if (gv && GvCV(gv)) {
3001 SV * const sv = cv_const_sv(GvCV(gv));
3003 pkgname = SvPV_const(sv, len);
3006 return gv_stashpvn(pkgname, len, 0);
3010 * S_readpipe_override
3011 * Check whether readpipe() is overriden, and generates the appropriate
3012 * optree, provided sublex_start() is called afterwards.
3015 S_readpipe_override(pTHX)
3018 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3019 pl_yylval.ival = OP_BACKTICK;
3021 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3023 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3024 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3025 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3027 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3028 append_elem(OP_LIST,
3029 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3030 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3037 * The intent of this yylex wrapper is to minimize the changes to the
3038 * tokener when we aren't interested in collecting madprops. It remains
3039 * to be seen how successful this strategy will be...
3046 char *s = PL_bufptr;
3048 /* make sure PL_thiswhite is initialized */
3052 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3053 if (PL_pending_ident)
3054 return S_pending_ident(aTHX);
3056 /* previous token ate up our whitespace? */
3057 if (!PL_lasttoke && PL_nextwhite) {
3058 PL_thiswhite = PL_nextwhite;
3062 /* isolate the token, and figure out where it is without whitespace */
3063 PL_realtokenstart = -1;
3067 assert(PL_curforce < 0);
3069 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3070 if (!PL_thistoken) {
3071 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3072 PL_thistoken = newSVpvs("");
3074 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3075 PL_thistoken = newSVpvn(tstart, s - tstart);
3078 if (PL_thismad) /* install head */
3079 CURMAD('X', PL_thistoken);
3082 /* last whitespace of a sublex? */
3083 if (optype == ')' && PL_endwhite) {
3084 CURMAD('X', PL_endwhite);
3089 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3090 if (!PL_thiswhite && !PL_endwhite && !optype) {
3091 sv_free(PL_thistoken);
3096 /* put off final whitespace till peg */
3097 if (optype == ';' && !PL_rsfp) {
3098 PL_nextwhite = PL_thiswhite;
3101 else if (PL_thisopen) {
3102 CURMAD('q', PL_thisopen);
3104 sv_free(PL_thistoken);
3108 /* Store actual token text as madprop X */
3109 CURMAD('X', PL_thistoken);
3113 /* add preceding whitespace as madprop _ */
3114 CURMAD('_', PL_thiswhite);
3118 /* add quoted material as madprop = */
3119 CURMAD('=', PL_thisstuff);
3123 /* add terminating quote as madprop Q */
3124 CURMAD('Q', PL_thisclose);
3128 /* special processing based on optype */
3132 /* opval doesn't need a TOKEN since it can already store mp */
3142 if (pl_yylval.opval)
3143 append_madprops(PL_thismad, pl_yylval.opval, 0);
3151 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3160 /* remember any fake bracket that lexer is about to discard */
3161 if (PL_lex_brackets == 1 &&
3162 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3165 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3168 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3169 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3172 break; /* don't bother looking for trailing comment */
3181 /* attach a trailing comment to its statement instead of next token */
3185 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3187 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3189 if (*s == '\n' || *s == '#') {
3190 while (s < PL_bufend && *s != '\n')
3194 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3195 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3212 /* Create new token struct. Note: opvals return early above. */
3213 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3220 S_tokenize_use(pTHX_ int is_use, char *s) {
3223 PERL_ARGS_ASSERT_TOKENIZE_USE;
3225 if (PL_expect != XSTATE)
3226 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3227 is_use ? "use" : "no"));
3229 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3230 s = force_version(s, TRUE);
3231 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3232 start_force(PL_curforce);
3233 NEXTVAL_NEXTTOKE.opval = NULL;
3236 else if (*s == 'v') {
3237 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3238 s = force_version(s, FALSE);
3242 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3243 s = force_version(s, FALSE);
3245 pl_yylval.ival = is_use;
3249 static const char* const exp_name[] =
3250 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3251 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3258 Works out what to call the token just pulled out of the input
3259 stream. The yacc parser takes care of taking the ops we return and
3260 stitching them into a tree.
3266 if read an identifier
3267 if we're in a my declaration
3268 croak if they tried to say my($foo::bar)
3269 build the ops for a my() declaration
3270 if it's an access to a my() variable
3271 are we in a sort block?
3272 croak if my($a); $a <=> $b
3273 build ops for access to a my() variable
3274 if in a dq string, and they've said @foo and we can't find @foo
3276 build ops for a bareword
3277 if we already built the token before, use it.
3282 #pragma segment Perl_yylex
3288 register char *s = PL_bufptr;
3293 /* orig_keyword, gvp, and gv are initialized here because
3294 * jump to the label just_a_word_zero can bypass their
3295 * initialization later. */
3296 I32 orig_keyword = 0;
3301 SV* tmp = newSVpvs("");
3302 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3303 (IV)CopLINE(PL_curcop),
3304 lex_state_names[PL_lex_state],
3305 exp_name[PL_expect],
3306 pv_display(tmp, s, strlen(s), 0, 60));
3309 /* check if there's an identifier for us to look at */
3310 if (PL_pending_ident)
3311 return REPORT(S_pending_ident(aTHX));
3313 /* no identifier pending identification */
3315 switch (PL_lex_state) {
3317 case LEX_NORMAL: /* Some compilers will produce faster */
3318 case LEX_INTERPNORMAL: /* code if we comment these out. */
3322 /* when we've already built the next token, just pull it out of the queue */
3326 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3328 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3329 PL_nexttoke[PL_lasttoke].next_mad = 0;
3330 if (PL_thismad && PL_thismad->mad_key == '_') {
3331 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3332 PL_thismad->mad_val = 0;
3333 mad_free(PL_thismad);
3338 PL_lex_state = PL_lex_defer;
3339 PL_expect = PL_lex_expect;
3340 PL_lex_defer = LEX_NORMAL;
3341 if (!PL_nexttoke[PL_lasttoke].next_type)
3346 pl_yylval = PL_nextval[PL_nexttoke];
3348 PL_lex_state = PL_lex_defer;
3349 PL_expect = PL_lex_expect;
3350 PL_lex_defer = LEX_NORMAL;
3354 /* FIXME - can these be merged? */
3355 return(PL_nexttoke[PL_lasttoke].next_type);
3357 return REPORT(PL_nexttype[PL_nexttoke]);
3360 /* interpolated case modifiers like \L \U, including \Q and \E.
3361 when we get here, PL_bufptr is at the \
3363 case LEX_INTERPCASEMOD:
3365 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3366 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3368 /* handle \E or end of string */
3369 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3371 if (PL_lex_casemods) {
3372 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3373 PL_lex_casestack[PL_lex_casemods] = '\0';
3375 if (PL_bufptr != PL_bufend
3376 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3378 PL_lex_state = LEX_INTERPCONCAT;
3381 PL_thistoken = newSVpvs("\\E");
3387 while (PL_bufptr != PL_bufend &&
3388 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3390 PL_thiswhite = newSVpvs("");
3391 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3395 if (PL_bufptr != PL_bufend)
3398 PL_lex_state = LEX_INTERPCONCAT;
3402 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3403 "### Saw case modifier\n"); });
3405 if (s[1] == '\\' && s[2] == 'E') {
3408 PL_thiswhite = newSVpvs("");
3409 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3412 PL_lex_state = LEX_INTERPCONCAT;
3417 if (!PL_madskills) /* when just compiling don't need correct */
3418 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3419 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3420 if ((*s == 'L' || *s == 'U') &&
3421 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3422 PL_lex_casestack[--PL_lex_casemods] = '\0';
3425 if (PL_lex_casemods > 10)
3426 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3427 PL_lex_casestack[PL_lex_casemods++] = *s;
3428 PL_lex_casestack[PL_lex_casemods] = '\0';
3429 PL_lex_state = LEX_INTERPCONCAT;
3430 start_force(PL_curforce);
3431 NEXTVAL_NEXTTOKE.ival = 0;
3433 start_force(PL_curforce);
3435 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3437 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3439 NEXTVAL_NEXTTOKE.ival = OP_LC;
3441 NEXTVAL_NEXTTOKE.ival = OP_UC;
3443 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3445 Perl_croak(aTHX_ "panic: yylex");
3447 SV* const tmpsv = newSVpvs("\\ ");
3448 /* replace the space with the character we want to escape
3450 SvPVX(tmpsv)[1] = *s;
3456 if (PL_lex_starts) {
3462 sv_free(PL_thistoken);
3463 PL_thistoken = newSVpvs("");
3466 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3467 if (PL_lex_casemods == 1 && PL_lex_inpat)
3476 case LEX_INTERPPUSH:
3477 return REPORT(sublex_push());
3479 case LEX_INTERPSTART:
3480 if (PL_bufptr == PL_bufend)
3481 return REPORT(sublex_done());
3482 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3483 "### Interpolated variable\n"); });
3485 PL_lex_dojoin = (*PL_bufptr == '@');
3486 PL_lex_state = LEX_INTERPNORMAL;
3487 if (PL_lex_dojoin) {
3488 start_force(PL_curforce);
3489 NEXTVAL_NEXTTOKE.ival = 0;
3491 start_force(PL_curforce);
3492 force_ident("\"", '$');
3493 start_force(PL_curforce);
3494 NEXTVAL_NEXTTOKE.ival = 0;
3496 start_force(PL_curforce);
3497 NEXTVAL_NEXTTOKE.ival = 0;
3499 start_force(PL_curforce);
3500 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3503 if (PL_lex_starts++) {
3508 sv_free(PL_thistoken);
3509 PL_thistoken = newSVpvs("");
3512 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3513 if (!PL_lex_casemods && PL_lex_inpat)
3520 case LEX_INTERPENDMAYBE:
3521 if (intuit_more(PL_bufptr)) {
3522 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3528 if (PL_lex_dojoin) {
3529 PL_lex_dojoin = FALSE;
3530 PL_lex_state = LEX_INTERPCONCAT;
3534 sv_free(PL_thistoken);
3535 PL_thistoken = newSVpvs("");
3540 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3541 && SvEVALED(PL_lex_repl))
3543 if (PL_bufptr != PL_bufend)
3544 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3548 case LEX_INTERPCONCAT:
3550 if (PL_lex_brackets)
3551 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3553 if (PL_bufptr == PL_bufend)
3554 return REPORT(sublex_done());
3556 if (SvIVX(PL_linestr) == '\'') {
3557 SV *sv = newSVsv(PL_linestr);
3560 else if ( PL_hints & HINT_NEW_RE )
3561 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3562 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3566 s = scan_const(PL_bufptr);
3568 PL_lex_state = LEX_INTERPCASEMOD;
3570 PL_lex_state = LEX_INTERPSTART;
3573 if (s != PL_bufptr) {
3574 start_force(PL_curforce);
3576 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3578 NEXTVAL_NEXTTOKE = pl_yylval;
3581 if (PL_lex_starts++) {
3585 sv_free(PL_thistoken);
3586 PL_thistoken = newSVpvs("");
3589 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3590 if (!PL_lex_casemods && PL_lex_inpat)
3603 PL_lex_state = LEX_NORMAL;
3604 s = scan_formline(PL_bufptr);
3605 if (!PL_lex_formbrack)
3611 PL_oldoldbufptr = PL_oldbufptr;
3617 sv_free(PL_thistoken);
3620 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3624 if (isIDFIRST_lazy_if(s,UTF))
3627 unsigned char c = *s;
3628 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3629 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
3630 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
3635 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
3639 goto fake_eof; /* emulate EOF on ^D or ^Z */
3648 if (PL_lex_brackets) {
3649 yyerror((const char *)
3651 ? "Format not terminated"
3652 : "Missing right curly or square bracket"));
3654 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3655 "### Tokener got EOF\n");
3659 if (s++ < PL_bufend)
3660 goto retry; /* ignore stray nulls */
3663 if (!PL_in_eval && !PL_preambled) {
3664 PL_preambled = TRUE;
3670 /* Generate a string of Perl code to load the debugger.
3671 * If PERL5DB is set, it will return the contents of that,
3672 * otherwise a compile-time require of perl5db.pl. */
3674 const char * const pdb = PerlEnv_getenv("PERL5DB");
3677 sv_setpv(PL_linestr, pdb);
3678 sv_catpvs(PL_linestr,";");
3680 SETERRNO(0,SS_NORMAL);
3681 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3684 sv_setpvs(PL_linestr,"");
3685 if (PL_preambleav) {
3686 SV **svp = AvARRAY(PL_preambleav);
3687 SV **const end = svp + AvFILLp(PL_preambleav);
3689 sv_catsv(PL_linestr, *svp);
3691 sv_catpvs(PL_linestr, ";");
3693 sv_free(MUTABLE_SV(PL_preambleav));
3694 PL_preambleav = NULL;
3697 sv_catpvs(PL_linestr,
3698 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3699 if (PL_minus_n || PL_minus_p) {
3700 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3702 sv_catpvs(PL_linestr,"chomp;");
3705 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3706 || *PL_splitstr == '"')
3707 && strchr(PL_splitstr + 1, *PL_splitstr))
3708 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3710 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3711 bytes can be used as quoting characters. :-) */
3712 const char *splits = PL_splitstr;
3713 sv_catpvs(PL_linestr, "our @F=split(q\0");
3716 if (*splits == '\\')
3717 sv_catpvn(PL_linestr, splits, 1);
3718 sv_catpvn(PL_linestr, splits, 1);
3719 } while (*splits++);
3720 /* This loop will embed the trailing NUL of
3721 PL_linestr as the last thing it does before
3723 sv_catpvs(PL_linestr, ");");
3727 sv_catpvs(PL_linestr,"our @F=split(' ');");
3730 sv_catpvs(PL_linestr, "\n");
3731 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3732 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3733 PL_last_lop = PL_last_uni = NULL;
3734 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3735 update_debugger_info(PL_linestr, NULL, 0);
3739 bof = PL_rsfp ? TRUE : FALSE;
3740 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3743 PL_realtokenstart = -1;
3746 if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3747 PerlIO_clearerr(PL_rsfp);
3749 (void)PerlIO_close(PL_rsfp);
3751 PL_doextract = FALSE;
3753 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3759 sv_setpvs(PL_linestr, ";}continue{print;}");
3761 sv_setpvs(PL_linestr, ";}");
3762 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3763 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3764 PL_last_lop = PL_last_uni = NULL;
3765 PL_minus_n = PL_minus_p = 0;
3768 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3769 PL_last_lop = PL_last_uni = NULL;
3770 sv_setpvs(PL_linestr,"");
3771 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3773 /* If it looks like the start of a BOM or raw UTF-16,
3774 * check if it in fact is. */
3780 #ifdef PERLIO_IS_STDIO
3781 # ifdef __GNU_LIBRARY__
3782 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3783 # define FTELL_FOR_PIPE_IS_BROKEN
3787 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3788 # define FTELL_FOR_PIPE_IS_BROKEN
3793 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3795 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3796 s = swallow_bom((U8*)s);
3800 /* Incest with pod. */
3803 sv_catsv(PL_thiswhite, PL_linestr);
3805 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3806 sv_setpvs(PL_linestr, "");
3807 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3808 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3809 PL_last_lop = PL_last_uni = NULL;
3810 PL_doextract = FALSE;
3814 } while (PL_doextract);
3815 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3816 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3817 update_debugger_info(PL_linestr, NULL, 0);
3818 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3819 PL_last_lop = PL_last_uni = NULL;
3820 if (CopLINE(PL_curcop) == 1) {
3821 while (s < PL_bufend && isSPACE(*s))
3823 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3827 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3831 if (*s == '#' && *(s+1) == '!')
3833 #ifdef ALTERNATE_SHEBANG
3835 static char const as[] = ALTERNATE_SHEBANG;
3836 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3837 d = s + (sizeof(as) - 1);
3839 #endif /* ALTERNATE_SHEBANG */
3848 while (*d && !isSPACE(*d))
3852 #ifdef ARG_ZERO_IS_SCRIPT
3853 if (ipathend > ipath) {
3855 * HP-UX (at least) sets argv[0] to the script name,
3856 * which makes $^X incorrect. And Digital UNIX and Linux,
3857 * at least, set argv[0] to the basename of the Perl
3858 * interpreter. So, having found "#!", we'll set it right.
3860 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3862 assert(SvPOK(x) || SvGMAGICAL(x));
3863 if (sv_eq(x, CopFILESV(PL_curcop))) {
3864 sv_setpvn(x, ipath, ipathend - ipath);
3870 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3871 const char * const lstart = SvPV_const(x,llen);
3873 bstart += blen - llen;
3874 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3875 sv_setpvn(x, ipath, ipathend - ipath);
3880 TAINT_NOT; /* $^X is always tainted, but that's OK */
3882 #endif /* ARG_ZERO_IS_SCRIPT */
3887 d = instr(s,"perl -");
3889 d = instr(s,"perl");
3891 /* avoid getting into infinite loops when shebang
3892 * line contains "Perl" rather than "perl" */
3894 for (d = ipathend-4; d >= ipath; --d) {
3895 if ((*d == 'p' || *d == 'P')
3896 && !ibcmp(d, "perl", 4))
3906 #ifdef ALTERNATE_SHEBANG
3908 * If the ALTERNATE_SHEBANG on this system starts with a
3909 * character that can be part of a Perl expression, then if
3910 * we see it but not "perl", we're probably looking at the
3911 * start of Perl code, not a request to hand off to some
3912 * other interpreter. Similarly, if "perl" is there, but
3913 * not in the first 'word' of the line, we assume the line
3914 * contains the start of the Perl program.
3916 if (d && *s != '#') {
3917 const char *c = ipath;
3918 while (*c && !strchr("; \t\r\n\f\v#", *c))
3921 d = NULL; /* "perl" not in first word; ignore */
3923 *s = '#'; /* Don't try to parse shebang line */
3925 #endif /* ALTERNATE_SHEBANG */
3930 !instr(s,"indir") &&
3931 instr(PL_origargv[0],"perl"))
3938 while (s < PL_bufend && isSPACE(*s))
3940 if (s < PL_bufend) {
3941 Newx(newargv,PL_origargc+3,char*);
3943 while (s < PL_bufend && !isSPACE(*s))
3946 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3949 newargv = PL_origargv;
3952 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3954 Perl_croak(aTHX_ "Can't exec %s", ipath);
3957 while (*d && !isSPACE(*d))
3959 while (SPACE_OR_TAB(*d))
3963 const bool switches_done = PL_doswitches;
3964 const U32 oldpdb = PL_perldb;
3965 const bool oldn = PL_minus_n;
3966 const bool oldp = PL_minus_p;
3970 bool baduni = FALSE;
3972 const char *d2 = d1 + 1;
3973 if (parse_unicode_opts((const char **)&d2)
3977 if (baduni || *d1 == 'M' || *d1 == 'm') {
3978 const char * const m = d1;
3979 while (*d1 && !isSPACE(*d1))
3981 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3984 d1 = moreswitches(d1);
3986 if (PL_doswitches && !switches_done) {
3987 int argc = PL_origargc;
3988 char **argv = PL_origargv;
3991 } while (argc && argv[0][0] == '-' && argv[0][1]);
3992 init_argv_symbols(argc,argv);
3994 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
3995 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3996 /* if we have already added "LINE: while (<>) {",
3997 we must not do it again */
3999 sv_setpvs(PL_linestr, "");
4000 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4001 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4002 PL_last_lop = PL_last_uni = NULL;
4003 PL_preambled = FALSE;
4004 if (PERLDB_LINE || PERLDB_SAVESRC)
4005 (void)gv_fetchfile(PL_origfilename);
4012 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4014 PL_lex_state = LEX_FORMLINE;
4019 #ifdef PERL_STRICT_CR
4020 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4022 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4024 case ' ': case '\t': case '\f': case 013:
4026 PL_realtokenstart = -1;
4028 PL_thiswhite = newSVpvs("");
4029 sv_catpvn(PL_thiswhite, s, 1);
4036 PL_realtokenstart = -1;
4040 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4041 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4042 /* handle eval qq[#line 1 "foo"\n ...] */
4043 CopLINE_dec(PL_curcop);
4046 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4048 if (!PL_in_eval || PL_rsfp)
4053 while (d < PL_bufend && *d != '\n')
4057 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4058 Perl_croak(aTHX_ "panic: input overflow");
4061 PL_thiswhite = newSVpvn(s, d - s);
4066 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4068 PL_lex_state = LEX_FORMLINE;
4074 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4075 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4078 TOKEN(PEG); /* make sure any #! line is accessible */
4083 /* if (PL_madskills && PL_lex_formbrack) { */
4085 while (d < PL_bufend && *d != '\n')
4089 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4090 Perl_croak(aTHX_ "panic: input overflow");
4091 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4093 PL_thiswhite = newSVpvs("");
4094 if (CopLINE(PL_curcop) == 1) {
4095 sv_setpvs(PL_thiswhite, "");
4098 sv_catpvn(PL_thiswhite, s, d - s);
4112 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4120 while (s < PL_bufend && SPACE_OR_TAB(*s))
4123 if (strnEQ(s,"=>",2)) {
4124 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4125 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4126 OPERATOR('-'); /* unary minus */
4128 PL_last_uni = PL_oldbufptr;
4130 case 'r': ftst = OP_FTEREAD; break;
4131 case 'w': ftst = OP_FTEWRITE; break;
4132 case 'x': ftst = OP_FTEEXEC; break;
4133 case 'o': ftst = OP_FTEOWNED; break;
4134 case 'R': ftst = OP_FTRREAD; break;
4135 case 'W': ftst = OP_FTRWRITE; break;
4136 case 'X': ftst = OP_FTREXEC; break;
4137 case 'O': ftst = OP_FTROWNED; break;
4138 case 'e': ftst = OP_FTIS; break;
4139 case 'z': ftst = OP_FTZERO; break;
4140 case 's': ftst = OP_FTSIZE; break;
4141 case 'f': ftst = OP_FTFILE; break;
4142 case 'd': ftst = OP_FTDIR; break;
4143 case 'l': ftst = OP_FTLINK; break;
4144 case 'p': ftst = OP_FTPIPE; break;
4145 case 'S': ftst = OP_FTSOCK; break;
4146 case 'u': ftst = OP_FTSUID; break;
4147 case 'g': ftst = OP_FTSGID; break;
4148 case 'k': ftst = OP_FTSVTX; break;
4149 case 'b': ftst = OP_FTBLK; break;
4150 case 'c': ftst = OP_FTCHR; break;
4151 case 't': ftst = OP_FTTTY; break;
4152 case 'T': ftst = OP_FTTEXT; break;
4153 case 'B': ftst = OP_FTBINARY; break;
4154 case 'M': case 'A': case 'C':
4155 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4157 case 'M': ftst = OP_FTMTIME; break;
4158 case 'A': ftst = OP_FTATIME; break;
4159 case 'C': ftst = OP_FTCTIME; break;
4167 PL_last_lop_op = (OPCODE)ftst;
4168 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4169 "### Saw file test %c\n", (int)tmp);
4174 /* Assume it was a minus followed by a one-letter named
4175 * subroutine call (or a -bareword), then. */
4176 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4177 "### '-%c' looked like a file test but was not\n",
4184 const char tmp = *s++;
4187 if (PL_expect == XOPERATOR)
4192 else if (*s == '>') {
4195 if (isIDFIRST_lazy_if(s,UTF)) {
4196 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4204 if (PL_expect == XOPERATOR)
4207 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4209 OPERATOR('-'); /* unary minus */
4215 const char tmp = *s++;
4218 if (PL_expect == XOPERATOR)
4223 if (PL_expect == XOPERATOR)
4226 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4233 if (PL_expect != XOPERATOR) {
4234 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4235 PL_expect = XOPERATOR;
4236 force_ident(PL_tokenbuf, '*');
4249 if (PL_expect == XOPERATOR) {
4253 PL_tokenbuf[0] = '%';
4254 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4255 sizeof PL_tokenbuf - 1, FALSE);
4256 if (!PL_tokenbuf[1]) {
4259 PL_pending_ident = '%';
4268 const char tmp = *s++;
4273 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4280 const char tmp = *s++;
4286 goto just_a_word_zero_gv;
4289 switch (PL_expect) {
4295 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4297 PL_bufptr = s; /* update in case we back off */
4303 PL_expect = XTERMBLOCK;
4306 stuffstart = s - SvPVX(PL_linestr) - 1;
4310 while (isIDFIRST_lazy_if(s,UTF)) {
4313 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4314 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4315 if (tmp < 0) tmp = -tmp;
4330 sv = newSVpvn(s, len);
4332 d = scan_str(d,TRUE,TRUE);
4334 /* MUST advance bufptr here to avoid bogus
4335 "at end of line" context messages from yyerror().
4337 PL_bufptr = s + len;
4338 yyerror("Unterminated attribute parameter in attribute list");
4342 return REPORT(0); /* EOF indicator */
4346 sv_catsv(sv, PL_lex_stuff);
4347 attrs = append_elem(OP_LIST, attrs,
4348 newSVOP(OP_CONST, 0, sv));
4349 SvREFCNT_dec(PL_lex_stuff);
4350 PL_lex_stuff = NULL;
4353 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4355 if (PL_in_my == KEY_our) {
4356 deprecate(":unique");
4359 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4362 /* NOTE: any CV attrs applied here need to be part of
4363 the CVf_BUILTIN_ATTRS define in cv.h! */
4364 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4366 CvLVALUE_on(PL_compcv);
4368 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4370 deprecate(":locked");
4372 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4374 CvMETHOD_on(PL_compcv);
4376 /* After we've set the flags, it could be argued that
4377 we don't need to do the attributes.pm-based setting
4378 process, and shouldn't bother appending recognized
4379 flags. To experiment with that, uncomment the
4380 following "else". (Note that's already been
4381 uncommented. That keeps the above-applied built-in
4382 attributes from being intercepted (and possibly
4383 rejected) by a package's attribute routines, but is
4384 justified by the performance win for the common case
4385 of applying only built-in attributes.) */
4387 attrs = append_elem(OP_LIST, attrs,
4388 newSVOP(OP_CONST, 0,
4392 if (*s == ':' && s[1] != ':')
4395 break; /* require real whitespace or :'s */
4396 /* XXX losing whitespace on sequential attributes here */
4400 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4401 if (*s != ';' && *s != '}' && *s != tmp
4402 && (tmp != '=' || *s != ')')) {
4403 const char q = ((*s == '\'') ? '"' : '\'');
4404 /* If here for an expression, and parsed no attrs, back
4406 if (tmp == '=' && !attrs) {
4410 /* MUST advance bufptr here to avoid bogus "at end of line"
4411 context messages from yyerror().
4414 yyerror( (const char *)
4416 ? Perl_form(aTHX_ "Invalid separator character "
4417 "%c%c%c in attribute list", q, *s, q)
4418 : "Unterminated attribute list" ) );
4426 start_force(PL_curforce);
4427 NEXTVAL_NEXTTOKE.opval = attrs;
4428 CURMAD('_', PL_nextwhite);
4433 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4434 (s - SvPVX(PL_linestr)) - stuffstart);
4442 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4443 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4451 const char tmp = *s++;
4456 const char tmp = *s++;
4464 if (PL_lex_brackets <= 0)
4465 yyerror("Unmatched right square bracket");
4468 if (PL_lex_state == LEX_INTERPNORMAL) {
4469 if (PL_lex_brackets == 0) {
4470 if (*s == '-' && s[1] == '>')
4471 PL_lex_state = LEX_INTERPENDMAYBE;
4472 else if (*s != '[' && *s != '{')
4473 PL_lex_state = LEX_INTERPEND;
4480 if (PL_lex_brackets > 100) {
4481 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4483 switch (PL_expect) {
4485 if (PL_lex_formbrack) {
4489 if (PL_oldoldbufptr == PL_last_lop)
4490 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4492 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4493 OPERATOR(HASHBRACK);
4495 while (s < PL_bufend && SPACE_OR_TAB(*s))
4498 PL_tokenbuf[0] = '\0';
4499 if (d < PL_bufend && *d == '-') {
4500 PL_tokenbuf[0] = '-';
4502 while (d < PL_bufend && SPACE_OR_TAB(*d))
4505 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4506 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4508 while (d < PL_bufend && SPACE_OR_TAB(*d))
4511 const char minus = (PL_tokenbuf[0] == '-');
4512 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4520 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4525 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4530 if (PL_oldoldbufptr == PL_last_lop)
4531 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4533 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4536 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4538 /* This hack is to get the ${} in the message. */
4540 yyerror("syntax error");
4543 OPERATOR(HASHBRACK);
4545 /* This hack serves to disambiguate a pair of curlies
4546 * as being a block or an anon hash. Normally, expectation
4547 * determines that, but in cases where we're not in a
4548 * position to expect anything in particular (like inside
4549 * eval"") we have to resolve the ambiguity. This code
4550 * covers the case where the first term in the curlies is a
4551 * quoted string. Most other cases need to be explicitly
4552 * disambiguated by prepending a "+" before the opening
4553 * curly in order to force resolution as an anon hash.
4555 * XXX should probably propagate the outer expectation
4556 * into eval"" to rely less on this hack, but that could
4557 * potentially break current behavior of eval"".
4561 if (*s == '\'' || *s == '"' || *s == '`') {
4562 /* common case: get past first string, handling escapes */
4563 for (t++; t < PL_bufend && *t != *s;)
4564 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4568 else if (*s == 'q') {
4571 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4574 /* skip q//-like construct */
4576 char open, close, term;
4579 while (t < PL_bufend && isSPACE(*t))
4581 /* check for q => */
4582 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4583 OPERATOR(HASHBRACK);
4587 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4591 for (t++; t < PL_bufend; t++) {
4592 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4594 else if (*t == open)
4598 for (t++; t < PL_bufend; t++) {
4599 if (*t == '\\' && t+1 < PL_bufend)
4601 else if (*t == close && --brackets <= 0)
4603 else if (*t == open)
4610 /* skip plain q word */
4611 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4614 else if (isALNUM_lazy_if(t,UTF)) {
4616 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4619 while (t < PL_bufend && isSPACE(*t))
4621 /* if comma follows first term, call it an anon hash */
4622 /* XXX it could be a comma expression with loop modifiers */
4623 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4624 || (*t == '=' && t[1] == '>')))
4625 OPERATOR(HASHBRACK);
4626 if (PL_expect == XREF)
4629 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4635 pl_yylval.ival = CopLINE(PL_curcop);
4636 if (isSPACE(*s) || *s == '#')
4637 PL_copline = NOLINE; /* invalidate current command line number */
4642 if (PL_lex_brackets <= 0)
4643 yyerror("Unmatched right curly bracket");
4645 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4646 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4647 PL_lex_formbrack = 0;
4648 if (PL_lex_state == LEX_INTERPNORMAL) {
4649 if (PL_lex_brackets == 0) {
4650 if (PL_expect & XFAKEBRACK) {
4651 PL_expect &= XENUMMASK;
4652 PL_lex_state = LEX_INTERPEND;
4657 PL_thiswhite = newSVpvs("");
4658 sv_catpvs(PL_thiswhite,"}");
4661 return yylex(); /* ignore fake brackets */
4663 if (*s == '-' && s[1] == '>')
4664 PL_lex_state = LEX_INTERPENDMAYBE;
4665 else if (*s != '[' && *s != '{')
4666 PL_lex_state = LEX_INTERPEND;
4669 if (PL_expect & XFAKEBRACK) {
4670 PL_expect &= XENUMMASK;
4672 return yylex(); /* ignore fake brackets */
4674 start_force(PL_curforce);
4676 curmad('X', newSVpvn(s-1,1));
4677 CURMAD('_', PL_thiswhite);
4682 PL_thistoken = newSVpvs("");
4690 if (PL_expect == XOPERATOR) {
4691 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4692 && isIDFIRST_lazy_if(s,UTF))
4694 CopLINE_dec(PL_curcop);
4695 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
4696 CopLINE_inc(PL_curcop);
4701 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4703 PL_expect = XOPERATOR;
4704 force_ident(PL_tokenbuf, '&');
4708 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4720 const char tmp = *s++;
4727 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4728 && strchr("+-*/%.^&|<",tmp))
4729 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4730 "Reversed %c= operator",(int)tmp);
4732 if (PL_expect == XSTATE && isALPHA(tmp) &&
4733 (s == PL_linestart+1 || s[-2] == '\n') )
4735 if (PL_in_eval && !PL_rsfp) {
4740 if (strnEQ(s,"=cut",4)) {
4756 PL_thiswhite = newSVpvs("");
4757 sv_catpvn(PL_thiswhite, PL_linestart,
4758 PL_bufend - PL_linestart);
4762 PL_doextract = TRUE;
4766 if (PL_lex_brackets < PL_lex_formbrack) {
4768 #ifdef PERL_STRICT_CR
4769 while (SPACE_OR_TAB(*t))
4771 while (SPACE_OR_TAB(*t) || *t == '\r')
4774 if (*t == '\n' || *t == '#') {
4785 const char tmp = *s++;
4787 /* was this !=~ where !~ was meant?
4788 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4790 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4791 const char *t = s+1;
4793 while (t < PL_bufend && isSPACE(*t))
4796 if (*t == '/' || *t == '?' ||
4797 ((*t == 'm' || *t == 's' || *t == 'y')
4798 && !isALNUM(t[1])) ||
4799 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4800 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4801 "!=~ should be !~");
4811 if (PL_expect != XOPERATOR) {
4812 if (s[1] != '<' && !strchr(s,'>'))
4815 s = scan_heredoc(s);
4817 s = scan_inputsymbol(s);
4818 TERM(sublex_start());
4824 SHop(OP_LEFT_SHIFT);
4838 const char tmp = *s++;
4840 SHop(OP_RIGHT_SHIFT);
4841 else if (tmp == '=')
4850 if (PL_expect == XOPERATOR) {
4851 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4852 return deprecate_commaless_var_list();
4856 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4857 PL_tokenbuf[0] = '@';
4858 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4859 sizeof PL_tokenbuf - 1, FALSE);
4860 if (PL_expect == XOPERATOR)
4861 no_op("Array length", s);
4862 if (!PL_tokenbuf[1])
4864 PL_expect = XOPERATOR;
4865 PL_pending_ident = '#';
4869 PL_tokenbuf[0] = '$';
4870 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4871 sizeof PL_tokenbuf - 1, FALSE);
4872 if (PL_expect == XOPERATOR)
4874 if (!PL_tokenbuf[1]) {
4876 yyerror("Final $ should be \\$ or $name");
4880 /* This kludge not intended to be bulletproof. */
4881 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4882 pl_yylval.opval = newSVOP(OP_CONST, 0,
4883 newSViv(CopARYBASE_get(&PL_compiling)));
4884 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4890 const char tmp = *s;
4891 if (PL_lex_state == LEX_NORMAL)
4894 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4895 && intuit_more(s)) {
4897 PL_tokenbuf[0] = '@';
4898 if (ckWARN(WARN_SYNTAX)) {
4901 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4904 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4905 while (t < PL_bufend && *t != ']')
4907 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4908 "Multidimensional syntax %.*s not supported",
4909 (int)((t - PL_bufptr) + 1), PL_bufptr);
4913 else if (*s == '{') {
4915 PL_tokenbuf[0] = '%';
4916 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4917 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4919 char tmpbuf[sizeof PL_tokenbuf];
4922 } while (isSPACE(*t));
4923 if (isIDFIRST_lazy_if(t,UTF)) {
4925 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4929 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4930 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4931 "You need to quote \"%s\"",
4938 PL_expect = XOPERATOR;
4939 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4940 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4941 if (!islop || PL_last_lop_op == OP_GREPSTART)
4942 PL_expect = XOPERATOR;
4943 else if (strchr("$@\"'`q", *s))
4944 PL_expect = XTERM; /* e.g. print $fh "foo" */
4945 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4946 PL_expect = XTERM; /* e.g. print $fh &sub */
4947 else if (isIDFIRST_lazy_if(s,UTF)) {
4948 char tmpbuf[sizeof PL_tokenbuf];
4950 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4951 if ((t2 = keyword(tmpbuf, len, 0))) {
4952 /* binary operators exclude handle interpretations */
4964 PL_expect = XTERM; /* e.g. print $fh length() */
4969 PL_expect = XTERM; /* e.g. print $fh subr() */
4972 else if (isDIGIT(*s))
4973 PL_expect = XTERM; /* e.g. print $fh 3 */
4974 else if (*s == '.' && isDIGIT(s[1]))
4975 PL_expect = XTERM; /* e.g. print $fh .3 */
4976 else if ((*s == '?' || *s == '-' || *s == '+')
4977 && !isSPACE(s[1]) && s[1] != '=')
4978 PL_expect = XTERM; /* e.g. print $fh -1 */
4979 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4981 PL_expect = XTERM; /* e.g. print $fh /.../
4982 XXX except DORDOR operator
4984 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4986 PL_expect = XTERM; /* print $fh <<"EOF" */
4989 PL_pending_ident = '$';
4993 if (PL_expect == XOPERATOR)
4995 PL_tokenbuf[0] = '@';
4996 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4997 if (!PL_tokenbuf[1]) {
5000 if (PL_lex_state == LEX_NORMAL)
5002 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5004 PL_tokenbuf[0] = '%';
5006 /* Warn about @ where they meant $. */
5007 if (*s == '[' || *s == '{') {
5008 if (ckWARN(WARN_SYNTAX)) {
5009 const char *t = s + 1;
5010 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5012 if (*t == '}' || *t == ']') {
5014 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5015 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5016 "Scalar value %.*s better written as $%.*s",
5017 (int)(t-PL_bufptr), PL_bufptr,
5018 (int)(t-PL_bufptr-1), PL_bufptr+1);
5023 PL_pending_ident = '@';
5026 case '/': /* may be division, defined-or, or pattern */
5027 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5031 case '?': /* may either be conditional or pattern */
5032 if (PL_expect == XOPERATOR) {
5040 /* A // operator. */
5050 /* Disable warning on "study /blah/" */
5051 if (PL_oldoldbufptr == PL_last_uni
5052 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5053 || memNE(PL_last_uni, "study", 5)
5054 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5057 s = scan_pat(s,OP_MATCH);
5058 TERM(sublex_start());
5062 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5063 #ifdef PERL_STRICT_CR
5066 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5068 && (s == PL_linestart || s[-1] == '\n') )
5070 PL_lex_formbrack = 0;
5074 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5078 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5084 pl_yylval.ival = OPf_SPECIAL;
5090 if (PL_expect != XOPERATOR)
5095 case '0': case '1': case '2': case '3': case '4':
5096 case '5': case '6': case '7': case '8': case '9':
5097 s = scan_num(s, &pl_yylval);
5098 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5099 if (PL_expect == XOPERATOR)
5104 s = scan_str(s,!!PL_madskills,FALSE);
5105 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5106 if (PL_expect == XOPERATOR) {
5107 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5108 return deprecate_commaless_var_list();
5115 pl_yylval.ival = OP_CONST;
5116 TERM(sublex_start());
5119 s = scan_str(s,!!PL_madskills,FALSE);
5120 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5121 if (PL_expect == XOPERATOR) {
5122 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5123 return deprecate_commaless_var_list();
5130 pl_yylval.ival = OP_CONST;
5131 /* FIXME. I think that this can be const if char *d is replaced by
5132 more localised variables. */
5133 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5134 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5135 pl_yylval.ival = OP_STRINGIFY;
5139 TERM(sublex_start());
5142 s = scan_str(s,!!PL_madskills,FALSE);
5143 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5144 if (PL_expect == XOPERATOR)
5145 no_op("Backticks",s);
5148 readpipe_override();
5149 TERM(sublex_start());
5153 if (PL_lex_inwhat && isDIGIT(*s))
5154 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5156 if (PL_expect == XOPERATOR)
5157 no_op("Backslash",s);
5161 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5162 char *start = s + 2;
5163 while (isDIGIT(*start) || *start == '_')
5165 if (*start == '.' && isDIGIT(start[1])) {
5166 s = scan_num(s, &pl_yylval);
5169 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5170 else if (!isALPHA(*start) && (PL_expect == XTERM
5171 || PL_expect == XREF || PL_expect == XSTATE
5172 || PL_expect == XTERMORDORDOR)) {
5173 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5175 s = scan_num(s, &pl_yylval);
5182 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5224 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5226 /* Some keywords can be followed by any delimiter, including ':' */
5227 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5228 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5229 (PL_tokenbuf[0] == 'q' &&
5230 strchr("qwxr", PL_tokenbuf[1])))));
5232 /* x::* is just a word, unless x is "CORE" */
5233 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5237 while (d < PL_bufend && isSPACE(*d))
5238 d++; /* no comments skipped here, or s### is misparsed */
5240 /* Is this a label? */
5241 if (!tmp && PL_expect == XSTATE
5242 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5243 tmp = keyword(PL_tokenbuf, len, 0);
5245 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5247 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5252 /* Check for keywords */
5253 tmp = keyword(PL_tokenbuf, len, 0);
5255 /* Is this a word before a => operator? */
5256 if (*d == '=' && d[1] == '>') {
5259 = (OP*)newSVOP(OP_CONST, 0,
5260 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5261 pl_yylval.opval->op_private = OPpCONST_BARE;
5265 if (tmp < 0) { /* second-class keyword? */
5266 GV *ogv = NULL; /* override (winner) */
5267 GV *hgv = NULL; /* hidden (loser) */
5268 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5270 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5273 if (GvIMPORTED_CV(gv))
5275 else if (! CvMETHOD(cv))
5279 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5280 (gv = *gvp) && isGV_with_GP(gv) &&
5281 GvCVu(gv) && GvIMPORTED_CV(gv))
5288 tmp = 0; /* overridden by import or by GLOBAL */
5291 && -tmp==KEY_lock /* XXX generalizable kludge */
5294 tmp = 0; /* any sub overrides "weak" keyword */
5296 else { /* no override */
5298 if (tmp == KEY_dump) {
5299 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5300 "dump() better written as CORE::dump()");
5304 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
5305 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5306 "Ambiguous call resolved as CORE::%s(), %s",
5307 GvENAME(hgv), "qualify as such or use &");
5314 default: /* not a keyword */
5315 /* Trade off - by using this evil construction we can pull the
5316 variable gv into the block labelled keylookup. If not, then
5317 we have to give it function scope so that the goto from the
5318 earlier ':' case doesn't bypass the initialisation. */
5320 just_a_word_zero_gv:
5328 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5331 SV *nextPL_nextwhite = 0;
5335 /* Get the rest if it looks like a package qualifier */
5337 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5339 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5342 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5343 *s == '\'' ? "'" : "::");
5348 if (PL_expect == XOPERATOR) {
5349 if (PL_bufptr == PL_linestart) {
5350 CopLINE_dec(PL_curcop);
5351 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5352 CopLINE_inc(PL_curcop);
5355 no_op("Bareword",s);
5358 /* Look for a subroutine with this name in current package,
5359 unless name is "Foo::", in which case Foo is a bearword
5360 (and a package name). */
5362 if (len > 2 && !PL_madskills &&
5363 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5365 if (ckWARN(WARN_BAREWORD)
5366 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5367 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5368 "Bareword \"%s\" refers to nonexistent package",
5371 PL_tokenbuf[len] = '\0';
5377 /* Mustn't actually add anything to a symbol table.
5378 But also don't want to "initialise" any placeholder
5379 constants that might already be there into full
5380 blown PVGVs with attached PVCV. */
5381 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5382 GV_NOADD_NOINIT, SVt_PVCV);
5387 /* if we saw a global override before, get the right name */
5390 sv = newSVpvs("CORE::GLOBAL::");
5391 sv_catpv(sv,PL_tokenbuf);
5394 /* If len is 0, newSVpv does strlen(), which is correct.
5395 If len is non-zero, then it will be the true length,
5396 and so the scalar will be created correctly. */
5397 sv = newSVpv(PL_tokenbuf,len);
5400 if (PL_madskills && !PL_thistoken) {
5401 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5402 PL_thistoken = newSVpvn(start,s - start);
5403 PL_realtokenstart = s - SvPVX(PL_linestr);
5407 /* Presume this is going to be a bareword of some sort. */
5410 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5411 pl_yylval.opval->op_private = OPpCONST_BARE;
5412 /* UTF-8 package name? */
5413 if (UTF && !IN_BYTES &&
5414 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5417 /* And if "Foo::", then that's what it certainly is. */
5422 /* Do the explicit type check so that we don't need to force
5423 the initialisation of the symbol table to have a real GV.
5424 Beware - gv may not really be a PVGV, cv may not really be
5425 a PVCV, (because of the space optimisations that gv_init
5426 understands) But they're true if for this symbol there is
5427 respectively a typeglob and a subroutine.
5429 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5430 /* Real typeglob, so get the real subroutine: */
5432 /* A proxy for a subroutine in this package? */
5433 : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
5436 /* See if it's the indirect object for a list operator. */
5438 if (PL_oldoldbufptr &&
5439 PL_oldoldbufptr < PL_bufptr &&
5440 (PL_oldoldbufptr == PL_last_lop
5441 || PL_oldoldbufptr == PL_last_uni) &&
5442 /* NO SKIPSPACE BEFORE HERE! */
5443 (PL_expect == XREF ||
5444 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5446 bool immediate_paren = *s == '(';
5448 /* (Now we can afford to cross potential line boundary.) */
5449 s = SKIPSPACE2(s,nextPL_nextwhite);
5451 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5454 /* Two barewords in a row may indicate method call. */
5456 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5457 (tmp = intuit_method(s, gv, cv)))
5460 /* If not a declared subroutine, it's an indirect object. */
5461 /* (But it's an indir obj regardless for sort.) */
5462 /* Also, if "_" follows a filetest operator, it's a bareword */
5465 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5467 (PL_last_lop_op != OP_MAPSTART &&
5468 PL_last_lop_op != OP_GREPSTART))))
5469 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5470 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5473 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5478 PL_expect = XOPERATOR;
5481 s = SKIPSPACE2(s,nextPL_nextwhite);
5482 PL_nextwhite = nextPL_nextwhite;
5487 /* Is this a word before a => operator? */
5488 if (*s == '=' && s[1] == '>' && !pkgname) {
5490 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5491 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5492 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5496 /* If followed by a paren, it's certainly a subroutine. */
5501 while (SPACE_OR_TAB(*d))
5503 if (*d == ')' && (sv = gv_const_sv(gv))) {
5510 PL_nextwhite = PL_thiswhite;
5513 start_force(PL_curforce);
5515 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5516 PL_expect = XOPERATOR;
5519 PL_nextwhite = nextPL_nextwhite;
5520 curmad('X', PL_thistoken);
5521 PL_thistoken = newSVpvs("");
5529 /* If followed by var or block, call it a method (unless sub) */
5531 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5532 PL_last_lop = PL_oldbufptr;
5533 PL_last_lop_op = OP_METHOD;
5537 /* If followed by a bareword, see if it looks like indir obj. */
5540 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5541 && (tmp = intuit_method(s, gv, cv)))
5544 /* Not a method, so call it a subroutine (if defined) */
5547 if (lastchar == '-')
5548 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5549 "Ambiguous use of -%s resolved as -&%s()",
5550 PL_tokenbuf, PL_tokenbuf);
5551 /* Check for a constant sub */
5552 if ((sv = gv_const_sv(gv))) {
5554 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5555 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5556 pl_yylval.opval->op_private = 0;
5560 /* Resolve to GV now. */
5561 if (SvTYPE(gv) != SVt_PVGV) {
5562 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5563 assert (SvTYPE(gv) == SVt_PVGV);
5564 /* cv must have been some sort of placeholder, so
5565 now needs replacing with a real code reference. */
5569 op_free(pl_yylval.opval);
5570 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5571 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5572 PL_last_lop = PL_oldbufptr;
5573 PL_last_lop_op = OP_ENTERSUB;
5574 /* Is there a prototype? */
5582 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5585 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5587 while (*proto == ';')
5589 if (*proto == '&' && *s == '{') {
5591 sv_setpvs(PL_subname, "__ANON__");
5593 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5600 PL_nextwhite = PL_thiswhite;
5603 start_force(PL_curforce);
5604 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5607 PL_nextwhite = nextPL_nextwhite;
5608 curmad('X', PL_thistoken);
5609 PL_thistoken = newSVpvs("");
5616 /* Guess harder when madskills require "best effort". */
5617 if (PL_madskills && (!gv || !GvCVu(gv))) {
5618 int probable_sub = 0;
5619 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5621 else if (isALPHA(*s)) {
5625 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5626 if (!keyword(tmpbuf, tmplen, 0))
5629 while (d < PL_bufend && isSPACE(*d))
5631 if (*d == '=' && d[1] == '>')
5636 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5637 op_free(pl_yylval.opval);
5638 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5639 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5640 PL_last_lop = PL_oldbufptr;
5641 PL_last_lop_op = OP_ENTERSUB;
5642 PL_nextwhite = PL_thiswhite;
5644 start_force(PL_curforce);
5645 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5647 PL_nextwhite = nextPL_nextwhite;
5648 curmad('X', PL_thistoken);
5649 PL_thistoken = newSVpvs("");
5654 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5661 /* Call it a bare word */
5663 if (PL_hints & HINT_STRICT_SUBS)
5664 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5667 /* after "print" and similar functions (corresponding to
5668 * "F? L" in opcode.pl), whatever wasn't already parsed as
5669 * a filehandle should be subject to "strict subs".
5670 * Likewise for the optional indirect-object argument to system
5671 * or exec, which can't be a bareword */
5672 if ((PL_last_lop_op == OP_PRINT
5673 || PL_last_lop_op == OP_PRTF
5674 || PL_last_lop_op == OP_SAY
5675 || PL_last_lop_op == OP_SYSTEM
5676 || PL_last_lop_op == OP_EXEC)
5677 && (PL_hints & HINT_STRICT_SUBS))
5678 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5679 if (lastchar != '-') {
5680 if (ckWARN(WARN_RESERVED)) {
5684 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5685 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5692 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
5693 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5694 "Operator or semicolon missing before %c%s",
5695 lastchar, PL_tokenbuf);
5696 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5697 "Ambiguous use of %c resolved as operator %c",
5698 lastchar, lastchar);
5704 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5705 newSVpv(CopFILE(PL_curcop),0));
5709 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5710 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5713 case KEY___PACKAGE__:
5714 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5716 ? newSVhek(HvNAME_HEK(PL_curstash))
5723 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5724 const char *pname = "main";
5725 if (PL_tokenbuf[2] == 'D')
5726 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5727 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5731 GvIOp(gv) = newIO();
5732 IoIFP(GvIOp(gv)) = PL_rsfp;
5733 #if defined(HAS_FCNTL) && defined(F_SETFD)
5735 const int fd = PerlIO_fileno(PL_rsfp);
5736 fcntl(fd,F_SETFD,fd >= 3);
5739 /* Mark this internal pseudo-handle as clean */
5740 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5741 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5742 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5744 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5745 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5746 /* if the script was opened in binmode, we need to revert
5747 * it to text mode for compatibility; but only iff it has CRs
5748 * XXX this is a questionable hack at best. */
5749 if (PL_bufend-PL_bufptr > 2
5750 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5753 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5754 loc = PerlIO_tell(PL_rsfp);
5755 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5758 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5760 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5761 #endif /* NETWARE */
5762 #ifdef PERLIO_IS_STDIO /* really? */
5763 # if defined(__BORLANDC__)
5764 /* XXX see note in do_binmode() */
5765 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5769 PerlIO_seek(PL_rsfp, loc, 0);
5773 #ifdef PERLIO_LAYERS
5776 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5777 else if (PL_encoding) {
5784 XPUSHs(PL_encoding);
5786 call_method("name", G_SCALAR);
5790 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5791 Perl_form(aTHX_ ":encoding(%"SVf")",
5800 if (PL_realtokenstart >= 0) {
5801 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5803 PL_endwhite = newSVpvs("");
5804 sv_catsv(PL_endwhite, PL_thiswhite);
5806 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5807 PL_realtokenstart = -1;
5809 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5810 SvCUR(PL_endwhite))) != NULL) ;
5825 if (PL_expect == XSTATE) {
5832 if (*s == ':' && s[1] == ':') {
5835 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5836 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5837 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5840 else if (tmp == KEY_require || tmp == KEY_do)
5841 /* that's a way to remember we saw "CORE::" */
5854 LOP(OP_ACCEPT,XTERM);
5860 LOP(OP_ATAN2,XTERM);
5866 LOP(OP_BINMODE,XTERM);
5869 LOP(OP_BLESS,XTERM);
5878 /* When 'use switch' is in effect, continue has a dual
5879 life as a control operator. */
5881 if (!FEATURE_IS_ENABLED("switch"))
5884 /* We have to disambiguate the two senses of
5885 "continue". If the next token is a '{' then
5886 treat it as the start of a continue block;
5887 otherwise treat it as a control operator.
5899 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5916 if (!PL_cryptseen) {
5917 PL_cryptseen = TRUE;
5921 LOP(OP_CRYPT,XTERM);
5924 LOP(OP_CHMOD,XTERM);
5927 LOP(OP_CHOWN,XTERM);
5930 LOP(OP_CONNECT,XTERM);
5949 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5950 if (orig_keyword == KEY_do) {
5959 PL_hints |= HINT_BLOCK_SCOPE;
5969 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5970 LOP(OP_DBMOPEN,XTERM);
5976 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5983 pl_yylval.ival = CopLINE(PL_curcop);
5999 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6000 UNIBRACK(OP_ENTEREVAL);
6014 case KEY_endhostent:
6020 case KEY_endservent:
6023 case KEY_endprotoent:
6034 pl_yylval.ival = CopLINE(PL_curcop);
6036 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6039 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6042 if ((PL_bufend - p) >= 3 &&
6043 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6045 else if ((PL_bufend - p) >= 4 &&
6046 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6049 if (isIDFIRST_lazy_if(p,UTF)) {
6050 p = scan_ident(p, PL_bufend,
6051 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6055 Perl_croak(aTHX_ "Missing $ on loop variable");
6057 s = SvPVX(PL_linestr) + soff;
6063 LOP(OP_FORMLINE,XTERM);
6069 LOP(OP_FCNTL,XTERM);
6075 LOP(OP_FLOCK,XTERM);
6084 LOP(OP_GREPSTART, XREF);
6087 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6102 case KEY_getpriority:
6103 LOP(OP_GETPRIORITY,XTERM);
6105 case KEY_getprotobyname:
6108 case KEY_getprotobynumber:
6109 LOP(OP_GPBYNUMBER,XTERM);
6111 case KEY_getprotoent:
6123 case KEY_getpeername:
6124 UNI(OP_GETPEERNAME);
6126 case KEY_gethostbyname:
6129 case KEY_gethostbyaddr:
6130 LOP(OP_GHBYADDR,XTERM);
6132 case KEY_gethostent:
6135 case KEY_getnetbyname:
6138 case KEY_getnetbyaddr:
6139 LOP(OP_GNBYADDR,XTERM);
6144 case KEY_getservbyname:
6145 LOP(OP_GSBYNAME,XTERM);
6147 case KEY_getservbyport:
6148 LOP(OP_GSBYPORT,XTERM);
6150 case KEY_getservent:
6153 case KEY_getsockname:
6154 UNI(OP_GETSOCKNAME);
6156 case KEY_getsockopt:
6157 LOP(OP_GSOCKOPT,XTERM);
6172 pl_yylval.ival = CopLINE(PL_curcop);
6182 pl_yylval.ival = CopLINE(PL_curcop);
6186 LOP(OP_INDEX,XTERM);
6192 LOP(OP_IOCTL,XTERM);
6204 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6236 LOP(OP_LISTEN,XTERM);
6245 s = scan_pat(s,OP_MATCH);
6246 TERM(sublex_start());
6249 LOP(OP_MAPSTART, XREF);
6252 LOP(OP_MKDIR,XTERM);
6255 LOP(OP_MSGCTL,XTERM);
6258 LOP(OP_MSGGET,XTERM);
6261 LOP(OP_MSGRCV,XTERM);
6264 LOP(OP_MSGSND,XTERM);
6269 PL_in_my = (U16)tmp;
6271 if (isIDFIRST_lazy_if(s,UTF)) {
6275 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6276 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6278 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6279 if (!PL_in_my_stash) {
6282 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6286 if (PL_madskills) { /* just add type to declarator token */
6287 sv_catsv(PL_thistoken, PL_nextwhite);
6289 sv_catpvn(PL_thistoken, start, s - start);
6297 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6304 s = tokenize_use(0, s);
6308 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6315 if (isIDFIRST_lazy_if(s,UTF)) {
6317 for (d = s; isALNUM_lazy_if(d,UTF);)
6319 for (t=d; isSPACE(*t);)
6321 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6323 && !(t[0] == '=' && t[1] == '>')
6325 int parms_len = (int)(d-s);
6326 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6327 "Precedence problem: open %.*s should be open(%.*s)",
6328 parms_len, s, parms_len, s);
6334 pl_yylval.ival = OP_OR;
6344 LOP(OP_OPEN_DIR,XTERM);
6347 checkcomma(s,PL_tokenbuf,"filehandle");
6351 checkcomma(s,PL_tokenbuf,"filehandle");
6370 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6371 s = force_version(s, FALSE);
6375 LOP(OP_PIPE_OP,XTERM);
6378 s = scan_str(s,!!PL_madskills,FALSE);
6381 pl_yylval.ival = OP_CONST;
6382 TERM(sublex_start());
6388 s = scan_str(s,!!PL_madskills,FALSE);
6391 PL_expect = XOPERATOR;
6393 if (SvCUR(PL_lex_stuff)) {
6396 d = SvPV_force(PL_lex_stuff, len);
6398 for (; isSPACE(*d) && len; --len, ++d)
6403 if (!warned && ckWARN(WARN_QW)) {
6404 for (; !isSPACE(*d) && len; --len, ++d) {
6406 Perl_warner(aTHX_ packWARN(WARN_QW),
6407 "Possible attempt to separate words with commas");
6410 else if (*d == '#') {
6411 Perl_warner(aTHX_ packWARN(WARN_QW),
6412 "Possible attempt to put comments in qw() list");
6418 for (; !isSPACE(*d) && len; --len, ++d)
6421 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6422 words = append_elem(OP_LIST, words,
6423 newSVOP(OP_CONST, 0, tokeq(sv)));
6427 start_force(PL_curforce);
6428 NEXTVAL_NEXTTOKE.opval = words;
6433 SvREFCNT_dec(PL_lex_stuff);
6434 PL_lex_stuff = NULL;
6440 s = scan_str(s,!!PL_madskills,FALSE);
6443 pl_yylval.ival = OP_STRINGIFY;
6444 if (SvIVX(PL_lex_stuff) == '\'')
6445 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6446 TERM(sublex_start());
6449 s = scan_pat(s,OP_QR);
6450 TERM(sublex_start());
6453 s = scan_str(s,!!PL_madskills,FALSE);
6456 readpipe_override();
6457 TERM(sublex_start());
6465 s = force_version(s, FALSE);
6467 else if (*s != 'v' || !isDIGIT(s[1])
6468 || (s = force_version(s, TRUE), *s == 'v'))
6470 *PL_tokenbuf = '\0';
6471 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6472 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6473 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6475 yyerror("<> should be quotes");
6477 if (orig_keyword == KEY_require) {
6485 PL_last_uni = PL_oldbufptr;
6486 PL_last_lop_op = OP_REQUIRE;
6488 return REPORT( (int)REQUIRE );
6494 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6498 LOP(OP_RENAME,XTERM);
6507 LOP(OP_RINDEX,XTERM);
6516 UNIDOR(OP_READLINE);
6519 UNIDOR(OP_BACKTICK);
6528 LOP(OP_REVERSE,XTERM);
6531 UNIDOR(OP_READLINK);
6538 if (pl_yylval.opval)
6539 TERM(sublex_start());
6541 TOKEN(1); /* force error */
6544 checkcomma(s,PL_tokenbuf,"filehandle");
6554 LOP(OP_SELECT,XTERM);
6560 LOP(OP_SEMCTL,XTERM);
6563 LOP(OP_SEMGET,XTERM);
6566 LOP(OP_SEMOP,XTERM);
6572 LOP(OP_SETPGRP,XTERM);
6574 case KEY_setpriority:
6575 LOP(OP_SETPRIORITY,XTERM);
6577 case KEY_sethostent:
6583 case KEY_setservent:
6586 case KEY_setprotoent:
6596 LOP(OP_SEEKDIR,XTERM);
6598 case KEY_setsockopt:
6599 LOP(OP_SSOCKOPT,XTERM);
6605 LOP(OP_SHMCTL,XTERM);
6608 LOP(OP_SHMGET,XTERM);
6611 LOP(OP_SHMREAD,XTERM);
6614 LOP(OP_SHMWRITE,XTERM);
6617 LOP(OP_SHUTDOWN,XTERM);
6626 LOP(OP_SOCKET,XTERM);
6628 case KEY_socketpair:
6629 LOP(OP_SOCKPAIR,XTERM);
6632 checkcomma(s,PL_tokenbuf,"subroutine name");
6634 if (*s == ';' || *s == ')') /* probably a close */
6635 Perl_croak(aTHX_ "sort is now a reserved word");
6637 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6641 LOP(OP_SPLIT,XTERM);
6644 LOP(OP_SPRINTF,XTERM);
6647 LOP(OP_SPLICE,XTERM);
6662 LOP(OP_SUBSTR,XTERM);
6668 char tmpbuf[sizeof PL_tokenbuf];
6669 SSize_t tboffset = 0;
6670 expectation attrful;
6671 bool have_name, have_proto;
6672 const int key = tmp;
6677 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6678 SV *subtoken = newSVpvn(tstart, s - tstart);
6682 s = SKIPSPACE2(s,tmpwhite);
6687 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6688 (*s == ':' && s[1] == ':'))
6691 SV *nametoke = NULL;
6695 attrful = XATTRBLOCK;
6696 /* remember buffer pos'n for later force_word */
6697 tboffset = s - PL_oldbufptr;
6698 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6701 nametoke = newSVpvn(s, d - s);
6703 if (memchr(tmpbuf, ':', len))
6704 sv_setpvn(PL_subname, tmpbuf, len);
6706 sv_setsv(PL_subname,PL_curstname);
6707 sv_catpvs(PL_subname,"::");
6708 sv_catpvn(PL_subname,tmpbuf,len);
6715 CURMAD('X', nametoke);
6716 CURMAD('_', tmpwhite);
6717 (void) force_word(PL_oldbufptr + tboffset, WORD,
6720 s = SKIPSPACE2(d,tmpwhite);
6727 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6728 PL_expect = XTERMBLOCK;
6729 attrful = XATTRTERM;
6730 sv_setpvs(PL_subname,"?");
6734 if (key == KEY_format) {
6736 PL_lex_formbrack = PL_lex_brackets + 1;
6738 PL_thistoken = subtoken;
6742 (void) force_word(PL_oldbufptr + tboffset, WORD,
6748 /* Look for a prototype */
6751 bool bad_proto = FALSE;
6752 bool in_brackets = FALSE;
6753 char greedy_proto = ' ';
6754 bool proto_after_greedy_proto = FALSE;
6755 bool must_be_last = FALSE;
6756 bool underscore = FALSE;
6757 bool seen_underscore = FALSE;
6758 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6760 s = scan_str(s,!!PL_madskills,FALSE);
6762 Perl_croak(aTHX_ "Prototype not terminated");
6763 /* strip spaces and check for bad characters */
6764 d = SvPVX(PL_lex_stuff);
6766 for (p = d; *p; ++p) {
6772 proto_after_greedy_proto = TRUE;
6773 if (!strchr("$@%*;[]&\\_", *p)) {
6785 else if ( *p == ']' ) {
6786 in_brackets = FALSE;
6788 else if ( (*p == '@' || *p == '%') &&
6789 ( tmp < 2 || d[tmp-2] != '\\' ) &&
6791 must_be_last = TRUE;
6794 else if ( *p == '_' ) {
6795 underscore = seen_underscore = TRUE;
6802 if (proto_after_greedy_proto)
6803 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6804 "Prototype after '%c' for %"SVf" : %s",
6805 greedy_proto, SVfARG(PL_subname), d);
6807 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6808 "Illegal character %sin prototype for %"SVf" : %s",
6809 seen_underscore ? "after '_' " : "",
6810 SVfARG(PL_subname), d);
6811 SvCUR_set(PL_lex_stuff, tmp);
6816 CURMAD('q', PL_thisopen);
6817 CURMAD('_', tmpwhite);
6818 CURMAD('=', PL_thisstuff);
6819 CURMAD('Q', PL_thisclose);
6820 NEXTVAL_NEXTTOKE.opval =
6821 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6822 PL_lex_stuff = NULL;
6825 s = SKIPSPACE2(s,tmpwhite);
6833 if (*s == ':' && s[1] != ':')
6834 PL_expect = attrful;
6835 else if (*s != '{' && key == KEY_sub) {
6837 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6839 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6846 curmad('^', newSVpvs(""));
6847 CURMAD('_', tmpwhite);
6851 PL_thistoken = subtoken;
6854 NEXTVAL_NEXTTOKE.opval =
6855 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6856 PL_lex_stuff = NULL;
6862 sv_setpvs(PL_subname, "__ANON__");
6864 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6868 (void) force_word(PL_oldbufptr + tboffset, WORD,
6877 LOP(OP_SYSTEM,XREF);
6880 LOP(OP_SYMLINK,XTERM);
6883 LOP(OP_SYSCALL,XTERM);
6886 LOP(OP_SYSOPEN,XTERM);
6889 LOP(OP_SYSSEEK,XTERM);
6892 LOP(OP_SYSREAD,XTERM);
6895 LOP(OP_SYSWRITE,XTERM);
6899 TERM(sublex_start());
6920 LOP(OP_TRUNCATE,XTERM);
6932 pl_yylval.ival = CopLINE(PL_curcop);
6936 pl_yylval.ival = CopLINE(PL_curcop);
6940 LOP(OP_UNLINK,XTERM);
6946 LOP(OP_UNPACK,XTERM);
6949 LOP(OP_UTIME,XTERM);
6955 LOP(OP_UNSHIFT,XTERM);
6958 s = tokenize_use(1, s);
6968 pl_yylval.ival = CopLINE(PL_curcop);
6972 pl_yylval.ival = CopLINE(PL_curcop);
6976 PL_hints |= HINT_BLOCK_SCOPE;
6983 LOP(OP_WAITPID,XTERM);
6992 ctl_l[0] = toCTRL('L');
6994 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6997 /* Make sure $^L is defined */
6998 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7003 if (PL_expect == XOPERATOR)
7009 pl_yylval.ival = OP_XOR;
7014 TERM(sublex_start());
7019 #pragma segment Main
7023 S_pending_ident(pTHX)
7028 /* pit holds the identifier we read and pending_ident is reset */
7029 char pit = PL_pending_ident;
7030 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7031 /* All routes through this function want to know if there is a colon. */
7032 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7033 PL_pending_ident = 0;
7035 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7036 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7037 "### Pending identifier '%s'\n", PL_tokenbuf); });
7039 /* if we're in a my(), we can't allow dynamics here.
7040 $foo'bar has already been turned into $foo::bar, so
7041 just check for colons.
7043 if it's a legal name, the OP is a PADANY.
7046 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7048 yyerror(Perl_form(aTHX_ "No package name allowed for "
7049 "variable %s in \"our\"",
7051 tmp = allocmy(PL_tokenbuf);
7055 yyerror(Perl_form(aTHX_ PL_no_myglob,
7056 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7058 pl_yylval.opval = newOP(OP_PADANY, 0);
7059 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
7065 build the ops for accesses to a my() variable.
7067 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7068 then used in a comparison. This catches most, but not
7069 all cases. For instance, it catches
7070 sort { my($a); $a <=> $b }
7072 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7073 (although why you'd do that is anyone's guess).
7078 tmp = pad_findmy(PL_tokenbuf);
7079 if (tmp != NOT_IN_PAD) {
7080 /* might be an "our" variable" */
7081 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7082 /* build ops for a bareword */
7083 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7084 HEK * const stashname = HvNAME_HEK(stash);
7085 SV * const sym = newSVhek(stashname);
7086 sv_catpvs(sym, "::");
7087 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7088 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7089 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7092 ? (GV_ADDMULTI | GV_ADDINEVAL)
7095 ((PL_tokenbuf[0] == '$') ? SVt_PV
7096 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7101 /* if it's a sort block and they're naming $a or $b */
7102 if (PL_last_lop_op == OP_SORT &&
7103 PL_tokenbuf[0] == '$' &&
7104 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7107 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7108 d < PL_bufend && *d != '\n';
7111 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7112 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7118 pl_yylval.opval = newOP(OP_PADANY, 0);
7119 pl_yylval.opval->op_targ = tmp;
7125 Whine if they've said @foo in a doublequoted string,
7126 and @foo isn't a variable we can find in the symbol
7129 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7130 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7132 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7133 /* DO NOT warn for @- and @+ */
7134 && !( PL_tokenbuf[2] == '\0' &&
7135 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7138 /* Downgraded from fatal to warning 20000522 mjd */
7139 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7140 "Possible unintended interpolation of %s in string",
7145 /* build ops for a bareword */
7146 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7148 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7150 PL_tokenbuf + 1, tokenbuf_len - 1,
7151 /* If the identifier refers to a stash, don't autovivify it.
7152 * Change 24660 had the side effect of causing symbol table
7153 * hashes to always be defined, even if they were freshly
7154 * created and the only reference in the entire program was
7155 * the single statement with the defined %foo::bar:: test.
7156 * It appears that all code in the wild doing this actually
7157 * wants to know whether sub-packages have been loaded, so
7158 * by avoiding auto-vivifying symbol tables, we ensure that
7159 * defined %foo::bar:: continues to be false, and the existing
7160 * tests still give the expected answers, even though what
7161 * they're actually testing has now changed subtly.
7163 (*PL_tokenbuf == '%'
7164 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7167 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7168 ((PL_tokenbuf[0] == '$') ? SVt_PV
7169 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7175 * The following code was generated by perl_keyword.pl.
7179 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7183 PERL_ARGS_ASSERT_KEYWORD;
7187 case 1: /* 5 tokens of length 1 */
7219 case 2: /* 18 tokens of length 2 */
7365 case 3: /* 29 tokens of length 3 */
7369 if (name[1] == 'N' &&
7432 if (name[1] == 'i' &&
7464 if (name[1] == 'o' &&
7473 if (name[1] == 'e' &&
7482 if (name[1] == 'n' &&
7491 if (name[1] == 'o' &&
7500 if (name[1] == 'a' &&
7509 if (name[1] == 'o' &&
7571 if (name[1] == 'e' &&
7585 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7611 if (name[1] == 'i' &&
7620 if (name[1] == 's' &&
7629 if (name[1] == 'e' &&
7638 if (name[1] == 'o' &&
7650 case 4: /* 41 tokens of length 4 */
7654 if (name[1] == 'O' &&
7664 if (name[1] == 'N' &&
7674 if (name[1] == 'i' &&
7684 if (name[1] == 'h' &&
7694 if (name[1] == 'u' &&
7707 if (name[2] == 'c' &&
7716 if (name[2] == 's' &&
7725 if (name[2] == 'a' &&
7761 if (name[1] == 'o' &&
7774 if (name[2] == 't' &&
7783 if (name[2] == 'o' &&
7792 if (name[2] == 't' &&
7801 if (name[2] == 'e' &&
7814 if (name[1] == 'o' &&
7827 if (name[2] == 'y' &&
7836 if (name[2] == 'l' &&
7852 if (name[2] == 's' &&
7861 if (name[2] == 'n' &&
7870 if (name[2] == 'c' &&
7883 if (name[1] == 'e' &&
7893 if (name[1] == 'p' &&
7906 if (name[2] == 'c' &&
7915 if (name[2] == 'p' &&
7924 if (name[2] == 's' &&
7940 if (name[2] == 'n' &&
8010 if (name[2] == 'r' &&
8019 if (name[2] == 'r' &&
8028 if (name[2] == 'a' &&
8044 if (name[2] == 'l' &&
8106 if (name[2] == 'e' &&
8109 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8122 case 5: /* 39 tokens of length 5 */
8126 if (name[1] == 'E' &&
8137 if (name[1] == 'H' &&
8151 if (name[2] == 'a' &&
8161 if (name[2] == 'a' &&
8178 if (name[2] == 'e' &&
8188 if (name[2] == 'e' &&
8192 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8208 if (name[3] == 'i' &&
8217 if (name[3] == 'o' &&
8253 if (name[2] == 'o' &&
8263 if (name[2] == 'y' &&
8277 if (name[1] == 'l' &&
8291 if (name[2] == 'n' &&
8301 if (name[2] == 'o' &&
8315 if (name[1] == 'i' &&
8320 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8329 if (name[2] == 'd' &&
8339 if (name[2] == 'c' &&
8356 if (name[2] == 'c' &&
8366 if (name[2] == 't' &&
8380 if (name[1] == 'k' &&
8391 if (name[1] == 'r' &&
8405 if (name[2] == 's' &&
8415 if (name[2] == 'd' &&
8432 if (name[2] == 'm' &&
8442 if (name[2] == 'i' &&
8452 if (name[2] == 'e' &&
8462 if (name[2] == 'l' &&
8472 if (name[2] == 'a' &&
8485 if (name[3] == 't' &&
8488 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8494 if (name[3] == 'd' &&
8511 if (name[1] == 'i' &&
8525 if (name[2] == 'a' &&
8538 if (name[3] == 'e' &&
8573 if (name[2] == 'i' &&
8590 if (name[2] == 'i' &&
8600 if (name[2] == 'i' &&
8617 case 6: /* 33 tokens of length 6 */
8621 if (name[1] == 'c' &&
8636 if (name[2] == 'l' &&
8647 if (name[2] == 'r' &&
8662 if (name[1] == 'e' &&
8677 if (name[2] == 's' &&
8682 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8688 if (name[2] == 'i' &&
8706 if (name[2] == 'l' &&
8717 if (name[2] == 'r' &&
8732 if (name[1] == 'm' &&
8747 if (name[2] == 'n' &&
8758 if (name[2] == 's' &&
8773 if (name[1] == 's' &&
8779 if (name[4] == 't' &&
8788 if (name[4] == 'e' &&
8797 if (name[4] == 'c' &&
8806 if (name[4] == 'n' &&
8822 if (name[1] == 'r' &&
8840 if (name[3] == 'a' &&
8850 if (name[3] == 'u' &&
8864 if (name[2] == 'n' &&
8882 if (name[2] == 'a' &&
8896 if (name[3] == 'e' &&
8909 if (name[4] == 't' &&
8918 if (name[4] == 'e' &&
8940 if (name[4] == 't' &&
8949 if (name[4] == 'e' &&
8965 if (name[2] == 'c' &&
8976 if (name[2] == 'l' &&
8987 if (name[2] == 'b' &&
8998 if (name[2] == 's' &&
9021 if (name[4] == 's' &&
9030 if (name[4] == 'n' &&
9043 if (name[3] == 'a' &&
9060 if (name[1] == 'a' &&
9075 case 7: /* 29 tokens of length 7 */
9079 if (name[1] == 'E' &&
9092 if (name[1] == '_' &&
9105 if (name[1] == 'i' &&
9112 return -KEY_binmode;
9118 if (name[1] == 'o' &&
9125 return -KEY_connect;
9134 if (name[2] == 'm' &&
9140 return -KEY_dbmopen;
9151 if (name[4] == 'u' &&
9155 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9161 if (name[4] == 'n' &&
9182 if (name[1] == 'o' &&
9195 if (name[1] == 'e' &&
9202 if (name[5] == 'r' &&
9205 return -KEY_getpgrp;
9211 if (name[5] == 'i' &&
9214 return -KEY_getppid;
9227 if (name[1] == 'c' &&
9234 return -KEY_lcfirst;
9240 if (name[1] == 'p' &&
9247 return -KEY_opendir;
9253 if (name[1] == 'a' &&
9271 if (name[3] == 'd' &&
9276 return -KEY_readdir;
9282 if (name[3] == 'u' &&
9293 if (name[3] == 'e' &&
9298 return -KEY_reverse;
9317 if (name[3] == 'k' &&
9322 return -KEY_seekdir;
9328 if (name[3] == 'p' &&
9333 return -KEY_setpgrp;
9343 if (name[2] == 'm' &&
9349 return -KEY_shmread;
9355 if (name[2] == 'r' &&
9361 return -KEY_sprintf;
9370 if (name[3] == 'l' &&
9375 return -KEY_symlink;
9384 if (name[4] == 'a' &&
9388 return -KEY_syscall;
9394 if (name[4] == 'p' &&
9398 return -KEY_sysopen;
9404 if (name[4] == 'e' &&
9408 return -KEY_sysread;
9414 if (name[4] == 'e' &&
9418 return -KEY_sysseek;
9436 if (name[1] == 'e' &&
9443 return -KEY_telldir;
9452 if (name[2] == 'f' &&
9458 return -KEY_ucfirst;
9464 if (name[2] == 's' &&
9470 return -KEY_unshift;
9480 if (name[1] == 'a' &&
9487 return -KEY_waitpid;
9496 case 8: /* 26 tokens of length 8 */
9500 if (name[1] == 'U' &&
9508 return KEY_AUTOLOAD;
9519 if (name[3] == 'A' &&
9525 return KEY___DATA__;
9531 if (name[3] == 'I' &&
9537 return -KEY___FILE__;
9543 if (name[3] == 'I' &&
9549 return -KEY___LINE__;
9565 if (name[2] == 'o' &&
9572 return -KEY_closedir;
9578 if (name[2] == 'n' &&
9585 return -KEY_continue;
9595 if (name[1] == 'b' &&
9603 return -KEY_dbmclose;
9609 if (name[1] == 'n' &&
9615 if (name[4] == 'r' &&
9620 return -KEY_endgrent;
9626 if (name[4] == 'w' &&
9631 return -KEY_endpwent;
9644 if (name[1] == 'o' &&
9652 return -KEY_formline;
9658 if (name[1] == 'e' &&
9669 if (name[6] == 'n' &&
9672 return -KEY_getgrent;
9678 if (name[6] == 'i' &&
9681 return -KEY_getgrgid;
9687 if (name[6] == 'a' &&
9690 return -KEY_getgrnam;
9703 if (name[4] == 'o' &&
9708 return -KEY_getlogin;
9719 if (name[6] == 'n' &&
9722 return -KEY_getpwent;
9728 if (name[6] == 'a' &&
9731 return -KEY_getpwnam;
9737 if (name[6] == 'i' &&
9740 return -KEY_getpwuid;
9760 if (name[1] == 'e' &&
9767 if (name[5] == 'i' &&
9774 return -KEY_readline;
9779 return -KEY_readlink;
9790 if (name[5] == 'i' &&
9794 return -KEY_readpipe;
9815 if (name[4] == 'r' &&
9820 return -KEY_setgrent;
9826 if (name[4] == 'w' &&
9831 return -KEY_setpwent;
9847 if (name[3] == 'w' &&
9853 return -KEY_shmwrite;
9859 if (name[3] == 't' &&
9865 return -KEY_shutdown;
9875 if (name[2] == 's' &&
9882 return -KEY_syswrite;
9892 if (name[1] == 'r' &&
9900 return -KEY_truncate;
9909 case 9: /* 9 tokens of length 9 */
9913 if (name[1] == 'N' &&
9922 return KEY_UNITCHECK;
9928 if (name[1] == 'n' &&
9937 return -KEY_endnetent;
9943 if (name[1] == 'e' &&
9952 return -KEY_getnetent;
9958 if (name[1] == 'o' &&
9967 return -KEY_localtime;
9973 if (name[1] == 'r' &&
9982 return KEY_prototype;
9988 if (name[1] == 'u' &&
9997 return -KEY_quotemeta;
10003 if (name[1] == 'e' &&
10012 return -KEY_rewinddir;
10018 if (name[1] == 'e' &&
10027 return -KEY_setnetent;
10033 if (name[1] == 'a' &&
10042 return -KEY_wantarray;
10051 case 10: /* 9 tokens of length 10 */
10055 if (name[1] == 'n' &&
10061 if (name[4] == 'o' &&
10068 return -KEY_endhostent;
10074 if (name[4] == 'e' &&
10081 return -KEY_endservent;
10094 if (name[1] == 'e' &&
10100 if (name[4] == 'o' &&
10107 return -KEY_gethostent;
10116 if (name[5] == 'r' &&
10122 return -KEY_getservent;
10128 if (name[5] == 'c' &&
10134 return -KEY_getsockopt;
10154 if (name[2] == 't')
10159 if (name[4] == 'o' &&
10166 return -KEY_sethostent;
10175 if (name[5] == 'r' &&
10181 return -KEY_setservent;
10187 if (name[5] == 'c' &&
10193 return -KEY_setsockopt;
10210 if (name[2] == 'c' &&
10219 return -KEY_socketpair;
10232 case 11: /* 8 tokens of length 11 */
10236 if (name[1] == '_' &&
10246 { /* __PACKAGE__ */
10247 return -KEY___PACKAGE__;
10253 if (name[1] == 'n' &&
10263 { /* endprotoent */
10264 return -KEY_endprotoent;
10270 if (name[1] == 'e' &&
10279 if (name[5] == 'e' &&
10285 { /* getpeername */
10286 return -KEY_getpeername;
10295 if (name[6] == 'o' &&
10300 { /* getpriority */
10301 return -KEY_getpriority;
10307 if (name[6] == 't' &&
10312 { /* getprotoent */
10313 return -KEY_getprotoent;
10327 if (name[4] == 'o' &&
10334 { /* getsockname */
10335 return -KEY_getsockname;
10348 if (name[1] == 'e' &&
10356 if (name[6] == 'o' &&
10361 { /* setpriority */
10362 return -KEY_setpriority;
10368 if (name[6] == 't' &&
10373 { /* setprotoent */
10374 return -KEY_setprotoent;
10390 case 12: /* 2 tokens of length 12 */
10391 if (name[0] == 'g' &&
10403 if (name[9] == 'd' &&
10406 { /* getnetbyaddr */
10407 return -KEY_getnetbyaddr;
10413 if (name[9] == 'a' &&
10416 { /* getnetbyname */
10417 return -KEY_getnetbyname;
10429 case 13: /* 4 tokens of length 13 */
10430 if (name[0] == 'g' &&
10437 if (name[4] == 'o' &&
10446 if (name[10] == 'd' &&
10449 { /* gethostbyaddr */
10450 return -KEY_gethostbyaddr;
10456 if (name[10] == 'a' &&
10459 { /* gethostbyname */
10460 return -KEY_gethostbyname;
10473 if (name[4] == 'e' &&
10482 if (name[10] == 'a' &&
10485 { /* getservbyname */
10486 return -KEY_getservbyname;
10492 if (name[10] == 'o' &&
10495 { /* getservbyport */
10496 return -KEY_getservbyport;
10515 case 14: /* 1 tokens of length 14 */
10516 if (name[0] == 'g' &&
10530 { /* getprotobyname */
10531 return -KEY_getprotobyname;
10536 case 16: /* 1 tokens of length 16 */
10537 if (name[0] == 'g' &&
10553 { /* getprotobynumber */
10554 return -KEY_getprotobynumber;
10568 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10572 PERL_ARGS_ASSERT_CHECKCOMMA;
10574 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10575 if (ckWARN(WARN_SYNTAX)) {
10578 for (w = s+2; *w && level; w++) {
10581 else if (*w == ')')
10584 while (isSPACE(*w))
10586 /* the list of chars below is for end of statements or
10587 * block / parens, boolean operators (&&, ||, //) and branch
10588 * constructs (or, and, if, until, unless, while, err, for).
10589 * Not a very solid hack... */
10590 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10591 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10592 "%s (...) interpreted as function",name);
10595 while (s < PL_bufend && isSPACE(*s))
10599 while (s < PL_bufend && isSPACE(*s))
10601 if (isIDFIRST_lazy_if(s,UTF)) {
10602 const char * const w = s++;
10603 while (isALNUM_lazy_if(s,UTF))
10605 while (s < PL_bufend && isSPACE(*s))
10609 if (keyword(w, s - w, 0))
10612 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10613 if (gv && GvCVu(gv))
10615 Perl_croak(aTHX_ "No comma allowed after %s", what);
10620 /* Either returns sv, or mortalizes sv and returns a new SV*.
10621 Best used as sv=new_constant(..., sv, ...).
10622 If s, pv are NULL, calls subroutine with one argument,
10623 and type is used with error messages only. */
10626 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10627 SV *sv, SV *pv, const char *type, STRLEN typelen)
10630 HV * const table = GvHV(PL_hintgv); /* ^H */
10634 const char *why1 = "", *why2 = "", *why3 = "";
10636 PERL_ARGS_ASSERT_NEW_CONSTANT;
10638 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10641 why2 = (const char *)
10642 (strEQ(key,"charnames")
10643 ? "(possibly a missing \"use charnames ...\")"
10645 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10646 (type ? type: "undef"), why2);
10648 /* This is convoluted and evil ("goto considered harmful")
10649 * but I do not understand the intricacies of all the different
10650 * failure modes of %^H in here. The goal here is to make
10651 * the most probable error message user-friendly. --jhi */
10656 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10657 (type ? type: "undef"), why1, why2, why3);
10659 yyerror(SvPVX_const(msg));
10663 cvp = hv_fetch(table, key, keylen, FALSE);
10664 if (!cvp || !SvOK(*cvp)) {
10667 why3 = "} is not defined";
10670 sv_2mortal(sv); /* Parent created it permanently */
10673 pv = newSVpvn_flags(s, len, SVs_TEMP);
10675 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10677 typesv = &PL_sv_undef;
10679 PUSHSTACKi(PERLSI_OVERLOAD);
10691 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10695 /* Check the eval first */
10696 if (!PL_in_eval && SvTRUE(ERRSV)) {
10697 sv_catpvs(ERRSV, "Propagated");
10698 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10700 res = SvREFCNT_inc_simple(sv);
10704 SvREFCNT_inc_simple_void(res);
10713 why1 = "Call to &{$^H{";
10715 why3 = "}} did not return a defined value";
10723 /* Returns a NUL terminated string, with the length of the string written to
10727 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10730 register char *d = dest;
10731 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10733 PERL_ARGS_ASSERT_SCAN_WORD;
10737 Perl_croak(aTHX_ ident_too_long);
10738 if (isALNUM(*s)) /* UTF handled below */
10740 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10745 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10749 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10750 char *t = s + UTF8SKIP(s);
10752 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10756 Perl_croak(aTHX_ ident_too_long);
10757 Copy(s, d, len, char);
10770 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10773 char *bracket = NULL;
10775 register char *d = dest;
10776 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10778 PERL_ARGS_ASSERT_SCAN_IDENT;
10783 while (isDIGIT(*s)) {
10785 Perl_croak(aTHX_ ident_too_long);
10792 Perl_croak(aTHX_ ident_too_long);
10793 if (isALNUM(*s)) /* UTF handled below */
10795 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10800 else if (*s == ':' && s[1] == ':') {
10804 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10805 char *t = s + UTF8SKIP(s);
10806 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10808 if (d + (t - s) > e)
10809 Perl_croak(aTHX_ ident_too_long);
10810 Copy(s, d, t - s, char);
10821 if (PL_lex_state != LEX_NORMAL)
10822 PL_lex_state = LEX_INTERPENDMAYBE;
10825 if (*s == '$' && s[1] &&
10826 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10839 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10844 if (isSPACE(s[-1])) {
10846 const char ch = *s++;
10847 if (!SPACE_OR_TAB(ch)) {
10853 if (isIDFIRST_lazy_if(d,UTF)) {
10857 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10858 end += UTF8SKIP(end);
10859 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10860 end += UTF8SKIP(end);
10862 Copy(s, d, end - s, char);
10867 while ((isALNUM(*s) || *s == ':') && d < e)
10870 Perl_croak(aTHX_ ident_too_long);
10873 while (s < send && SPACE_OR_TAB(*s))
10875 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10876 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10877 const char * const brack =
10879 ((*s == '[') ? "[...]" : "{...}");
10880 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10881 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10882 funny, dest, brack, funny, dest, brack);
10885 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10889 /* Handle extended ${^Foo} variables
10890 * 1999-02-27 mjd-perl-patch@plover.com */
10891 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10895 while (isALNUM(*s) && d < e) {
10899 Perl_croak(aTHX_ ident_too_long);
10904 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10905 PL_lex_state = LEX_INTERPEND;
10908 if (PL_lex_state == LEX_NORMAL) {
10909 if (ckWARN(WARN_AMBIGUOUS) &&
10910 (keyword(dest, d - dest, 0)
10911 || get_cvn_flags(dest, d - dest, 0)))
10915 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10916 "Ambiguous use of %c{%s} resolved to %c%s",
10917 funny, dest, funny, dest);
10922 s = bracket; /* let the parser handle it */
10926 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10927 PL_lex_state = LEX_INTERPEND;
10932 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10934 PERL_ARGS_ASSERT_PMFLAG;
10936 PERL_UNUSED_CONTEXT;
10938 const char c = (char)ch;
10940 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10941 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10942 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10943 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10944 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10950 S_scan_pat(pTHX_ char *start, I32 type)
10954 char *s = scan_str(start,!!PL_madskills,FALSE);
10955 const char * const valid_flags =
10956 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10961 PERL_ARGS_ASSERT_SCAN_PAT;
10964 const char * const delimiter = skipspace(start);
10968 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10969 : "Search pattern not terminated" ));
10972 pm = (PMOP*)newPMOP(type, 0);
10973 if (PL_multi_open == '?') {
10974 /* This is the only point in the code that sets PMf_ONCE: */
10975 pm->op_pmflags |= PMf_ONCE;
10977 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10978 allows us to restrict the list needed by reset to just the ??
10980 assert(type != OP_TRANS);
10982 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10985 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10988 elements = mg->mg_len / sizeof(PMOP**);
10989 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10990 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10991 mg->mg_len = elements * sizeof(PMOP**);
10992 PmopSTASH_set(pm,PL_curstash);
10998 while (*s && strchr(valid_flags, *s))
10999 pmflag(&pm->op_pmflags,*s++);
11001 if (PL_madskills && modstart != s) {
11002 SV* tmptoken = newSVpvn(modstart, s - modstart);
11003 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11006 /* issue a warning if /c is specified,but /g is not */
11007 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11009 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11010 "Use of /c modifier is meaningless without /g" );
11013 PL_lex_op = (OP*)pm;
11014 pl_yylval.ival = OP_MATCH;
11019 S_scan_subst(pTHX_ char *start)
11030 PERL_ARGS_ASSERT_SCAN_SUBST;
11032 pl_yylval.ival = OP_NULL;
11034 s = scan_str(start,!!PL_madskills,FALSE);
11037 Perl_croak(aTHX_ "Substitution pattern not terminated");
11039 if (s[-1] == PL_multi_open)
11042 if (PL_madskills) {
11043 CURMAD('q', PL_thisopen);
11044 CURMAD('_', PL_thiswhite);
11045 CURMAD('E', PL_thisstuff);
11046 CURMAD('Q', PL_thisclose);
11047 PL_realtokenstart = s - SvPVX(PL_linestr);
11051 first_start = PL_multi_start;
11052 s = scan_str(s,!!PL_madskills,FALSE);
11054 if (PL_lex_stuff) {
11055 SvREFCNT_dec(PL_lex_stuff);
11056 PL_lex_stuff = NULL;
11058 Perl_croak(aTHX_ "Substitution replacement not terminated");
11060 PL_multi_start = first_start; /* so whole substitution is taken together */
11062 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11065 if (PL_madskills) {
11066 CURMAD('z', PL_thisopen);
11067 CURMAD('R', PL_thisstuff);
11068 CURMAD('Z', PL_thisclose);
11074 if (*s == EXEC_PAT_MOD) {
11078 else if (strchr(S_PAT_MODS, *s))
11079 pmflag(&pm->op_pmflags,*s++);
11085 if (PL_madskills) {
11087 curmad('m', newSVpvn(modstart, s - modstart));
11088 append_madprops(PL_thismad, (OP*)pm, 0);
11092 if ((pm->op_pmflags & PMf_CONTINUE)) {
11093 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11097 SV * const repl = newSVpvs("");
11099 PL_sublex_info.super_bufptr = s;
11100 PL_sublex_info.super_bufend = PL_bufend;
11102 pm->op_pmflags |= PMf_EVAL;
11105 sv_catpvs(repl, "eval ");
11107 sv_catpvs(repl, "do ");
11109 sv_catpvs(repl, "{");
11110 sv_catsv(repl, PL_lex_repl);
11111 if (strchr(SvPVX(PL_lex_repl), '#'))
11112 sv_catpvs(repl, "\n");
11113 sv_catpvs(repl, "}");
11115 SvREFCNT_dec(PL_lex_repl);
11116 PL_lex_repl = repl;
11119 PL_lex_op = (OP*)pm;
11120 pl_yylval.ival = OP_SUBST;
11125 S_scan_trans(pTHX_ char *start)
11138 PERL_ARGS_ASSERT_SCAN_TRANS;
11140 pl_yylval.ival = OP_NULL;
11142 s = scan_str(start,!!PL_madskills,FALSE);
11144 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11146 if (s[-1] == PL_multi_open)
11149 if (PL_madskills) {
11150 CURMAD('q', PL_thisopen);
11151 CURMAD('_', PL_thiswhite);
11152 CURMAD('E', PL_thisstuff);
11153 CURMAD('Q', PL_thisclose);
11154 PL_realtokenstart = s - SvPVX(PL_linestr);
11158 s = scan_str(s,!!PL_madskills,FALSE);
11160 if (PL_lex_stuff) {
11161 SvREFCNT_dec(PL_lex_stuff);
11162 PL_lex_stuff = NULL;
11164 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11166 if (PL_madskills) {
11167 CURMAD('z', PL_thisopen);
11168 CURMAD('R', PL_thisstuff);
11169 CURMAD('Z', PL_thisclose);
11172 complement = del = squash = 0;
11179 complement = OPpTRANS_COMPLEMENT;
11182 del = OPpTRANS_DELETE;
11185 squash = OPpTRANS_SQUASH;
11194 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11195 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11196 o->op_private &= ~OPpTRANS_ALL;
11197 o->op_private |= del|squash|complement|
11198 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11199 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11202 pl_yylval.ival = OP_TRANS;
11205 if (PL_madskills) {
11207 curmad('m', newSVpvn(modstart, s - modstart));
11208 append_madprops(PL_thismad, o, 0);
11217 S_scan_heredoc(pTHX_ register char *s)
11221 I32 op_type = OP_SCALAR;
11225 const char *found_newline;
11229 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11231 I32 stuffstart = s - SvPVX(PL_linestr);
11234 PL_realtokenstart = -1;
11237 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11241 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11245 while (SPACE_OR_TAB(*peek))
11247 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11250 s = delimcpy(d, e, s, PL_bufend, term, &len);
11260 if (!isALNUM_lazy_if(s,UTF))
11261 deprecate("bare << to mean <<\"\"");
11262 for (; isALNUM_lazy_if(s,UTF); s++) {
11267 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11268 Perl_croak(aTHX_ "Delimiter for here document is too long");
11271 len = d - PL_tokenbuf;
11274 if (PL_madskills) {
11275 tstart = PL_tokenbuf + !outer;
11276 PL_thisclose = newSVpvn(tstart, len - !outer);
11277 tstart = SvPVX(PL_linestr) + stuffstart;
11278 PL_thisopen = newSVpvn(tstart, s - tstart);
11279 stuffstart = s - SvPVX(PL_linestr);
11282 #ifndef PERL_STRICT_CR
11283 d = strchr(s, '\r');
11285 char * const olds = s;
11287 while (s < PL_bufend) {
11293 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11302 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11309 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11310 herewas = newSVpvn(s,PL_bufend-s);
11314 herewas = newSVpvn(s-1,found_newline-s+1);
11317 herewas = newSVpvn(s,found_newline-s);
11321 if (PL_madskills) {
11322 tstart = SvPVX(PL_linestr) + stuffstart;
11324 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11326 PL_thisstuff = newSVpvn(tstart, s - tstart);
11329 s += SvCUR(herewas);
11332 stuffstart = s - SvPVX(PL_linestr);
11338 tmpstr = newSV_type(SVt_PVIV);
11339 SvGROW(tmpstr, 80);
11340 if (term == '\'') {
11341 op_type = OP_CONST;
11342 SvIV_set(tmpstr, -1);
11344 else if (term == '`') {
11345 op_type = OP_BACKTICK;
11346 SvIV_set(tmpstr, '\\');
11350 PL_multi_start = CopLINE(PL_curcop);
11351 PL_multi_open = PL_multi_close = '<';
11352 term = *PL_tokenbuf;
11353 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11354 char * const bufptr = PL_sublex_info.super_bufptr;
11355 char * const bufend = PL_sublex_info.super_bufend;
11356 char * const olds = s - SvCUR(herewas);
11357 s = strchr(bufptr, '\n');
11361 while (s < bufend &&
11362 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11364 CopLINE_inc(PL_curcop);
11367 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11368 missingterm(PL_tokenbuf);
11370 sv_setpvn(herewas,bufptr,d-bufptr+1);
11371 sv_setpvn(tmpstr,d+1,s-d);
11373 sv_catpvn(herewas,s,bufend-s);
11374 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11381 while (s < PL_bufend &&
11382 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11384 CopLINE_inc(PL_curcop);
11386 if (s >= PL_bufend) {
11387 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11388 missingterm(PL_tokenbuf);
11390 sv_setpvn(tmpstr,d+1,s-d);
11392 if (PL_madskills) {
11394 sv_catpvn(PL_thisstuff, d + 1, s - d);
11396 PL_thisstuff = newSVpvn(d + 1, s - d);
11397 stuffstart = s - SvPVX(PL_linestr);
11401 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11403 sv_catpvn(herewas,s,PL_bufend-s);
11404 sv_setsv(PL_linestr,herewas);
11405 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11406 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11407 PL_last_lop = PL_last_uni = NULL;
11410 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
11411 while (s >= PL_bufend) { /* multiple line string? */
11413 if (PL_madskills) {
11414 tstart = SvPVX(PL_linestr) + stuffstart;
11416 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11418 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11422 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11423 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11424 missingterm(PL_tokenbuf);
11427 stuffstart = s - SvPVX(PL_linestr);
11429 CopLINE_inc(PL_curcop);
11430 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11431 PL_last_lop = PL_last_uni = NULL;
11432 #ifndef PERL_STRICT_CR
11433 if (PL_bufend - PL_linestart >= 2) {
11434 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11435 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11437 PL_bufend[-2] = '\n';
11439 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11441 else if (PL_bufend[-1] == '\r')
11442 PL_bufend[-1] = '\n';
11444 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11445 PL_bufend[-1] = '\n';
11447 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11448 update_debugger_info(PL_linestr, NULL, 0);
11449 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11450 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11451 *(SvPVX(PL_linestr) + off ) = ' ';
11452 sv_catsv(PL_linestr,herewas);
11453 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11454 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11458 sv_catsv(tmpstr,PL_linestr);
11463 PL_multi_end = CopLINE(PL_curcop);
11464 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11465 SvPV_shrink_to_cur(tmpstr);
11467 SvREFCNT_dec(herewas);
11469 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11471 else if (PL_encoding)
11472 sv_recode_to_utf8(tmpstr, PL_encoding);
11474 PL_lex_stuff = tmpstr;
11475 pl_yylval.ival = op_type;
11479 /* scan_inputsymbol
11480 takes: current position in input buffer
11481 returns: new position in input buffer
11482 side-effects: pl_yylval and lex_op are set.
11487 <FH> read from filehandle
11488 <pkg::FH> read from package qualified filehandle
11489 <pkg'FH> read from package qualified filehandle
11490 <$fh> read from filehandle in $fh
11491 <*.h> filename glob
11496 S_scan_inputsymbol(pTHX_ char *start)
11499 register char *s = start; /* current position in buffer */
11502 char *d = PL_tokenbuf; /* start of temp holding space */
11503 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11505 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11507 end = strchr(s, '\n');
11510 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11512 /* die if we didn't have space for the contents of the <>,
11513 or if it didn't end, or if we see a newline
11516 if (len >= (I32)sizeof PL_tokenbuf)
11517 Perl_croak(aTHX_ "Excessively long <> operator");
11519 Perl_croak(aTHX_ "Unterminated <> operator");
11524 Remember, only scalar variables are interpreted as filehandles by
11525 this code. Anything more complex (e.g., <$fh{$num}>) will be
11526 treated as a glob() call.
11527 This code makes use of the fact that except for the $ at the front,
11528 a scalar variable and a filehandle look the same.
11530 if (*d == '$' && d[1]) d++;
11532 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11533 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11536 /* If we've tried to read what we allow filehandles to look like, and
11537 there's still text left, then it must be a glob() and not a getline.
11538 Use scan_str to pull out the stuff between the <> and treat it
11539 as nothing more than a string.
11542 if (d - PL_tokenbuf != len) {
11543 pl_yylval.ival = OP_GLOB;
11544 s = scan_str(start,!!PL_madskills,FALSE);
11546 Perl_croak(aTHX_ "Glob not terminated");
11550 bool readline_overriden = FALSE;
11553 /* we're in a filehandle read situation */
11556 /* turn <> into <ARGV> */
11558 Copy("ARGV",d,5,char);
11560 /* Check whether readline() is overriden */
11561 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11563 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11565 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11566 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11567 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11568 readline_overriden = TRUE;
11570 /* if <$fh>, create the ops to turn the variable into a
11574 /* try to find it in the pad for this block, otherwise find
11575 add symbol table ops
11577 const PADOFFSET tmp = pad_findmy(d);
11578 if (tmp != NOT_IN_PAD) {
11579 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11580 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11581 HEK * const stashname = HvNAME_HEK(stash);
11582 SV * const sym = sv_2mortal(newSVhek(stashname));
11583 sv_catpvs(sym, "::");
11584 sv_catpv(sym, d+1);
11589 OP * const o = newOP(OP_PADSV, 0);
11591 PL_lex_op = readline_overriden
11592 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11593 append_elem(OP_LIST, o,
11594 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11595 : (OP*)newUNOP(OP_READLINE, 0, o);
11604 ? (GV_ADDMULTI | GV_ADDINEVAL)
11607 PL_lex_op = readline_overriden
11608 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11609 append_elem(OP_LIST,
11610 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11611 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11612 : (OP*)newUNOP(OP_READLINE, 0,
11613 newUNOP(OP_RV2SV, 0,
11614 newGVOP(OP_GV, 0, gv)));
11616 if (!readline_overriden)
11617 PL_lex_op->op_flags |= OPf_SPECIAL;
11618 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11619 pl_yylval.ival = OP_NULL;
11622 /* If it's none of the above, it must be a literal filehandle
11623 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11625 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11626 PL_lex_op = readline_overriden
11627 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11628 append_elem(OP_LIST,
11629 newGVOP(OP_GV, 0, gv),
11630 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11631 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11632 pl_yylval.ival = OP_NULL;
11641 takes: start position in buffer
11642 keep_quoted preserve \ on the embedded delimiter(s)
11643 keep_delims preserve the delimiters around the string
11644 returns: position to continue reading from buffer
11645 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11646 updates the read buffer.
11648 This subroutine pulls a string out of the input. It is called for:
11649 q single quotes q(literal text)
11650 ' single quotes 'literal text'
11651 qq double quotes qq(interpolate $here please)
11652 " double quotes "interpolate $here please"
11653 qx backticks qx(/bin/ls -l)
11654 ` backticks `/bin/ls -l`
11655 qw quote words @EXPORT_OK = qw( func() $spam )
11656 m// regexp match m/this/
11657 s/// regexp substitute s/this/that/
11658 tr/// string transliterate tr/this/that/
11659 y/// string transliterate y/this/that/
11660 ($*@) sub prototypes sub foo ($)
11661 (stuff) sub attr parameters sub foo : attr(stuff)
11662 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11664 In most of these cases (all but <>, patterns and transliterate)
11665 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11666 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11667 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11670 It skips whitespace before the string starts, and treats the first
11671 character as the delimiter. If the delimiter is one of ([{< then
11672 the corresponding "close" character )]}> is used as the closing
11673 delimiter. It allows quoting of delimiters, and if the string has
11674 balanced delimiters ([{<>}]) it allows nesting.
11676 On success, the SV with the resulting string is put into lex_stuff or,
11677 if that is already non-NULL, into lex_repl. The second case occurs only
11678 when parsing the RHS of the special constructs s/// and tr/// (y///).
11679 For convenience, the terminating delimiter character is stuffed into
11684 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11687 SV *sv; /* scalar value: string */
11688 const char *tmps; /* temp string, used for delimiter matching */
11689 register char *s = start; /* current position in the buffer */
11690 register char term; /* terminating character */
11691 register char *to; /* current position in the sv's data */
11692 I32 brackets = 1; /* bracket nesting level */
11693 bool has_utf8 = FALSE; /* is there any utf8 content? */
11694 I32 termcode; /* terminating char. code */
11695 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11696 STRLEN termlen; /* length of terminating string */
11697 int last_off = 0; /* last position for nesting bracket */
11703 PERL_ARGS_ASSERT_SCAN_STR;
11705 /* skip space before the delimiter */
11711 if (PL_realtokenstart >= 0) {
11712 stuffstart = PL_realtokenstart;
11713 PL_realtokenstart = -1;
11716 stuffstart = start - SvPVX(PL_linestr);
11718 /* mark where we are, in case we need to report errors */
11721 /* after skipping whitespace, the next character is the terminator */
11724 termcode = termstr[0] = term;
11728 termcode = utf8_to_uvchr((U8*)s, &termlen);
11729 Copy(s, termstr, termlen, U8);
11730 if (!UTF8_IS_INVARIANT(term))
11734 /* mark where we are */
11735 PL_multi_start = CopLINE(PL_curcop);
11736 PL_multi_open = term;
11738 /* find corresponding closing delimiter */
11739 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11740 termcode = termstr[0] = term = tmps[5];
11742 PL_multi_close = term;
11744 /* create a new SV to hold the contents. 79 is the SV's initial length.
11745 What a random number. */
11746 sv = newSV_type(SVt_PVIV);
11748 SvIV_set(sv, termcode);
11749 (void)SvPOK_only(sv); /* validate pointer */
11751 /* move past delimiter and try to read a complete string */
11753 sv_catpvn(sv, s, termlen);
11756 tstart = SvPVX(PL_linestr) + stuffstart;
11757 if (!PL_thisopen && !keep_delims) {
11758 PL_thisopen = newSVpvn(tstart, s - tstart);
11759 stuffstart = s - SvPVX(PL_linestr);
11763 if (PL_encoding && !UTF) {
11767 int offset = s - SvPVX_const(PL_linestr);
11768 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11769 &offset, (char*)termstr, termlen);
11770 const char * const ns = SvPVX_const(PL_linestr) + offset;
11771 char * const svlast = SvEND(sv) - 1;
11773 for (; s < ns; s++) {
11774 if (*s == '\n' && !PL_rsfp)
11775 CopLINE_inc(PL_curcop);
11778 goto read_more_line;
11780 /* handle quoted delimiters */
11781 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11783 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11785 if ((svlast-1 - t) % 2) {
11786 if (!keep_quoted) {
11787 *(svlast-1) = term;
11789 SvCUR_set(sv, SvCUR(sv) - 1);
11794 if (PL_multi_open == PL_multi_close) {
11800 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11801 /* At here, all closes are "was quoted" one,
11802 so we don't check PL_multi_close. */
11804 if (!keep_quoted && *(t+1) == PL_multi_open)
11809 else if (*t == PL_multi_open)
11817 SvCUR_set(sv, w - SvPVX_const(sv));
11819 last_off = w - SvPVX(sv);
11820 if (--brackets <= 0)
11825 if (!keep_delims) {
11826 SvCUR_set(sv, SvCUR(sv) - 1);
11832 /* extend sv if need be */
11833 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11834 /* set 'to' to the next character in the sv's string */
11835 to = SvPVX(sv)+SvCUR(sv);
11837 /* if open delimiter is the close delimiter read unbridle */
11838 if (PL_multi_open == PL_multi_close) {
11839 for (; s < PL_bufend; s++,to++) {
11840 /* embedded newlines increment the current line number */
11841 if (*s == '\n' && !PL_rsfp)
11842 CopLINE_inc(PL_curcop);
11843 /* handle quoted delimiters */
11844 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11845 if (!keep_quoted && s[1] == term)
11847 /* any other quotes are simply copied straight through */
11851 /* terminate when run out of buffer (the for() condition), or
11852 have found the terminator */
11853 else if (*s == term) {
11856 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11859 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11865 /* if the terminator isn't the same as the start character (e.g.,
11866 matched brackets), we have to allow more in the quoting, and
11867 be prepared for nested brackets.
11870 /* read until we run out of string, or we find the terminator */
11871 for (; s < PL_bufend; s++,to++) {
11872 /* embedded newlines increment the line count */
11873 if (*s == '\n' && !PL_rsfp)
11874 CopLINE_inc(PL_curcop);
11875 /* backslashes can escape the open or closing characters */
11876 if (*s == '\\' && s+1 < PL_bufend) {
11877 if (!keep_quoted &&
11878 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11883 /* allow nested opens and closes */
11884 else if (*s == PL_multi_close && --brackets <= 0)
11886 else if (*s == PL_multi_open)
11888 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11893 /* terminate the copied string and update the sv's end-of-string */
11895 SvCUR_set(sv, to - SvPVX_const(sv));
11898 * this next chunk reads more into the buffer if we're not done yet
11902 break; /* handle case where we are done yet :-) */
11904 #ifndef PERL_STRICT_CR
11905 if (to - SvPVX_const(sv) >= 2) {
11906 if ((to[-2] == '\r' && to[-1] == '\n') ||
11907 (to[-2] == '\n' && to[-1] == '\r'))
11911 SvCUR_set(sv, to - SvPVX_const(sv));
11913 else if (to[-1] == '\r')
11916 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11921 /* if we're out of file, or a read fails, bail and reset the current
11922 line marker so we can report where the unterminated string began
11925 if (PL_madskills) {
11926 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11928 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11930 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11934 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11936 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11942 /* we read a line, so increment our line counter */
11943 CopLINE_inc(PL_curcop);
11945 /* update debugger info */
11946 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11947 update_debugger_info(PL_linestr, NULL, 0);
11949 /* having changed the buffer, we must update PL_bufend */
11950 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11951 PL_last_lop = PL_last_uni = NULL;
11954 /* at this point, we have successfully read the delimited string */
11956 if (!PL_encoding || UTF) {
11958 if (PL_madskills) {
11959 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11960 const int len = s - tstart;
11962 sv_catpvn(PL_thisstuff, tstart, len);
11964 PL_thisstuff = newSVpvn(tstart, len);
11965 if (!PL_thisclose && !keep_delims)
11966 PL_thisclose = newSVpvn(s,termlen);
11971 sv_catpvn(sv, s, termlen);
11976 if (PL_madskills) {
11977 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11978 const int len = s - tstart - termlen;
11980 sv_catpvn(PL_thisstuff, tstart, len);
11982 PL_thisstuff = newSVpvn(tstart, len);
11983 if (!PL_thisclose && !keep_delims)
11984 PL_thisclose = newSVpvn(s - termlen,termlen);
11988 if (has_utf8 || PL_encoding)
11991 PL_multi_end = CopLINE(PL_curcop);
11993 /* if we allocated too much space, give some back */
11994 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11995 SvLEN_set(sv, SvCUR(sv) + 1);
11996 SvPV_renew(sv, SvLEN(sv));
11999 /* decide whether this is the first or second quoted string we've read
12012 takes: pointer to position in buffer
12013 returns: pointer to new position in buffer
12014 side-effects: builds ops for the constant in pl_yylval.op
12016 Read a number in any of the formats that Perl accepts:
12018 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12019 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12022 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12024 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12027 If it reads a number without a decimal point or an exponent, it will
12028 try converting the number to an integer and see if it can do so
12029 without loss of precision.
12033 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12036 register const char *s = start; /* current position in buffer */
12037 register char *d; /* destination in temp buffer */
12038 register char *e; /* end of temp buffer */
12039 NV nv; /* number read, as a double */
12040 SV *sv = NULL; /* place to put the converted number */
12041 bool floatit; /* boolean: int or float? */
12042 const char *lastub = NULL; /* position of last underbar */
12043 static char const number_too_long[] = "Number too long";
12045 PERL_ARGS_ASSERT_SCAN_NUM;
12047 /* We use the first character to decide what type of number this is */
12051 Perl_croak(aTHX_ "panic: scan_num");
12053 /* if it starts with a 0, it could be an octal number, a decimal in
12054 0.13 disguise, or a hexadecimal number, or a binary number. */
12058 u holds the "number so far"
12059 shift the power of 2 of the base
12060 (hex == 4, octal == 3, binary == 1)
12061 overflowed was the number more than we can hold?
12063 Shift is used when we add a digit. It also serves as an "are
12064 we in octal/hex/binary?" indicator to disallow hex characters
12065 when in octal mode.
12070 bool overflowed = FALSE;
12071 bool just_zero = TRUE; /* just plain 0 or binary number? */
12072 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12073 static const char* const bases[5] =
12074 { "", "binary", "", "octal", "hexadecimal" };
12075 static const char* const Bases[5] =
12076 { "", "Binary", "", "Octal", "Hexadecimal" };
12077 static const char* const maxima[5] =
12079 "0b11111111111111111111111111111111",
12083 const char *base, *Base, *max;
12085 /* check for hex */
12090 } else if (s[1] == 'b') {
12095 /* check for a decimal in disguise */
12096 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12098 /* so it must be octal */
12105 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12106 "Misplaced _ in number");
12110 base = bases[shift];
12111 Base = Bases[shift];
12112 max = maxima[shift];
12114 /* read the rest of the number */
12116 /* x is used in the overflow test,
12117 b is the digit we're adding on. */
12122 /* if we don't mention it, we're done */
12126 /* _ are ignored -- but warned about if consecutive */
12128 if (lastub && s == lastub + 1)
12129 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12130 "Misplaced _ in number");
12134 /* 8 and 9 are not octal */
12135 case '8': case '9':
12137 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12141 case '2': case '3': case '4':
12142 case '5': case '6': case '7':
12144 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12147 case '0': case '1':
12148 b = *s++ & 15; /* ASCII digit -> value of digit */
12152 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12153 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12154 /* make sure they said 0x */
12157 b = (*s++ & 7) + 9;
12159 /* Prepare to put the digit we have onto the end
12160 of the number so far. We check for overflows.
12166 x = u << shift; /* make room for the digit */
12168 if ((x >> shift) != u
12169 && !(PL_hints & HINT_NEW_BINARY)) {
12172 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12173 "Integer overflow in %s number",
12176 u = x | b; /* add the digit to the end */
12179 n *= nvshift[shift];
12180 /* If an NV has not enough bits in its
12181 * mantissa to represent an UV this summing of
12182 * small low-order numbers is a waste of time
12183 * (because the NV cannot preserve the
12184 * low-order bits anyway): we could just
12185 * remember when did we overflow and in the
12186 * end just multiply n by the right
12194 /* if we get here, we had success: make a scalar value from
12199 /* final misplaced underbar check */
12200 if (s[-1] == '_') {
12201 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12206 if (n > 4294967295.0)
12207 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12208 "%s number > %s non-portable",
12214 if (u > 0xffffffff)
12215 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12216 "%s number > %s non-portable",
12221 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12222 sv = new_constant(start, s - start, "integer",
12223 sv, NULL, NULL, 0);
12224 else if (PL_hints & HINT_NEW_BINARY)
12225 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12230 handle decimal numbers.
12231 we're also sent here when we read a 0 as the first digit
12233 case '1': case '2': case '3': case '4': case '5':
12234 case '6': case '7': case '8': case '9': case '.':
12237 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12240 /* read next group of digits and _ and copy into d */
12241 while (isDIGIT(*s) || *s == '_') {
12242 /* skip underscores, checking for misplaced ones
12246 if (lastub && s == lastub + 1)
12247 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12248 "Misplaced _ in number");
12252 /* check for end of fixed-length buffer */
12254 Perl_croak(aTHX_ number_too_long);
12255 /* if we're ok, copy the character */
12260 /* final misplaced underbar check */
12261 if (lastub && s == lastub + 1) {
12262 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12265 /* read a decimal portion if there is one. avoid
12266 3..5 being interpreted as the number 3. followed
12269 if (*s == '.' && s[1] != '.') {
12274 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12275 "Misplaced _ in number");
12279 /* copy, ignoring underbars, until we run out of digits.
12281 for (; isDIGIT(*s) || *s == '_'; s++) {
12282 /* fixed length buffer check */
12284 Perl_croak(aTHX_ number_too_long);
12286 if (lastub && s == lastub + 1)
12287 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12288 "Misplaced _ in number");
12294 /* fractional part ending in underbar? */
12295 if (s[-1] == '_') {
12296 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12297 "Misplaced _ in number");
12299 if (*s == '.' && isDIGIT(s[1])) {
12300 /* oops, it's really a v-string, but without the "v" */
12306 /* read exponent part, if present */
12307 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12311 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12312 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12314 /* stray preinitial _ */
12316 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12317 "Misplaced _ in number");
12321 /* allow positive or negative exponent */
12322 if (*s == '+' || *s == '-')
12325 /* stray initial _ */
12327 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12328 "Misplaced _ in number");
12332 /* read digits of exponent */
12333 while (isDIGIT(*s) || *s == '_') {
12336 Perl_croak(aTHX_ number_too_long);
12340 if (((lastub && s == lastub + 1) ||
12341 (!isDIGIT(s[1]) && s[1] != '_')))
12342 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12343 "Misplaced _ in number");
12350 /* make an sv from the string */
12354 We try to do an integer conversion first if no characters
12355 indicating "float" have been found.
12360 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12362 if (flags == IS_NUMBER_IN_UV) {
12364 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12367 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12368 if (uv <= (UV) IV_MIN)
12369 sv_setiv(sv, -(IV)uv);
12376 /* terminate the string */
12378 nv = Atof(PL_tokenbuf);
12383 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12384 const char *const key = floatit ? "float" : "integer";
12385 const STRLEN keylen = floatit ? 5 : 7;
12386 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12387 key, keylen, sv, NULL, NULL, 0);
12391 /* if it starts with a v, it could be a v-string */
12394 sv = newSV(5); /* preallocate storage space */
12395 s = scan_vstring(s, PL_bufend, sv);
12399 /* make the op for the constant and return */
12402 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12404 lvalp->opval = NULL;
12410 S_scan_formline(pTHX_ register char *s)
12413 register char *eol;
12415 SV * const stuff = newSVpvs("");
12416 bool needargs = FALSE;
12417 bool eofmt = FALSE;
12419 char *tokenstart = s;
12420 SV* savewhite = NULL;
12422 if (PL_madskills) {
12423 savewhite = PL_thiswhite;
12428 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12430 while (!needargs) {
12433 #ifdef PERL_STRICT_CR
12434 while (SPACE_OR_TAB(*t))
12437 while (SPACE_OR_TAB(*t) || *t == '\r')
12440 if (*t == '\n' || t == PL_bufend) {
12445 if (PL_in_eval && !PL_rsfp) {
12446 eol = (char *) memchr(s,'\n',PL_bufend-s);
12451 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12453 for (t = s; t < eol; t++) {
12454 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12456 goto enough; /* ~~ must be first line in formline */
12458 if (*t == '@' || *t == '^')
12462 sv_catpvn(stuff, s, eol-s);
12463 #ifndef PERL_STRICT_CR
12464 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12465 char *end = SvPVX(stuff) + SvCUR(stuff);
12468 SvCUR_set(stuff, SvCUR(stuff) - 1);
12478 if (PL_madskills) {
12480 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12482 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12485 s = filter_gets(PL_linestr, PL_rsfp, 0);
12487 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12489 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12491 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12492 PL_last_lop = PL_last_uni = NULL;
12501 if (SvCUR(stuff)) {
12504 PL_lex_state = LEX_NORMAL;
12505 start_force(PL_curforce);
12506 NEXTVAL_NEXTTOKE.ival = 0;
12510 PL_lex_state = LEX_FORMLINE;
12512 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12514 else if (PL_encoding)
12515 sv_recode_to_utf8(stuff, PL_encoding);
12517 start_force(PL_curforce);
12518 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12520 start_force(PL_curforce);
12521 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12525 SvREFCNT_dec(stuff);
12527 PL_lex_formbrack = 0;
12531 if (PL_madskills) {
12533 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12535 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12536 PL_thiswhite = savewhite;
12543 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12546 const I32 oldsavestack_ix = PL_savestack_ix;
12547 CV* const outsidecv = PL_compcv;
12550 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12552 SAVEI32(PL_subline);
12553 save_item(PL_subname);
12554 SAVESPTR(PL_compcv);
12556 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12557 CvFLAGS(PL_compcv) |= flags;
12559 PL_subline = CopLINE(PL_curcop);
12560 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12561 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12562 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12564 return oldsavestack_ix;
12568 #pragma segment Perl_yylex
12571 S_yywarn(pTHX_ const char *const s)
12575 PERL_ARGS_ASSERT_YYWARN;
12577 PL_in_eval |= EVAL_WARNONLY;
12579 PL_in_eval &= ~EVAL_WARNONLY;
12584 Perl_yyerror(pTHX_ const char *const s)
12587 const char *where = NULL;
12588 const char *context = NULL;
12591 int yychar = PL_parser->yychar;
12593 PERL_ARGS_ASSERT_YYERROR;
12595 if (!yychar || (yychar == ';' && !PL_rsfp))
12597 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12598 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12599 PL_oldbufptr != PL_bufptr) {
12602 The code below is removed for NetWare because it abends/crashes on NetWare
12603 when the script has error such as not having the closing quotes like:
12604 if ($var eq "value)
12605 Checking of white spaces is anyway done in NetWare code.
12608 while (isSPACE(*PL_oldoldbufptr))
12611 context = PL_oldoldbufptr;
12612 contlen = PL_bufptr - PL_oldoldbufptr;
12614 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12615 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12618 The code below is removed for NetWare because it abends/crashes on NetWare
12619 when the script has error such as not having the closing quotes like:
12620 if ($var eq "value)
12621 Checking of white spaces is anyway done in NetWare code.
12624 while (isSPACE(*PL_oldbufptr))
12627 context = PL_oldbufptr;
12628 contlen = PL_bufptr - PL_oldbufptr;
12630 else if (yychar > 255)
12631 where = "next token ???";
12632 else if (yychar == -2) { /* YYEMPTY */
12633 if (PL_lex_state == LEX_NORMAL ||
12634 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12635 where = "at end of line";
12636 else if (PL_lex_inpat)
12637 where = "within pattern";
12639 where = "within string";
12642 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12644 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12645 else if (isPRINT_LC(yychar)) {
12646 const char string = yychar;
12647 sv_catpvn(where_sv, &string, 1);
12650 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12651 where = SvPVX_const(where_sv);
12653 msg = sv_2mortal(newSVpv(s, 0));
12654 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12655 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12657 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12659 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12660 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12661 Perl_sv_catpvf(aTHX_ msg,
12662 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12663 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12666 if (PL_in_eval & EVAL_WARNONLY) {
12667 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12671 if (PL_error_count >= 10) {
12672 if (PL_in_eval && SvCUR(ERRSV))
12673 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12674 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12676 Perl_croak(aTHX_ "%s has too many errors.\n",
12677 OutCopFILE(PL_curcop));
12680 PL_in_my_stash = NULL;
12684 #pragma segment Main
12688 S_swallow_bom(pTHX_ U8 *s)
12691 const STRLEN slen = SvCUR(PL_linestr);
12693 PERL_ARGS_ASSERT_SWALLOW_BOM;
12697 if (s[1] == 0xFE) {
12698 /* UTF-16 little-endian? (or UTF32-LE?) */
12699 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12700 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12701 #ifndef PERL_NO_UTF16_FILTER
12702 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12705 if (PL_bufend > (char*)s) {
12709 filter_add(S_utf16rev_textfilter, NULL);
12710 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12711 utf16_to_utf8_reversed(s, news,
12712 PL_bufend - (char*)s - 1,
12714 sv_setpvn(PL_linestr, (const char*)news, newlen);
12716 s = (U8*)SvPVX(PL_linestr);
12717 Copy(news, s, newlen, U8);
12721 SvUTF8_on(PL_linestr);
12722 s = (U8*)SvPVX(PL_linestr);
12724 /* FIXME - is this a general bug fix? */
12727 PL_bufend = SvPVX(PL_linestr) + newlen;
12730 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12735 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12736 #ifndef PERL_NO_UTF16_FILTER
12737 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12740 if (PL_bufend > (char *)s) {
12744 filter_add(S_utf16_textfilter, NULL);
12745 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12746 utf16_to_utf8(s, news,
12747 PL_bufend - (char*)s,
12749 sv_setpvn(PL_linestr, (const char*)news, newlen);
12751 SvUTF8_on(PL_linestr);
12752 s = (U8*)SvPVX(PL_linestr);
12753 PL_bufend = SvPVX(PL_linestr) + newlen;
12756 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12761 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12762 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12763 s += 3; /* UTF-8 */
12769 if (s[2] == 0xFE && s[3] == 0xFF) {
12770 /* UTF-32 big-endian */
12771 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12774 else if (s[2] == 0 && s[3] != 0) {
12777 * are a good indicator of UTF-16BE. */
12778 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12784 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12785 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12786 s += 4; /* UTF-8 */
12792 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12795 * are a good indicator of UTF-16LE. */
12796 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12804 #ifndef PERL_NO_UTF16_FILTER
12806 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12809 const STRLEN old = SvCUR(sv);
12810 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12811 DEBUG_P(PerlIO_printf(Perl_debug_log,
12812 "utf16_textfilter(%p): %d %d (%d)\n",
12813 FPTR2DPTR(void *, S_utf16_textfilter),
12814 idx, maxlen, (int) count));
12818 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12819 Copy(SvPVX_const(sv), tmps, old, char);
12820 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12821 SvCUR(sv) - old, &newlen);
12822 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12824 DEBUG_P({sv_dump(sv);});
12829 S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12832 const STRLEN old = SvCUR(sv);
12833 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12834 DEBUG_P(PerlIO_printf(Perl_debug_log,
12835 "utf16rev_textfilter(%p): %d %d (%d)\n",
12836 FPTR2DPTR(void *, utf16rev_textfilter),
12837 idx, maxlen, (int) count));
12841 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12842 Copy(SvPVX_const(sv), tmps, old, char);
12843 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12844 SvCUR(sv) - old, &newlen);
12845 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12847 DEBUG_P({ sv_dump(sv); });
12853 Returns a pointer to the next character after the parsed
12854 vstring, as well as updating the passed in sv.
12856 Function must be called like
12859 s = scan_vstring(s,e,sv);
12861 where s and e are the start and end of the string.
12862 The sv should already be large enough to store the vstring
12863 passed in, for performance reasons.
12868 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12871 const char *pos = s;
12872 const char *start = s;
12874 PERL_ARGS_ASSERT_SCAN_VSTRING;
12876 if (*pos == 'v') pos++; /* get past 'v' */
12877 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12879 if ( *pos != '.') {
12880 /* this may not be a v-string if followed by => */
12881 const char *next = pos;
12882 while (next < e && isSPACE(*next))
12884 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12885 /* return string not v-string */
12886 sv_setpvn(sv,(char *)s,pos-s);
12887 return (char *)pos;
12891 if (!isALPHA(*pos)) {
12892 U8 tmpbuf[UTF8_MAXBYTES+1];
12895 s++; /* get past 'v' */
12900 /* this is atoi() that tolerates underscores */
12903 const char *end = pos;
12905 while (--end >= s) {
12907 const UV orev = rev;
12908 rev += (*end - '0') * mult;
12911 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12912 "Integer overflow in decimal number");
12916 if (rev > 0x7FFFFFFF)
12917 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12919 /* Append native character for the rev point */
12920 tmpend = uvchr_to_utf8(tmpbuf, rev);
12921 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12922 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12924 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12930 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12934 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12942 * c-indentation-style: bsd
12943 * c-basic-offset: 4
12944 * indent-tabs-mode: t
12947 * ex: set ts=8 sts=4 sw=4 noet: