3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 #define PERL_IN_TOKE_C
28 #define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
31 #define pl_yylval (PL_parser->yylval)
33 /* YYINITDEPTH -- initial size of the parser's stacks. */
34 #define YYINITDEPTH 200
36 /* XXX temporary backwards compatibility */
37 #define PL_lex_brackets (PL_parser->lex_brackets)
38 #define PL_lex_brackstack (PL_parser->lex_brackstack)
39 #define PL_lex_casemods (PL_parser->lex_casemods)
40 #define PL_lex_casestack (PL_parser->lex_casestack)
41 #define PL_lex_defer (PL_parser->lex_defer)
42 #define PL_lex_dojoin (PL_parser->lex_dojoin)
43 #define PL_lex_expect (PL_parser->lex_expect)
44 #define PL_lex_formbrack (PL_parser->lex_formbrack)
45 #define PL_lex_inpat (PL_parser->lex_inpat)
46 #define PL_lex_inwhat (PL_parser->lex_inwhat)
47 #define PL_lex_op (PL_parser->lex_op)
48 #define PL_lex_repl (PL_parser->lex_repl)
49 #define PL_lex_starts (PL_parser->lex_starts)
50 #define PL_lex_stuff (PL_parser->lex_stuff)
51 #define PL_multi_start (PL_parser->multi_start)
52 #define PL_multi_open (PL_parser->multi_open)
53 #define PL_multi_close (PL_parser->multi_close)
54 #define PL_pending_ident (PL_parser->pending_ident)
55 #define PL_preambled (PL_parser->preambled)
56 #define PL_sublex_info (PL_parser->sublex_info)
57 #define PL_linestr (PL_parser->linestr)
58 #define PL_expect (PL_parser->expect)
59 #define PL_copline (PL_parser->copline)
60 #define PL_bufptr (PL_parser->bufptr)
61 #define PL_oldbufptr (PL_parser->oldbufptr)
62 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63 #define PL_linestart (PL_parser->linestart)
64 #define PL_bufend (PL_parser->bufend)
65 #define PL_last_uni (PL_parser->last_uni)
66 #define PL_last_lop (PL_parser->last_lop)
67 #define PL_last_lop_op (PL_parser->last_lop_op)
68 #define PL_lex_state (PL_parser->lex_state)
69 #define PL_rsfp (PL_parser->rsfp)
70 #define PL_rsfp_filters (PL_parser->rsfp_filters)
71 #define PL_in_my (PL_parser->in_my)
72 #define PL_in_my_stash (PL_parser->in_my_stash)
73 #define PL_tokenbuf (PL_parser->tokenbuf)
74 #define PL_multi_end (PL_parser->multi_end)
75 #define PL_error_count (PL_parser->error_count)
78 # define PL_endwhite (PL_parser->endwhite)
79 # define PL_faketokens (PL_parser->faketokens)
80 # define PL_lasttoke (PL_parser->lasttoke)
81 # define PL_nextwhite (PL_parser->nextwhite)
82 # define PL_realtokenstart (PL_parser->realtokenstart)
83 # define PL_skipwhite (PL_parser->skipwhite)
84 # define PL_thisclose (PL_parser->thisclose)
85 # define PL_thismad (PL_parser->thismad)
86 # define PL_thisopen (PL_parser->thisopen)
87 # define PL_thisstuff (PL_parser->thisstuff)
88 # define PL_thistoken (PL_parser->thistoken)
89 # define PL_thiswhite (PL_parser->thiswhite)
90 # define PL_thiswhite (PL_parser->thiswhite)
91 # define PL_nexttoke (PL_parser->nexttoke)
92 # define PL_curforce (PL_parser->curforce)
94 # define PL_nexttoke (PL_parser->nexttoke)
95 # define PL_nexttype (PL_parser->nexttype)
96 # define PL_nextval (PL_parser->nextval)
99 /* This can't be done with embed.fnc, because struct yy_parser contains a
100 member named pending_ident, which clashes with the generated #define */
102 S_pending_ident(pTHX);
104 static const char ident_too_long[] = "Identifier too long";
107 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
108 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
110 # define CURMAD(slot,sv)
111 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
114 #define XFAKEBRACK 128
115 #define XENUMMASK 127
117 #ifdef USE_UTF8_SCRIPTS
118 # define UTF (!IN_BYTES)
120 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
123 /* The maximum number of characters preceding the unrecognized one to display */
124 #define UNRECOGNIZED_PRECEDE_COUNT 10
126 /* In variables named $^X, these are the legal values for X.
127 * 1999-02-27 mjd-perl-patch@plover.com */
128 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
130 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
132 /* LEX_* are values for PL_lex_state, the state of the lexer.
133 * They are arranged oddly so that the guard on the switch statement
134 * can get by with a single comparison (if the compiler is smart enough).
137 /* #define LEX_NOTPARSING 11 is done in perl.h. */
139 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
140 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
141 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
142 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
143 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
145 /* at end of code, eg "$x" followed by: */
146 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
147 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
149 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
150 string or after \E, $foo, etc */
151 #define LEX_INTERPCONST 2 /* NOT USED */
152 #define LEX_FORMLINE 1 /* expecting a format line */
153 #define LEX_KNOWNEXT 0 /* next token known; just return it */
157 static const char* const lex_state_names[] = {
176 #include "keywords.h"
178 /* CLINE is a macro that ensures PL_copline has a sane value */
183 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
186 # define SKIPSPACE0(s) skipspace0(s)
187 # define SKIPSPACE1(s) skipspace1(s)
188 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
189 # define PEEKSPACE(s) skipspace2(s,0)
191 # define SKIPSPACE0(s) skipspace(s)
192 # define SKIPSPACE1(s) skipspace(s)
193 # define SKIPSPACE2(s,tsv) skipspace(s)
194 # define PEEKSPACE(s) skipspace(s)
198 * Convenience functions to return different tokens and prime the
199 * lexer for the next token. They all take an argument.
201 * TOKEN : generic token (used for '(', DOLSHARP, etc)
202 * OPERATOR : generic operator
203 * AOPERATOR : assignment operator
204 * PREBLOCK : beginning the block after an if, while, foreach, ...
205 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
206 * PREREF : *EXPR where EXPR is not a simple identifier
207 * TERM : expression term
208 * LOOPX : loop exiting command (goto, last, dump, etc)
209 * FTST : file test operator
210 * FUN0 : zero-argument function
211 * FUN1 : not used, except for not, which isn't a UNIOP
212 * BOop : bitwise or or xor
214 * SHop : shift operator
215 * PWop : power operator
216 * PMop : pattern-matching operator
217 * Aop : addition-level operator
218 * Mop : multiplication-level operator
219 * Eop : equality-testing operator
220 * Rop : relational operator <= != gt
222 * Also see LOP and lop() below.
225 #ifdef DEBUGGING /* Serve -DT. */
226 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
228 # define REPORT(retval) (retval)
231 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
234 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
238 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
239 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
240 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
241 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
242 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
243 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
244 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
245 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
246 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
247 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
248 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
249 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
250 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
252 /* This bit of chicanery makes a unary function followed by
253 * a parenthesis into a function with one argument, highest precedence.
254 * The UNIDOR macro is for unary functions that can be followed by the //
255 * operator (such as C<shift // 0>).
257 #define UNI2(f,x) { \
258 pl_yylval.ival = f; \
261 PL_last_uni = PL_oldbufptr; \
262 PL_last_lop_op = f; \
264 return REPORT( (int)FUNC1 ); \
266 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
268 #define UNI(f) UNI2(f,XTERM)
269 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
271 #define UNIBRACK(f) { \
272 pl_yylval.ival = f; \
274 PL_last_uni = PL_oldbufptr; \
276 return REPORT( (int)FUNC1 ); \
278 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
281 /* grandfather return to old style */
282 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
286 /* how to interpret the pl_yylval associated with the token */
290 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
296 static struct debug_tokens {
298 enum token_type type;
300 } const debug_tokens[] =
302 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
303 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
304 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
305 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
306 { ARROW, TOKENTYPE_NONE, "ARROW" },
307 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
308 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
309 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
310 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
311 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
312 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
313 { DO, TOKENTYPE_NONE, "DO" },
314 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
315 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
316 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
317 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
318 { ELSE, TOKENTYPE_NONE, "ELSE" },
319 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
320 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
321 { FOR, TOKENTYPE_IVAL, "FOR" },
322 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
323 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
324 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
325 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
326 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
327 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
328 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
329 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
330 { IF, TOKENTYPE_IVAL, "IF" },
331 { LABEL, TOKENTYPE_PVAL, "LABEL" },
332 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
333 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
334 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
335 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
336 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
337 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
338 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
339 { MY, TOKENTYPE_IVAL, "MY" },
340 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
341 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
342 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
343 { OROP, TOKENTYPE_IVAL, "OROP" },
344 { OROR, TOKENTYPE_NONE, "OROR" },
345 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
346 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
347 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
348 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
349 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
350 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
351 { PREINC, TOKENTYPE_NONE, "PREINC" },
352 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
353 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
354 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
355 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
356 { SUB, TOKENTYPE_NONE, "SUB" },
357 { THING, TOKENTYPE_OPVAL, "THING" },
358 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
359 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
360 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
361 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
362 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
363 { USE, TOKENTYPE_IVAL, "USE" },
364 { WHEN, TOKENTYPE_IVAL, "WHEN" },
365 { WHILE, TOKENTYPE_IVAL, "WHILE" },
366 { WORD, TOKENTYPE_OPVAL, "WORD" },
367 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
368 { 0, TOKENTYPE_NONE, NULL }
371 /* dump the returned token in rv, plus any optional arg in pl_yylval */
374 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
378 PERL_ARGS_ASSERT_TOKEREPORT;
381 const char *name = NULL;
382 enum token_type type = TOKENTYPE_NONE;
383 const struct debug_tokens *p;
384 SV* const report = newSVpvs("<== ");
386 for (p = debug_tokens; p->token; p++) {
387 if (p->token == (int)rv) {
394 Perl_sv_catpv(aTHX_ report, name);
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
398 sv_catpvs(report, "EOF");
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[lvalp->ival]);
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
415 case TOKENTYPE_OPVAL:
417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
418 PL_op_name[lvalp->opval->op_type]);
419 if (lvalp->opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(lvalp->opval)));
426 sv_catpvs(report, "(opval=null)");
429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
435 /* print the buffer with suitable escapes */
438 S_printbuf(pTHX_ const char *const fmt, const char *const s)
440 SV* const tmp = newSVpvs("");
442 PERL_ARGS_ASSERT_PRINTBUF;
444 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
451 S_deprecate_commaless_var_list(pTHX) {
453 deprecate("comma-less variable list");
454 return REPORT(','); /* grandfather non-comma-format format */
460 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
461 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
465 S_ao(pTHX_ int toketype)
468 if (*PL_bufptr == '=') {
470 if (toketype == ANDAND)
471 pl_yylval.ival = OP_ANDASSIGN;
472 else if (toketype == OROR)
473 pl_yylval.ival = OP_ORASSIGN;
474 else if (toketype == DORDOR)
475 pl_yylval.ival = OP_DORASSIGN;
483 * When Perl expects an operator and finds something else, no_op
484 * prints the warning. It always prints "<something> found where
485 * operator expected. It prints "Missing semicolon on previous line?"
486 * if the surprise occurs at the start of the line. "do you need to
487 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
488 * where the compiler doesn't know if foo is a method call or a function.
489 * It prints "Missing operator before end of line" if there's nothing
490 * after the missing operator, or "... before <...>" if there is something
491 * after the missing operator.
495 S_no_op(pTHX_ const char *const what, char *s)
498 char * const oldbp = PL_bufptr;
499 const bool is_first = (PL_oldbufptr == PL_linestart);
501 PERL_ARGS_ASSERT_NO_OP;
507 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
508 if (ckWARN_d(WARN_SYNTAX)) {
510 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
511 "\t(Missing semicolon on previous line?)\n");
512 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
514 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
516 if (t < PL_bufptr && isSPACE(*t))
517 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
518 "\t(Do you need to predeclare %.*s?)\n",
519 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
523 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
524 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
532 * Complain about missing quote/regexp/heredoc terminator.
533 * If it's called with NULL then it cauterizes the line buffer.
534 * If we're in a delimited string and the delimiter is a control
535 * character, it's reformatted into a two-char sequence like ^C.
540 S_missingterm(pTHX_ char *s)
546 char * const nl = strrchr(s,'\n');
550 else if (isCNTRL(PL_multi_close)) {
552 tmpbuf[1] = (char)toCTRL(PL_multi_close);
557 *tmpbuf = (char)PL_multi_close;
561 q = strchr(s,'"') ? '\'' : '"';
562 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
565 #define FEATURE_IS_ENABLED(name) \
566 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
567 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
568 /* The longest string we pass in. */
569 #define MAX_FEATURE_LEN (sizeof("switch")-1)
572 * S_feature_is_enabled
573 * Check whether the named feature is enabled.
576 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
579 HV * const hinthv = GvHV(PL_hintgv);
580 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
582 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
584 assert(namelen <= MAX_FEATURE_LEN);
585 memcpy(&he_name[8], name, namelen);
587 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
591 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
592 * utf16-to-utf8-reversed.
595 #ifdef PERL_CR_FILTER
599 register const char *s = SvPVX_const(sv);
600 register const char * const e = s + SvCUR(sv);
602 PERL_ARGS_ASSERT_STRIP_RETURN;
604 /* outer loop optimized to do nothing if there are no CR-LFs */
606 if (*s++ == '\r' && *s == '\n') {
607 /* hit a CR-LF, need to copy the rest */
608 register char *d = s - 1;
611 if (*s == '\r' && s[1] == '\n')
622 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
624 const I32 count = FILTER_READ(idx+1, sv, maxlen);
625 if (count > 0 && !maxlen)
636 * Create a parser object and initialise its parser and lexer fields
638 * rsfp is the opened file handle to read from (if any),
640 * line holds any initial content already read from the file (or in
641 * the case of no file, such as an eval, the whole contents);
643 * new_filter indicates that this is a new file and it shouldn't inherit
644 * the filters from the current parser (ie require).
648 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
651 const char *s = NULL;
653 yy_parser *parser, *oparser;
655 /* create and initialise a parser */
657 Newxz(parser, 1, yy_parser);
658 parser->old_parser = oparser = PL_parser;
661 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
662 parser->ps = parser->stack;
663 parser->stack_size = YYINITDEPTH;
665 parser->stack->state = 0;
666 parser->yyerrstatus = 0;
667 parser->yychar = YYEMPTY; /* Cause a token to be read. */
669 /* on scope exit, free this parser and restore any outer one */
671 parser->saved_curcop = PL_curcop;
673 /* initialise lexer state */
676 parser->curforce = -1;
678 parser->nexttoke = 0;
680 parser->error_count = oparser ? oparser->error_count : 0;
681 parser->copline = NOLINE;
682 parser->lex_state = LEX_NORMAL;
683 parser->expect = XSTATE;
685 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
686 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
688 Newx(parser->lex_brackstack, 120, char);
689 Newx(parser->lex_casestack, 12, char);
690 *parser->lex_casestack = '\0';
693 s = SvPV_const(line, len);
699 parser->linestr = newSVpvs("\n;");
700 } else if (SvREADONLY(line) || s[len-1] != ';') {
701 parser->linestr = newSVsv(line);
703 sv_catpvs(parser->linestr, "\n;");
706 SvREFCNT_inc_simple_void_NN(line);
707 parser->linestr = line;
709 parser->oldoldbufptr =
712 parser->linestart = SvPVX(parser->linestr);
713 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
714 parser->last_lop = parser->last_uni = NULL;
718 /* delete a parser object */
721 Perl_parser_free(pTHX_ const yy_parser *parser)
723 PERL_ARGS_ASSERT_PARSER_FREE;
725 PL_curcop = parser->saved_curcop;
726 SvREFCNT_dec(parser->linestr);
728 if (parser->rsfp == PerlIO_stdin())
729 PerlIO_clearerr(parser->rsfp);
730 else if (parser->rsfp && (!parser->old_parser ||
731 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
732 PerlIO_close(parser->rsfp);
733 SvREFCNT_dec(parser->rsfp_filters);
735 Safefree(parser->stack);
736 Safefree(parser->lex_brackstack);
737 Safefree(parser->lex_casestack);
738 PL_parser = parser->old_parser;
745 * Finalizer for lexing operations. Must be called when the parser is
746 * done with the lexer.
753 PL_doextract = FALSE;
758 * This subroutine has nothing to do with tilting, whether at windmills
759 * or pinball tables. Its name is short for "increment line". It
760 * increments the current line number in CopLINE(PL_curcop) and checks
761 * to see whether the line starts with a comment of the form
762 * # line 500 "foo.pm"
763 * If so, it sets the current line number and file to the values in the comment.
767 S_incline(pTHX_ const char *s)
774 PERL_ARGS_ASSERT_INCLINE;
776 CopLINE_inc(PL_curcop);
779 while (SPACE_OR_TAB(*s))
781 if (strnEQ(s, "line", 4))
785 if (SPACE_OR_TAB(*s))
789 while (SPACE_OR_TAB(*s))
797 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
799 while (SPACE_OR_TAB(*s))
801 if (*s == '"' && (t = strchr(s+1, '"'))) {
811 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
813 if (*e != '\n' && *e != '\0')
814 return; /* false alarm */
817 const STRLEN len = t - s;
819 SV *const temp_sv = CopFILESV(PL_curcop);
825 tmplen = SvCUR(temp_sv);
831 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
832 /* must copy *{"::_<(eval N)[oldfilename:L]"}
833 * to *{"::_<newfilename"} */
834 /* However, the long form of evals is only turned on by the
835 debugger - usually they're "(eval %lu)" */
839 STRLEN tmplen2 = len;
840 if (tmplen + 2 <= sizeof smallbuf)
843 Newx(tmpbuf, tmplen + 2, char);
846 memcpy(tmpbuf + 2, cf, tmplen);
848 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
853 if (tmplen2 + 2 <= sizeof smallbuf)
856 Newx(tmpbuf2, tmplen2 + 2, char);
858 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
859 /* Either they malloc'd it, or we malloc'd it,
860 so no prefix is present in ours. */
865 memcpy(tmpbuf2 + 2, s, tmplen2);
868 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
870 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
871 /* adjust ${"::_<newfilename"} to store the new file name */
872 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
873 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
874 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
877 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
879 if (tmpbuf != smallbuf) Safefree(tmpbuf);
882 CopFILE_free(PL_curcop);
883 CopFILE_setn(PL_curcop, s, len);
885 CopLINE_set(PL_curcop, atoi(n)-1);
889 /* skip space before PL_thistoken */
892 S_skipspace0(pTHX_ register char *s)
894 PERL_ARGS_ASSERT_SKIPSPACE0;
901 PL_thiswhite = newSVpvs("");
902 sv_catsv(PL_thiswhite, PL_skipwhite);
903 sv_free(PL_skipwhite);
906 PL_realtokenstart = s - SvPVX(PL_linestr);
910 /* skip space after PL_thistoken */
913 S_skipspace1(pTHX_ register char *s)
915 const char *start = s;
916 I32 startoff = start - SvPVX(PL_linestr);
918 PERL_ARGS_ASSERT_SKIPSPACE1;
923 start = SvPVX(PL_linestr) + startoff;
924 if (!PL_thistoken && PL_realtokenstart >= 0) {
925 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
926 PL_thistoken = newSVpvn(tstart, start - tstart);
928 PL_realtokenstart = -1;
931 PL_nextwhite = newSVpvs("");
932 sv_catsv(PL_nextwhite, PL_skipwhite);
933 sv_free(PL_skipwhite);
940 S_skipspace2(pTHX_ register char *s, SV **svp)
943 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
944 const I32 startoff = s - SvPVX(PL_linestr);
946 PERL_ARGS_ASSERT_SKIPSPACE2;
949 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
950 if (!PL_madskills || !svp)
952 start = SvPVX(PL_linestr) + startoff;
953 if (!PL_thistoken && PL_realtokenstart >= 0) {
954 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
955 PL_thistoken = newSVpvn(tstart, start - tstart);
956 PL_realtokenstart = -1;
961 sv_setsv(*svp, PL_skipwhite);
962 sv_free(PL_skipwhite);
971 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
973 AV *av = CopFILEAVx(PL_curcop);
975 SV * const sv = newSV_type(SVt_PVMG);
977 sv_setsv(sv, orig_sv);
979 sv_setpvn(sv, buf, len);
982 av_store(av, (I32)CopLINE(PL_curcop), sv);
988 * Called to gobble the appropriate amount and type of whitespace.
989 * Skips comments as well.
993 S_skipspace(pTHX_ register char *s)
998 int startoff = s - SvPVX(PL_linestr);
1000 PERL_ARGS_ASSERT_SKIPSPACE;
1003 sv_free(PL_skipwhite);
1007 PERL_ARGS_ASSERT_SKIPSPACE;
1009 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1010 while (s < PL_bufend && SPACE_OR_TAB(*s))
1020 SSize_t oldprevlen, oldoldprevlen;
1021 SSize_t oldloplen = 0, oldunilen = 0;
1022 while (s < PL_bufend && isSPACE(*s)) {
1023 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1028 if (s < PL_bufend && *s == '#') {
1029 while (s < PL_bufend && *s != '\n')
1031 if (s < PL_bufend) {
1033 if (PL_in_eval && !PL_rsfp) {
1040 /* only continue to recharge the buffer if we're at the end
1041 * of the buffer, we're not reading from a source filter, and
1042 * we're in normal lexing mode
1044 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1045 PL_lex_state == LEX_FORMLINE)
1052 /* try to recharge the buffer */
1054 curoff = s - SvPVX(PL_linestr);
1057 if ((s = filter_gets(PL_linestr, PL_rsfp,
1058 (prevlen = SvCUR(PL_linestr)))) == NULL)
1061 if (PL_madskills && curoff != startoff) {
1063 PL_skipwhite = newSVpvs("");
1064 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1068 /* mustn't throw out old stuff yet if madpropping */
1069 SvCUR(PL_linestr) = curoff;
1070 s = SvPVX(PL_linestr) + curoff;
1072 if (curoff && s[-1] == '\n')
1076 /* end of file. Add on the -p or -n magic */
1077 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1080 sv_catpvs(PL_linestr,
1081 ";}continue{print or die qq(-p destination: $!\\n);}");
1083 sv_setpvs(PL_linestr,
1084 ";}continue{print or die qq(-p destination: $!\\n);}");
1086 PL_minus_n = PL_minus_p = 0;
1088 else if (PL_minus_n) {
1090 sv_catpvs(PL_linestr, ";}");
1092 sv_setpvs(PL_linestr, ";}");
1098 sv_catpvs(PL_linestr,";");
1100 sv_setpvs(PL_linestr,";");
1103 /* reset variables for next time we lex */
1104 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1110 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1111 PL_last_lop = PL_last_uni = NULL;
1113 /* Close the filehandle. Could be from
1114 * STDIN, or a regular file. If we were reading code from
1115 * STDIN (because the commandline held no -e or filename)
1116 * then we don't close it, we reset it so the code can
1117 * read from STDIN too.
1120 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1121 PerlIO_clearerr(PL_rsfp);
1123 (void)PerlIO_close(PL_rsfp);
1128 /* not at end of file, so we only read another line */
1129 /* make corresponding updates to old pointers, for yyerror() */
1130 oldprevlen = PL_oldbufptr - PL_bufend;
1131 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1133 oldunilen = PL_last_uni - PL_bufend;
1135 oldloplen = PL_last_lop - PL_bufend;
1136 PL_linestart = PL_bufptr = s + prevlen;
1137 PL_bufend = s + SvCUR(PL_linestr);
1139 PL_oldbufptr = s + oldprevlen;
1140 PL_oldoldbufptr = s + oldoldprevlen;
1142 PL_last_uni = s + oldunilen;
1144 PL_last_lop = s + oldloplen;
1147 /* debugger active and we're not compiling the debugger code,
1148 * so store the line into the debugger's array of lines
1150 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
1151 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1158 PL_skipwhite = newSVpvs("");
1159 curoff = s - SvPVX(PL_linestr);
1160 if (curoff - startoff)
1161 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1170 * Check the unary operators to ensure there's no ambiguity in how they're
1171 * used. An ambiguous piece of code would be:
1173 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1174 * the +5 is its argument.
1184 if (PL_oldoldbufptr != PL_last_uni)
1186 while (isSPACE(*PL_last_uni))
1189 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1191 if ((t = strchr(s, '(')) && t < PL_bufptr)
1194 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1195 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1196 (int)(s - PL_last_uni), PL_last_uni);
1200 * LOP : macro to build a list operator. Its behaviour has been replaced
1201 * with a subroutine, S_lop() for which LOP is just another name.
1204 #define LOP(f,x) return lop(f,x,s)
1208 * Build a list operator (or something that might be one). The rules:
1209 * - if we have a next token, then it's a list operator [why?]
1210 * - if the next thing is an opening paren, then it's a function
1211 * - else it's a list operator
1215 S_lop(pTHX_ I32 f, int x, char *s)
1219 PERL_ARGS_ASSERT_LOP;
1225 PL_last_lop = PL_oldbufptr;
1226 PL_last_lop_op = (OPCODE)f;
1229 return REPORT(LSTOP);
1232 return REPORT(LSTOP);
1235 return REPORT(FUNC);
1238 return REPORT(FUNC);
1240 return REPORT(LSTOP);
1246 * Sets up for an eventual force_next(). start_force(0) basically does
1247 * an unshift, while start_force(-1) does a push. yylex removes items
1252 S_start_force(pTHX_ int where)
1256 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1257 where = PL_lasttoke;
1258 assert(PL_curforce < 0 || PL_curforce == where);
1259 if (PL_curforce != where) {
1260 for (i = PL_lasttoke; i > where; --i) {
1261 PL_nexttoke[i] = PL_nexttoke[i-1];
1265 if (PL_curforce < 0) /* in case of duplicate start_force() */
1266 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1267 PL_curforce = where;
1270 curmad('^', newSVpvs(""));
1271 CURMAD('_', PL_nextwhite);
1276 S_curmad(pTHX_ char slot, SV *sv)
1282 if (PL_curforce < 0)
1283 where = &PL_thismad;
1285 where = &PL_nexttoke[PL_curforce].next_mad;
1291 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1293 else if (PL_encoding) {
1294 sv_recode_to_utf8(sv, PL_encoding);
1299 /* keep a slot open for the head of the list? */
1300 if (slot != '_' && *where && (*where)->mad_key == '^') {
1301 (*where)->mad_key = slot;
1302 sv_free(MUTABLE_SV(((*where)->mad_val)));
1303 (*where)->mad_val = (void*)sv;
1306 addmad(newMADsv(slot, sv), where, 0);
1309 # define start_force(where) NOOP
1310 # define curmad(slot, sv) NOOP
1315 * When the lexer realizes it knows the next token (for instance,
1316 * it is reordering tokens for the parser) then it can call S_force_next
1317 * to know what token to return the next time the lexer is called. Caller
1318 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1319 * and possibly PL_expect to ensure the lexer handles the token correctly.
1323 S_force_next(pTHX_ I32 type)
1328 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1329 tokereport(type, &NEXTVAL_NEXTTOKE);
1333 if (PL_curforce < 0)
1334 start_force(PL_lasttoke);
1335 PL_nexttoke[PL_curforce].next_type = type;
1336 if (PL_lex_state != LEX_KNOWNEXT)
1337 PL_lex_defer = PL_lex_state;
1338 PL_lex_state = LEX_KNOWNEXT;
1339 PL_lex_expect = PL_expect;
1342 PL_nexttype[PL_nexttoke] = type;
1344 if (PL_lex_state != LEX_KNOWNEXT) {
1345 PL_lex_defer = PL_lex_state;
1346 PL_lex_expect = PL_expect;
1347 PL_lex_state = LEX_KNOWNEXT;
1353 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1356 SV * const sv = newSVpvn_utf8(start, len,
1359 && !is_ascii_string((const U8*)start, len)
1360 && is_utf8_string((const U8*)start, len));
1366 * When the lexer knows the next thing is a word (for instance, it has
1367 * just seen -> and it knows that the next char is a word char, then
1368 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1372 * char *start : buffer position (must be within PL_linestr)
1373 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1374 * int check_keyword : if true, Perl checks to make sure the word isn't
1375 * a keyword (do this if the word is a label, e.g. goto FOO)
1376 * int allow_pack : if true, : characters will also be allowed (require,
1377 * use, etc. do this)
1378 * int allow_initial_tick : used by the "sub" lexer only.
1382 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1388 PERL_ARGS_ASSERT_FORCE_WORD;
1390 start = SKIPSPACE1(start);
1392 if (isIDFIRST_lazy_if(s,UTF) ||
1393 (allow_pack && *s == ':') ||
1394 (allow_initial_tick && *s == '\'') )
1396 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1397 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1399 start_force(PL_curforce);
1401 curmad('X', newSVpvn(start,s-start));
1402 if (token == METHOD) {
1407 PL_expect = XOPERATOR;
1411 curmad('g', newSVpvs( "forced" ));
1412 NEXTVAL_NEXTTOKE.opval
1413 = (OP*)newSVOP(OP_CONST,0,
1414 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1415 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1423 * Called when the lexer wants $foo *foo &foo etc, but the program
1424 * text only contains the "foo" portion. The first argument is a pointer
1425 * to the "foo", and the second argument is the type symbol to prefix.
1426 * Forces the next token to be a "WORD".
1427 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1431 S_force_ident(pTHX_ register const char *s, int kind)
1435 PERL_ARGS_ASSERT_FORCE_IDENT;
1438 const STRLEN len = strlen(s);
1439 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1440 start_force(PL_curforce);
1441 NEXTVAL_NEXTTOKE.opval = o;
1444 o->op_private = OPpCONST_ENTERED;
1445 /* XXX see note in pp_entereval() for why we forgo typo
1446 warnings if the symbol must be introduced in an eval.
1448 gv_fetchpvn_flags(s, len,
1449 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1451 kind == '$' ? SVt_PV :
1452 kind == '@' ? SVt_PVAV :
1453 kind == '%' ? SVt_PVHV :
1461 Perl_str_to_version(pTHX_ SV *sv)
1466 const char *start = SvPV_const(sv,len);
1467 const char * const end = start + len;
1468 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1470 PERL_ARGS_ASSERT_STR_TO_VERSION;
1472 while (start < end) {
1476 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1481 retval += ((NV)n)/nshift;
1490 * Forces the next token to be a version number.
1491 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1492 * and if "guessing" is TRUE, then no new token is created (and the caller
1493 * must use an alternative parsing method).
1497 S_force_version(pTHX_ char *s, int guessing)
1503 I32 startoff = s - SvPVX(PL_linestr);
1506 PERL_ARGS_ASSERT_FORCE_VERSION;
1514 while (isDIGIT(*d) || *d == '_' || *d == '.')
1518 start_force(PL_curforce);
1519 curmad('X', newSVpvn(s,d-s));
1522 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1524 s = scan_num(s, &pl_yylval);
1525 version = pl_yylval.opval;
1526 ver = cSVOPx(version)->op_sv;
1527 if (SvPOK(ver) && !SvNIOK(ver)) {
1528 SvUPGRADE(ver, SVt_PVNV);
1529 SvNV_set(ver, str_to_version(ver));
1530 SvNOK_on(ver); /* hint that it is a version */
1533 else if (guessing) {
1536 sv_free(PL_nextwhite); /* let next token collect whitespace */
1538 s = SvPVX(PL_linestr) + startoff;
1546 if (PL_madskills && !version) {
1547 sv_free(PL_nextwhite); /* let next token collect whitespace */
1549 s = SvPVX(PL_linestr) + startoff;
1552 /* NOTE: The parser sees the package name and the VERSION swapped */
1553 start_force(PL_curforce);
1554 NEXTVAL_NEXTTOKE.opval = version;
1562 * Tokenize a quoted string passed in as an SV. It finds the next
1563 * chunk, up to end of string or a backslash. It may make a new
1564 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1569 S_tokeq(pTHX_ SV *sv)
1573 register char *send;
1578 PERL_ARGS_ASSERT_TOKEQ;
1583 s = SvPV_force(sv, len);
1584 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1587 while (s < send && *s != '\\')
1592 if ( PL_hints & HINT_NEW_STRING ) {
1593 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1597 if (s + 1 < send && (s[1] == '\\'))
1598 s++; /* all that, just for this */
1603 SvCUR_set(sv, d - SvPVX_const(sv));
1605 if ( PL_hints & HINT_NEW_STRING )
1606 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1611 * Now come three functions related to double-quote context,
1612 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1613 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1614 * interact with PL_lex_state, and create fake ( ... ) argument lists
1615 * to handle functions and concatenation.
1616 * They assume that whoever calls them will be setting up a fake
1617 * join call, because each subthing puts a ',' after it. This lets
1620 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1622 * (I'm not sure whether the spurious commas at the end of lcfirst's
1623 * arguments and join's arguments are created or not).
1628 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1630 * Pattern matching will set PL_lex_op to the pattern-matching op to
1631 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1633 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1635 * Everything else becomes a FUNC.
1637 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1638 * had an OP_CONST or OP_READLINE). This just sets us up for a
1639 * call to S_sublex_push().
1643 S_sublex_start(pTHX)
1646 register const I32 op_type = pl_yylval.ival;
1648 if (op_type == OP_NULL) {
1649 pl_yylval.opval = PL_lex_op;
1653 if (op_type == OP_CONST || op_type == OP_READLINE) {
1654 SV *sv = tokeq(PL_lex_stuff);
1656 if (SvTYPE(sv) == SVt_PVIV) {
1657 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1659 const char * const p = SvPV_const(sv, len);
1660 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1664 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1665 PL_lex_stuff = NULL;
1666 /* Allow <FH> // "foo" */
1667 if (op_type == OP_READLINE)
1668 PL_expect = XTERMORDORDOR;
1671 else if (op_type == OP_BACKTICK && PL_lex_op) {
1672 /* readpipe() vas overriden */
1673 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1674 pl_yylval.opval = PL_lex_op;
1676 PL_lex_stuff = NULL;
1680 PL_sublex_info.super_state = PL_lex_state;
1681 PL_sublex_info.sub_inwhat = (U16)op_type;
1682 PL_sublex_info.sub_op = PL_lex_op;
1683 PL_lex_state = LEX_INTERPPUSH;
1687 pl_yylval.opval = PL_lex_op;
1697 * Create a new scope to save the lexing state. The scope will be
1698 * ended in S_sublex_done. Returns a '(', starting the function arguments
1699 * to the uc, lc, etc. found before.
1700 * Sets PL_lex_state to LEX_INTERPCONCAT.
1709 PL_lex_state = PL_sublex_info.super_state;
1710 SAVEBOOL(PL_lex_dojoin);
1711 SAVEI32(PL_lex_brackets);
1712 SAVEI32(PL_lex_casemods);
1713 SAVEI32(PL_lex_starts);
1714 SAVEI8(PL_lex_state);
1715 SAVEVPTR(PL_lex_inpat);
1716 SAVEI16(PL_lex_inwhat);
1717 SAVECOPLINE(PL_curcop);
1718 SAVEPPTR(PL_bufptr);
1719 SAVEPPTR(PL_bufend);
1720 SAVEPPTR(PL_oldbufptr);
1721 SAVEPPTR(PL_oldoldbufptr);
1722 SAVEPPTR(PL_last_lop);
1723 SAVEPPTR(PL_last_uni);
1724 SAVEPPTR(PL_linestart);
1725 SAVESPTR(PL_linestr);
1726 SAVEGENERICPV(PL_lex_brackstack);
1727 SAVEGENERICPV(PL_lex_casestack);
1729 PL_linestr = PL_lex_stuff;
1730 PL_lex_stuff = NULL;
1732 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1733 = SvPVX(PL_linestr);
1734 PL_bufend += SvCUR(PL_linestr);
1735 PL_last_lop = PL_last_uni = NULL;
1736 SAVEFREESV(PL_linestr);
1738 PL_lex_dojoin = FALSE;
1739 PL_lex_brackets = 0;
1740 Newx(PL_lex_brackstack, 120, char);
1741 Newx(PL_lex_casestack, 12, char);
1742 PL_lex_casemods = 0;
1743 *PL_lex_casestack = '\0';
1745 PL_lex_state = LEX_INTERPCONCAT;
1746 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1748 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1749 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1750 PL_lex_inpat = PL_sublex_info.sub_op;
1752 PL_lex_inpat = NULL;
1759 * Restores lexer state after a S_sublex_push.
1766 if (!PL_lex_starts++) {
1767 SV * const sv = newSVpvs("");
1768 if (SvUTF8(PL_linestr))
1770 PL_expect = XOPERATOR;
1771 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1775 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1776 PL_lex_state = LEX_INTERPCASEMOD;
1780 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1781 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1782 PL_linestr = PL_lex_repl;
1784 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1785 PL_bufend += SvCUR(PL_linestr);
1786 PL_last_lop = PL_last_uni = NULL;
1787 SAVEFREESV(PL_linestr);
1788 PL_lex_dojoin = FALSE;
1789 PL_lex_brackets = 0;
1790 PL_lex_casemods = 0;
1791 *PL_lex_casestack = '\0';
1793 if (SvEVALED(PL_lex_repl)) {
1794 PL_lex_state = LEX_INTERPNORMAL;
1796 /* we don't clear PL_lex_repl here, so that we can check later
1797 whether this is an evalled subst; that means we rely on the
1798 logic to ensure sublex_done() is called again only via the
1799 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1802 PL_lex_state = LEX_INTERPCONCAT;
1812 PL_endwhite = newSVpvs("");
1813 sv_catsv(PL_endwhite, PL_thiswhite);
1817 sv_setpvs(PL_thistoken,"");
1819 PL_realtokenstart = -1;
1823 PL_bufend = SvPVX(PL_linestr);
1824 PL_bufend += SvCUR(PL_linestr);
1825 PL_expect = XOPERATOR;
1826 PL_sublex_info.sub_inwhat = 0;
1834 Extracts a pattern, double-quoted string, or transliteration. This
1837 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1838 processing a pattern (PL_lex_inpat is true), a transliteration
1839 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1841 Returns a pointer to the character scanned up to. If this is
1842 advanced from the start pointer supplied (i.e. if anything was
1843 successfully parsed), will leave an OP for the substring scanned
1844 in pl_yylval. Caller must intuit reason for not parsing further
1845 by looking at the next characters herself.
1849 double-quoted style: \r and \n
1850 regexp special ones: \D \s
1853 case and quoting: \U \Q \E
1854 stops on @ and $, but not for $ as tail anchor
1856 In transliterations:
1857 characters are VERY literal, except for - not at the start or end
1858 of the string, which indicates a range. If the range is in bytes,
1859 scan_const expands the range to the full set of intermediate
1860 characters. If the range is in utf8, the hyphen is replaced with
1861 a certain range mark which will be handled by pmtrans() in op.c.
1863 In double-quoted strings:
1865 double-quoted style: \r and \n
1867 deprecated backrefs: \1 (in substitution replacements)
1868 case and quoting: \U \Q \E
1871 scan_const does *not* construct ops to handle interpolated strings.
1872 It stops processing as soon as it finds an embedded $ or @ variable
1873 and leaves it to the caller to work out what's going on.
1875 embedded arrays (whether in pattern or not) could be:
1876 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1878 $ in double-quoted strings must be the symbol of an embedded scalar.
1880 $ in pattern could be $foo or could be tail anchor. Assumption:
1881 it's a tail anchor if $ is the last thing in the string, or if it's
1882 followed by one of "()| \r\n\t"
1884 \1 (backreferences) are turned into $1
1886 The structure of the code is
1887 while (there's a character to process) {
1888 handle transliteration ranges
1889 skip regexp comments /(?#comment)/ and codes /(?{code})/
1890 skip #-initiated comments in //x patterns
1891 check for embedded arrays
1892 check for embedded scalars
1894 leave intact backslashes from leaveit (below)
1895 deprecate \1 in substitution replacements
1896 handle string-changing backslashes \l \U \Q \E, etc.
1897 switch (what was escaped) {
1898 handle \- in a transliteration (becomes a literal -)
1899 handle \132 (octal characters)
1900 handle \x15 and \x{1234} (hex characters)
1901 handle \N{name} (named characters)
1902 handle \cV (control characters)
1903 handle printf-style backslashes (\f, \r, \n, etc)
1906 } (end if backslash)
1907 handle regular character
1908 } (end while character to read)
1913 S_scan_const(pTHX_ char *start)
1916 register char *send = PL_bufend; /* end of the constant */
1917 SV *sv = newSV(send - start); /* sv for the constant. See
1918 note below on sizing. */
1919 register char *s = start; /* start of the constant */
1920 register char *d = SvPVX(sv); /* destination for copies */
1921 bool dorange = FALSE; /* are we in a translit range? */
1922 bool didrange = FALSE; /* did we just finish a range? */
1923 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1924 I32 this_utf8 = UTF; /* Is the source string assumed
1925 to be UTF8? But, this can
1926 show as true when the source
1927 isn't utf8, as for example
1928 when it is entirely composed
1931 /* Note on sizing: The scanned constant is placed into sv, which is
1932 * initialized by newSV() assuming one byte of output for every byte of
1933 * input. This routine expects newSV() to allocate an extra byte for a
1934 * trailing NUL, which this routine will append if it gets to the end of
1935 * the input. There may be more bytes of input than output (eg., \N{LATIN
1936 * CAPITAL LETTER A}), or more output than input if the constant ends up
1937 * recoded to utf8, but each time a construct is found that might increase
1938 * the needed size, SvGROW() is called. Its size parameter each time is
1939 * based on the best guess estimate at the time, namely the length used so
1940 * far, plus the length the current construct will occupy, plus room for
1941 * the trailing NUL, plus one byte for every input byte still unscanned */
1945 UV literal_endpoint = 0;
1946 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1949 PERL_ARGS_ASSERT_SCAN_CONST;
1951 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1952 /* If we are doing a trans and we know we want UTF8 set expectation */
1953 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1954 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1958 while (s < send || dorange) {
1959 /* get transliterations out of the way (they're most literal) */
1960 if (PL_lex_inwhat == OP_TRANS) {
1961 /* expand a range A-Z to the full set of characters. AIE! */
1963 I32 i; /* current expanded character */
1964 I32 min; /* first character in range */
1965 I32 max; /* last character in range */
1976 char * const c = (char*)utf8_hop((U8*)d, -1);
1980 *c = (char)UTF_TO_NATIVE(0xff);
1981 /* mark the range as done, and continue */
1987 i = d - SvPVX_const(sv); /* remember current offset */
1990 SvLEN(sv) + (has_utf8 ?
1991 (512 - UTF_CONTINUATION_MARK +
1994 /* How many two-byte within 0..255: 128 in UTF-8,
1995 * 96 in UTF-8-mod. */
1997 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1999 d = SvPVX(sv) + i; /* refresh d after realloc */
2003 for (j = 0; j <= 1; j++) {
2004 char * const c = (char*)utf8_hop((U8*)d, -1);
2005 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2011 max = (U8)0xff; /* only to \xff */
2012 uvmax = uv; /* \x{100} to uvmax */
2014 d = c; /* eat endpoint chars */
2019 d -= 2; /* eat the first char and the - */
2020 min = (U8)*d; /* first char in range */
2021 max = (U8)d[1]; /* last char in range */
2028 "Invalid range \"%c-%c\" in transliteration operator",
2029 (char)min, (char)max);
2033 if (literal_endpoint == 2 &&
2034 ((isLOWER(min) && isLOWER(max)) ||
2035 (isUPPER(min) && isUPPER(max)))) {
2037 for (i = min; i <= max; i++)
2039 *d++ = NATIVE_TO_NEED(has_utf8,i);
2041 for (i = min; i <= max; i++)
2043 *d++ = NATIVE_TO_NEED(has_utf8,i);
2048 for (i = min; i <= max; i++)
2051 const U8 ch = (U8)NATIVE_TO_UTF(i);
2052 if (UNI_IS_INVARIANT(ch))
2055 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2056 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2065 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2067 *d++ = (char)UTF_TO_NATIVE(0xff);
2069 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2073 /* mark the range as done, and continue */
2077 literal_endpoint = 0;
2082 /* range begins (ignore - as first or last char) */
2083 else if (*s == '-' && s+1 < send && s != start) {
2085 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2092 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2102 literal_endpoint = 0;
2103 native_range = TRUE;
2108 /* if we get here, we're not doing a transliteration */
2110 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2111 except for the last char, which will be done separately. */
2112 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2114 while (s+1 < send && *s != ')')
2115 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2117 else if (s[2] == '{' /* This should match regcomp.c */
2118 || (s[2] == '?' && s[3] == '{'))
2121 char *regparse = s + (s[2] == '{' ? 3 : 4);
2124 while (count && (c = *regparse)) {
2125 if (c == '\\' && regparse[1])
2133 if (*regparse != ')')
2134 regparse--; /* Leave one char for continuation. */
2135 while (s < regparse)
2136 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2140 /* likewise skip #-initiated comments in //x patterns */
2141 else if (*s == '#' && PL_lex_inpat &&
2142 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2143 while (s+1 < send && *s != '\n')
2144 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2147 /* check for embedded arrays
2148 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2150 else if (*s == '@' && s[1]) {
2151 if (isALNUM_lazy_if(s+1,UTF))
2153 if (strchr(":'{$", s[1]))
2155 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2156 break; /* in regexp, neither @+ nor @- are interpolated */
2159 /* check for embedded scalars. only stop if we're sure it's a
2162 else if (*s == '$') {
2163 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2165 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2167 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2168 "Possible unintended interpolation of $\\ in regex");
2170 break; /* in regexp, $ might be tail anchor */
2174 /* End of else if chain - OP_TRANS rejoin rest */
2177 if (*s == '\\' && s+1 < send) {
2180 /* deprecate \1 in strings and substitution replacements */
2181 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2182 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2184 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2189 /* string-change backslash escapes */
2190 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2194 /* skip any other backslash escapes in a pattern */
2195 else if (PL_lex_inpat) {
2196 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2197 goto default_action;
2200 /* if we get here, it's either a quoted -, or a digit */
2203 /* quoted - in transliterations */
2205 if (PL_lex_inwhat == OP_TRANS) {
2212 if ((isALPHA(*s) || isDIGIT(*s)))
2213 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2214 "Unrecognized escape \\%c passed through",
2216 /* default action is to copy the quoted character */
2217 goto default_action;
2220 /* eg. \132 indicates the octal constant 0x132 */
2221 case '0': case '1': case '2': case '3':
2222 case '4': case '5': case '6': case '7':
2226 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2229 goto NUM_ESCAPE_INSERT;
2231 /* eg. \x24 indicates the hex constant 0x24 */
2235 char* const e = strchr(s, '}');
2236 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2237 PERL_SCAN_DISALLOW_PREFIX;
2242 yyerror("Missing right brace on \\x{}");
2246 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2252 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2253 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2259 /* Insert oct, hex, or \N{U+...} escaped character. There will
2260 * always be enough room in sv since such escapes will be
2261 * longer than any UTF-8 sequence they can end up as, except if
2262 * they force us to recode the rest of the string into utf8 */
2264 /* Here uv is the ordinal of the next character being added in
2265 * unicode (converted from native). (It has to be done before
2266 * here because \N is interpreted as unicode, and oct and hex
2268 if (!UNI_IS_INVARIANT(uv)) {
2269 if (!has_utf8 && uv > 255) {
2270 /* Might need to recode whatever we have accumulated so
2271 * far if it contains any chars variant in utf8 or
2274 SvCUR_set(sv, d - SvPVX_const(sv));
2277 /* See Note on sizing above. */
2278 sv_utf8_upgrade_flags_grow(sv,
2279 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2280 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2281 d = SvPVX(sv) + SvCUR(sv);
2286 d = (char*)uvuni_to_utf8((U8*)d, uv);
2287 if (PL_lex_inwhat == OP_TRANS &&
2288 PL_sublex_info.sub_op) {
2289 PL_sublex_info.sub_op->op_private |=
2290 (PL_lex_repl ? OPpTRANS_FROM_UTF
2294 if (uv > 255 && !dorange)
2295 native_range = FALSE;
2307 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2312 char* e = strchr(s, '}');
2318 yyerror("Missing right brace on \\N{}");
2322 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2323 /* \N{U+...} The ... is a unicode value even on EBCDIC
2325 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2326 PERL_SCAN_DISALLOW_PREFIX;
2329 uv = grok_hex(s, &len, &flags, NULL);
2330 if ( e > s && len != (STRLEN)(e - s) ) {
2334 goto NUM_ESCAPE_INSERT;
2336 res = newSVpvn(s + 1, e - s - 1);
2337 res = new_constant( NULL, 0, "charnames",
2338 res, NULL, s - 2, e - s + 3 );
2340 sv_utf8_upgrade(res);
2341 str = SvPV_const(res,len);
2342 #ifdef EBCDIC_NEVER_MIND
2343 /* charnames uses pack U and that has been
2344 * recently changed to do the below uni->native
2345 * mapping, so this would be redundant (and wrong,
2346 * the code point would be doubly converted).
2347 * But leave this in just in case the pack U change
2348 * gets revoked, but the semantics is still
2349 * desireable for charnames. --jhi */
2351 UV uv = utf8_to_uvchr((const U8*)str, 0);
2354 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2356 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2357 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2358 str = SvPV_const(res, len);
2362 /* If destination is not in utf8 but this new character is,
2363 * recode the dest to utf8 */
2364 if (!has_utf8 && SvUTF8(res)) {
2365 SvCUR_set(sv, d - SvPVX_const(sv));
2368 /* See Note on sizing above. */
2369 sv_utf8_upgrade_flags_grow(sv,
2370 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2371 len + (STRLEN)(send - s) + 1);
2372 d = SvPVX(sv) + SvCUR(sv);
2374 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2376 /* See Note on sizing above. (NOTE: SvCUR() is not set
2377 * correctly here). */
2378 const STRLEN off = d - SvPVX_const(sv);
2379 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2383 native_range = FALSE; /* \N{} is guessed to be Unicode */
2385 Copy(str, d, len, char);
2392 yyerror("Missing braces on \\N{}");
2395 /* \c is a control character */
2404 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2407 yyerror("Missing control char name in \\c");
2411 /* printf-style backslashes, formfeeds, newlines, etc */
2413 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2416 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2419 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2422 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2425 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2428 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2431 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2437 } /* end if (backslash) */
2444 /* If we started with encoded form, or already know we want it,
2445 then encode the next character */
2446 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2450 /* One might think that it is wasted effort in the case of the
2451 * source being utf8 (this_utf8 == TRUE) to take the next character
2452 * in the source, convert it to an unsigned value, and then convert
2453 * it back again. But the source has not been validated here. The
2454 * routine that does the conversion checks for errors like
2457 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2458 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2460 SvCUR_set(sv, d - SvPVX_const(sv));
2463 /* See Note on sizing above. */
2464 sv_utf8_upgrade_flags_grow(sv,
2465 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2466 need + (STRLEN)(send - s) + 1);
2467 d = SvPVX(sv) + SvCUR(sv);
2469 } else if (need > len) {
2470 /* encoded value larger than old, may need extra space (NOTE:
2471 * SvCUR() is not set correctly here). See Note on sizing
2473 const STRLEN off = d - SvPVX_const(sv);
2474 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2478 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2480 if (uv > 255 && !dorange)
2481 native_range = FALSE;
2485 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2487 } /* while loop to process each character */
2489 /* terminate the string and set up the sv */
2491 SvCUR_set(sv, d - SvPVX_const(sv));
2492 if (SvCUR(sv) >= SvLEN(sv))
2493 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2496 if (PL_encoding && !has_utf8) {
2497 sv_recode_to_utf8(sv, PL_encoding);
2503 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2504 PL_sublex_info.sub_op->op_private |=
2505 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2509 /* shrink the sv if we allocated more than we used */
2510 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2511 SvPV_shrink_to_cur(sv);
2514 /* return the substring (via pl_yylval) only if we parsed anything */
2515 if (s > PL_bufptr) {
2516 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2517 const char *const key = PL_lex_inpat ? "qr" : "q";
2518 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2522 if (PL_lex_inwhat == OP_TRANS) {
2525 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2533 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2536 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2543 * Returns TRUE if there's more to the expression (e.g., a subscript),
2546 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2548 * ->[ and ->{ return TRUE
2549 * { and [ outside a pattern are always subscripts, so return TRUE
2550 * if we're outside a pattern and it's not { or [, then return FALSE
2551 * if we're in a pattern and the first char is a {
2552 * {4,5} (any digits around the comma) returns FALSE
2553 * if we're in a pattern and the first char is a [
2555 * [SOMETHING] has a funky algorithm to decide whether it's a
2556 * character class or not. It has to deal with things like
2557 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2558 * anything else returns TRUE
2561 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2564 S_intuit_more(pTHX_ register char *s)
2568 PERL_ARGS_ASSERT_INTUIT_MORE;
2570 if (PL_lex_brackets)
2572 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2574 if (*s != '{' && *s != '[')
2579 /* In a pattern, so maybe we have {n,m}. */
2596 /* On the other hand, maybe we have a character class */
2599 if (*s == ']' || *s == '^')
2602 /* this is terrifying, and it works */
2603 int weight = 2; /* let's weigh the evidence */
2605 unsigned char un_char = 255, last_un_char;
2606 const char * const send = strchr(s,']');
2607 char tmpbuf[sizeof PL_tokenbuf * 4];
2609 if (!send) /* has to be an expression */
2612 Zero(seen,256,char);
2615 else if (isDIGIT(*s)) {
2617 if (isDIGIT(s[1]) && s[2] == ']')
2623 for (; s < send; s++) {
2624 last_un_char = un_char;
2625 un_char = (unsigned char)*s;
2630 weight -= seen[un_char] * 10;
2631 if (isALNUM_lazy_if(s+1,UTF)) {
2633 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2634 len = (int)strlen(tmpbuf);
2635 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2640 else if (*s == '$' && s[1] &&
2641 strchr("[#!%*<>()-=",s[1])) {
2642 if (/*{*/ strchr("])} =",s[2]))
2651 if (strchr("wds]",s[1]))
2653 else if (seen[(U8)'\''] || seen[(U8)'"'])
2655 else if (strchr("rnftbxcav",s[1]))
2657 else if (isDIGIT(s[1])) {
2659 while (s[1] && isDIGIT(s[1]))
2669 if (strchr("aA01! ",last_un_char))
2671 if (strchr("zZ79~",s[1]))
2673 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2674 weight -= 5; /* cope with negative subscript */
2677 if (!isALNUM(last_un_char)
2678 && !(last_un_char == '$' || last_un_char == '@'
2679 || last_un_char == '&')
2680 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2685 if (keyword(tmpbuf, d - tmpbuf, 0))
2688 if (un_char == last_un_char + 1)
2690 weight -= seen[un_char];
2695 if (weight >= 0) /* probably a character class */
2705 * Does all the checking to disambiguate
2707 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2708 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2710 * First argument is the stuff after the first token, e.g. "bar".
2712 * Not a method if bar is a filehandle.
2713 * Not a method if foo is a subroutine prototyped to take a filehandle.
2714 * Not a method if it's really "Foo $bar"
2715 * Method if it's "foo $bar"
2716 * Not a method if it's really "print foo $bar"
2717 * Method if it's really "foo package::" (interpreted as package->foo)
2718 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2719 * Not a method if bar is a filehandle or package, but is quoted with
2724 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2727 char *s = start + (*start == '$');
2728 char tmpbuf[sizeof PL_tokenbuf];
2735 PERL_ARGS_ASSERT_INTUIT_METHOD;
2738 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2742 const char *proto = SvPVX_const(cv);
2753 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2754 /* start is the beginning of the possible filehandle/object,
2755 * and s is the end of it
2756 * tmpbuf is a copy of it
2759 if (*start == '$') {
2760 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2761 isUPPER(*PL_tokenbuf))
2764 len = start - SvPVX(PL_linestr);
2768 start = SvPVX(PL_linestr) + len;
2772 return *s == '(' ? FUNCMETH : METHOD;
2774 if (!keyword(tmpbuf, len, 0)) {
2775 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2779 soff = s - SvPVX(PL_linestr);
2783 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2784 if (indirgv && GvCVu(indirgv))
2786 /* filehandle or package name makes it a method */
2787 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2789 soff = s - SvPVX(PL_linestr);
2792 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2793 return 0; /* no assumptions -- "=>" quotes bearword */
2795 start_force(PL_curforce);
2796 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2797 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
2798 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2800 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2805 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2807 return *s == '(' ? FUNCMETH : METHOD;
2813 /* Encoded script support. filter_add() effectively inserts a
2814 * 'pre-processing' function into the current source input stream.
2815 * Note that the filter function only applies to the current source file
2816 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2818 * The datasv parameter (which may be NULL) can be used to pass
2819 * private data to this instance of the filter. The filter function
2820 * can recover the SV using the FILTER_DATA macro and use it to
2821 * store private buffers and state information.
2823 * The supplied datasv parameter is upgraded to a PVIO type
2824 * and the IoDIRP/IoANY field is used to store the function pointer,
2825 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2826 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2827 * private use must be set using malloc'd pointers.
2831 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2840 if (!PL_rsfp_filters)
2841 PL_rsfp_filters = newAV();
2844 SvUPGRADE(datasv, SVt_PVIO);
2845 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2846 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2847 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2848 FPTR2DPTR(void *, IoANY(datasv)),
2849 SvPV_nolen(datasv)));
2850 av_unshift(PL_rsfp_filters, 1);
2851 av_store(PL_rsfp_filters, 0, datasv) ;
2856 /* Delete most recently added instance of this filter function. */
2858 Perl_filter_del(pTHX_ filter_t funcp)
2863 PERL_ARGS_ASSERT_FILTER_DEL;
2866 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2867 FPTR2DPTR(void*, funcp)));
2869 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2871 /* if filter is on top of stack (usual case) just pop it off */
2872 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2873 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2874 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2875 IoANY(datasv) = (void *)NULL;
2876 sv_free(av_pop(PL_rsfp_filters));
2880 /* we need to search for the correct entry and clear it */
2881 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2885 /* Invoke the idxth filter function for the current rsfp. */
2886 /* maxlen 0 = read one text line */
2888 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2893 /* This API is bad. It should have been using unsigned int for maxlen.
2894 Not sure if we want to change the API, but if not we should sanity
2895 check the value here. */
2896 const unsigned int correct_length
2905 PERL_ARGS_ASSERT_FILTER_READ;
2907 if (!PL_parser || !PL_rsfp_filters)
2909 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2910 /* Provide a default input filter to make life easy. */
2911 /* Note that we append to the line. This is handy. */
2912 DEBUG_P(PerlIO_printf(Perl_debug_log,
2913 "filter_read %d: from rsfp\n", idx));
2914 if (correct_length) {
2917 const int old_len = SvCUR(buf_sv);
2919 /* ensure buf_sv is large enough */
2920 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2921 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2922 correct_length)) <= 0) {
2923 if (PerlIO_error(PL_rsfp))
2924 return -1; /* error */
2926 return 0 ; /* end of file */
2928 SvCUR_set(buf_sv, old_len + len) ;
2931 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2932 if (PerlIO_error(PL_rsfp))
2933 return -1; /* error */
2935 return 0 ; /* end of file */
2938 return SvCUR(buf_sv);
2940 /* Skip this filter slot if filter has been deleted */
2941 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2942 DEBUG_P(PerlIO_printf(Perl_debug_log,
2943 "filter_read %d: skipped (filter deleted)\n",
2945 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2947 /* Get function pointer hidden within datasv */
2948 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2949 DEBUG_P(PerlIO_printf(Perl_debug_log,
2950 "filter_read %d: via function %p (%s)\n",
2951 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2952 /* Call function. The function is expected to */
2953 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2954 /* Return: <0:error, =0:eof, >0:not eof */
2955 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2959 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2963 PERL_ARGS_ASSERT_FILTER_GETS;
2965 #ifdef PERL_CR_FILTER
2966 if (!PL_rsfp_filters) {
2967 filter_add(S_cr_textfilter,NULL);
2970 if (PL_rsfp_filters) {
2972 SvCUR_set(sv, 0); /* start with empty line */
2973 if (FILTER_READ(0, sv, 0) > 0)
2974 return ( SvPVX(sv) ) ;
2979 return (sv_gets(sv, fp, append));
2983 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
2988 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2990 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2994 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2995 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2997 return GvHV(gv); /* Foo:: */
3000 /* use constant CLASS => 'MyClass' */
3001 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3002 if (gv && GvCV(gv)) {
3003 SV * const sv = cv_const_sv(GvCV(gv));
3005 pkgname = SvPV_const(sv, len);
3008 return gv_stashpvn(pkgname, len, 0);
3012 * S_readpipe_override
3013 * Check whether readpipe() is overriden, and generates the appropriate
3014 * optree, provided sublex_start() is called afterwards.
3017 S_readpipe_override(pTHX)
3020 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3021 pl_yylval.ival = OP_BACKTICK;
3023 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3025 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3026 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3027 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3029 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3030 append_elem(OP_LIST,
3031 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3032 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3039 * The intent of this yylex wrapper is to minimize the changes to the
3040 * tokener when we aren't interested in collecting madprops. It remains
3041 * to be seen how successful this strategy will be...
3048 char *s = PL_bufptr;
3050 /* make sure PL_thiswhite is initialized */
3054 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3055 if (PL_pending_ident)
3056 return S_pending_ident(aTHX);
3058 /* previous token ate up our whitespace? */
3059 if (!PL_lasttoke && PL_nextwhite) {
3060 PL_thiswhite = PL_nextwhite;
3064 /* isolate the token, and figure out where it is without whitespace */
3065 PL_realtokenstart = -1;
3069 assert(PL_curforce < 0);
3071 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3072 if (!PL_thistoken) {
3073 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3074 PL_thistoken = newSVpvs("");
3076 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3077 PL_thistoken = newSVpvn(tstart, s - tstart);
3080 if (PL_thismad) /* install head */
3081 CURMAD('X', PL_thistoken);
3084 /* last whitespace of a sublex? */
3085 if (optype == ')' && PL_endwhite) {
3086 CURMAD('X', PL_endwhite);
3091 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3092 if (!PL_thiswhite && !PL_endwhite && !optype) {
3093 sv_free(PL_thistoken);
3098 /* put off final whitespace till peg */
3099 if (optype == ';' && !PL_rsfp) {
3100 PL_nextwhite = PL_thiswhite;
3103 else if (PL_thisopen) {
3104 CURMAD('q', PL_thisopen);
3106 sv_free(PL_thistoken);
3110 /* Store actual token text as madprop X */
3111 CURMAD('X', PL_thistoken);
3115 /* add preceding whitespace as madprop _ */
3116 CURMAD('_', PL_thiswhite);
3120 /* add quoted material as madprop = */
3121 CURMAD('=', PL_thisstuff);
3125 /* add terminating quote as madprop Q */
3126 CURMAD('Q', PL_thisclose);
3130 /* special processing based on optype */
3134 /* opval doesn't need a TOKEN since it can already store mp */
3144 if (pl_yylval.opval)
3145 append_madprops(PL_thismad, pl_yylval.opval, 0);
3153 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3162 /* remember any fake bracket that lexer is about to discard */
3163 if (PL_lex_brackets == 1 &&
3164 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3167 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3170 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3171 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3174 break; /* don't bother looking for trailing comment */
3183 /* attach a trailing comment to its statement instead of next token */
3187 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3189 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3191 if (*s == '\n' || *s == '#') {
3192 while (s < PL_bufend && *s != '\n')
3196 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3197 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3214 /* Create new token struct. Note: opvals return early above. */
3215 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3222 S_tokenize_use(pTHX_ int is_use, char *s) {
3225 PERL_ARGS_ASSERT_TOKENIZE_USE;
3227 if (PL_expect != XSTATE)
3228 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3229 is_use ? "use" : "no"));
3231 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3232 s = force_version(s, TRUE);
3233 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3234 start_force(PL_curforce);
3235 NEXTVAL_NEXTTOKE.opval = NULL;
3238 else if (*s == 'v') {
3239 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3240 s = force_version(s, FALSE);
3244 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3245 s = force_version(s, FALSE);
3247 pl_yylval.ival = is_use;
3251 static const char* const exp_name[] =
3252 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3253 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3260 Works out what to call the token just pulled out of the input
3261 stream. The yacc parser takes care of taking the ops we return and
3262 stitching them into a tree.
3268 if read an identifier
3269 if we're in a my declaration
3270 croak if they tried to say my($foo::bar)
3271 build the ops for a my() declaration
3272 if it's an access to a my() variable
3273 are we in a sort block?
3274 croak if my($a); $a <=> $b
3275 build ops for access to a my() variable
3276 if in a dq string, and they've said @foo and we can't find @foo
3278 build ops for a bareword
3279 if we already built the token before, use it.
3284 #pragma segment Perl_yylex
3290 register char *s = PL_bufptr;
3295 /* orig_keyword, gvp, and gv are initialized here because
3296 * jump to the label just_a_word_zero can bypass their
3297 * initialization later. */
3298 I32 orig_keyword = 0;
3303 SV* tmp = newSVpvs("");
3304 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3305 (IV)CopLINE(PL_curcop),
3306 lex_state_names[PL_lex_state],
3307 exp_name[PL_expect],
3308 pv_display(tmp, s, strlen(s), 0, 60));
3311 /* check if there's an identifier for us to look at */
3312 if (PL_pending_ident)
3313 return REPORT(S_pending_ident(aTHX));
3315 /* no identifier pending identification */
3317 switch (PL_lex_state) {
3319 case LEX_NORMAL: /* Some compilers will produce faster */
3320 case LEX_INTERPNORMAL: /* code if we comment these out. */
3324 /* when we've already built the next token, just pull it out of the queue */
3328 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3330 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3331 PL_nexttoke[PL_lasttoke].next_mad = 0;
3332 if (PL_thismad && PL_thismad->mad_key == '_') {
3333 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3334 PL_thismad->mad_val = 0;
3335 mad_free(PL_thismad);
3340 PL_lex_state = PL_lex_defer;
3341 PL_expect = PL_lex_expect;
3342 PL_lex_defer = LEX_NORMAL;
3343 if (!PL_nexttoke[PL_lasttoke].next_type)
3348 pl_yylval = PL_nextval[PL_nexttoke];
3350 PL_lex_state = PL_lex_defer;
3351 PL_expect = PL_lex_expect;
3352 PL_lex_defer = LEX_NORMAL;
3356 /* FIXME - can these be merged? */
3357 return(PL_nexttoke[PL_lasttoke].next_type);
3359 return REPORT(PL_nexttype[PL_nexttoke]);
3362 /* interpolated case modifiers like \L \U, including \Q and \E.
3363 when we get here, PL_bufptr is at the \
3365 case LEX_INTERPCASEMOD:
3367 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3368 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3370 /* handle \E or end of string */
3371 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3373 if (PL_lex_casemods) {
3374 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3375 PL_lex_casestack[PL_lex_casemods] = '\0';
3377 if (PL_bufptr != PL_bufend
3378 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3380 PL_lex_state = LEX_INTERPCONCAT;
3383 PL_thistoken = newSVpvs("\\E");
3389 while (PL_bufptr != PL_bufend &&
3390 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3392 PL_thiswhite = newSVpvs("");
3393 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3397 if (PL_bufptr != PL_bufend)
3400 PL_lex_state = LEX_INTERPCONCAT;
3404 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3405 "### Saw case modifier\n"); });
3407 if (s[1] == '\\' && s[2] == 'E') {
3410 PL_thiswhite = newSVpvs("");
3411 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3414 PL_lex_state = LEX_INTERPCONCAT;
3419 if (!PL_madskills) /* when just compiling don't need correct */
3420 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3421 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3422 if ((*s == 'L' || *s == 'U') &&
3423 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3424 PL_lex_casestack[--PL_lex_casemods] = '\0';
3427 if (PL_lex_casemods > 10)
3428 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3429 PL_lex_casestack[PL_lex_casemods++] = *s;
3430 PL_lex_casestack[PL_lex_casemods] = '\0';
3431 PL_lex_state = LEX_INTERPCONCAT;
3432 start_force(PL_curforce);
3433 NEXTVAL_NEXTTOKE.ival = 0;
3435 start_force(PL_curforce);
3437 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3439 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3441 NEXTVAL_NEXTTOKE.ival = OP_LC;
3443 NEXTVAL_NEXTTOKE.ival = OP_UC;
3445 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3447 Perl_croak(aTHX_ "panic: yylex");
3449 SV* const tmpsv = newSVpvs("\\ ");
3450 /* replace the space with the character we want to escape
3452 SvPVX(tmpsv)[1] = *s;
3458 if (PL_lex_starts) {
3464 sv_free(PL_thistoken);
3465 PL_thistoken = newSVpvs("");
3468 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3469 if (PL_lex_casemods == 1 && PL_lex_inpat)
3478 case LEX_INTERPPUSH:
3479 return REPORT(sublex_push());
3481 case LEX_INTERPSTART:
3482 if (PL_bufptr == PL_bufend)
3483 return REPORT(sublex_done());
3484 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3485 "### Interpolated variable\n"); });
3487 PL_lex_dojoin = (*PL_bufptr == '@');
3488 PL_lex_state = LEX_INTERPNORMAL;
3489 if (PL_lex_dojoin) {
3490 start_force(PL_curforce);
3491 NEXTVAL_NEXTTOKE.ival = 0;
3493 start_force(PL_curforce);
3494 force_ident("\"", '$');
3495 start_force(PL_curforce);
3496 NEXTVAL_NEXTTOKE.ival = 0;
3498 start_force(PL_curforce);
3499 NEXTVAL_NEXTTOKE.ival = 0;
3501 start_force(PL_curforce);
3502 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3505 if (PL_lex_starts++) {
3510 sv_free(PL_thistoken);
3511 PL_thistoken = newSVpvs("");
3514 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3515 if (!PL_lex_casemods && PL_lex_inpat)
3522 case LEX_INTERPENDMAYBE:
3523 if (intuit_more(PL_bufptr)) {
3524 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3530 if (PL_lex_dojoin) {
3531 PL_lex_dojoin = FALSE;
3532 PL_lex_state = LEX_INTERPCONCAT;
3536 sv_free(PL_thistoken);
3537 PL_thistoken = newSVpvs("");
3542 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3543 && SvEVALED(PL_lex_repl))
3545 if (PL_bufptr != PL_bufend)
3546 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3550 case LEX_INTERPCONCAT:
3552 if (PL_lex_brackets)
3553 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3555 if (PL_bufptr == PL_bufend)
3556 return REPORT(sublex_done());
3558 if (SvIVX(PL_linestr) == '\'') {
3559 SV *sv = newSVsv(PL_linestr);
3562 else if ( PL_hints & HINT_NEW_RE )
3563 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3564 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3568 s = scan_const(PL_bufptr);
3570 PL_lex_state = LEX_INTERPCASEMOD;
3572 PL_lex_state = LEX_INTERPSTART;
3575 if (s != PL_bufptr) {
3576 start_force(PL_curforce);
3578 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3580 NEXTVAL_NEXTTOKE = pl_yylval;
3583 if (PL_lex_starts++) {
3587 sv_free(PL_thistoken);
3588 PL_thistoken = newSVpvs("");
3591 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3592 if (!PL_lex_casemods && PL_lex_inpat)
3605 PL_lex_state = LEX_NORMAL;
3606 s = scan_formline(PL_bufptr);
3607 if (!PL_lex_formbrack)
3613 PL_oldoldbufptr = PL_oldbufptr;
3619 sv_free(PL_thistoken);
3622 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3626 if (isIDFIRST_lazy_if(s,UTF))
3629 unsigned char c = *s;
3630 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3631 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
3632 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
3637 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
3641 goto fake_eof; /* emulate EOF on ^D or ^Z */
3650 if (PL_lex_brackets) {
3651 yyerror((const char *)
3653 ? "Format not terminated"
3654 : "Missing right curly or square bracket"));
3656 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3657 "### Tokener got EOF\n");
3661 if (s++ < PL_bufend)
3662 goto retry; /* ignore stray nulls */
3665 if (!PL_in_eval && !PL_preambled) {
3666 PL_preambled = TRUE;
3672 /* Generate a string of Perl code to load the debugger.
3673 * If PERL5DB is set, it will return the contents of that,
3674 * otherwise a compile-time require of perl5db.pl. */
3676 const char * const pdb = PerlEnv_getenv("PERL5DB");
3679 sv_setpv(PL_linestr, pdb);
3680 sv_catpvs(PL_linestr,";");
3682 SETERRNO(0,SS_NORMAL);
3683 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3686 sv_setpvs(PL_linestr,"");
3687 if (PL_preambleav) {
3688 SV **svp = AvARRAY(PL_preambleav);
3689 SV **const end = svp + AvFILLp(PL_preambleav);
3691 sv_catsv(PL_linestr, *svp);
3693 sv_catpvs(PL_linestr, ";");
3695 sv_free(MUTABLE_SV(PL_preambleav));
3696 PL_preambleav = NULL;
3699 sv_catpvs(PL_linestr,
3700 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3701 if (PL_minus_n || PL_minus_p) {
3702 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3704 sv_catpvs(PL_linestr,"chomp;");
3707 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3708 || *PL_splitstr == '"')
3709 && strchr(PL_splitstr + 1, *PL_splitstr))
3710 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3712 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3713 bytes can be used as quoting characters. :-) */
3714 const char *splits = PL_splitstr;
3715 sv_catpvs(PL_linestr, "our @F=split(q\0");
3718 if (*splits == '\\')
3719 sv_catpvn(PL_linestr, splits, 1);
3720 sv_catpvn(PL_linestr, splits, 1);
3721 } while (*splits++);
3722 /* This loop will embed the trailing NUL of
3723 PL_linestr as the last thing it does before
3725 sv_catpvs(PL_linestr, ");");
3729 sv_catpvs(PL_linestr,"our @F=split(' ');");
3732 sv_catpvs(PL_linestr, "\n");
3733 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3734 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3735 PL_last_lop = PL_last_uni = NULL;
3736 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3737 update_debugger_info(PL_linestr, NULL, 0);
3741 bof = PL_rsfp ? TRUE : FALSE;
3742 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3745 PL_realtokenstart = -1;
3748 if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3749 PerlIO_clearerr(PL_rsfp);
3751 (void)PerlIO_close(PL_rsfp);
3753 PL_doextract = FALSE;
3755 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3761 sv_setpvs(PL_linestr, ";}continue{print;}");
3763 sv_setpvs(PL_linestr, ";}");
3764 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3765 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3766 PL_last_lop = PL_last_uni = NULL;
3767 PL_minus_n = PL_minus_p = 0;
3770 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3771 PL_last_lop = PL_last_uni = NULL;
3772 sv_setpvs(PL_linestr,"");
3773 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3775 /* If it looks like the start of a BOM or raw UTF-16,
3776 * check if it in fact is. */
3782 #ifdef PERLIO_IS_STDIO
3783 # ifdef __GNU_LIBRARY__
3784 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3785 # define FTELL_FOR_PIPE_IS_BROKEN
3789 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3790 # define FTELL_FOR_PIPE_IS_BROKEN
3795 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3797 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3798 s = swallow_bom((U8*)s);
3802 /* Incest with pod. */
3805 sv_catsv(PL_thiswhite, PL_linestr);
3807 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3808 sv_setpvs(PL_linestr, "");
3809 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3810 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3811 PL_last_lop = PL_last_uni = NULL;
3812 PL_doextract = FALSE;
3816 } while (PL_doextract);
3817 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3818 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3819 update_debugger_info(PL_linestr, NULL, 0);
3820 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3821 PL_last_lop = PL_last_uni = NULL;
3822 if (CopLINE(PL_curcop) == 1) {
3823 while (s < PL_bufend && isSPACE(*s))
3825 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3829 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3833 if (*s == '#' && *(s+1) == '!')
3835 #ifdef ALTERNATE_SHEBANG
3837 static char const as[] = ALTERNATE_SHEBANG;
3838 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3839 d = s + (sizeof(as) - 1);
3841 #endif /* ALTERNATE_SHEBANG */
3850 while (*d && !isSPACE(*d))
3854 #ifdef ARG_ZERO_IS_SCRIPT
3855 if (ipathend > ipath) {
3857 * HP-UX (at least) sets argv[0] to the script name,
3858 * which makes $^X incorrect. And Digital UNIX and Linux,
3859 * at least, set argv[0] to the basename of the Perl
3860 * interpreter. So, having found "#!", we'll set it right.
3862 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3864 assert(SvPOK(x) || SvGMAGICAL(x));
3865 if (sv_eq(x, CopFILESV(PL_curcop))) {
3866 sv_setpvn(x, ipath, ipathend - ipath);
3872 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3873 const char * const lstart = SvPV_const(x,llen);
3875 bstart += blen - llen;
3876 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3877 sv_setpvn(x, ipath, ipathend - ipath);
3882 TAINT_NOT; /* $^X is always tainted, but that's OK */
3884 #endif /* ARG_ZERO_IS_SCRIPT */
3889 d = instr(s,"perl -");
3891 d = instr(s,"perl");
3893 /* avoid getting into infinite loops when shebang
3894 * line contains "Perl" rather than "perl" */
3896 for (d = ipathend-4; d >= ipath; --d) {
3897 if ((*d == 'p' || *d == 'P')
3898 && !ibcmp(d, "perl", 4))
3908 #ifdef ALTERNATE_SHEBANG
3910 * If the ALTERNATE_SHEBANG on this system starts with a
3911 * character that can be part of a Perl expression, then if
3912 * we see it but not "perl", we're probably looking at the
3913 * start of Perl code, not a request to hand off to some
3914 * other interpreter. Similarly, if "perl" is there, but
3915 * not in the first 'word' of the line, we assume the line
3916 * contains the start of the Perl program.
3918 if (d && *s != '#') {
3919 const char *c = ipath;
3920 while (*c && !strchr("; \t\r\n\f\v#", *c))
3923 d = NULL; /* "perl" not in first word; ignore */
3925 *s = '#'; /* Don't try to parse shebang line */
3927 #endif /* ALTERNATE_SHEBANG */
3932 !instr(s,"indir") &&
3933 instr(PL_origargv[0],"perl"))
3940 while (s < PL_bufend && isSPACE(*s))
3942 if (s < PL_bufend) {
3943 Newx(newargv,PL_origargc+3,char*);
3945 while (s < PL_bufend && !isSPACE(*s))
3948 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3951 newargv = PL_origargv;
3954 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3956 Perl_croak(aTHX_ "Can't exec %s", ipath);
3959 while (*d && !isSPACE(*d))
3961 while (SPACE_OR_TAB(*d))
3965 const bool switches_done = PL_doswitches;
3966 const U32 oldpdb = PL_perldb;
3967 const bool oldn = PL_minus_n;
3968 const bool oldp = PL_minus_p;
3972 bool baduni = FALSE;
3974 const char *d2 = d1 + 1;
3975 if (parse_unicode_opts((const char **)&d2)
3979 if (baduni || *d1 == 'M' || *d1 == 'm') {
3980 const char * const m = d1;
3981 while (*d1 && !isSPACE(*d1))
3983 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3986 d1 = moreswitches(d1);
3988 if (PL_doswitches && !switches_done) {
3989 int argc = PL_origargc;
3990 char **argv = PL_origargv;
3993 } while (argc && argv[0][0] == '-' && argv[0][1]);
3994 init_argv_symbols(argc,argv);
3996 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
3997 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3998 /* if we have already added "LINE: while (<>) {",
3999 we must not do it again */
4001 sv_setpvs(PL_linestr, "");
4002 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4003 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4004 PL_last_lop = PL_last_uni = NULL;
4005 PL_preambled = FALSE;
4006 if (PERLDB_LINE || PERLDB_SAVESRC)
4007 (void)gv_fetchfile(PL_origfilename);
4014 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4016 PL_lex_state = LEX_FORMLINE;
4021 #ifdef PERL_STRICT_CR
4022 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4024 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4026 case ' ': case '\t': case '\f': case 013:
4028 PL_realtokenstart = -1;
4030 PL_thiswhite = newSVpvs("");
4031 sv_catpvn(PL_thiswhite, s, 1);
4038 PL_realtokenstart = -1;
4042 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4043 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4044 /* handle eval qq[#line 1 "foo"\n ...] */
4045 CopLINE_dec(PL_curcop);
4048 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4050 if (!PL_in_eval || PL_rsfp)
4055 while (d < PL_bufend && *d != '\n')
4059 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4060 Perl_croak(aTHX_ "panic: input overflow");
4063 PL_thiswhite = newSVpvn(s, d - s);
4068 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4070 PL_lex_state = LEX_FORMLINE;
4076 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4077 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4080 TOKEN(PEG); /* make sure any #! line is accessible */
4085 /* if (PL_madskills && PL_lex_formbrack) { */
4087 while (d < PL_bufend && *d != '\n')
4091 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4092 Perl_croak(aTHX_ "panic: input overflow");
4093 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4095 PL_thiswhite = newSVpvs("");
4096 if (CopLINE(PL_curcop) == 1) {
4097 sv_setpvs(PL_thiswhite, "");
4100 sv_catpvn(PL_thiswhite, s, d - s);
4114 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4122 while (s < PL_bufend && SPACE_OR_TAB(*s))
4125 if (strnEQ(s,"=>",2)) {
4126 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4127 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4128 OPERATOR('-'); /* unary minus */
4130 PL_last_uni = PL_oldbufptr;
4132 case 'r': ftst = OP_FTEREAD; break;
4133 case 'w': ftst = OP_FTEWRITE; break;
4134 case 'x': ftst = OP_FTEEXEC; break;
4135 case 'o': ftst = OP_FTEOWNED; break;
4136 case 'R': ftst = OP_FTRREAD; break;
4137 case 'W': ftst = OP_FTRWRITE; break;
4138 case 'X': ftst = OP_FTREXEC; break;
4139 case 'O': ftst = OP_FTROWNED; break;
4140 case 'e': ftst = OP_FTIS; break;
4141 case 'z': ftst = OP_FTZERO; break;
4142 case 's': ftst = OP_FTSIZE; break;
4143 case 'f': ftst = OP_FTFILE; break;
4144 case 'd': ftst = OP_FTDIR; break;
4145 case 'l': ftst = OP_FTLINK; break;
4146 case 'p': ftst = OP_FTPIPE; break;
4147 case 'S': ftst = OP_FTSOCK; break;
4148 case 'u': ftst = OP_FTSUID; break;
4149 case 'g': ftst = OP_FTSGID; break;
4150 case 'k': ftst = OP_FTSVTX; break;
4151 case 'b': ftst = OP_FTBLK; break;
4152 case 'c': ftst = OP_FTCHR; break;
4153 case 't': ftst = OP_FTTTY; break;
4154 case 'T': ftst = OP_FTTEXT; break;
4155 case 'B': ftst = OP_FTBINARY; break;
4156 case 'M': case 'A': case 'C':
4157 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4159 case 'M': ftst = OP_FTMTIME; break;
4160 case 'A': ftst = OP_FTATIME; break;
4161 case 'C': ftst = OP_FTCTIME; break;
4169 PL_last_lop_op = (OPCODE)ftst;
4170 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4171 "### Saw file test %c\n", (int)tmp);
4176 /* Assume it was a minus followed by a one-letter named
4177 * subroutine call (or a -bareword), then. */
4178 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4179 "### '-%c' looked like a file test but was not\n",
4186 const char tmp = *s++;
4189 if (PL_expect == XOPERATOR)
4194 else if (*s == '>') {
4197 if (isIDFIRST_lazy_if(s,UTF)) {
4198 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4206 if (PL_expect == XOPERATOR)
4209 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4211 OPERATOR('-'); /* unary minus */
4217 const char tmp = *s++;
4220 if (PL_expect == XOPERATOR)
4225 if (PL_expect == XOPERATOR)
4228 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4235 if (PL_expect != XOPERATOR) {
4236 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4237 PL_expect = XOPERATOR;
4238 force_ident(PL_tokenbuf, '*');
4251 if (PL_expect == XOPERATOR) {
4255 PL_tokenbuf[0] = '%';
4256 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4257 sizeof PL_tokenbuf - 1, FALSE);
4258 if (!PL_tokenbuf[1]) {
4261 PL_pending_ident = '%';
4270 const char tmp = *s++;
4275 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4282 const char tmp = *s++;
4288 goto just_a_word_zero_gv;
4291 switch (PL_expect) {
4297 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4299 PL_bufptr = s; /* update in case we back off */
4305 PL_expect = XTERMBLOCK;
4308 stuffstart = s - SvPVX(PL_linestr) - 1;
4312 while (isIDFIRST_lazy_if(s,UTF)) {
4315 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4316 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4317 if (tmp < 0) tmp = -tmp;
4332 sv = newSVpvn(s, len);
4334 d = scan_str(d,TRUE,TRUE);
4336 /* MUST advance bufptr here to avoid bogus
4337 "at end of line" context messages from yyerror().
4339 PL_bufptr = s + len;
4340 yyerror("Unterminated attribute parameter in attribute list");
4344 return REPORT(0); /* EOF indicator */
4348 sv_catsv(sv, PL_lex_stuff);
4349 attrs = append_elem(OP_LIST, attrs,
4350 newSVOP(OP_CONST, 0, sv));
4351 SvREFCNT_dec(PL_lex_stuff);
4352 PL_lex_stuff = NULL;
4355 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4357 if (PL_in_my == KEY_our) {
4358 deprecate(":unique");
4361 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4364 /* NOTE: any CV attrs applied here need to be part of
4365 the CVf_BUILTIN_ATTRS define in cv.h! */
4366 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4368 CvLVALUE_on(PL_compcv);
4370 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4372 deprecate(":locked");
4374 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4376 CvMETHOD_on(PL_compcv);
4378 /* After we've set the flags, it could be argued that
4379 we don't need to do the attributes.pm-based setting
4380 process, and shouldn't bother appending recognized
4381 flags. To experiment with that, uncomment the
4382 following "else". (Note that's already been
4383 uncommented. That keeps the above-applied built-in
4384 attributes from being intercepted (and possibly
4385 rejected) by a package's attribute routines, but is
4386 justified by the performance win for the common case
4387 of applying only built-in attributes.) */
4389 attrs = append_elem(OP_LIST, attrs,
4390 newSVOP(OP_CONST, 0,
4394 if (*s == ':' && s[1] != ':')
4397 break; /* require real whitespace or :'s */
4398 /* XXX losing whitespace on sequential attributes here */
4402 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4403 if (*s != ';' && *s != '}' && *s != tmp
4404 && (tmp != '=' || *s != ')')) {
4405 const char q = ((*s == '\'') ? '"' : '\'');
4406 /* If here for an expression, and parsed no attrs, back
4408 if (tmp == '=' && !attrs) {
4412 /* MUST advance bufptr here to avoid bogus "at end of line"
4413 context messages from yyerror().
4416 yyerror( (const char *)
4418 ? Perl_form(aTHX_ "Invalid separator character "
4419 "%c%c%c in attribute list", q, *s, q)
4420 : "Unterminated attribute list" ) );
4428 start_force(PL_curforce);
4429 NEXTVAL_NEXTTOKE.opval = attrs;
4430 CURMAD('_', PL_nextwhite);
4435 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4436 (s - SvPVX(PL_linestr)) - stuffstart);
4444 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4445 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4453 const char tmp = *s++;
4458 const char tmp = *s++;
4466 if (PL_lex_brackets <= 0)
4467 yyerror("Unmatched right square bracket");
4470 if (PL_lex_state == LEX_INTERPNORMAL) {
4471 if (PL_lex_brackets == 0) {
4472 if (*s == '-' && s[1] == '>')
4473 PL_lex_state = LEX_INTERPENDMAYBE;
4474 else if (*s != '[' && *s != '{')
4475 PL_lex_state = LEX_INTERPEND;
4482 if (PL_lex_brackets > 100) {
4483 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4485 switch (PL_expect) {
4487 if (PL_lex_formbrack) {
4491 if (PL_oldoldbufptr == PL_last_lop)
4492 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4494 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4495 OPERATOR(HASHBRACK);
4497 while (s < PL_bufend && SPACE_OR_TAB(*s))
4500 PL_tokenbuf[0] = '\0';
4501 if (d < PL_bufend && *d == '-') {
4502 PL_tokenbuf[0] = '-';
4504 while (d < PL_bufend && SPACE_OR_TAB(*d))
4507 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4508 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4510 while (d < PL_bufend && SPACE_OR_TAB(*d))
4513 const char minus = (PL_tokenbuf[0] == '-');
4514 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4522 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4527 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4532 if (PL_oldoldbufptr == PL_last_lop)
4533 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4535 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4538 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4540 /* This hack is to get the ${} in the message. */
4542 yyerror("syntax error");
4545 OPERATOR(HASHBRACK);
4547 /* This hack serves to disambiguate a pair of curlies
4548 * as being a block or an anon hash. Normally, expectation
4549 * determines that, but in cases where we're not in a
4550 * position to expect anything in particular (like inside
4551 * eval"") we have to resolve the ambiguity. This code
4552 * covers the case where the first term in the curlies is a
4553 * quoted string. Most other cases need to be explicitly
4554 * disambiguated by prepending a "+" before the opening
4555 * curly in order to force resolution as an anon hash.
4557 * XXX should probably propagate the outer expectation
4558 * into eval"" to rely less on this hack, but that could
4559 * potentially break current behavior of eval"".
4563 if (*s == '\'' || *s == '"' || *s == '`') {
4564 /* common case: get past first string, handling escapes */
4565 for (t++; t < PL_bufend && *t != *s;)
4566 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4570 else if (*s == 'q') {
4573 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4576 /* skip q//-like construct */
4578 char open, close, term;
4581 while (t < PL_bufend && isSPACE(*t))
4583 /* check for q => */
4584 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4585 OPERATOR(HASHBRACK);
4589 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4593 for (t++; t < PL_bufend; t++) {
4594 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4596 else if (*t == open)
4600 for (t++; t < PL_bufend; t++) {
4601 if (*t == '\\' && t+1 < PL_bufend)
4603 else if (*t == close && --brackets <= 0)
4605 else if (*t == open)
4612 /* skip plain q word */
4613 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4616 else if (isALNUM_lazy_if(t,UTF)) {
4618 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4621 while (t < PL_bufend && isSPACE(*t))
4623 /* if comma follows first term, call it an anon hash */
4624 /* XXX it could be a comma expression with loop modifiers */
4625 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4626 || (*t == '=' && t[1] == '>')))
4627 OPERATOR(HASHBRACK);
4628 if (PL_expect == XREF)
4631 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4637 pl_yylval.ival = CopLINE(PL_curcop);
4638 if (isSPACE(*s) || *s == '#')
4639 PL_copline = NOLINE; /* invalidate current command line number */
4644 if (PL_lex_brackets <= 0)
4645 yyerror("Unmatched right curly bracket");
4647 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4648 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4649 PL_lex_formbrack = 0;
4650 if (PL_lex_state == LEX_INTERPNORMAL) {
4651 if (PL_lex_brackets == 0) {
4652 if (PL_expect & XFAKEBRACK) {
4653 PL_expect &= XENUMMASK;
4654 PL_lex_state = LEX_INTERPEND;
4659 PL_thiswhite = newSVpvs("");
4660 sv_catpvs(PL_thiswhite,"}");
4663 return yylex(); /* ignore fake brackets */
4665 if (*s == '-' && s[1] == '>')
4666 PL_lex_state = LEX_INTERPENDMAYBE;
4667 else if (*s != '[' && *s != '{')
4668 PL_lex_state = LEX_INTERPEND;
4671 if (PL_expect & XFAKEBRACK) {
4672 PL_expect &= XENUMMASK;
4674 return yylex(); /* ignore fake brackets */
4676 start_force(PL_curforce);
4678 curmad('X', newSVpvn(s-1,1));
4679 CURMAD('_', PL_thiswhite);
4684 PL_thistoken = newSVpvs("");
4692 if (PL_expect == XOPERATOR) {
4693 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4694 && isIDFIRST_lazy_if(s,UTF))
4696 CopLINE_dec(PL_curcop);
4697 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
4698 CopLINE_inc(PL_curcop);
4703 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4705 PL_expect = XOPERATOR;
4706 force_ident(PL_tokenbuf, '&');
4710 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4722 const char tmp = *s++;
4729 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4730 && strchr("+-*/%.^&|<",tmp))
4731 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4732 "Reversed %c= operator",(int)tmp);
4734 if (PL_expect == XSTATE && isALPHA(tmp) &&
4735 (s == PL_linestart+1 || s[-2] == '\n') )
4737 if (PL_in_eval && !PL_rsfp) {
4742 if (strnEQ(s,"=cut",4)) {
4758 PL_thiswhite = newSVpvs("");
4759 sv_catpvn(PL_thiswhite, PL_linestart,
4760 PL_bufend - PL_linestart);
4764 PL_doextract = TRUE;
4768 if (PL_lex_brackets < PL_lex_formbrack) {
4770 #ifdef PERL_STRICT_CR
4771 while (SPACE_OR_TAB(*t))
4773 while (SPACE_OR_TAB(*t) || *t == '\r')
4776 if (*t == '\n' || *t == '#') {
4787 const char tmp = *s++;
4789 /* was this !=~ where !~ was meant?
4790 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4792 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4793 const char *t = s+1;
4795 while (t < PL_bufend && isSPACE(*t))
4798 if (*t == '/' || *t == '?' ||
4799 ((*t == 'm' || *t == 's' || *t == 'y')
4800 && !isALNUM(t[1])) ||
4801 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4802 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4803 "!=~ should be !~");
4813 if (PL_expect != XOPERATOR) {
4814 if (s[1] != '<' && !strchr(s,'>'))
4817 s = scan_heredoc(s);
4819 s = scan_inputsymbol(s);
4820 TERM(sublex_start());
4826 SHop(OP_LEFT_SHIFT);
4840 const char tmp = *s++;
4842 SHop(OP_RIGHT_SHIFT);
4843 else if (tmp == '=')
4852 if (PL_expect == XOPERATOR) {
4853 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4854 return deprecate_commaless_var_list();
4858 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4859 PL_tokenbuf[0] = '@';
4860 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4861 sizeof PL_tokenbuf - 1, FALSE);
4862 if (PL_expect == XOPERATOR)
4863 no_op("Array length", s);
4864 if (!PL_tokenbuf[1])
4866 PL_expect = XOPERATOR;
4867 PL_pending_ident = '#';
4871 PL_tokenbuf[0] = '$';
4872 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4873 sizeof PL_tokenbuf - 1, FALSE);
4874 if (PL_expect == XOPERATOR)
4876 if (!PL_tokenbuf[1]) {
4878 yyerror("Final $ should be \\$ or $name");
4882 /* This kludge not intended to be bulletproof. */
4883 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4884 pl_yylval.opval = newSVOP(OP_CONST, 0,
4885 newSViv(CopARYBASE_get(&PL_compiling)));
4886 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4892 const char tmp = *s;
4893 if (PL_lex_state == LEX_NORMAL)
4896 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4897 && intuit_more(s)) {
4899 PL_tokenbuf[0] = '@';
4900 if (ckWARN(WARN_SYNTAX)) {
4903 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4906 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4907 while (t < PL_bufend && *t != ']')
4909 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4910 "Multidimensional syntax %.*s not supported",
4911 (int)((t - PL_bufptr) + 1), PL_bufptr);
4915 else if (*s == '{') {
4917 PL_tokenbuf[0] = '%';
4918 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4919 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4921 char tmpbuf[sizeof PL_tokenbuf];
4924 } while (isSPACE(*t));
4925 if (isIDFIRST_lazy_if(t,UTF)) {
4927 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4931 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4932 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4933 "You need to quote \"%s\"",
4940 PL_expect = XOPERATOR;
4941 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4942 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4943 if (!islop || PL_last_lop_op == OP_GREPSTART)
4944 PL_expect = XOPERATOR;
4945 else if (strchr("$@\"'`q", *s))
4946 PL_expect = XTERM; /* e.g. print $fh "foo" */
4947 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4948 PL_expect = XTERM; /* e.g. print $fh &sub */
4949 else if (isIDFIRST_lazy_if(s,UTF)) {
4950 char tmpbuf[sizeof PL_tokenbuf];
4952 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4953 if ((t2 = keyword(tmpbuf, len, 0))) {
4954 /* binary operators exclude handle interpretations */
4966 PL_expect = XTERM; /* e.g. print $fh length() */
4971 PL_expect = XTERM; /* e.g. print $fh subr() */
4974 else if (isDIGIT(*s))
4975 PL_expect = XTERM; /* e.g. print $fh 3 */
4976 else if (*s == '.' && isDIGIT(s[1]))
4977 PL_expect = XTERM; /* e.g. print $fh .3 */
4978 else if ((*s == '?' || *s == '-' || *s == '+')
4979 && !isSPACE(s[1]) && s[1] != '=')
4980 PL_expect = XTERM; /* e.g. print $fh -1 */
4981 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4983 PL_expect = XTERM; /* e.g. print $fh /.../
4984 XXX except DORDOR operator
4986 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4988 PL_expect = XTERM; /* print $fh <<"EOF" */
4991 PL_pending_ident = '$';
4995 if (PL_expect == XOPERATOR)
4997 PL_tokenbuf[0] = '@';
4998 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4999 if (!PL_tokenbuf[1]) {
5002 if (PL_lex_state == LEX_NORMAL)
5004 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5006 PL_tokenbuf[0] = '%';
5008 /* Warn about @ where they meant $. */
5009 if (*s == '[' || *s == '{') {
5010 if (ckWARN(WARN_SYNTAX)) {
5011 const char *t = s + 1;
5012 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5014 if (*t == '}' || *t == ']') {
5016 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5017 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5018 "Scalar value %.*s better written as $%.*s",
5019 (int)(t-PL_bufptr), PL_bufptr,
5020 (int)(t-PL_bufptr-1), PL_bufptr+1);
5025 PL_pending_ident = '@';
5028 case '/': /* may be division, defined-or, or pattern */
5029 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5033 case '?': /* may either be conditional or pattern */
5034 if (PL_expect == XOPERATOR) {
5042 /* A // operator. */
5052 /* Disable warning on "study /blah/" */
5053 if (PL_oldoldbufptr == PL_last_uni
5054 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5055 || memNE(PL_last_uni, "study", 5)
5056 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5059 s = scan_pat(s,OP_MATCH);
5060 TERM(sublex_start());
5064 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5065 #ifdef PERL_STRICT_CR
5068 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5070 && (s == PL_linestart || s[-1] == '\n') )
5072 PL_lex_formbrack = 0;
5076 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5080 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5086 pl_yylval.ival = OPf_SPECIAL;
5092 if (PL_expect != XOPERATOR)
5097 case '0': case '1': case '2': case '3': case '4':
5098 case '5': case '6': case '7': case '8': case '9':
5099 s = scan_num(s, &pl_yylval);
5100 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5101 if (PL_expect == XOPERATOR)
5106 s = scan_str(s,!!PL_madskills,FALSE);
5107 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5108 if (PL_expect == XOPERATOR) {
5109 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5110 return deprecate_commaless_var_list();
5117 pl_yylval.ival = OP_CONST;
5118 TERM(sublex_start());
5121 s = scan_str(s,!!PL_madskills,FALSE);
5122 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5123 if (PL_expect == XOPERATOR) {
5124 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5125 return deprecate_commaless_var_list();
5132 pl_yylval.ival = OP_CONST;
5133 /* FIXME. I think that this can be const if char *d is replaced by
5134 more localised variables. */
5135 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5136 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5137 pl_yylval.ival = OP_STRINGIFY;
5141 TERM(sublex_start());
5144 s = scan_str(s,!!PL_madskills,FALSE);
5145 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5146 if (PL_expect == XOPERATOR)
5147 no_op("Backticks",s);
5150 readpipe_override();
5151 TERM(sublex_start());
5155 if (PL_lex_inwhat && isDIGIT(*s))
5156 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5158 if (PL_expect == XOPERATOR)
5159 no_op("Backslash",s);
5163 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5164 char *start = s + 2;
5165 while (isDIGIT(*start) || *start == '_')
5167 if (*start == '.' && isDIGIT(start[1])) {
5168 s = scan_num(s, &pl_yylval);
5171 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5172 else if (!isALPHA(*start) && (PL_expect == XTERM
5173 || PL_expect == XREF || PL_expect == XSTATE
5174 || PL_expect == XTERMORDORDOR)) {
5175 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5177 s = scan_num(s, &pl_yylval);
5184 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5226 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5228 /* Some keywords can be followed by any delimiter, including ':' */
5229 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5230 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5231 (PL_tokenbuf[0] == 'q' &&
5232 strchr("qwxr", PL_tokenbuf[1])))));
5234 /* x::* is just a word, unless x is "CORE" */
5235 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5239 while (d < PL_bufend && isSPACE(*d))
5240 d++; /* no comments skipped here, or s### is misparsed */
5242 /* Is this a label? */
5243 if (!tmp && PL_expect == XSTATE
5244 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5245 tmp = keyword(PL_tokenbuf, len, 0);
5247 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5249 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5254 /* Check for keywords */
5255 tmp = keyword(PL_tokenbuf, len, 0);
5257 /* Is this a word before a => operator? */
5258 if (*d == '=' && d[1] == '>') {
5261 = (OP*)newSVOP(OP_CONST, 0,
5262 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5263 pl_yylval.opval->op_private = OPpCONST_BARE;
5267 if (tmp < 0) { /* second-class keyword? */
5268 GV *ogv = NULL; /* override (winner) */
5269 GV *hgv = NULL; /* hidden (loser) */
5270 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5272 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5275 if (GvIMPORTED_CV(gv))
5277 else if (! CvMETHOD(cv))
5281 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5282 (gv = *gvp) && isGV_with_GP(gv) &&
5283 GvCVu(gv) && GvIMPORTED_CV(gv))
5290 tmp = 0; /* overridden by import or by GLOBAL */
5293 && -tmp==KEY_lock /* XXX generalizable kludge */
5296 tmp = 0; /* any sub overrides "weak" keyword */
5298 else { /* no override */
5300 if (tmp == KEY_dump) {
5301 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5302 "dump() better written as CORE::dump()");
5306 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
5307 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5308 "Ambiguous call resolved as CORE::%s(), %s",
5309 GvENAME(hgv), "qualify as such or use &");
5316 default: /* not a keyword */
5317 /* Trade off - by using this evil construction we can pull the
5318 variable gv into the block labelled keylookup. If not, then
5319 we have to give it function scope so that the goto from the
5320 earlier ':' case doesn't bypass the initialisation. */
5322 just_a_word_zero_gv:
5330 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5333 SV *nextPL_nextwhite = 0;
5337 /* Get the rest if it looks like a package qualifier */
5339 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5341 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5344 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5345 *s == '\'' ? "'" : "::");
5350 if (PL_expect == XOPERATOR) {
5351 if (PL_bufptr == PL_linestart) {
5352 CopLINE_dec(PL_curcop);
5353 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5354 CopLINE_inc(PL_curcop);
5357 no_op("Bareword",s);
5360 /* Look for a subroutine with this name in current package,
5361 unless name is "Foo::", in which case Foo is a bearword
5362 (and a package name). */
5364 if (len > 2 && !PL_madskills &&
5365 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5367 if (ckWARN(WARN_BAREWORD)
5368 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5369 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5370 "Bareword \"%s\" refers to nonexistent package",
5373 PL_tokenbuf[len] = '\0';
5379 /* Mustn't actually add anything to a symbol table.
5380 But also don't want to "initialise" any placeholder
5381 constants that might already be there into full
5382 blown PVGVs with attached PVCV. */
5383 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5384 GV_NOADD_NOINIT, SVt_PVCV);
5389 /* if we saw a global override before, get the right name */
5392 sv = newSVpvs("CORE::GLOBAL::");
5393 sv_catpv(sv,PL_tokenbuf);
5396 /* If len is 0, newSVpv does strlen(), which is correct.
5397 If len is non-zero, then it will be the true length,
5398 and so the scalar will be created correctly. */
5399 sv = newSVpv(PL_tokenbuf,len);
5402 if (PL_madskills && !PL_thistoken) {
5403 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5404 PL_thistoken = newSVpvn(start,s - start);
5405 PL_realtokenstart = s - SvPVX(PL_linestr);
5409 /* Presume this is going to be a bareword of some sort. */
5412 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5413 pl_yylval.opval->op_private = OPpCONST_BARE;
5414 /* UTF-8 package name? */
5415 if (UTF && !IN_BYTES &&
5416 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5419 /* And if "Foo::", then that's what it certainly is. */
5424 /* Do the explicit type check so that we don't need to force
5425 the initialisation of the symbol table to have a real GV.
5426 Beware - gv may not really be a PVGV, cv may not really be
5427 a PVCV, (because of the space optimisations that gv_init
5428 understands) But they're true if for this symbol there is
5429 respectively a typeglob and a subroutine.
5431 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5432 /* Real typeglob, so get the real subroutine: */
5434 /* A proxy for a subroutine in this package? */
5435 : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
5438 /* See if it's the indirect object for a list operator. */
5440 if (PL_oldoldbufptr &&
5441 PL_oldoldbufptr < PL_bufptr &&
5442 (PL_oldoldbufptr == PL_last_lop
5443 || PL_oldoldbufptr == PL_last_uni) &&
5444 /* NO SKIPSPACE BEFORE HERE! */
5445 (PL_expect == XREF ||
5446 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5448 bool immediate_paren = *s == '(';
5450 /* (Now we can afford to cross potential line boundary.) */
5451 s = SKIPSPACE2(s,nextPL_nextwhite);
5453 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5456 /* Two barewords in a row may indicate method call. */
5458 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5459 (tmp = intuit_method(s, gv, cv)))
5462 /* If not a declared subroutine, it's an indirect object. */
5463 /* (But it's an indir obj regardless for sort.) */
5464 /* Also, if "_" follows a filetest operator, it's a bareword */
5467 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5469 (PL_last_lop_op != OP_MAPSTART &&
5470 PL_last_lop_op != OP_GREPSTART))))
5471 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5472 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5475 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5480 PL_expect = XOPERATOR;
5483 s = SKIPSPACE2(s,nextPL_nextwhite);
5484 PL_nextwhite = nextPL_nextwhite;
5489 /* Is this a word before a => operator? */
5490 if (*s == '=' && s[1] == '>' && !pkgname) {
5492 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5493 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5494 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5498 /* If followed by a paren, it's certainly a subroutine. */
5503 while (SPACE_OR_TAB(*d))
5505 if (*d == ')' && (sv = gv_const_sv(gv))) {
5512 PL_nextwhite = PL_thiswhite;
5515 start_force(PL_curforce);
5517 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5518 PL_expect = XOPERATOR;
5521 PL_nextwhite = nextPL_nextwhite;
5522 curmad('X', PL_thistoken);
5523 PL_thistoken = newSVpvs("");
5531 /* If followed by var or block, call it a method (unless sub) */
5533 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5534 PL_last_lop = PL_oldbufptr;
5535 PL_last_lop_op = OP_METHOD;
5539 /* If followed by a bareword, see if it looks like indir obj. */
5542 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5543 && (tmp = intuit_method(s, gv, cv)))
5546 /* Not a method, so call it a subroutine (if defined) */
5549 if (lastchar == '-')
5550 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5551 "Ambiguous use of -%s resolved as -&%s()",
5552 PL_tokenbuf, PL_tokenbuf);
5553 /* Check for a constant sub */
5554 if ((sv = gv_const_sv(gv))) {
5556 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5557 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5558 pl_yylval.opval->op_private = 0;
5562 /* Resolve to GV now. */
5563 if (SvTYPE(gv) != SVt_PVGV) {
5564 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5565 assert (SvTYPE(gv) == SVt_PVGV);
5566 /* cv must have been some sort of placeholder, so
5567 now needs replacing with a real code reference. */
5571 op_free(pl_yylval.opval);
5572 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5573 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5574 PL_last_lop = PL_oldbufptr;
5575 PL_last_lop_op = OP_ENTERSUB;
5576 /* Is there a prototype? */
5584 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5587 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5589 while (*proto == ';')
5591 if (*proto == '&' && *s == '{') {
5593 sv_setpvs(PL_subname, "__ANON__");
5595 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5602 PL_nextwhite = PL_thiswhite;
5605 start_force(PL_curforce);
5606 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5609 PL_nextwhite = nextPL_nextwhite;
5610 curmad('X', PL_thistoken);
5611 PL_thistoken = newSVpvs("");
5618 /* Guess harder when madskills require "best effort". */
5619 if (PL_madskills && (!gv || !GvCVu(gv))) {
5620 int probable_sub = 0;
5621 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5623 else if (isALPHA(*s)) {
5627 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5628 if (!keyword(tmpbuf, tmplen, 0))
5631 while (d < PL_bufend && isSPACE(*d))
5633 if (*d == '=' && d[1] == '>')
5638 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5639 op_free(pl_yylval.opval);
5640 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5641 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5642 PL_last_lop = PL_oldbufptr;
5643 PL_last_lop_op = OP_ENTERSUB;
5644 PL_nextwhite = PL_thiswhite;
5646 start_force(PL_curforce);
5647 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5649 PL_nextwhite = nextPL_nextwhite;
5650 curmad('X', PL_thistoken);
5651 PL_thistoken = newSVpvs("");
5656 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5663 /* Call it a bare word */
5665 if (PL_hints & HINT_STRICT_SUBS)
5666 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5669 /* after "print" and similar functions (corresponding to
5670 * "F? L" in opcode.pl), whatever wasn't already parsed as
5671 * a filehandle should be subject to "strict subs".
5672 * Likewise for the optional indirect-object argument to system
5673 * or exec, which can't be a bareword */
5674 if ((PL_last_lop_op == OP_PRINT
5675 || PL_last_lop_op == OP_PRTF
5676 || PL_last_lop_op == OP_SAY
5677 || PL_last_lop_op == OP_SYSTEM
5678 || PL_last_lop_op == OP_EXEC)
5679 && (PL_hints & HINT_STRICT_SUBS))
5680 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5681 if (lastchar != '-') {
5682 if (ckWARN(WARN_RESERVED)) {
5686 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5687 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5694 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
5695 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5696 "Operator or semicolon missing before %c%s",
5697 lastchar, PL_tokenbuf);
5698 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5699 "Ambiguous use of %c resolved as operator %c",
5700 lastchar, lastchar);
5706 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5707 newSVpv(CopFILE(PL_curcop),0));
5711 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5712 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5715 case KEY___PACKAGE__:
5716 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5718 ? newSVhek(HvNAME_HEK(PL_curstash))
5725 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5726 const char *pname = "main";
5727 if (PL_tokenbuf[2] == 'D')
5728 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5729 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5733 GvIOp(gv) = newIO();
5734 IoIFP(GvIOp(gv)) = PL_rsfp;
5735 #if defined(HAS_FCNTL) && defined(F_SETFD)
5737 const int fd = PerlIO_fileno(PL_rsfp);
5738 fcntl(fd,F_SETFD,fd >= 3);
5741 /* Mark this internal pseudo-handle as clean */
5742 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5743 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5744 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5746 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5747 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5748 /* if the script was opened in binmode, we need to revert
5749 * it to text mode for compatibility; but only iff it has CRs
5750 * XXX this is a questionable hack at best. */
5751 if (PL_bufend-PL_bufptr > 2
5752 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5755 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5756 loc = PerlIO_tell(PL_rsfp);
5757 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5760 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5762 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5763 #endif /* NETWARE */
5764 #ifdef PERLIO_IS_STDIO /* really? */
5765 # if defined(__BORLANDC__)
5766 /* XXX see note in do_binmode() */
5767 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5771 PerlIO_seek(PL_rsfp, loc, 0);
5775 #ifdef PERLIO_LAYERS
5778 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5779 else if (PL_encoding) {
5786 XPUSHs(PL_encoding);
5788 call_method("name", G_SCALAR);
5792 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5793 Perl_form(aTHX_ ":encoding(%"SVf")",
5802 if (PL_realtokenstart >= 0) {
5803 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5805 PL_endwhite = newSVpvs("");
5806 sv_catsv(PL_endwhite, PL_thiswhite);
5808 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5809 PL_realtokenstart = -1;
5811 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5812 SvCUR(PL_endwhite))) != NULL) ;
5827 if (PL_expect == XSTATE) {
5834 if (*s == ':' && s[1] == ':') {
5837 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5838 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5839 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5842 else if (tmp == KEY_require || tmp == KEY_do)
5843 /* that's a way to remember we saw "CORE::" */
5856 LOP(OP_ACCEPT,XTERM);
5862 LOP(OP_ATAN2,XTERM);
5868 LOP(OP_BINMODE,XTERM);
5871 LOP(OP_BLESS,XTERM);
5880 /* When 'use switch' is in effect, continue has a dual
5881 life as a control operator. */
5883 if (!FEATURE_IS_ENABLED("switch"))
5886 /* We have to disambiguate the two senses of
5887 "continue". If the next token is a '{' then
5888 treat it as the start of a continue block;
5889 otherwise treat it as a control operator.
5901 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5918 if (!PL_cryptseen) {
5919 PL_cryptseen = TRUE;
5923 LOP(OP_CRYPT,XTERM);
5926 LOP(OP_CHMOD,XTERM);
5929 LOP(OP_CHOWN,XTERM);
5932 LOP(OP_CONNECT,XTERM);
5951 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5952 if (orig_keyword == KEY_do) {
5961 PL_hints |= HINT_BLOCK_SCOPE;
5971 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5972 LOP(OP_DBMOPEN,XTERM);
5978 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5985 pl_yylval.ival = CopLINE(PL_curcop);
6001 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6002 UNIBRACK(OP_ENTEREVAL);
6016 case KEY_endhostent:
6022 case KEY_endservent:
6025 case KEY_endprotoent:
6036 pl_yylval.ival = CopLINE(PL_curcop);
6038 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6041 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6044 if ((PL_bufend - p) >= 3 &&
6045 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6047 else if ((PL_bufend - p) >= 4 &&
6048 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6051 if (isIDFIRST_lazy_if(p,UTF)) {
6052 p = scan_ident(p, PL_bufend,
6053 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6057 Perl_croak(aTHX_ "Missing $ on loop variable");
6059 s = SvPVX(PL_linestr) + soff;
6065 LOP(OP_FORMLINE,XTERM);
6071 LOP(OP_FCNTL,XTERM);
6077 LOP(OP_FLOCK,XTERM);
6086 LOP(OP_GREPSTART, XREF);
6089 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6104 case KEY_getpriority:
6105 LOP(OP_GETPRIORITY,XTERM);
6107 case KEY_getprotobyname:
6110 case KEY_getprotobynumber:
6111 LOP(OP_GPBYNUMBER,XTERM);
6113 case KEY_getprotoent:
6125 case KEY_getpeername:
6126 UNI(OP_GETPEERNAME);
6128 case KEY_gethostbyname:
6131 case KEY_gethostbyaddr:
6132 LOP(OP_GHBYADDR,XTERM);
6134 case KEY_gethostent:
6137 case KEY_getnetbyname:
6140 case KEY_getnetbyaddr:
6141 LOP(OP_GNBYADDR,XTERM);
6146 case KEY_getservbyname:
6147 LOP(OP_GSBYNAME,XTERM);
6149 case KEY_getservbyport:
6150 LOP(OP_GSBYPORT,XTERM);
6152 case KEY_getservent:
6155 case KEY_getsockname:
6156 UNI(OP_GETSOCKNAME);
6158 case KEY_getsockopt:
6159 LOP(OP_GSOCKOPT,XTERM);
6174 pl_yylval.ival = CopLINE(PL_curcop);
6184 pl_yylval.ival = CopLINE(PL_curcop);
6188 LOP(OP_INDEX,XTERM);
6194 LOP(OP_IOCTL,XTERM);
6206 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6238 LOP(OP_LISTEN,XTERM);
6247 s = scan_pat(s,OP_MATCH);
6248 TERM(sublex_start());
6251 LOP(OP_MAPSTART, XREF);
6254 LOP(OP_MKDIR,XTERM);
6257 LOP(OP_MSGCTL,XTERM);
6260 LOP(OP_MSGGET,XTERM);
6263 LOP(OP_MSGRCV,XTERM);
6266 LOP(OP_MSGSND,XTERM);
6271 PL_in_my = (U16)tmp;
6273 if (isIDFIRST_lazy_if(s,UTF)) {
6277 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6278 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6280 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6281 if (!PL_in_my_stash) {
6284 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6288 if (PL_madskills) { /* just add type to declarator token */
6289 sv_catsv(PL_thistoken, PL_nextwhite);
6291 sv_catpvn(PL_thistoken, start, s - start);
6299 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6306 s = tokenize_use(0, s);
6310 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6317 if (isIDFIRST_lazy_if(s,UTF)) {
6319 for (d = s; isALNUM_lazy_if(d,UTF);)
6321 for (t=d; isSPACE(*t);)
6323 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6325 && !(t[0] == '=' && t[1] == '>')
6327 int parms_len = (int)(d-s);
6328 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6329 "Precedence problem: open %.*s should be open(%.*s)",
6330 parms_len, s, parms_len, s);
6336 pl_yylval.ival = OP_OR;
6346 LOP(OP_OPEN_DIR,XTERM);
6349 checkcomma(s,PL_tokenbuf,"filehandle");
6353 checkcomma(s,PL_tokenbuf,"filehandle");
6372 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6373 s = force_version(s, FALSE);
6377 LOP(OP_PIPE_OP,XTERM);
6380 s = scan_str(s,!!PL_madskills,FALSE);
6383 pl_yylval.ival = OP_CONST;
6384 TERM(sublex_start());
6390 s = scan_str(s,!!PL_madskills,FALSE);
6393 PL_expect = XOPERATOR;
6395 if (SvCUR(PL_lex_stuff)) {
6398 d = SvPV_force(PL_lex_stuff, len);
6400 for (; isSPACE(*d) && len; --len, ++d)
6405 if (!warned && ckWARN(WARN_QW)) {
6406 for (; !isSPACE(*d) && len; --len, ++d) {
6408 Perl_warner(aTHX_ packWARN(WARN_QW),
6409 "Possible attempt to separate words with commas");
6412 else if (*d == '#') {
6413 Perl_warner(aTHX_ packWARN(WARN_QW),
6414 "Possible attempt to put comments in qw() list");
6420 for (; !isSPACE(*d) && len; --len, ++d)
6423 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6424 words = append_elem(OP_LIST, words,
6425 newSVOP(OP_CONST, 0, tokeq(sv)));
6429 start_force(PL_curforce);
6430 NEXTVAL_NEXTTOKE.opval = words;
6435 SvREFCNT_dec(PL_lex_stuff);
6436 PL_lex_stuff = NULL;
6442 s = scan_str(s,!!PL_madskills,FALSE);
6445 pl_yylval.ival = OP_STRINGIFY;
6446 if (SvIVX(PL_lex_stuff) == '\'')
6447 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6448 TERM(sublex_start());
6451 s = scan_pat(s,OP_QR);
6452 TERM(sublex_start());
6455 s = scan_str(s,!!PL_madskills,FALSE);
6458 readpipe_override();
6459 TERM(sublex_start());
6467 s = force_version(s, FALSE);
6469 else if (*s != 'v' || !isDIGIT(s[1])
6470 || (s = force_version(s, TRUE), *s == 'v'))
6472 *PL_tokenbuf = '\0';
6473 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6474 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6475 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6477 yyerror("<> should be quotes");
6479 if (orig_keyword == KEY_require) {
6487 PL_last_uni = PL_oldbufptr;
6488 PL_last_lop_op = OP_REQUIRE;
6490 return REPORT( (int)REQUIRE );
6496 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6500 LOP(OP_RENAME,XTERM);
6509 LOP(OP_RINDEX,XTERM);
6518 UNIDOR(OP_READLINE);
6521 UNIDOR(OP_BACKTICK);
6530 LOP(OP_REVERSE,XTERM);
6533 UNIDOR(OP_READLINK);
6540 if (pl_yylval.opval)
6541 TERM(sublex_start());
6543 TOKEN(1); /* force error */
6546 checkcomma(s,PL_tokenbuf,"filehandle");
6556 LOP(OP_SELECT,XTERM);
6562 LOP(OP_SEMCTL,XTERM);
6565 LOP(OP_SEMGET,XTERM);
6568 LOP(OP_SEMOP,XTERM);
6574 LOP(OP_SETPGRP,XTERM);
6576 case KEY_setpriority:
6577 LOP(OP_SETPRIORITY,XTERM);
6579 case KEY_sethostent:
6585 case KEY_setservent:
6588 case KEY_setprotoent:
6598 LOP(OP_SEEKDIR,XTERM);
6600 case KEY_setsockopt:
6601 LOP(OP_SSOCKOPT,XTERM);
6607 LOP(OP_SHMCTL,XTERM);
6610 LOP(OP_SHMGET,XTERM);
6613 LOP(OP_SHMREAD,XTERM);
6616 LOP(OP_SHMWRITE,XTERM);
6619 LOP(OP_SHUTDOWN,XTERM);
6628 LOP(OP_SOCKET,XTERM);
6630 case KEY_socketpair:
6631 LOP(OP_SOCKPAIR,XTERM);
6634 checkcomma(s,PL_tokenbuf,"subroutine name");
6636 if (*s == ';' || *s == ')') /* probably a close */
6637 Perl_croak(aTHX_ "sort is now a reserved word");
6639 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6643 LOP(OP_SPLIT,XTERM);
6646 LOP(OP_SPRINTF,XTERM);
6649 LOP(OP_SPLICE,XTERM);
6664 LOP(OP_SUBSTR,XTERM);
6670 char tmpbuf[sizeof PL_tokenbuf];
6671 SSize_t tboffset = 0;
6672 expectation attrful;
6673 bool have_name, have_proto;
6674 const int key = tmp;
6679 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6680 SV *subtoken = newSVpvn(tstart, s - tstart);
6684 s = SKIPSPACE2(s,tmpwhite);
6689 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6690 (*s == ':' && s[1] == ':'))
6693 SV *nametoke = NULL;
6697 attrful = XATTRBLOCK;
6698 /* remember buffer pos'n for later force_word */
6699 tboffset = s - PL_oldbufptr;
6700 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6703 nametoke = newSVpvn(s, d - s);
6705 if (memchr(tmpbuf, ':', len))
6706 sv_setpvn(PL_subname, tmpbuf, len);
6708 sv_setsv(PL_subname,PL_curstname);
6709 sv_catpvs(PL_subname,"::");
6710 sv_catpvn(PL_subname,tmpbuf,len);
6717 CURMAD('X', nametoke);
6718 CURMAD('_', tmpwhite);
6719 (void) force_word(PL_oldbufptr + tboffset, WORD,
6722 s = SKIPSPACE2(d,tmpwhite);
6729 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6730 PL_expect = XTERMBLOCK;
6731 attrful = XATTRTERM;
6732 sv_setpvs(PL_subname,"?");
6736 if (key == KEY_format) {
6738 PL_lex_formbrack = PL_lex_brackets + 1;
6740 PL_thistoken = subtoken;
6744 (void) force_word(PL_oldbufptr + tboffset, WORD,
6750 /* Look for a prototype */
6753 bool bad_proto = FALSE;
6754 bool in_brackets = FALSE;
6755 char greedy_proto = ' ';
6756 bool proto_after_greedy_proto = FALSE;
6757 bool must_be_last = FALSE;
6758 bool underscore = FALSE;
6759 bool seen_underscore = FALSE;
6760 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6762 s = scan_str(s,!!PL_madskills,FALSE);
6764 Perl_croak(aTHX_ "Prototype not terminated");
6765 /* strip spaces and check for bad characters */
6766 d = SvPVX(PL_lex_stuff);
6768 for (p = d; *p; ++p) {
6774 proto_after_greedy_proto = TRUE;
6775 if (!strchr("$@%*;[]&\\_", *p)) {
6787 else if ( *p == ']' ) {
6788 in_brackets = FALSE;
6790 else if ( (*p == '@' || *p == '%') &&
6791 ( tmp < 2 || d[tmp-2] != '\\' ) &&
6793 must_be_last = TRUE;
6796 else if ( *p == '_' ) {
6797 underscore = seen_underscore = TRUE;
6804 if (proto_after_greedy_proto)
6805 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6806 "Prototype after '%c' for %"SVf" : %s",
6807 greedy_proto, SVfARG(PL_subname), d);
6809 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6810 "Illegal character %sin prototype for %"SVf" : %s",
6811 seen_underscore ? "after '_' " : "",
6812 SVfARG(PL_subname), d);
6813 SvCUR_set(PL_lex_stuff, tmp);
6818 CURMAD('q', PL_thisopen);
6819 CURMAD('_', tmpwhite);
6820 CURMAD('=', PL_thisstuff);
6821 CURMAD('Q', PL_thisclose);
6822 NEXTVAL_NEXTTOKE.opval =
6823 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6824 PL_lex_stuff = NULL;
6827 s = SKIPSPACE2(s,tmpwhite);
6835 if (*s == ':' && s[1] != ':')
6836 PL_expect = attrful;
6837 else if (*s != '{' && key == KEY_sub) {
6839 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6841 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6848 curmad('^', newSVpvs(""));
6849 CURMAD('_', tmpwhite);
6853 PL_thistoken = subtoken;
6856 NEXTVAL_NEXTTOKE.opval =
6857 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6858 PL_lex_stuff = NULL;
6864 sv_setpvs(PL_subname, "__ANON__");
6866 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6870 (void) force_word(PL_oldbufptr + tboffset, WORD,
6879 LOP(OP_SYSTEM,XREF);
6882 LOP(OP_SYMLINK,XTERM);
6885 LOP(OP_SYSCALL,XTERM);
6888 LOP(OP_SYSOPEN,XTERM);
6891 LOP(OP_SYSSEEK,XTERM);
6894 LOP(OP_SYSREAD,XTERM);
6897 LOP(OP_SYSWRITE,XTERM);
6901 TERM(sublex_start());
6922 LOP(OP_TRUNCATE,XTERM);
6934 pl_yylval.ival = CopLINE(PL_curcop);
6938 pl_yylval.ival = CopLINE(PL_curcop);
6942 LOP(OP_UNLINK,XTERM);
6948 LOP(OP_UNPACK,XTERM);
6951 LOP(OP_UTIME,XTERM);
6957 LOP(OP_UNSHIFT,XTERM);
6960 s = tokenize_use(1, s);
6970 pl_yylval.ival = CopLINE(PL_curcop);
6974 pl_yylval.ival = CopLINE(PL_curcop);
6978 PL_hints |= HINT_BLOCK_SCOPE;
6985 LOP(OP_WAITPID,XTERM);
6994 ctl_l[0] = toCTRL('L');
6996 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6999 /* Make sure $^L is defined */
7000 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7005 if (PL_expect == XOPERATOR)
7011 pl_yylval.ival = OP_XOR;
7016 TERM(sublex_start());
7021 #pragma segment Main
7025 S_pending_ident(pTHX)
7030 /* pit holds the identifier we read and pending_ident is reset */
7031 char pit = PL_pending_ident;
7032 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7033 /* All routes through this function want to know if there is a colon. */
7034 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7035 PL_pending_ident = 0;
7037 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7038 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7039 "### Pending identifier '%s'\n", PL_tokenbuf); });
7041 /* if we're in a my(), we can't allow dynamics here.
7042 $foo'bar has already been turned into $foo::bar, so
7043 just check for colons.
7045 if it's a legal name, the OP is a PADANY.
7048 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7050 yyerror(Perl_form(aTHX_ "No package name allowed for "
7051 "variable %s in \"our\"",
7053 tmp = allocmy(PL_tokenbuf);
7057 yyerror(Perl_form(aTHX_ PL_no_myglob,
7058 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7060 pl_yylval.opval = newOP(OP_PADANY, 0);
7061 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
7067 build the ops for accesses to a my() variable.
7069 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7070 then used in a comparison. This catches most, but not
7071 all cases. For instance, it catches
7072 sort { my($a); $a <=> $b }
7074 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7075 (although why you'd do that is anyone's guess).
7080 tmp = pad_findmy(PL_tokenbuf);
7081 if (tmp != NOT_IN_PAD) {
7082 /* might be an "our" variable" */
7083 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7084 /* build ops for a bareword */
7085 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7086 HEK * const stashname = HvNAME_HEK(stash);
7087 SV * const sym = newSVhek(stashname);
7088 sv_catpvs(sym, "::");
7089 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7090 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7091 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7094 ? (GV_ADDMULTI | GV_ADDINEVAL)
7097 ((PL_tokenbuf[0] == '$') ? SVt_PV
7098 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7103 /* if it's a sort block and they're naming $a or $b */
7104 if (PL_last_lop_op == OP_SORT &&
7105 PL_tokenbuf[0] == '$' &&
7106 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7109 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7110 d < PL_bufend && *d != '\n';
7113 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7114 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7120 pl_yylval.opval = newOP(OP_PADANY, 0);
7121 pl_yylval.opval->op_targ = tmp;
7127 Whine if they've said @foo in a doublequoted string,
7128 and @foo isn't a variable we can find in the symbol
7131 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7132 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7134 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7135 /* DO NOT warn for @- and @+ */
7136 && !( PL_tokenbuf[2] == '\0' &&
7137 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7140 /* Downgraded from fatal to warning 20000522 mjd */
7141 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7142 "Possible unintended interpolation of %s in string",
7147 /* build ops for a bareword */
7148 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7150 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7152 PL_tokenbuf + 1, tokenbuf_len - 1,
7153 /* If the identifier refers to a stash, don't autovivify it.
7154 * Change 24660 had the side effect of causing symbol table
7155 * hashes to always be defined, even if they were freshly
7156 * created and the only reference in the entire program was
7157 * the single statement with the defined %foo::bar:: test.
7158 * It appears that all code in the wild doing this actually
7159 * wants to know whether sub-packages have been loaded, so
7160 * by avoiding auto-vivifying symbol tables, we ensure that
7161 * defined %foo::bar:: continues to be false, and the existing
7162 * tests still give the expected answers, even though what
7163 * they're actually testing has now changed subtly.
7165 (*PL_tokenbuf == '%'
7166 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7169 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7170 ((PL_tokenbuf[0] == '$') ? SVt_PV
7171 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7177 * The following code was generated by perl_keyword.pl.
7181 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7185 PERL_ARGS_ASSERT_KEYWORD;
7189 case 1: /* 5 tokens of length 1 */
7221 case 2: /* 18 tokens of length 2 */
7367 case 3: /* 29 tokens of length 3 */
7371 if (name[1] == 'N' &&
7434 if (name[1] == 'i' &&
7466 if (name[1] == 'o' &&
7475 if (name[1] == 'e' &&
7484 if (name[1] == 'n' &&
7493 if (name[1] == 'o' &&
7502 if (name[1] == 'a' &&
7511 if (name[1] == 'o' &&
7573 if (name[1] == 'e' &&
7587 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7613 if (name[1] == 'i' &&
7622 if (name[1] == 's' &&
7631 if (name[1] == 'e' &&
7640 if (name[1] == 'o' &&
7652 case 4: /* 41 tokens of length 4 */
7656 if (name[1] == 'O' &&
7666 if (name[1] == 'N' &&
7676 if (name[1] == 'i' &&
7686 if (name[1] == 'h' &&
7696 if (name[1] == 'u' &&
7709 if (name[2] == 'c' &&
7718 if (name[2] == 's' &&
7727 if (name[2] == 'a' &&
7763 if (name[1] == 'o' &&
7776 if (name[2] == 't' &&
7785 if (name[2] == 'o' &&
7794 if (name[2] == 't' &&
7803 if (name[2] == 'e' &&
7816 if (name[1] == 'o' &&
7829 if (name[2] == 'y' &&
7838 if (name[2] == 'l' &&
7854 if (name[2] == 's' &&
7863 if (name[2] == 'n' &&
7872 if (name[2] == 'c' &&
7885 if (name[1] == 'e' &&
7895 if (name[1] == 'p' &&
7908 if (name[2] == 'c' &&
7917 if (name[2] == 'p' &&
7926 if (name[2] == 's' &&
7942 if (name[2] == 'n' &&
8012 if (name[2] == 'r' &&
8021 if (name[2] == 'r' &&
8030 if (name[2] == 'a' &&
8046 if (name[2] == 'l' &&
8108 if (name[2] == 'e' &&
8111 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8124 case 5: /* 39 tokens of length 5 */
8128 if (name[1] == 'E' &&
8139 if (name[1] == 'H' &&
8153 if (name[2] == 'a' &&
8163 if (name[2] == 'a' &&
8180 if (name[2] == 'e' &&
8190 if (name[2] == 'e' &&
8194 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8210 if (name[3] == 'i' &&
8219 if (name[3] == 'o' &&
8255 if (name[2] == 'o' &&
8265 if (name[2] == 'y' &&
8279 if (name[1] == 'l' &&
8293 if (name[2] == 'n' &&
8303 if (name[2] == 'o' &&
8317 if (name[1] == 'i' &&
8322 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8331 if (name[2] == 'd' &&
8341 if (name[2] == 'c' &&
8358 if (name[2] == 'c' &&
8368 if (name[2] == 't' &&
8382 if (name[1] == 'k' &&
8393 if (name[1] == 'r' &&
8407 if (name[2] == 's' &&
8417 if (name[2] == 'd' &&
8434 if (name[2] == 'm' &&
8444 if (name[2] == 'i' &&
8454 if (name[2] == 'e' &&
8464 if (name[2] == 'l' &&
8474 if (name[2] == 'a' &&
8487 if (name[3] == 't' &&
8490 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8496 if (name[3] == 'd' &&
8513 if (name[1] == 'i' &&
8527 if (name[2] == 'a' &&
8540 if (name[3] == 'e' &&
8575 if (name[2] == 'i' &&
8592 if (name[2] == 'i' &&
8602 if (name[2] == 'i' &&
8619 case 6: /* 33 tokens of length 6 */
8623 if (name[1] == 'c' &&
8638 if (name[2] == 'l' &&
8649 if (name[2] == 'r' &&
8664 if (name[1] == 'e' &&
8679 if (name[2] == 's' &&
8684 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8690 if (name[2] == 'i' &&
8708 if (name[2] == 'l' &&
8719 if (name[2] == 'r' &&
8734 if (name[1] == 'm' &&
8749 if (name[2] == 'n' &&
8760 if (name[2] == 's' &&
8775 if (name[1] == 's' &&
8781 if (name[4] == 't' &&
8790 if (name[4] == 'e' &&
8799 if (name[4] == 'c' &&
8808 if (name[4] == 'n' &&
8824 if (name[1] == 'r' &&
8842 if (name[3] == 'a' &&
8852 if (name[3] == 'u' &&
8866 if (name[2] == 'n' &&
8884 if (name[2] == 'a' &&
8898 if (name[3] == 'e' &&
8911 if (name[4] == 't' &&
8920 if (name[4] == 'e' &&
8942 if (name[4] == 't' &&
8951 if (name[4] == 'e' &&
8967 if (name[2] == 'c' &&
8978 if (name[2] == 'l' &&
8989 if (name[2] == 'b' &&
9000 if (name[2] == 's' &&
9023 if (name[4] == 's' &&
9032 if (name[4] == 'n' &&
9045 if (name[3] == 'a' &&
9062 if (name[1] == 'a' &&
9077 case 7: /* 29 tokens of length 7 */
9081 if (name[1] == 'E' &&
9094 if (name[1] == '_' &&
9107 if (name[1] == 'i' &&
9114 return -KEY_binmode;
9120 if (name[1] == 'o' &&
9127 return -KEY_connect;
9136 if (name[2] == 'm' &&
9142 return -KEY_dbmopen;
9153 if (name[4] == 'u' &&
9157 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9163 if (name[4] == 'n' &&
9184 if (name[1] == 'o' &&
9197 if (name[1] == 'e' &&
9204 if (name[5] == 'r' &&
9207 return -KEY_getpgrp;
9213 if (name[5] == 'i' &&
9216 return -KEY_getppid;
9229 if (name[1] == 'c' &&
9236 return -KEY_lcfirst;
9242 if (name[1] == 'p' &&
9249 return -KEY_opendir;
9255 if (name[1] == 'a' &&
9273 if (name[3] == 'd' &&
9278 return -KEY_readdir;
9284 if (name[3] == 'u' &&
9295 if (name[3] == 'e' &&
9300 return -KEY_reverse;
9319 if (name[3] == 'k' &&
9324 return -KEY_seekdir;
9330 if (name[3] == 'p' &&
9335 return -KEY_setpgrp;
9345 if (name[2] == 'm' &&
9351 return -KEY_shmread;
9357 if (name[2] == 'r' &&
9363 return -KEY_sprintf;
9372 if (name[3] == 'l' &&
9377 return -KEY_symlink;
9386 if (name[4] == 'a' &&
9390 return -KEY_syscall;
9396 if (name[4] == 'p' &&
9400 return -KEY_sysopen;
9406 if (name[4] == 'e' &&
9410 return -KEY_sysread;
9416 if (name[4] == 'e' &&
9420 return -KEY_sysseek;
9438 if (name[1] == 'e' &&
9445 return -KEY_telldir;
9454 if (name[2] == 'f' &&
9460 return -KEY_ucfirst;
9466 if (name[2] == 's' &&
9472 return -KEY_unshift;
9482 if (name[1] == 'a' &&
9489 return -KEY_waitpid;
9498 case 8: /* 26 tokens of length 8 */
9502 if (name[1] == 'U' &&
9510 return KEY_AUTOLOAD;
9521 if (name[3] == 'A' &&
9527 return KEY___DATA__;
9533 if (name[3] == 'I' &&
9539 return -KEY___FILE__;
9545 if (name[3] == 'I' &&
9551 return -KEY___LINE__;
9567 if (name[2] == 'o' &&
9574 return -KEY_closedir;
9580 if (name[2] == 'n' &&
9587 return -KEY_continue;
9597 if (name[1] == 'b' &&
9605 return -KEY_dbmclose;
9611 if (name[1] == 'n' &&
9617 if (name[4] == 'r' &&
9622 return -KEY_endgrent;
9628 if (name[4] == 'w' &&
9633 return -KEY_endpwent;
9646 if (name[1] == 'o' &&
9654 return -KEY_formline;
9660 if (name[1] == 'e' &&
9671 if (name[6] == 'n' &&
9674 return -KEY_getgrent;
9680 if (name[6] == 'i' &&
9683 return -KEY_getgrgid;
9689 if (name[6] == 'a' &&
9692 return -KEY_getgrnam;
9705 if (name[4] == 'o' &&
9710 return -KEY_getlogin;
9721 if (name[6] == 'n' &&
9724 return -KEY_getpwent;
9730 if (name[6] == 'a' &&
9733 return -KEY_getpwnam;
9739 if (name[6] == 'i' &&
9742 return -KEY_getpwuid;
9762 if (name[1] == 'e' &&
9769 if (name[5] == 'i' &&
9776 return -KEY_readline;
9781 return -KEY_readlink;
9792 if (name[5] == 'i' &&
9796 return -KEY_readpipe;
9817 if (name[4] == 'r' &&
9822 return -KEY_setgrent;
9828 if (name[4] == 'w' &&
9833 return -KEY_setpwent;
9849 if (name[3] == 'w' &&
9855 return -KEY_shmwrite;
9861 if (name[3] == 't' &&
9867 return -KEY_shutdown;
9877 if (name[2] == 's' &&
9884 return -KEY_syswrite;
9894 if (name[1] == 'r' &&
9902 return -KEY_truncate;
9911 case 9: /* 9 tokens of length 9 */
9915 if (name[1] == 'N' &&
9924 return KEY_UNITCHECK;
9930 if (name[1] == 'n' &&
9939 return -KEY_endnetent;
9945 if (name[1] == 'e' &&
9954 return -KEY_getnetent;
9960 if (name[1] == 'o' &&
9969 return -KEY_localtime;
9975 if (name[1] == 'r' &&
9984 return KEY_prototype;
9990 if (name[1] == 'u' &&
9999 return -KEY_quotemeta;
10005 if (name[1] == 'e' &&
10014 return -KEY_rewinddir;
10020 if (name[1] == 'e' &&
10029 return -KEY_setnetent;
10035 if (name[1] == 'a' &&
10044 return -KEY_wantarray;
10053 case 10: /* 9 tokens of length 10 */
10057 if (name[1] == 'n' &&
10063 if (name[4] == 'o' &&
10070 return -KEY_endhostent;
10076 if (name[4] == 'e' &&
10083 return -KEY_endservent;
10096 if (name[1] == 'e' &&
10102 if (name[4] == 'o' &&
10109 return -KEY_gethostent;
10118 if (name[5] == 'r' &&
10124 return -KEY_getservent;
10130 if (name[5] == 'c' &&
10136 return -KEY_getsockopt;
10156 if (name[2] == 't')
10161 if (name[4] == 'o' &&
10168 return -KEY_sethostent;
10177 if (name[5] == 'r' &&
10183 return -KEY_setservent;
10189 if (name[5] == 'c' &&
10195 return -KEY_setsockopt;
10212 if (name[2] == 'c' &&
10221 return -KEY_socketpair;
10234 case 11: /* 8 tokens of length 11 */
10238 if (name[1] == '_' &&
10248 { /* __PACKAGE__ */
10249 return -KEY___PACKAGE__;
10255 if (name[1] == 'n' &&
10265 { /* endprotoent */
10266 return -KEY_endprotoent;
10272 if (name[1] == 'e' &&
10281 if (name[5] == 'e' &&
10287 { /* getpeername */
10288 return -KEY_getpeername;
10297 if (name[6] == 'o' &&
10302 { /* getpriority */
10303 return -KEY_getpriority;
10309 if (name[6] == 't' &&
10314 { /* getprotoent */
10315 return -KEY_getprotoent;
10329 if (name[4] == 'o' &&
10336 { /* getsockname */
10337 return -KEY_getsockname;
10350 if (name[1] == 'e' &&
10358 if (name[6] == 'o' &&
10363 { /* setpriority */
10364 return -KEY_setpriority;
10370 if (name[6] == 't' &&
10375 { /* setprotoent */
10376 return -KEY_setprotoent;
10392 case 12: /* 2 tokens of length 12 */
10393 if (name[0] == 'g' &&
10405 if (name[9] == 'd' &&
10408 { /* getnetbyaddr */
10409 return -KEY_getnetbyaddr;
10415 if (name[9] == 'a' &&
10418 { /* getnetbyname */
10419 return -KEY_getnetbyname;
10431 case 13: /* 4 tokens of length 13 */
10432 if (name[0] == 'g' &&
10439 if (name[4] == 'o' &&
10448 if (name[10] == 'd' &&
10451 { /* gethostbyaddr */
10452 return -KEY_gethostbyaddr;
10458 if (name[10] == 'a' &&
10461 { /* gethostbyname */
10462 return -KEY_gethostbyname;
10475 if (name[4] == 'e' &&
10484 if (name[10] == 'a' &&
10487 { /* getservbyname */
10488 return -KEY_getservbyname;
10494 if (name[10] == 'o' &&
10497 { /* getservbyport */
10498 return -KEY_getservbyport;
10517 case 14: /* 1 tokens of length 14 */
10518 if (name[0] == 'g' &&
10532 { /* getprotobyname */
10533 return -KEY_getprotobyname;
10538 case 16: /* 1 tokens of length 16 */
10539 if (name[0] == 'g' &&
10555 { /* getprotobynumber */
10556 return -KEY_getprotobynumber;
10570 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10574 PERL_ARGS_ASSERT_CHECKCOMMA;
10576 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10577 if (ckWARN(WARN_SYNTAX)) {
10580 for (w = s+2; *w && level; w++) {
10583 else if (*w == ')')
10586 while (isSPACE(*w))
10588 /* the list of chars below is for end of statements or
10589 * block / parens, boolean operators (&&, ||, //) and branch
10590 * constructs (or, and, if, until, unless, while, err, for).
10591 * Not a very solid hack... */
10592 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10593 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10594 "%s (...) interpreted as function",name);
10597 while (s < PL_bufend && isSPACE(*s))
10601 while (s < PL_bufend && isSPACE(*s))
10603 if (isIDFIRST_lazy_if(s,UTF)) {
10604 const char * const w = s++;
10605 while (isALNUM_lazy_if(s,UTF))
10607 while (s < PL_bufend && isSPACE(*s))
10611 if (keyword(w, s - w, 0))
10614 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10615 if (gv && GvCVu(gv))
10617 Perl_croak(aTHX_ "No comma allowed after %s", what);
10622 /* Either returns sv, or mortalizes sv and returns a new SV*.
10623 Best used as sv=new_constant(..., sv, ...).
10624 If s, pv are NULL, calls subroutine with one argument,
10625 and type is used with error messages only. */
10628 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10629 SV *sv, SV *pv, const char *type, STRLEN typelen)
10632 HV * const table = GvHV(PL_hintgv); /* ^H */
10636 const char *why1 = "", *why2 = "", *why3 = "";
10638 PERL_ARGS_ASSERT_NEW_CONSTANT;
10640 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10643 why2 = (const char *)
10644 (strEQ(key,"charnames")
10645 ? "(possibly a missing \"use charnames ...\")"
10647 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10648 (type ? type: "undef"), why2);
10650 /* This is convoluted and evil ("goto considered harmful")
10651 * but I do not understand the intricacies of all the different
10652 * failure modes of %^H in here. The goal here is to make
10653 * the most probable error message user-friendly. --jhi */
10658 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10659 (type ? type: "undef"), why1, why2, why3);
10661 yyerror(SvPVX_const(msg));
10665 cvp = hv_fetch(table, key, keylen, FALSE);
10666 if (!cvp || !SvOK(*cvp)) {
10669 why3 = "} is not defined";
10672 sv_2mortal(sv); /* Parent created it permanently */
10675 pv = newSVpvn_flags(s, len, SVs_TEMP);
10677 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10679 typesv = &PL_sv_undef;
10681 PUSHSTACKi(PERLSI_OVERLOAD);
10693 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10697 /* Check the eval first */
10698 if (!PL_in_eval && SvTRUE(ERRSV)) {
10699 sv_catpvs(ERRSV, "Propagated");
10700 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10702 res = SvREFCNT_inc_simple(sv);
10706 SvREFCNT_inc_simple_void(res);
10715 why1 = "Call to &{$^H{";
10717 why3 = "}} did not return a defined value";
10725 /* Returns a NUL terminated string, with the length of the string written to
10729 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10732 register char *d = dest;
10733 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10735 PERL_ARGS_ASSERT_SCAN_WORD;
10739 Perl_croak(aTHX_ ident_too_long);
10740 if (isALNUM(*s)) /* UTF handled below */
10742 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10747 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10751 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10752 char *t = s + UTF8SKIP(s);
10754 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10758 Perl_croak(aTHX_ ident_too_long);
10759 Copy(s, d, len, char);
10772 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10775 char *bracket = NULL;
10777 register char *d = dest;
10778 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10780 PERL_ARGS_ASSERT_SCAN_IDENT;
10785 while (isDIGIT(*s)) {
10787 Perl_croak(aTHX_ ident_too_long);
10794 Perl_croak(aTHX_ ident_too_long);
10795 if (isALNUM(*s)) /* UTF handled below */
10797 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10802 else if (*s == ':' && s[1] == ':') {
10806 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10807 char *t = s + UTF8SKIP(s);
10808 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10810 if (d + (t - s) > e)
10811 Perl_croak(aTHX_ ident_too_long);
10812 Copy(s, d, t - s, char);
10823 if (PL_lex_state != LEX_NORMAL)
10824 PL_lex_state = LEX_INTERPENDMAYBE;
10827 if (*s == '$' && s[1] &&
10828 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10841 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10846 if (isSPACE(s[-1])) {
10848 const char ch = *s++;
10849 if (!SPACE_OR_TAB(ch)) {
10855 if (isIDFIRST_lazy_if(d,UTF)) {
10859 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10860 end += UTF8SKIP(end);
10861 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10862 end += UTF8SKIP(end);
10864 Copy(s, d, end - s, char);
10869 while ((isALNUM(*s) || *s == ':') && d < e)
10872 Perl_croak(aTHX_ ident_too_long);
10875 while (s < send && SPACE_OR_TAB(*s))
10877 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10878 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10879 const char * const brack =
10881 ((*s == '[') ? "[...]" : "{...}");
10882 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10883 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10884 funny, dest, brack, funny, dest, brack);
10887 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10891 /* Handle extended ${^Foo} variables
10892 * 1999-02-27 mjd-perl-patch@plover.com */
10893 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10897 while (isALNUM(*s) && d < e) {
10901 Perl_croak(aTHX_ ident_too_long);
10906 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10907 PL_lex_state = LEX_INTERPEND;
10910 if (PL_lex_state == LEX_NORMAL) {
10911 if (ckWARN(WARN_AMBIGUOUS) &&
10912 (keyword(dest, d - dest, 0)
10913 || get_cvn_flags(dest, d - dest, 0)))
10917 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10918 "Ambiguous use of %c{%s} resolved to %c%s",
10919 funny, dest, funny, dest);
10924 s = bracket; /* let the parser handle it */
10928 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10929 PL_lex_state = LEX_INTERPEND;
10934 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10936 PERL_ARGS_ASSERT_PMFLAG;
10938 PERL_UNUSED_CONTEXT;
10940 const char c = (char)ch;
10942 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10943 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10944 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10945 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10946 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10952 S_scan_pat(pTHX_ char *start, I32 type)
10956 char *s = scan_str(start,!!PL_madskills,FALSE);
10957 const char * const valid_flags =
10958 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10963 PERL_ARGS_ASSERT_SCAN_PAT;
10966 const char * const delimiter = skipspace(start);
10970 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10971 : "Search pattern not terminated" ));
10974 pm = (PMOP*)newPMOP(type, 0);
10975 if (PL_multi_open == '?') {
10976 /* This is the only point in the code that sets PMf_ONCE: */
10977 pm->op_pmflags |= PMf_ONCE;
10979 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10980 allows us to restrict the list needed by reset to just the ??
10982 assert(type != OP_TRANS);
10984 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10987 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10990 elements = mg->mg_len / sizeof(PMOP**);
10991 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10992 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10993 mg->mg_len = elements * sizeof(PMOP**);
10994 PmopSTASH_set(pm,PL_curstash);
11000 while (*s && strchr(valid_flags, *s))
11001 pmflag(&pm->op_pmflags,*s++);
11003 if (PL_madskills && modstart != s) {
11004 SV* tmptoken = newSVpvn(modstart, s - modstart);
11005 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11008 /* issue a warning if /c is specified,but /g is not */
11009 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11011 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11012 "Use of /c modifier is meaningless without /g" );
11015 PL_lex_op = (OP*)pm;
11016 pl_yylval.ival = OP_MATCH;
11021 S_scan_subst(pTHX_ char *start)
11032 PERL_ARGS_ASSERT_SCAN_SUBST;
11034 pl_yylval.ival = OP_NULL;
11036 s = scan_str(start,!!PL_madskills,FALSE);
11039 Perl_croak(aTHX_ "Substitution pattern not terminated");
11041 if (s[-1] == PL_multi_open)
11044 if (PL_madskills) {
11045 CURMAD('q', PL_thisopen);
11046 CURMAD('_', PL_thiswhite);
11047 CURMAD('E', PL_thisstuff);
11048 CURMAD('Q', PL_thisclose);
11049 PL_realtokenstart = s - SvPVX(PL_linestr);
11053 first_start = PL_multi_start;
11054 s = scan_str(s,!!PL_madskills,FALSE);
11056 if (PL_lex_stuff) {
11057 SvREFCNT_dec(PL_lex_stuff);
11058 PL_lex_stuff = NULL;
11060 Perl_croak(aTHX_ "Substitution replacement not terminated");
11062 PL_multi_start = first_start; /* so whole substitution is taken together */
11064 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11067 if (PL_madskills) {
11068 CURMAD('z', PL_thisopen);
11069 CURMAD('R', PL_thisstuff);
11070 CURMAD('Z', PL_thisclose);
11076 if (*s == EXEC_PAT_MOD) {
11080 else if (strchr(S_PAT_MODS, *s))
11081 pmflag(&pm->op_pmflags,*s++);
11087 if (PL_madskills) {
11089 curmad('m', newSVpvn(modstart, s - modstart));
11090 append_madprops(PL_thismad, (OP*)pm, 0);
11094 if ((pm->op_pmflags & PMf_CONTINUE)) {
11095 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11099 SV * const repl = newSVpvs("");
11101 PL_sublex_info.super_bufptr = s;
11102 PL_sublex_info.super_bufend = PL_bufend;
11104 pm->op_pmflags |= PMf_EVAL;
11107 sv_catpvs(repl, "eval ");
11109 sv_catpvs(repl, "do ");
11111 sv_catpvs(repl, "{");
11112 sv_catsv(repl, PL_lex_repl);
11113 if (strchr(SvPVX(PL_lex_repl), '#'))
11114 sv_catpvs(repl, "\n");
11115 sv_catpvs(repl, "}");
11117 SvREFCNT_dec(PL_lex_repl);
11118 PL_lex_repl = repl;
11121 PL_lex_op = (OP*)pm;
11122 pl_yylval.ival = OP_SUBST;
11127 S_scan_trans(pTHX_ char *start)
11140 PERL_ARGS_ASSERT_SCAN_TRANS;
11142 pl_yylval.ival = OP_NULL;
11144 s = scan_str(start,!!PL_madskills,FALSE);
11146 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11148 if (s[-1] == PL_multi_open)
11151 if (PL_madskills) {
11152 CURMAD('q', PL_thisopen);
11153 CURMAD('_', PL_thiswhite);
11154 CURMAD('E', PL_thisstuff);
11155 CURMAD('Q', PL_thisclose);
11156 PL_realtokenstart = s - SvPVX(PL_linestr);
11160 s = scan_str(s,!!PL_madskills,FALSE);
11162 if (PL_lex_stuff) {
11163 SvREFCNT_dec(PL_lex_stuff);
11164 PL_lex_stuff = NULL;
11166 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11168 if (PL_madskills) {
11169 CURMAD('z', PL_thisopen);
11170 CURMAD('R', PL_thisstuff);
11171 CURMAD('Z', PL_thisclose);
11174 complement = del = squash = 0;
11181 complement = OPpTRANS_COMPLEMENT;
11184 del = OPpTRANS_DELETE;
11187 squash = OPpTRANS_SQUASH;
11196 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11197 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11198 o->op_private &= ~OPpTRANS_ALL;
11199 o->op_private |= del|squash|complement|
11200 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11201 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11204 pl_yylval.ival = OP_TRANS;
11207 if (PL_madskills) {
11209 curmad('m', newSVpvn(modstart, s - modstart));
11210 append_madprops(PL_thismad, o, 0);
11219 S_scan_heredoc(pTHX_ register char *s)
11223 I32 op_type = OP_SCALAR;
11227 const char *found_newline;
11231 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11233 I32 stuffstart = s - SvPVX(PL_linestr);
11236 PL_realtokenstart = -1;
11239 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11243 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11247 while (SPACE_OR_TAB(*peek))
11249 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11252 s = delimcpy(d, e, s, PL_bufend, term, &len);
11262 if (!isALNUM_lazy_if(s,UTF))
11263 deprecate("bare << to mean <<\"\"");
11264 for (; isALNUM_lazy_if(s,UTF); s++) {
11269 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11270 Perl_croak(aTHX_ "Delimiter for here document is too long");
11273 len = d - PL_tokenbuf;
11276 if (PL_madskills) {
11277 tstart = PL_tokenbuf + !outer;
11278 PL_thisclose = newSVpvn(tstart, len - !outer);
11279 tstart = SvPVX(PL_linestr) + stuffstart;
11280 PL_thisopen = newSVpvn(tstart, s - tstart);
11281 stuffstart = s - SvPVX(PL_linestr);
11284 #ifndef PERL_STRICT_CR
11285 d = strchr(s, '\r');
11287 char * const olds = s;
11289 while (s < PL_bufend) {
11295 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11304 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11311 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11312 herewas = newSVpvn(s,PL_bufend-s);
11316 herewas = newSVpvn(s-1,found_newline-s+1);
11319 herewas = newSVpvn(s,found_newline-s);
11323 if (PL_madskills) {
11324 tstart = SvPVX(PL_linestr) + stuffstart;
11326 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11328 PL_thisstuff = newSVpvn(tstart, s - tstart);
11331 s += SvCUR(herewas);
11334 stuffstart = s - SvPVX(PL_linestr);
11340 tmpstr = newSV_type(SVt_PVIV);
11341 SvGROW(tmpstr, 80);
11342 if (term == '\'') {
11343 op_type = OP_CONST;
11344 SvIV_set(tmpstr, -1);
11346 else if (term == '`') {
11347 op_type = OP_BACKTICK;
11348 SvIV_set(tmpstr, '\\');
11352 PL_multi_start = CopLINE(PL_curcop);
11353 PL_multi_open = PL_multi_close = '<';
11354 term = *PL_tokenbuf;
11355 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11356 char * const bufptr = PL_sublex_info.super_bufptr;
11357 char * const bufend = PL_sublex_info.super_bufend;
11358 char * const olds = s - SvCUR(herewas);
11359 s = strchr(bufptr, '\n');
11363 while (s < bufend &&
11364 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11366 CopLINE_inc(PL_curcop);
11369 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11370 missingterm(PL_tokenbuf);
11372 sv_setpvn(herewas,bufptr,d-bufptr+1);
11373 sv_setpvn(tmpstr,d+1,s-d);
11375 sv_catpvn(herewas,s,bufend-s);
11376 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11383 while (s < PL_bufend &&
11384 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11386 CopLINE_inc(PL_curcop);
11388 if (s >= PL_bufend) {
11389 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11390 missingterm(PL_tokenbuf);
11392 sv_setpvn(tmpstr,d+1,s-d);
11394 if (PL_madskills) {
11396 sv_catpvn(PL_thisstuff, d + 1, s - d);
11398 PL_thisstuff = newSVpvn(d + 1, s - d);
11399 stuffstart = s - SvPVX(PL_linestr);
11403 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11405 sv_catpvn(herewas,s,PL_bufend-s);
11406 sv_setsv(PL_linestr,herewas);
11407 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11408 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11409 PL_last_lop = PL_last_uni = NULL;
11412 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
11413 while (s >= PL_bufend) { /* multiple line string? */
11415 if (PL_madskills) {
11416 tstart = SvPVX(PL_linestr) + stuffstart;
11418 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11420 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11424 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11425 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11426 missingterm(PL_tokenbuf);
11429 stuffstart = s - SvPVX(PL_linestr);
11431 CopLINE_inc(PL_curcop);
11432 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11433 PL_last_lop = PL_last_uni = NULL;
11434 #ifndef PERL_STRICT_CR
11435 if (PL_bufend - PL_linestart >= 2) {
11436 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11437 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11439 PL_bufend[-2] = '\n';
11441 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11443 else if (PL_bufend[-1] == '\r')
11444 PL_bufend[-1] = '\n';
11446 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11447 PL_bufend[-1] = '\n';
11449 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11450 update_debugger_info(PL_linestr, NULL, 0);
11451 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11452 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11453 *(SvPVX(PL_linestr) + off ) = ' ';
11454 sv_catsv(PL_linestr,herewas);
11455 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11456 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11460 sv_catsv(tmpstr,PL_linestr);
11465 PL_multi_end = CopLINE(PL_curcop);
11466 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11467 SvPV_shrink_to_cur(tmpstr);
11469 SvREFCNT_dec(herewas);
11471 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11473 else if (PL_encoding)
11474 sv_recode_to_utf8(tmpstr, PL_encoding);
11476 PL_lex_stuff = tmpstr;
11477 pl_yylval.ival = op_type;
11481 /* scan_inputsymbol
11482 takes: current position in input buffer
11483 returns: new position in input buffer
11484 side-effects: pl_yylval and lex_op are set.
11489 <FH> read from filehandle
11490 <pkg::FH> read from package qualified filehandle
11491 <pkg'FH> read from package qualified filehandle
11492 <$fh> read from filehandle in $fh
11493 <*.h> filename glob
11498 S_scan_inputsymbol(pTHX_ char *start)
11501 register char *s = start; /* current position in buffer */
11504 char *d = PL_tokenbuf; /* start of temp holding space */
11505 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11507 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11509 end = strchr(s, '\n');
11512 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11514 /* die if we didn't have space for the contents of the <>,
11515 or if it didn't end, or if we see a newline
11518 if (len >= (I32)sizeof PL_tokenbuf)
11519 Perl_croak(aTHX_ "Excessively long <> operator");
11521 Perl_croak(aTHX_ "Unterminated <> operator");
11526 Remember, only scalar variables are interpreted as filehandles by
11527 this code. Anything more complex (e.g., <$fh{$num}>) will be
11528 treated as a glob() call.
11529 This code makes use of the fact that except for the $ at the front,
11530 a scalar variable and a filehandle look the same.
11532 if (*d == '$' && d[1]) d++;
11534 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11535 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11538 /* If we've tried to read what we allow filehandles to look like, and
11539 there's still text left, then it must be a glob() and not a getline.
11540 Use scan_str to pull out the stuff between the <> and treat it
11541 as nothing more than a string.
11544 if (d - PL_tokenbuf != len) {
11545 pl_yylval.ival = OP_GLOB;
11546 s = scan_str(start,!!PL_madskills,FALSE);
11548 Perl_croak(aTHX_ "Glob not terminated");
11552 bool readline_overriden = FALSE;
11555 /* we're in a filehandle read situation */
11558 /* turn <> into <ARGV> */
11560 Copy("ARGV",d,5,char);
11562 /* Check whether readline() is overriden */
11563 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11565 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11567 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11568 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11569 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11570 readline_overriden = TRUE;
11572 /* if <$fh>, create the ops to turn the variable into a
11576 /* try to find it in the pad for this block, otherwise find
11577 add symbol table ops
11579 const PADOFFSET tmp = pad_findmy(d);
11580 if (tmp != NOT_IN_PAD) {
11581 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11582 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11583 HEK * const stashname = HvNAME_HEK(stash);
11584 SV * const sym = sv_2mortal(newSVhek(stashname));
11585 sv_catpvs(sym, "::");
11586 sv_catpv(sym, d+1);
11591 OP * const o = newOP(OP_PADSV, 0);
11593 PL_lex_op = readline_overriden
11594 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11595 append_elem(OP_LIST, o,
11596 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11597 : (OP*)newUNOP(OP_READLINE, 0, o);
11606 ? (GV_ADDMULTI | GV_ADDINEVAL)
11609 PL_lex_op = readline_overriden
11610 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11611 append_elem(OP_LIST,
11612 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11613 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11614 : (OP*)newUNOP(OP_READLINE, 0,
11615 newUNOP(OP_RV2SV, 0,
11616 newGVOP(OP_GV, 0, gv)));
11618 if (!readline_overriden)
11619 PL_lex_op->op_flags |= OPf_SPECIAL;
11620 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11621 pl_yylval.ival = OP_NULL;
11624 /* If it's none of the above, it must be a literal filehandle
11625 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11627 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11628 PL_lex_op = readline_overriden
11629 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11630 append_elem(OP_LIST,
11631 newGVOP(OP_GV, 0, gv),
11632 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11633 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11634 pl_yylval.ival = OP_NULL;
11643 takes: start position in buffer
11644 keep_quoted preserve \ on the embedded delimiter(s)
11645 keep_delims preserve the delimiters around the string
11646 returns: position to continue reading from buffer
11647 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11648 updates the read buffer.
11650 This subroutine pulls a string out of the input. It is called for:
11651 q single quotes q(literal text)
11652 ' single quotes 'literal text'
11653 qq double quotes qq(interpolate $here please)
11654 " double quotes "interpolate $here please"
11655 qx backticks qx(/bin/ls -l)
11656 ` backticks `/bin/ls -l`
11657 qw quote words @EXPORT_OK = qw( func() $spam )
11658 m// regexp match m/this/
11659 s/// regexp substitute s/this/that/
11660 tr/// string transliterate tr/this/that/
11661 y/// string transliterate y/this/that/
11662 ($*@) sub prototypes sub foo ($)
11663 (stuff) sub attr parameters sub foo : attr(stuff)
11664 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11666 In most of these cases (all but <>, patterns and transliterate)
11667 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11668 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11669 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11672 It skips whitespace before the string starts, and treats the first
11673 character as the delimiter. If the delimiter is one of ([{< then
11674 the corresponding "close" character )]}> is used as the closing
11675 delimiter. It allows quoting of delimiters, and if the string has
11676 balanced delimiters ([{<>}]) it allows nesting.
11678 On success, the SV with the resulting string is put into lex_stuff or,
11679 if that is already non-NULL, into lex_repl. The second case occurs only
11680 when parsing the RHS of the special constructs s/// and tr/// (y///).
11681 For convenience, the terminating delimiter character is stuffed into
11686 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11689 SV *sv; /* scalar value: string */
11690 const char *tmps; /* temp string, used for delimiter matching */
11691 register char *s = start; /* current position in the buffer */
11692 register char term; /* terminating character */
11693 register char *to; /* current position in the sv's data */
11694 I32 brackets = 1; /* bracket nesting level */
11695 bool has_utf8 = FALSE; /* is there any utf8 content? */
11696 I32 termcode; /* terminating char. code */
11697 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11698 STRLEN termlen; /* length of terminating string */
11699 int last_off = 0; /* last position for nesting bracket */
11705 PERL_ARGS_ASSERT_SCAN_STR;
11707 /* skip space before the delimiter */
11713 if (PL_realtokenstart >= 0) {
11714 stuffstart = PL_realtokenstart;
11715 PL_realtokenstart = -1;
11718 stuffstart = start - SvPVX(PL_linestr);
11720 /* mark where we are, in case we need to report errors */
11723 /* after skipping whitespace, the next character is the terminator */
11726 termcode = termstr[0] = term;
11730 termcode = utf8_to_uvchr((U8*)s, &termlen);
11731 Copy(s, termstr, termlen, U8);
11732 if (!UTF8_IS_INVARIANT(term))
11736 /* mark where we are */
11737 PL_multi_start = CopLINE(PL_curcop);
11738 PL_multi_open = term;
11740 /* find corresponding closing delimiter */
11741 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11742 termcode = termstr[0] = term = tmps[5];
11744 PL_multi_close = term;
11746 /* create a new SV to hold the contents. 79 is the SV's initial length.
11747 What a random number. */
11748 sv = newSV_type(SVt_PVIV);
11750 SvIV_set(sv, termcode);
11751 (void)SvPOK_only(sv); /* validate pointer */
11753 /* move past delimiter and try to read a complete string */
11755 sv_catpvn(sv, s, termlen);
11758 tstart = SvPVX(PL_linestr) + stuffstart;
11759 if (!PL_thisopen && !keep_delims) {
11760 PL_thisopen = newSVpvn(tstart, s - tstart);
11761 stuffstart = s - SvPVX(PL_linestr);
11765 if (PL_encoding && !UTF) {
11769 int offset = s - SvPVX_const(PL_linestr);
11770 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11771 &offset, (char*)termstr, termlen);
11772 const char * const ns = SvPVX_const(PL_linestr) + offset;
11773 char * const svlast = SvEND(sv) - 1;
11775 for (; s < ns; s++) {
11776 if (*s == '\n' && !PL_rsfp)
11777 CopLINE_inc(PL_curcop);
11780 goto read_more_line;
11782 /* handle quoted delimiters */
11783 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11785 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11787 if ((svlast-1 - t) % 2) {
11788 if (!keep_quoted) {
11789 *(svlast-1) = term;
11791 SvCUR_set(sv, SvCUR(sv) - 1);
11796 if (PL_multi_open == PL_multi_close) {
11802 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11803 /* At here, all closes are "was quoted" one,
11804 so we don't check PL_multi_close. */
11806 if (!keep_quoted && *(t+1) == PL_multi_open)
11811 else if (*t == PL_multi_open)
11819 SvCUR_set(sv, w - SvPVX_const(sv));
11821 last_off = w - SvPVX(sv);
11822 if (--brackets <= 0)
11827 if (!keep_delims) {
11828 SvCUR_set(sv, SvCUR(sv) - 1);
11834 /* extend sv if need be */
11835 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11836 /* set 'to' to the next character in the sv's string */
11837 to = SvPVX(sv)+SvCUR(sv);
11839 /* if open delimiter is the close delimiter read unbridle */
11840 if (PL_multi_open == PL_multi_close) {
11841 for (; s < PL_bufend; s++,to++) {
11842 /* embedded newlines increment the current line number */
11843 if (*s == '\n' && !PL_rsfp)
11844 CopLINE_inc(PL_curcop);
11845 /* handle quoted delimiters */
11846 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11847 if (!keep_quoted && s[1] == term)
11849 /* any other quotes are simply copied straight through */
11853 /* terminate when run out of buffer (the for() condition), or
11854 have found the terminator */
11855 else if (*s == term) {
11858 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11861 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11867 /* if the terminator isn't the same as the start character (e.g.,
11868 matched brackets), we have to allow more in the quoting, and
11869 be prepared for nested brackets.
11872 /* read until we run out of string, or we find the terminator */
11873 for (; s < PL_bufend; s++,to++) {
11874 /* embedded newlines increment the line count */
11875 if (*s == '\n' && !PL_rsfp)
11876 CopLINE_inc(PL_curcop);
11877 /* backslashes can escape the open or closing characters */
11878 if (*s == '\\' && s+1 < PL_bufend) {
11879 if (!keep_quoted &&
11880 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11885 /* allow nested opens and closes */
11886 else if (*s == PL_multi_close && --brackets <= 0)
11888 else if (*s == PL_multi_open)
11890 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11895 /* terminate the copied string and update the sv's end-of-string */
11897 SvCUR_set(sv, to - SvPVX_const(sv));
11900 * this next chunk reads more into the buffer if we're not done yet
11904 break; /* handle case where we are done yet :-) */
11906 #ifndef PERL_STRICT_CR
11907 if (to - SvPVX_const(sv) >= 2) {
11908 if ((to[-2] == '\r' && to[-1] == '\n') ||
11909 (to[-2] == '\n' && to[-1] == '\r'))
11913 SvCUR_set(sv, to - SvPVX_const(sv));
11915 else if (to[-1] == '\r')
11918 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11923 /* if we're out of file, or a read fails, bail and reset the current
11924 line marker so we can report where the unterminated string began
11927 if (PL_madskills) {
11928 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11930 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11932 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11936 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11938 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11944 /* we read a line, so increment our line counter */
11945 CopLINE_inc(PL_curcop);
11947 /* update debugger info */
11948 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11949 update_debugger_info(PL_linestr, NULL, 0);
11951 /* having changed the buffer, we must update PL_bufend */
11952 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11953 PL_last_lop = PL_last_uni = NULL;
11956 /* at this point, we have successfully read the delimited string */
11958 if (!PL_encoding || UTF) {
11960 if (PL_madskills) {
11961 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11962 const int len = s - tstart;
11964 sv_catpvn(PL_thisstuff, tstart, len);
11966 PL_thisstuff = newSVpvn(tstart, len);
11967 if (!PL_thisclose && !keep_delims)
11968 PL_thisclose = newSVpvn(s,termlen);
11973 sv_catpvn(sv, s, termlen);
11978 if (PL_madskills) {
11979 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11980 const int len = s - tstart - termlen;
11982 sv_catpvn(PL_thisstuff, tstart, len);
11984 PL_thisstuff = newSVpvn(tstart, len);
11985 if (!PL_thisclose && !keep_delims)
11986 PL_thisclose = newSVpvn(s - termlen,termlen);
11990 if (has_utf8 || PL_encoding)
11993 PL_multi_end = CopLINE(PL_curcop);
11995 /* if we allocated too much space, give some back */
11996 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11997 SvLEN_set(sv, SvCUR(sv) + 1);
11998 SvPV_renew(sv, SvLEN(sv));
12001 /* decide whether this is the first or second quoted string we've read
12014 takes: pointer to position in buffer
12015 returns: pointer to new position in buffer
12016 side-effects: builds ops for the constant in pl_yylval.op
12018 Read a number in any of the formats that Perl accepts:
12020 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12021 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12024 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12026 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12029 If it reads a number without a decimal point or an exponent, it will
12030 try converting the number to an integer and see if it can do so
12031 without loss of precision.
12035 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12038 register const char *s = start; /* current position in buffer */
12039 register char *d; /* destination in temp buffer */
12040 register char *e; /* end of temp buffer */
12041 NV nv; /* number read, as a double */
12042 SV *sv = NULL; /* place to put the converted number */
12043 bool floatit; /* boolean: int or float? */
12044 const char *lastub = NULL; /* position of last underbar */
12045 static char const number_too_long[] = "Number too long";
12047 PERL_ARGS_ASSERT_SCAN_NUM;
12049 /* We use the first character to decide what type of number this is */
12053 Perl_croak(aTHX_ "panic: scan_num");
12055 /* if it starts with a 0, it could be an octal number, a decimal in
12056 0.13 disguise, or a hexadecimal number, or a binary number. */
12060 u holds the "number so far"
12061 shift the power of 2 of the base
12062 (hex == 4, octal == 3, binary == 1)
12063 overflowed was the number more than we can hold?
12065 Shift is used when we add a digit. It also serves as an "are
12066 we in octal/hex/binary?" indicator to disallow hex characters
12067 when in octal mode.
12072 bool overflowed = FALSE;
12073 bool just_zero = TRUE; /* just plain 0 or binary number? */
12074 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12075 static const char* const bases[5] =
12076 { "", "binary", "", "octal", "hexadecimal" };
12077 static const char* const Bases[5] =
12078 { "", "Binary", "", "Octal", "Hexadecimal" };
12079 static const char* const maxima[5] =
12081 "0b11111111111111111111111111111111",
12085 const char *base, *Base, *max;
12087 /* check for hex */
12092 } else if (s[1] == 'b') {
12097 /* check for a decimal in disguise */
12098 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12100 /* so it must be octal */
12107 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12108 "Misplaced _ in number");
12112 base = bases[shift];
12113 Base = Bases[shift];
12114 max = maxima[shift];
12116 /* read the rest of the number */
12118 /* x is used in the overflow test,
12119 b is the digit we're adding on. */
12124 /* if we don't mention it, we're done */
12128 /* _ are ignored -- but warned about if consecutive */
12130 if (lastub && s == lastub + 1)
12131 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12132 "Misplaced _ in number");
12136 /* 8 and 9 are not octal */
12137 case '8': case '9':
12139 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12143 case '2': case '3': case '4':
12144 case '5': case '6': case '7':
12146 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12149 case '0': case '1':
12150 b = *s++ & 15; /* ASCII digit -> value of digit */
12154 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12155 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12156 /* make sure they said 0x */
12159 b = (*s++ & 7) + 9;
12161 /* Prepare to put the digit we have onto the end
12162 of the number so far. We check for overflows.
12168 x = u << shift; /* make room for the digit */
12170 if ((x >> shift) != u
12171 && !(PL_hints & HINT_NEW_BINARY)) {
12174 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12175 "Integer overflow in %s number",
12178 u = x | b; /* add the digit to the end */
12181 n *= nvshift[shift];
12182 /* If an NV has not enough bits in its
12183 * mantissa to represent an UV this summing of
12184 * small low-order numbers is a waste of time
12185 * (because the NV cannot preserve the
12186 * low-order bits anyway): we could just
12187 * remember when did we overflow and in the
12188 * end just multiply n by the right
12196 /* if we get here, we had success: make a scalar value from
12201 /* final misplaced underbar check */
12202 if (s[-1] == '_') {
12203 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12208 if (n > 4294967295.0)
12209 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12210 "%s number > %s non-portable",
12216 if (u > 0xffffffff)
12217 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12218 "%s number > %s non-portable",
12223 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12224 sv = new_constant(start, s - start, "integer",
12225 sv, NULL, NULL, 0);
12226 else if (PL_hints & HINT_NEW_BINARY)
12227 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12232 handle decimal numbers.
12233 we're also sent here when we read a 0 as the first digit
12235 case '1': case '2': case '3': case '4': case '5':
12236 case '6': case '7': case '8': case '9': case '.':
12239 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12242 /* read next group of digits and _ and copy into d */
12243 while (isDIGIT(*s) || *s == '_') {
12244 /* skip underscores, checking for misplaced ones
12248 if (lastub && s == lastub + 1)
12249 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12250 "Misplaced _ in number");
12254 /* check for end of fixed-length buffer */
12256 Perl_croak(aTHX_ number_too_long);
12257 /* if we're ok, copy the character */
12262 /* final misplaced underbar check */
12263 if (lastub && s == lastub + 1) {
12264 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12267 /* read a decimal portion if there is one. avoid
12268 3..5 being interpreted as the number 3. followed
12271 if (*s == '.' && s[1] != '.') {
12276 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12277 "Misplaced _ in number");
12281 /* copy, ignoring underbars, until we run out of digits.
12283 for (; isDIGIT(*s) || *s == '_'; s++) {
12284 /* fixed length buffer check */
12286 Perl_croak(aTHX_ number_too_long);
12288 if (lastub && s == lastub + 1)
12289 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12290 "Misplaced _ in number");
12296 /* fractional part ending in underbar? */
12297 if (s[-1] == '_') {
12298 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12299 "Misplaced _ in number");
12301 if (*s == '.' && isDIGIT(s[1])) {
12302 /* oops, it's really a v-string, but without the "v" */
12308 /* read exponent part, if present */
12309 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12313 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12314 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12316 /* stray preinitial _ */
12318 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12319 "Misplaced _ in number");
12323 /* allow positive or negative exponent */
12324 if (*s == '+' || *s == '-')
12327 /* stray initial _ */
12329 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12330 "Misplaced _ in number");
12334 /* read digits of exponent */
12335 while (isDIGIT(*s) || *s == '_') {
12338 Perl_croak(aTHX_ number_too_long);
12342 if (((lastub && s == lastub + 1) ||
12343 (!isDIGIT(s[1]) && s[1] != '_')))
12344 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12345 "Misplaced _ in number");
12352 /* make an sv from the string */
12356 We try to do an integer conversion first if no characters
12357 indicating "float" have been found.
12362 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12364 if (flags == IS_NUMBER_IN_UV) {
12366 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12369 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12370 if (uv <= (UV) IV_MIN)
12371 sv_setiv(sv, -(IV)uv);
12378 /* terminate the string */
12380 nv = Atof(PL_tokenbuf);
12385 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12386 const char *const key = floatit ? "float" : "integer";
12387 const STRLEN keylen = floatit ? 5 : 7;
12388 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12389 key, keylen, sv, NULL, NULL, 0);
12393 /* if it starts with a v, it could be a v-string */
12396 sv = newSV(5); /* preallocate storage space */
12397 s = scan_vstring(s, PL_bufend, sv);
12401 /* make the op for the constant and return */
12404 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12406 lvalp->opval = NULL;
12412 S_scan_formline(pTHX_ register char *s)
12415 register char *eol;
12417 SV * const stuff = newSVpvs("");
12418 bool needargs = FALSE;
12419 bool eofmt = FALSE;
12421 char *tokenstart = s;
12422 SV* savewhite = NULL;
12424 if (PL_madskills) {
12425 savewhite = PL_thiswhite;
12430 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12432 while (!needargs) {
12435 #ifdef PERL_STRICT_CR
12436 while (SPACE_OR_TAB(*t))
12439 while (SPACE_OR_TAB(*t) || *t == '\r')
12442 if (*t == '\n' || t == PL_bufend) {
12447 if (PL_in_eval && !PL_rsfp) {
12448 eol = (char *) memchr(s,'\n',PL_bufend-s);
12453 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12455 for (t = s; t < eol; t++) {
12456 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12458 goto enough; /* ~~ must be first line in formline */
12460 if (*t == '@' || *t == '^')
12464 sv_catpvn(stuff, s, eol-s);
12465 #ifndef PERL_STRICT_CR
12466 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12467 char *end = SvPVX(stuff) + SvCUR(stuff);
12470 SvCUR_set(stuff, SvCUR(stuff) - 1);
12480 if (PL_madskills) {
12482 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12484 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12487 s = filter_gets(PL_linestr, PL_rsfp, 0);
12489 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12491 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12493 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12494 PL_last_lop = PL_last_uni = NULL;
12503 if (SvCUR(stuff)) {
12506 PL_lex_state = LEX_NORMAL;
12507 start_force(PL_curforce);
12508 NEXTVAL_NEXTTOKE.ival = 0;
12512 PL_lex_state = LEX_FORMLINE;
12514 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12516 else if (PL_encoding)
12517 sv_recode_to_utf8(stuff, PL_encoding);
12519 start_force(PL_curforce);
12520 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12522 start_force(PL_curforce);
12523 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12527 SvREFCNT_dec(stuff);
12529 PL_lex_formbrack = 0;
12533 if (PL_madskills) {
12535 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12537 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12538 PL_thiswhite = savewhite;
12545 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12548 const I32 oldsavestack_ix = PL_savestack_ix;
12549 CV* const outsidecv = PL_compcv;
12552 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12554 SAVEI32(PL_subline);
12555 save_item(PL_subname);
12556 SAVESPTR(PL_compcv);
12558 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12559 CvFLAGS(PL_compcv) |= flags;
12561 PL_subline = CopLINE(PL_curcop);
12562 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12563 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12564 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12566 return oldsavestack_ix;
12570 #pragma segment Perl_yylex
12573 S_yywarn(pTHX_ const char *const s)
12577 PERL_ARGS_ASSERT_YYWARN;
12579 PL_in_eval |= EVAL_WARNONLY;
12581 PL_in_eval &= ~EVAL_WARNONLY;
12586 Perl_yyerror(pTHX_ const char *const s)
12589 const char *where = NULL;
12590 const char *context = NULL;
12593 int yychar = PL_parser->yychar;
12595 PERL_ARGS_ASSERT_YYERROR;
12597 if (!yychar || (yychar == ';' && !PL_rsfp))
12599 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12600 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12601 PL_oldbufptr != PL_bufptr) {
12604 The code below is removed for NetWare because it abends/crashes on NetWare
12605 when the script has error such as not having the closing quotes like:
12606 if ($var eq "value)
12607 Checking of white spaces is anyway done in NetWare code.
12610 while (isSPACE(*PL_oldoldbufptr))
12613 context = PL_oldoldbufptr;
12614 contlen = PL_bufptr - PL_oldoldbufptr;
12616 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12617 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12620 The code below is removed for NetWare because it abends/crashes on NetWare
12621 when the script has error such as not having the closing quotes like:
12622 if ($var eq "value)
12623 Checking of white spaces is anyway done in NetWare code.
12626 while (isSPACE(*PL_oldbufptr))
12629 context = PL_oldbufptr;
12630 contlen = PL_bufptr - PL_oldbufptr;
12632 else if (yychar > 255)
12633 where = "next token ???";
12634 else if (yychar == -2) { /* YYEMPTY */
12635 if (PL_lex_state == LEX_NORMAL ||
12636 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12637 where = "at end of line";
12638 else if (PL_lex_inpat)
12639 where = "within pattern";
12641 where = "within string";
12644 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12646 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12647 else if (isPRINT_LC(yychar)) {
12648 const char string = yychar;
12649 sv_catpvn(where_sv, &string, 1);
12652 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12653 where = SvPVX_const(where_sv);
12655 msg = sv_2mortal(newSVpv(s, 0));
12656 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12657 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12659 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12661 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12662 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12663 Perl_sv_catpvf(aTHX_ msg,
12664 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12665 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12668 if (PL_in_eval & EVAL_WARNONLY) {
12669 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12673 if (PL_error_count >= 10) {
12674 if (PL_in_eval && SvCUR(ERRSV))
12675 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12676 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12678 Perl_croak(aTHX_ "%s has too many errors.\n",
12679 OutCopFILE(PL_curcop));
12682 PL_in_my_stash = NULL;
12686 #pragma segment Main
12690 S_swallow_bom(pTHX_ U8 *s)
12693 const STRLEN slen = SvCUR(PL_linestr);
12695 PERL_ARGS_ASSERT_SWALLOW_BOM;
12699 if (s[1] == 0xFE) {
12700 /* UTF-16 little-endian? (or UTF32-LE?) */
12701 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12702 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12703 #ifndef PERL_NO_UTF16_FILTER
12704 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12707 if (PL_bufend > (char*)s) {
12711 IoLINES(filter_add(S_utf16_textfilter, NULL)) = 1;
12712 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12713 utf16_to_utf8_reversed(s, news,
12714 PL_bufend - (char*)s - 1,
12716 sv_setpvn(PL_linestr, (const char*)news, newlen);
12718 s = (U8*)SvPVX(PL_linestr);
12719 Copy(news, s, newlen, U8);
12723 SvUTF8_on(PL_linestr);
12724 s = (U8*)SvPVX(PL_linestr);
12726 /* FIXME - is this a general bug fix? */
12729 PL_bufend = SvPVX(PL_linestr) + newlen;
12732 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12737 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12738 #ifndef PERL_NO_UTF16_FILTER
12739 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12742 if (PL_bufend > (char *)s) {
12746 filter_add(S_utf16_textfilter, NULL);
12747 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12748 utf16_to_utf8(s, news,
12749 PL_bufend - (char*)s,
12751 sv_setpvn(PL_linestr, (const char*)news, newlen);
12753 SvUTF8_on(PL_linestr);
12754 s = (U8*)SvPVX(PL_linestr);
12755 PL_bufend = SvPVX(PL_linestr) + newlen;
12758 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12763 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12764 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12765 s += 3; /* UTF-8 */
12771 if (s[2] == 0xFE && s[3] == 0xFF) {
12772 /* UTF-32 big-endian */
12773 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12776 else if (s[2] == 0 && s[3] != 0) {
12779 * are a good indicator of UTF-16BE. */
12780 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12786 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12787 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12788 s += 4; /* UTF-8 */
12794 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12797 * are a good indicator of UTF-16LE. */
12798 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12806 #ifndef PERL_NO_UTF16_FILTER
12808 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12811 const STRLEN old = SvCUR(sv);
12812 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12813 const int reverse = IoLINES(sv);
12814 DEBUG_P(PerlIO_printf(Perl_debug_log,
12815 "utf16%s_textfilter(%p): %d %d (%d)\n",
12816 reverse ? "rev" : "",
12817 FPTR2DPTR(void *, S_utf16_textfilter),
12818 idx, maxlen, (int) count));
12822 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12823 Copy(SvPVX_const(sv), tmps, old, char);
12825 /* You would expect this to be utf16_to_utf8_reversed()
12826 It was, prior to 1de9afcdf18cf98bbdecaa782da93e907be6fe4e
12827 Effectively, right now, UTF-16LE is being read in off-by-one
12829 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12830 SvCUR(sv) - old, &newlen);
12832 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12833 SvCUR(sv) - old, &newlen);
12835 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12837 DEBUG_P({sv_dump(sv);});
12843 Returns a pointer to the next character after the parsed
12844 vstring, as well as updating the passed in sv.
12846 Function must be called like
12849 s = scan_vstring(s,e,sv);
12851 where s and e are the start and end of the string.
12852 The sv should already be large enough to store the vstring
12853 passed in, for performance reasons.
12858 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12861 const char *pos = s;
12862 const char *start = s;
12864 PERL_ARGS_ASSERT_SCAN_VSTRING;
12866 if (*pos == 'v') pos++; /* get past 'v' */
12867 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12869 if ( *pos != '.') {
12870 /* this may not be a v-string if followed by => */
12871 const char *next = pos;
12872 while (next < e && isSPACE(*next))
12874 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12875 /* return string not v-string */
12876 sv_setpvn(sv,(char *)s,pos-s);
12877 return (char *)pos;
12881 if (!isALPHA(*pos)) {
12882 U8 tmpbuf[UTF8_MAXBYTES+1];
12885 s++; /* get past 'v' */
12890 /* this is atoi() that tolerates underscores */
12893 const char *end = pos;
12895 while (--end >= s) {
12897 const UV orev = rev;
12898 rev += (*end - '0') * mult;
12901 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12902 "Integer overflow in decimal number");
12906 if (rev > 0x7FFFFFFF)
12907 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12909 /* Append native character for the rev point */
12910 tmpend = uvchr_to_utf8(tmpbuf, rev);
12911 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12912 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12914 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12920 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12924 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12932 * c-indentation-style: bsd
12933 * c-basic-offset: 4
12934 * indent-tabs-mode: t
12937 * ex: set ts=8 sts=4 sw=4 noet: