3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
43 #define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 #define pl_yylval (PL_parser->yylval)
48 /* YYINITDEPTH -- initial size of the parser's stacks. */
49 #define YYINITDEPTH 200
51 /* XXX temporary backwards compatibility */
52 #define PL_lex_brackets (PL_parser->lex_brackets)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_pending_ident (PL_parser->pending_ident)
70 #define PL_preambled (PL_parser->preambled)
71 #define PL_sublex_info (PL_parser->sublex_info)
72 #define PL_linestr (PL_parser->linestr)
73 #define PL_expect (PL_parser->expect)
74 #define PL_copline (PL_parser->copline)
75 #define PL_bufptr (PL_parser->bufptr)
76 #define PL_oldbufptr (PL_parser->oldbufptr)
77 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78 #define PL_linestart (PL_parser->linestart)
79 #define PL_bufend (PL_parser->bufend)
80 #define PL_last_uni (PL_parser->last_uni)
81 #define PL_last_lop (PL_parser->last_lop)
82 #define PL_last_lop_op (PL_parser->last_lop_op)
83 #define PL_lex_state (PL_parser->lex_state)
84 #define PL_rsfp (PL_parser->rsfp)
85 #define PL_rsfp_filters (PL_parser->rsfp_filters)
86 #define PL_in_my (PL_parser->in_my)
87 #define PL_in_my_stash (PL_parser->in_my_stash)
88 #define PL_tokenbuf (PL_parser->tokenbuf)
89 #define PL_multi_end (PL_parser->multi_end)
90 #define PL_error_count (PL_parser->error_count)
93 # define PL_endwhite (PL_parser->endwhite)
94 # define PL_faketokens (PL_parser->faketokens)
95 # define PL_lasttoke (PL_parser->lasttoke)
96 # define PL_nextwhite (PL_parser->nextwhite)
97 # define PL_realtokenstart (PL_parser->realtokenstart)
98 # define PL_skipwhite (PL_parser->skipwhite)
99 # define PL_thisclose (PL_parser->thisclose)
100 # define PL_thismad (PL_parser->thismad)
101 # define PL_thisopen (PL_parser->thisopen)
102 # define PL_thisstuff (PL_parser->thisstuff)
103 # define PL_thistoken (PL_parser->thistoken)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_thiswhite (PL_parser->thiswhite)
106 # define PL_nexttoke (PL_parser->nexttoke)
107 # define PL_curforce (PL_parser->curforce)
109 # define PL_nexttoke (PL_parser->nexttoke)
110 # define PL_nexttype (PL_parser->nexttype)
111 # define PL_nextval (PL_parser->nextval)
114 /* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
117 S_pending_ident(pTHX);
119 static const char ident_too_long[] = "Identifier too long";
122 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
123 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
125 # define CURMAD(slot,sv)
126 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
129 #define XFAKEBRACK 128
130 #define XENUMMASK 127
132 #ifdef USE_UTF8_SCRIPTS
133 # define UTF (!IN_BYTES)
135 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
138 /* The maximum number of characters preceding the unrecognized one to display */
139 #define UNRECOGNIZED_PRECEDE_COUNT 10
141 /* In variables named $^X, these are the legal values for X.
142 * 1999-02-27 mjd-perl-patch@plover.com */
143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
147 /* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
149 * can get by with a single comparison (if the compiler is smart enough).
152 /* #define LEX_NOTPARSING 11 is done in perl.h. */
154 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
155 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
160 /* at end of code, eg "$x" followed by: */
161 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166 #define LEX_INTERPCONST 2 /* NOT USED */
167 #define LEX_FORMLINE 1 /* expecting a format line */
168 #define LEX_KNOWNEXT 0 /* next token known; just return it */
172 static const char* const lex_state_names[] = {
191 #include "keywords.h"
193 /* CLINE is a macro that ensures PL_copline has a sane value */
198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
201 # define SKIPSPACE0(s) skipspace0(s)
202 # define SKIPSPACE1(s) skipspace1(s)
203 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204 # define PEEKSPACE(s) skipspace2(s,0)
206 # define SKIPSPACE0(s) skipspace(s)
207 # define SKIPSPACE1(s) skipspace(s)
208 # define SKIPSPACE2(s,tsv) skipspace(s)
209 # define PEEKSPACE(s) skipspace(s)
213 * Convenience functions to return different tokens and prime the
214 * lexer for the next token. They all take an argument.
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
226 * FUN1 : not used, except for not, which isn't a UNIOP
227 * BOop : bitwise or or xor
229 * SHop : shift operator
230 * PWop : power operator
231 * PMop : pattern-matching operator
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
235 * Rop : relational operator <= != gt
237 * Also see LOP and lop() below.
240 #ifdef DEBUGGING /* Serve -DT. */
241 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
243 # define REPORT(retval) (retval)
246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
267 /* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
272 #define UNI2(f,x) { \
273 pl_yylval.ival = f; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
279 return REPORT( (int)FUNC1 ); \
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 #define UNI(f) UNI2(f,XTERM)
284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
286 #define UNIBRACK(f) { \
287 pl_yylval.ival = f; \
289 PL_last_uni = PL_oldbufptr; \
291 return REPORT( (int)FUNC1 ); \
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
296 /* grandfather return to old style */
297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
301 /* how to interpret the pl_yylval associated with the token */
305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
311 static struct debug_tokens {
313 enum token_type type;
315 } const debug_tokens[] =
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
385 { 0, TOKENTYPE_NONE, NULL }
388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
395 PERL_ARGS_ASSERT_TOKEREPORT;
398 const char *name = NULL;
399 enum token_type type = TOKENTYPE_NONE;
400 const struct debug_tokens *p;
401 SV* const report = newSVpvs("<== ");
403 for (p = debug_tokens; p->token; p++) {
404 if (p->token == (int)rv) {
411 Perl_sv_catpv(aTHX_ report, name);
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
415 sv_catpvs(report, "EOF");
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
427 PL_op_name[lvalp->ival]);
430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
432 case TOKENTYPE_OPVAL:
434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
437 Perl_sv_catpvf(aTHX_ report, " %s",
438 SvPEEK(cSVOPx_sv(lvalp->opval)));
443 sv_catpvs(report, "(opval=null)");
446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
452 /* print the buffer with suitable escapes */
455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
457 SV* const tmp = newSVpvs("");
459 PERL_ARGS_ASSERT_PRINTBUF;
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
468 S_deprecate_commaless_var_list(pTHX) {
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
482 S_ao(pTHX_ int toketype)
485 if (*PL_bufptr == '=') {
487 if (toketype == ANDAND)
488 pl_yylval.ival = OP_ANDASSIGN;
489 else if (toketype == OROR)
490 pl_yylval.ival = OP_ORASSIGN;
491 else if (toketype == DORDOR)
492 pl_yylval.ival = OP_DORASSIGN;
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
512 S_no_op(pTHX_ const char *const what, char *s)
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
518 PERL_ARGS_ASSERT_NO_OP;
524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
525 if (ckWARN_d(WARN_SYNTAX)) {
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
549 * Complain about missing quote/regexp/heredoc terminator.
550 * If it's called with NULL then it cauterizes the line buffer.
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
557 S_missingterm(pTHX_ char *s)
563 char * const nl = strrchr(s,'\n');
567 else if (isCNTRL(PL_multi_close)) {
569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
574 *tmpbuf = (char)PL_multi_close;
578 q = strchr(s,'"') ? '\'' : '"';
579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
582 #define FEATURE_IS_ENABLED(name) \
583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
585 /* The longest string we pass in. */
586 #define MAX_FEATURE_LEN (sizeof("switch")-1)
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
596 HV * const hinthv = GvHV(PL_hintgv);
597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
612 #ifdef PERL_CR_FILTER
616 register const char *s = SvPVX_const(sv);
617 register const char * const e = s + SvCUR(sv);
619 PERL_ARGS_ASSERT_STRIP_RETURN;
621 /* outer loop optimized to do nothing if there are no CR-LFs */
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
628 if (*s == '\r' && s[1] == '\n')
639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
642 if (count > 0 && !maxlen)
653 * Create a parser object and initialise its parser and lexer fields
655 * rsfp is the opened file handle to read from (if any),
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
668 const char *s = NULL;
670 yy_parser *parser, *oparser;
672 /* create and initialise a parser */
674 Newxz(parser, 1, yy_parser);
675 parser->old_parser = oparser = PL_parser;
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
686 /* on scope exit, free this parser and restore any outer one */
688 parser->saved_curcop = PL_curcop;
690 /* initialise lexer state */
693 parser->curforce = -1;
695 parser->nexttoke = 0;
697 parser->error_count = oparser ? oparser->error_count : 0;
698 parser->copline = NOLINE;
699 parser->lex_state = LEX_NORMAL;
700 parser->expect = XSTATE;
702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
710 s = SvPV_const(line, len);
716 parser->linestr = newSVpvs("\n;");
717 } else if (SvREADONLY(line) || s[len-1] != ';') {
718 parser->linestr = newSVsv(line);
720 sv_catpvs(parser->linestr, "\n;");
723 SvREFCNT_inc_simple_void_NN(line);
724 parser->linestr = line;
726 parser->oldoldbufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
735 /* delete a parser object */
738 Perl_parser_free(pTHX_ const yy_parser *parser)
740 PERL_ARGS_ASSERT_PARSER_FREE;
742 PL_curcop = parser->saved_curcop;
743 SvREFCNT_dec(parser->linestr);
745 if (parser->rsfp == PerlIO_stdin())
746 PerlIO_clearerr(parser->rsfp);
747 else if (parser->rsfp && (!parser->old_parser ||
748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
749 PerlIO_close(parser->rsfp);
750 SvREFCNT_dec(parser->rsfp_filters);
752 Safefree(parser->stack);
753 Safefree(parser->lex_brackstack);
754 Safefree(parser->lex_casestack);
755 PL_parser = parser->old_parser;
762 * Finalizer for lexing operations. Must be called when the parser is
763 * done with the lexer.
770 PL_doextract = FALSE;
774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
776 Buffer scalar containing the chunk currently under consideration of the
777 text currently being lexed. This is always a plain string scalar (for
778 which C<SvPOK> is true). It is not intended to be used as a scalar by
779 normal scalar means; instead refer to the buffer directly by the pointer
780 variables described below.
782 The lexer maintains various C<char*> pointers to things in the
783 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
784 reallocated, all of these pointers must be updated. Don't attempt to
785 do this manually, but rather use L</lex_grow_linestr> if you need to
786 reallocate the buffer.
788 The content of the text chunk in the buffer is commonly exactly one
789 complete line of input, up to and including a newline terminator,
790 but there are situations where it is otherwise. The octets of the
791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
792 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
793 flag on this scalar, which may disagree with it.
795 For direct examination of the buffer, the variable
796 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
798 of these pointers is usually preferable to examination of the scalar
799 through normal scalar means.
801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
803 Direct pointer to the end of the chunk of text currently being lexed, the
804 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
805 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
806 always located at the end of the buffer, and does not count as part of
807 the buffer's contents.
809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
811 Points to the current position of lexing inside the lexer buffer.
812 Characters around this point may be freely examined, within
813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
814 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
817 Lexing code (whether in the Perl core or not) moves this pointer past
818 the characters that it consumes. It is also expected to perform some
819 bookkeeping whenever a newline character is consumed. This movement
820 can be more conveniently performed by the function L</lex_read_to>,
821 which handles newlines appropriately.
823 Interpretation of the buffer's octets can be abstracted out by
824 using the slightly higher-level functions L</lex_peek_unichar> and
825 L</lex_read_unichar>.
827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
829 Points to the start of the current line inside the lexer buffer.
830 This is useful for indicating at which column an error occurred, and
831 not much else. This must be updated by any lexing code that consumes
832 a newline; the function L</lex_read_to> handles this detail.
838 =for apidoc Amx|bool|lex_bufutf8
840 Indicates whether the octets in the lexer buffer
841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
842 of Unicode characters. If not, they should be interpreted as Latin-1
843 characters. This is analogous to the C<SvUTF8> flag for scalars.
845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
846 contains valid UTF-8. Lexing code must be robust in the face of invalid
849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
850 is significant, but not the whole story regarding the input character
851 encoding. Normally, when a file is being read, the scalar contains octets
852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
853 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
856 is in effect. This logic may change in the future; use this function
857 instead of implementing the logic yourself.
863 Perl_lex_bufutf8(pTHX)
869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
872 at least I<len> octets (including terminating NUL). Returns a
873 pointer to the reallocated buffer. This is necessary before making
874 any direct modification of the buffer that would increase its length.
875 L</lex_stuff_pvn> provides a more convenient way to insert text into
878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
879 this function updates all of the lexer's variables that point directly
886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
891 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
892 linestr = PL_parser->linestr;
893 buf = SvPVX(linestr);
894 if (len <= SvLEN(linestr))
896 bufend_pos = PL_parser->bufend - buf;
897 bufptr_pos = PL_parser->bufptr - buf;
898 oldbufptr_pos = PL_parser->oldbufptr - buf;
899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
900 linestart_pos = PL_parser->linestart - buf;
901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
903 buf = sv_grow(linestr, len);
904 PL_parser->bufend = buf + bufend_pos;
905 PL_parser->bufptr = buf + bufptr_pos;
906 PL_parser->oldbufptr = buf + oldbufptr_pos;
907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
908 PL_parser->linestart = buf + linestart_pos;
909 if (PL_parser->last_uni)
910 PL_parser->last_uni = buf + last_uni_pos;
911 if (PL_parser->last_lop)
912 PL_parser->last_lop = buf + last_lop_pos;
917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
921 reallocating the buffer if necessary. This means that lexing code that
922 runs later will see the characters as if they had appeared in the input.
923 It is not recommended to do this as part of normal parsing, and most
924 uses of this facility run the risk of the inserted characters being
925 interpreted in an unintended manner.
927 The string to be inserted is represented by I<len> octets starting
928 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
930 The characters are recoded for the lexer buffer, according to how the
931 buffer is currently being interpreted (L</lex_bufutf8>). If a string
932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
933 function is more convenient.
939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
942 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
943 if (flags & ~(LEX_STUFF_UTF8))
944 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
946 if (flags & LEX_STUFF_UTF8) {
950 char *p, *e = pv+len;
951 for (p = pv; p != e; p++)
952 highhalf += !!(((U8)*p) & 0x80);
955 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
956 bufptr = PL_parser->bufptr;
957 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
958 PL_parser->bufend += len+highhalf;
959 for (p = pv; p != e; p++) {
962 *bufptr++ = (char)(0xc0 | (c >> 6));
963 *bufptr++ = (char)(0x80 | (c & 0x3f));
970 if (flags & LEX_STUFF_UTF8) {
972 char *p, *e = pv+len;
973 for (p = pv; p != e; p++) {
976 Perl_croak(aTHX_ "Lexing code attempted to stuff "
977 "non-Latin-1 character into Latin-1 input");
978 } else if (c >= 0xc2 && p+1 != e &&
979 (((U8)p[1]) & 0xc0) == 0x80) {
982 } else if (c >= 0x80) {
983 /* malformed UTF-8 */
985 SAVESPTR(PL_warnhook);
986 PL_warnhook = PERL_WARNHOOK_FATAL;
987 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
993 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
994 bufptr = PL_parser->bufptr;
995 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
996 PL_parser->bufend += len-highhalf;
997 for (p = pv; p != e; p++) {
1000 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1003 *bufptr++ = (char)c;
1008 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009 bufptr = PL_parser->bufptr;
1010 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011 PL_parser->bufend += len;
1012 Copy(pv, bufptr, len, char);
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary. This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1028 The string to be inserted is the string value of I<sv>. The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1042 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1044 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1046 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1057 This is not the normal way to consume lexed text. For that, use
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1068 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069 buf = PL_parser->bufptr;
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1074 bufend = PL_parser->bufend;
1076 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077 unstuff_len = ptr - buf;
1078 Move(ptr, buf, bufend+1-ptr, char);
1079 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080 PL_parser->bufend = bufend - unstuff_len;
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1099 Perl_lex_read_to(pTHX_ char *ptr)
1102 PERL_ARGS_ASSERT_LEX_READ_TO;
1103 s = PL_parser->bufptr;
1104 if (ptr < s || ptr > PL_parser->bufend)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106 for (; s != ptr; s++)
1108 CopLINE_inc(PL_curcop);
1109 PL_parser->linestart = s+1;
1111 PL_parser->bufptr = ptr;
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>. The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately. I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it. However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer fof
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1139 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140 buf = SvPVX(PL_parser->linestr);
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1145 if (ptr > PL_parser->bufptr)
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147 discard_len = ptr - buf;
1148 if (PL_parser->oldbufptr < ptr)
1149 PL_parser->oldbufptr = ptr;
1150 if (PL_parser->oldoldbufptr < ptr)
1151 PL_parser->oldoldbufptr = ptr;
1152 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153 PL_parser->last_uni = NULL;
1154 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155 PL_parser->last_lop = NULL;
1156 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158 PL_parser->bufend -= discard_len;
1159 PL_parser->bufptr -= discard_len;
1160 PL_parser->oldbufptr -= discard_len;
1161 PL_parser->oldoldbufptr -= discard_len;
1162 if (PL_parser->last_uni)
1163 PL_parser->last_uni -= discard_len;
1164 if (PL_parser->last_lop)
1165 PL_parser->last_lop -= discard_len;
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more. It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded. If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1190 #define LEX_FAKE_EOF 0x80000000
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1197 STRLEN old_bufend_pos, new_bufend_pos;
1198 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200 bool got_some_for_debugger = 0;
1202 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1205 flags |= LEX_KEEP_PREVIOUS;
1206 #endif /* PERL_MAD */
1207 linestr = PL_parser->linestr;
1208 buf = SvPVX(linestr);
1209 if (!(flags & LEX_KEEP_PREVIOUS) &&
1210 PL_parser->bufptr == PL_parser->bufend) {
1211 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1213 if (PL_parser->last_uni != PL_parser->bufend)
1214 PL_parser->last_uni = NULL;
1215 if (PL_parser->last_lop != PL_parser->bufend)
1216 PL_parser->last_lop = NULL;
1217 last_uni_pos = last_lop_pos = 0;
1221 old_bufend_pos = PL_parser->bufend - buf;
1222 bufptr_pos = PL_parser->bufptr - buf;
1223 oldbufptr_pos = PL_parser->oldbufptr - buf;
1224 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1225 linestart_pos = PL_parser->linestart - buf;
1226 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1227 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1229 if (flags & LEX_FAKE_EOF) {
1231 } else if (!PL_parser->rsfp) {
1233 } else if (filter_gets(linestr, old_bufend_pos)) {
1235 got_some_for_debugger = 1;
1237 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1238 sv_setpvs(linestr, "");
1240 /* End of real input. Close filehandle (unless it was STDIN),
1241 * then add implicit termination.
1243 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1244 PerlIO_clearerr(PL_parser->rsfp);
1245 else if (PL_parser->rsfp)
1246 (void)PerlIO_close(PL_parser->rsfp);
1247 PL_parser->rsfp = NULL;
1248 PL_doextract = FALSE;
1250 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1253 if (!PL_in_eval && PL_minus_p) {
1255 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1256 PL_minus_n = PL_minus_p = 0;
1257 } else if (!PL_in_eval && PL_minus_n) {
1258 sv_catpvs(linestr, /*{*/";}");
1261 sv_catpvs(linestr, ";");
1264 buf = SvPVX(linestr);
1265 new_bufend_pos = SvCUR(linestr);
1266 PL_parser->bufend = buf + new_bufend_pos;
1267 PL_parser->bufptr = buf + bufptr_pos;
1268 PL_parser->oldbufptr = buf + oldbufptr_pos;
1269 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1270 PL_parser->linestart = buf + linestart_pos;
1271 if (PL_parser->last_uni)
1272 PL_parser->last_uni = buf + last_uni_pos;
1273 if (PL_parser->last_lop)
1274 PL_parser->last_lop = buf + last_lop_pos;
1275 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1276 PL_curstash != PL_debstash) {
1277 /* debugger active and we're not compiling the debugger code,
1278 * so store the line into the debugger's array of lines
1280 update_debugger_info(NULL, buf+old_bufend_pos,
1281 new_bufend_pos-old_bufend_pos);
1287 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1289 Looks ahead one (Unicode) character in the text currently being lexed.
1290 Returns the codepoint (unsigned integer value) of the next character,
1291 or -1 if lexing has reached the end of the input text. To consume the
1292 peeked character, use L</lex_read_unichar>.
1294 If the next character is in (or extends into) the next chunk of input
1295 text, the next chunk will be read in. Normally the current chunk will be
1296 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1297 then the current chunk will not be discarded.
1299 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1300 is encountered, an exception is generated.
1306 Perl_lex_peek_unichar(pTHX_ U32 flags)
1309 if (flags & ~(LEX_KEEP_PREVIOUS))
1310 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1311 s = PL_parser->bufptr;
1312 bufend = PL_parser->bufend;
1318 if (!lex_next_chunk(flags))
1320 s = PL_parser->bufptr;
1321 bufend = PL_parser->bufend;
1327 len = PL_utf8skip[head];
1328 while ((STRLEN)(bufend-s) < len) {
1329 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1331 s = PL_parser->bufptr;
1332 bufend = PL_parser->bufend;
1335 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1336 if (retlen == (STRLEN)-1) {
1337 /* malformed UTF-8 */
1339 SAVESPTR(PL_warnhook);
1340 PL_warnhook = PERL_WARNHOOK_FATAL;
1341 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1347 if (!lex_next_chunk(flags))
1349 s = PL_parser->bufptr;
1356 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1358 Reads the next (Unicode) character in the text currently being lexed.
1359 Returns the codepoint (unsigned integer value) of the character read,
1360 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1361 if lexing has reached the end of the input text. To non-destructively
1362 examine the next character, use L</lex_peek_unichar> instead.
1364 If the next character is in (or extends into) the next chunk of input
1365 text, the next chunk will be read in. Normally the current chunk will be
1366 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1367 then the current chunk will not be discarded.
1369 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1370 is encountered, an exception is generated.
1376 Perl_lex_read_unichar(pTHX_ U32 flags)
1379 if (flags & ~(LEX_KEEP_PREVIOUS))
1380 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1381 c = lex_peek_unichar(flags);
1384 CopLINE_inc(PL_curcop);
1385 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1391 =for apidoc Amx|void|lex_read_space|U32 flags
1393 Reads optional spaces, in Perl style, in the text currently being
1394 lexed. The spaces may include ordinary whitespace characters and
1395 Perl-style comments. C<#line> directives are processed if encountered.
1396 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1397 at a non-space character (or the end of the input text).
1399 If spaces extend into the next chunk of input text, the next chunk will
1400 be read in. Normally the current chunk will be discarded at the same
1401 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1402 chunk will not be discarded.
1408 Perl_lex_read_space(pTHX_ U32 flags)
1411 bool need_incline = 0;
1412 if (flags & ~(LEX_KEEP_PREVIOUS))
1413 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1416 sv_free(PL_skipwhite);
1417 PL_skipwhite = NULL;
1420 PL_skipwhite = newSVpvs("");
1421 #endif /* PERL_MAD */
1422 s = PL_parser->bufptr;
1423 bufend = PL_parser->bufend;
1429 } while (!(c == '\n' || (c == 0 && s == bufend)));
1430 } else if (c == '\n') {
1432 PL_parser->linestart = s;
1437 } else if (isSPACE(c)) {
1439 } else if (c == 0 && s == bufend) {
1443 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1444 #endif /* PERL_MAD */
1445 PL_parser->bufptr = s;
1446 CopLINE_inc(PL_curcop);
1447 got_more = lex_next_chunk(flags);
1448 CopLINE_dec(PL_curcop);
1449 s = PL_parser->bufptr;
1450 bufend = PL_parser->bufend;
1453 if (need_incline && PL_parser->rsfp) {
1463 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1464 #endif /* PERL_MAD */
1465 PL_parser->bufptr = s;
1470 * This subroutine has nothing to do with tilting, whether at windmills
1471 * or pinball tables. Its name is short for "increment line". It
1472 * increments the current line number in CopLINE(PL_curcop) and checks
1473 * to see whether the line starts with a comment of the form
1474 * # line 500 "foo.pm"
1475 * If so, it sets the current line number and file to the values in the comment.
1479 S_incline(pTHX_ const char *s)
1486 PERL_ARGS_ASSERT_INCLINE;
1488 CopLINE_inc(PL_curcop);
1491 while (SPACE_OR_TAB(*s))
1493 if (strnEQ(s, "line", 4))
1497 if (SPACE_OR_TAB(*s))
1501 while (SPACE_OR_TAB(*s))
1509 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1511 while (SPACE_OR_TAB(*s))
1513 if (*s == '"' && (t = strchr(s+1, '"'))) {
1519 while (!isSPACE(*t))
1523 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1525 if (*e != '\n' && *e != '\0')
1526 return; /* false alarm */
1529 const STRLEN len = t - s;
1530 #ifndef USE_ITHREADS
1531 SV *const temp_sv = CopFILESV(PL_curcop);
1536 cf = SvPVX(temp_sv);
1537 tmplen = SvCUR(temp_sv);
1543 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1544 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1545 * to *{"::_<newfilename"} */
1546 /* However, the long form of evals is only turned on by the
1547 debugger - usually they're "(eval %lu)" */
1551 STRLEN tmplen2 = len;
1552 if (tmplen + 2 <= sizeof smallbuf)
1555 Newx(tmpbuf, tmplen + 2, char);
1558 memcpy(tmpbuf + 2, cf, tmplen);
1560 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1565 if (tmplen2 + 2 <= sizeof smallbuf)
1568 Newx(tmpbuf2, tmplen2 + 2, char);
1570 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1571 /* Either they malloc'd it, or we malloc'd it,
1572 so no prefix is present in ours. */
1577 memcpy(tmpbuf2 + 2, s, tmplen2);
1580 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1582 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1583 /* adjust ${"::_<newfilename"} to store the new file name */
1584 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1585 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1586 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1589 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1591 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1594 CopFILE_free(PL_curcop);
1595 CopFILE_setn(PL_curcop, s, len);
1597 CopLINE_set(PL_curcop, atoi(n)-1);
1601 /* skip space before PL_thistoken */
1604 S_skipspace0(pTHX_ register char *s)
1606 PERL_ARGS_ASSERT_SKIPSPACE0;
1613 PL_thiswhite = newSVpvs("");
1614 sv_catsv(PL_thiswhite, PL_skipwhite);
1615 sv_free(PL_skipwhite);
1618 PL_realtokenstart = s - SvPVX(PL_linestr);
1622 /* skip space after PL_thistoken */
1625 S_skipspace1(pTHX_ register char *s)
1627 const char *start = s;
1628 I32 startoff = start - SvPVX(PL_linestr);
1630 PERL_ARGS_ASSERT_SKIPSPACE1;
1635 start = SvPVX(PL_linestr) + startoff;
1636 if (!PL_thistoken && PL_realtokenstart >= 0) {
1637 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1638 PL_thistoken = newSVpvn(tstart, start - tstart);
1640 PL_realtokenstart = -1;
1643 PL_nextwhite = newSVpvs("");
1644 sv_catsv(PL_nextwhite, PL_skipwhite);
1645 sv_free(PL_skipwhite);
1652 S_skipspace2(pTHX_ register char *s, SV **svp)
1655 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1656 const I32 startoff = s - SvPVX(PL_linestr);
1658 PERL_ARGS_ASSERT_SKIPSPACE2;
1661 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1662 if (!PL_madskills || !svp)
1664 start = SvPVX(PL_linestr) + startoff;
1665 if (!PL_thistoken && PL_realtokenstart >= 0) {
1666 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1667 PL_thistoken = newSVpvn(tstart, start - tstart);
1668 PL_realtokenstart = -1;
1672 *svp = newSVpvs("");
1673 sv_setsv(*svp, PL_skipwhite);
1674 sv_free(PL_skipwhite);
1683 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1685 AV *av = CopFILEAVx(PL_curcop);
1687 SV * const sv = newSV_type(SVt_PVMG);
1689 sv_setsv(sv, orig_sv);
1691 sv_setpvn(sv, buf, len);
1694 av_store(av, (I32)CopLINE(PL_curcop), sv);
1700 * Called to gobble the appropriate amount and type of whitespace.
1701 * Skips comments as well.
1705 S_skipspace(pTHX_ register char *s)
1709 #endif /* PERL_MAD */
1710 PERL_ARGS_ASSERT_SKIPSPACE;
1713 sv_free(PL_skipwhite);
1714 PL_skipwhite = NULL;
1716 #endif /* PERL_MAD */
1717 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1718 while (s < PL_bufend && SPACE_OR_TAB(*s))
1720 } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1721 while (isSPACE(*s) && *s != '\n')
1726 } while (s != PL_bufend && *s != '\n');
1731 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1733 lex_read_space(LEX_KEEP_PREVIOUS);
1735 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1736 if (PL_linestart > PL_bufptr)
1737 PL_bufptr = PL_linestart;
1742 PL_skipwhite = newSVpvn(start, s-start);
1743 #endif /* PERL_MAD */
1749 * Check the unary operators to ensure there's no ambiguity in how they're
1750 * used. An ambiguous piece of code would be:
1752 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1753 * the +5 is its argument.
1763 if (PL_oldoldbufptr != PL_last_uni)
1765 while (isSPACE(*PL_last_uni))
1768 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1770 if ((t = strchr(s, '(')) && t < PL_bufptr)
1773 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1774 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1775 (int)(s - PL_last_uni), PL_last_uni);
1779 * LOP : macro to build a list operator. Its behaviour has been replaced
1780 * with a subroutine, S_lop() for which LOP is just another name.
1783 #define LOP(f,x) return lop(f,x,s)
1787 * Build a list operator (or something that might be one). The rules:
1788 * - if we have a next token, then it's a list operator [why?]
1789 * - if the next thing is an opening paren, then it's a function
1790 * - else it's a list operator
1794 S_lop(pTHX_ I32 f, int x, char *s)
1798 PERL_ARGS_ASSERT_LOP;
1804 PL_last_lop = PL_oldbufptr;
1805 PL_last_lop_op = (OPCODE)f;
1808 return REPORT(LSTOP);
1811 return REPORT(LSTOP);
1814 return REPORT(FUNC);
1817 return REPORT(FUNC);
1819 return REPORT(LSTOP);
1825 * Sets up for an eventual force_next(). start_force(0) basically does
1826 * an unshift, while start_force(-1) does a push. yylex removes items
1831 S_start_force(pTHX_ int where)
1835 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1836 where = PL_lasttoke;
1837 assert(PL_curforce < 0 || PL_curforce == where);
1838 if (PL_curforce != where) {
1839 for (i = PL_lasttoke; i > where; --i) {
1840 PL_nexttoke[i] = PL_nexttoke[i-1];
1844 if (PL_curforce < 0) /* in case of duplicate start_force() */
1845 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1846 PL_curforce = where;
1849 curmad('^', newSVpvs(""));
1850 CURMAD('_', PL_nextwhite);
1855 S_curmad(pTHX_ char slot, SV *sv)
1861 if (PL_curforce < 0)
1862 where = &PL_thismad;
1864 where = &PL_nexttoke[PL_curforce].next_mad;
1870 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1872 else if (PL_encoding) {
1873 sv_recode_to_utf8(sv, PL_encoding);
1878 /* keep a slot open for the head of the list? */
1879 if (slot != '_' && *where && (*where)->mad_key == '^') {
1880 (*where)->mad_key = slot;
1881 sv_free(MUTABLE_SV(((*where)->mad_val)));
1882 (*where)->mad_val = (void*)sv;
1885 addmad(newMADsv(slot, sv), where, 0);
1888 # define start_force(where) NOOP
1889 # define curmad(slot, sv) NOOP
1894 * When the lexer realizes it knows the next token (for instance,
1895 * it is reordering tokens for the parser) then it can call S_force_next
1896 * to know what token to return the next time the lexer is called. Caller
1897 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1898 * and possibly PL_expect to ensure the lexer handles the token correctly.
1902 S_force_next(pTHX_ I32 type)
1907 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1908 tokereport(type, &NEXTVAL_NEXTTOKE);
1912 if (PL_curforce < 0)
1913 start_force(PL_lasttoke);
1914 PL_nexttoke[PL_curforce].next_type = type;
1915 if (PL_lex_state != LEX_KNOWNEXT)
1916 PL_lex_defer = PL_lex_state;
1917 PL_lex_state = LEX_KNOWNEXT;
1918 PL_lex_expect = PL_expect;
1921 PL_nexttype[PL_nexttoke] = type;
1923 if (PL_lex_state != LEX_KNOWNEXT) {
1924 PL_lex_defer = PL_lex_state;
1925 PL_lex_expect = PL_expect;
1926 PL_lex_state = LEX_KNOWNEXT;
1932 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1935 SV * const sv = newSVpvn_utf8(start, len,
1938 && !is_ascii_string((const U8*)start, len)
1939 && is_utf8_string((const U8*)start, len));
1945 * When the lexer knows the next thing is a word (for instance, it has
1946 * just seen -> and it knows that the next char is a word char, then
1947 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1951 * char *start : buffer position (must be within PL_linestr)
1952 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1953 * int check_keyword : if true, Perl checks to make sure the word isn't
1954 * a keyword (do this if the word is a label, e.g. goto FOO)
1955 * int allow_pack : if true, : characters will also be allowed (require,
1956 * use, etc. do this)
1957 * int allow_initial_tick : used by the "sub" lexer only.
1961 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1967 PERL_ARGS_ASSERT_FORCE_WORD;
1969 start = SKIPSPACE1(start);
1971 if (isIDFIRST_lazy_if(s,UTF) ||
1972 (allow_pack && *s == ':') ||
1973 (allow_initial_tick && *s == '\'') )
1975 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1976 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1978 start_force(PL_curforce);
1980 curmad('X', newSVpvn(start,s-start));
1981 if (token == METHOD) {
1986 PL_expect = XOPERATOR;
1990 curmad('g', newSVpvs( "forced" ));
1991 NEXTVAL_NEXTTOKE.opval
1992 = (OP*)newSVOP(OP_CONST,0,
1993 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1994 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2002 * Called when the lexer wants $foo *foo &foo etc, but the program
2003 * text only contains the "foo" portion. The first argument is a pointer
2004 * to the "foo", and the second argument is the type symbol to prefix.
2005 * Forces the next token to be a "WORD".
2006 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2010 S_force_ident(pTHX_ register const char *s, int kind)
2014 PERL_ARGS_ASSERT_FORCE_IDENT;
2017 const STRLEN len = strlen(s);
2018 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2019 start_force(PL_curforce);
2020 NEXTVAL_NEXTTOKE.opval = o;
2023 o->op_private = OPpCONST_ENTERED;
2024 /* XXX see note in pp_entereval() for why we forgo typo
2025 warnings if the symbol must be introduced in an eval.
2027 gv_fetchpvn_flags(s, len,
2028 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2030 kind == '$' ? SVt_PV :
2031 kind == '@' ? SVt_PVAV :
2032 kind == '%' ? SVt_PVHV :
2040 Perl_str_to_version(pTHX_ SV *sv)
2045 const char *start = SvPV_const(sv,len);
2046 const char * const end = start + len;
2047 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2049 PERL_ARGS_ASSERT_STR_TO_VERSION;
2051 while (start < end) {
2055 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2060 retval += ((NV)n)/nshift;
2069 * Forces the next token to be a version number.
2070 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2071 * and if "guessing" is TRUE, then no new token is created (and the caller
2072 * must use an alternative parsing method).
2076 S_force_version(pTHX_ char *s, int guessing)
2082 I32 startoff = s - SvPVX(PL_linestr);
2085 PERL_ARGS_ASSERT_FORCE_VERSION;
2093 while (isDIGIT(*d) || *d == '_' || *d == '.')
2097 start_force(PL_curforce);
2098 curmad('X', newSVpvn(s,d-s));
2101 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2103 s = scan_num(s, &pl_yylval);
2104 version = pl_yylval.opval;
2105 ver = cSVOPx(version)->op_sv;
2106 if (SvPOK(ver) && !SvNIOK(ver)) {
2107 SvUPGRADE(ver, SVt_PVNV);
2108 SvNV_set(ver, str_to_version(ver));
2109 SvNOK_on(ver); /* hint that it is a version */
2112 else if (guessing) {
2115 sv_free(PL_nextwhite); /* let next token collect whitespace */
2117 s = SvPVX(PL_linestr) + startoff;
2125 if (PL_madskills && !version) {
2126 sv_free(PL_nextwhite); /* let next token collect whitespace */
2128 s = SvPVX(PL_linestr) + startoff;
2131 /* NOTE: The parser sees the package name and the VERSION swapped */
2132 start_force(PL_curforce);
2133 NEXTVAL_NEXTTOKE.opval = version;
2141 * Tokenize a quoted string passed in as an SV. It finds the next
2142 * chunk, up to end of string or a backslash. It may make a new
2143 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2148 S_tokeq(pTHX_ SV *sv)
2152 register char *send;
2157 PERL_ARGS_ASSERT_TOKEQ;
2162 s = SvPV_force(sv, len);
2163 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2166 while (s < send && *s != '\\')
2171 if ( PL_hints & HINT_NEW_STRING ) {
2172 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2176 if (s + 1 < send && (s[1] == '\\'))
2177 s++; /* all that, just for this */
2182 SvCUR_set(sv, d - SvPVX_const(sv));
2184 if ( PL_hints & HINT_NEW_STRING )
2185 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2190 * Now come three functions related to double-quote context,
2191 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2192 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2193 * interact with PL_lex_state, and create fake ( ... ) argument lists
2194 * to handle functions and concatenation.
2195 * They assume that whoever calls them will be setting up a fake
2196 * join call, because each subthing puts a ',' after it. This lets
2199 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2201 * (I'm not sure whether the spurious commas at the end of lcfirst's
2202 * arguments and join's arguments are created or not).
2207 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2209 * Pattern matching will set PL_lex_op to the pattern-matching op to
2210 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2212 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2214 * Everything else becomes a FUNC.
2216 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2217 * had an OP_CONST or OP_READLINE). This just sets us up for a
2218 * call to S_sublex_push().
2222 S_sublex_start(pTHX)
2225 register const I32 op_type = pl_yylval.ival;
2227 if (op_type == OP_NULL) {
2228 pl_yylval.opval = PL_lex_op;
2232 if (op_type == OP_CONST || op_type == OP_READLINE) {
2233 SV *sv = tokeq(PL_lex_stuff);
2235 if (SvTYPE(sv) == SVt_PVIV) {
2236 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2238 const char * const p = SvPV_const(sv, len);
2239 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2243 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2244 PL_lex_stuff = NULL;
2245 /* Allow <FH> // "foo" */
2246 if (op_type == OP_READLINE)
2247 PL_expect = XTERMORDORDOR;
2250 else if (op_type == OP_BACKTICK && PL_lex_op) {
2251 /* readpipe() vas overriden */
2252 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2253 pl_yylval.opval = PL_lex_op;
2255 PL_lex_stuff = NULL;
2259 PL_sublex_info.super_state = PL_lex_state;
2260 PL_sublex_info.sub_inwhat = (U16)op_type;
2261 PL_sublex_info.sub_op = PL_lex_op;
2262 PL_lex_state = LEX_INTERPPUSH;
2266 pl_yylval.opval = PL_lex_op;
2276 * Create a new scope to save the lexing state. The scope will be
2277 * ended in S_sublex_done. Returns a '(', starting the function arguments
2278 * to the uc, lc, etc. found before.
2279 * Sets PL_lex_state to LEX_INTERPCONCAT.
2288 PL_lex_state = PL_sublex_info.super_state;
2289 SAVEBOOL(PL_lex_dojoin);
2290 SAVEI32(PL_lex_brackets);
2291 SAVEI32(PL_lex_casemods);
2292 SAVEI32(PL_lex_starts);
2293 SAVEI8(PL_lex_state);
2294 SAVEVPTR(PL_lex_inpat);
2295 SAVEI16(PL_lex_inwhat);
2296 SAVECOPLINE(PL_curcop);
2297 SAVEPPTR(PL_bufptr);
2298 SAVEPPTR(PL_bufend);
2299 SAVEPPTR(PL_oldbufptr);
2300 SAVEPPTR(PL_oldoldbufptr);
2301 SAVEPPTR(PL_last_lop);
2302 SAVEPPTR(PL_last_uni);
2303 SAVEPPTR(PL_linestart);
2304 SAVESPTR(PL_linestr);
2305 SAVEGENERICPV(PL_lex_brackstack);
2306 SAVEGENERICPV(PL_lex_casestack);
2308 PL_linestr = PL_lex_stuff;
2309 PL_lex_stuff = NULL;
2311 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2312 = SvPVX(PL_linestr);
2313 PL_bufend += SvCUR(PL_linestr);
2314 PL_last_lop = PL_last_uni = NULL;
2315 SAVEFREESV(PL_linestr);
2317 PL_lex_dojoin = FALSE;
2318 PL_lex_brackets = 0;
2319 Newx(PL_lex_brackstack, 120, char);
2320 Newx(PL_lex_casestack, 12, char);
2321 PL_lex_casemods = 0;
2322 *PL_lex_casestack = '\0';
2324 PL_lex_state = LEX_INTERPCONCAT;
2325 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2327 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2328 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2329 PL_lex_inpat = PL_sublex_info.sub_op;
2331 PL_lex_inpat = NULL;
2338 * Restores lexer state after a S_sublex_push.
2345 if (!PL_lex_starts++) {
2346 SV * const sv = newSVpvs("");
2347 if (SvUTF8(PL_linestr))
2349 PL_expect = XOPERATOR;
2350 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2354 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2355 PL_lex_state = LEX_INTERPCASEMOD;
2359 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2360 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2361 PL_linestr = PL_lex_repl;
2363 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2364 PL_bufend += SvCUR(PL_linestr);
2365 PL_last_lop = PL_last_uni = NULL;
2366 SAVEFREESV(PL_linestr);
2367 PL_lex_dojoin = FALSE;
2368 PL_lex_brackets = 0;
2369 PL_lex_casemods = 0;
2370 *PL_lex_casestack = '\0';
2372 if (SvEVALED(PL_lex_repl)) {
2373 PL_lex_state = LEX_INTERPNORMAL;
2375 /* we don't clear PL_lex_repl here, so that we can check later
2376 whether this is an evalled subst; that means we rely on the
2377 logic to ensure sublex_done() is called again only via the
2378 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2381 PL_lex_state = LEX_INTERPCONCAT;
2391 PL_endwhite = newSVpvs("");
2392 sv_catsv(PL_endwhite, PL_thiswhite);
2396 sv_setpvs(PL_thistoken,"");
2398 PL_realtokenstart = -1;
2402 PL_bufend = SvPVX(PL_linestr);
2403 PL_bufend += SvCUR(PL_linestr);
2404 PL_expect = XOPERATOR;
2405 PL_sublex_info.sub_inwhat = 0;
2413 Extracts a pattern, double-quoted string, or transliteration. This
2416 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2417 processing a pattern (PL_lex_inpat is true), a transliteration
2418 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2420 Returns a pointer to the character scanned up to. If this is
2421 advanced from the start pointer supplied (i.e. if anything was
2422 successfully parsed), will leave an OP for the substring scanned
2423 in pl_yylval. Caller must intuit reason for not parsing further
2424 by looking at the next characters herself.
2428 double-quoted style: \r and \n
2429 regexp special ones: \D \s
2432 case and quoting: \U \Q \E
2433 stops on @ and $, but not for $ as tail anchor
2435 In transliterations:
2436 characters are VERY literal, except for - not at the start or end
2437 of the string, which indicates a range. If the range is in bytes,
2438 scan_const expands the range to the full set of intermediate
2439 characters. If the range is in utf8, the hyphen is replaced with
2440 a certain range mark which will be handled by pmtrans() in op.c.
2442 In double-quoted strings:
2444 double-quoted style: \r and \n
2446 deprecated backrefs: \1 (in substitution replacements)
2447 case and quoting: \U \Q \E
2450 scan_const does *not* construct ops to handle interpolated strings.
2451 It stops processing as soon as it finds an embedded $ or @ variable
2452 and leaves it to the caller to work out what's going on.
2454 embedded arrays (whether in pattern or not) could be:
2455 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2457 $ in double-quoted strings must be the symbol of an embedded scalar.
2459 $ in pattern could be $foo or could be tail anchor. Assumption:
2460 it's a tail anchor if $ is the last thing in the string, or if it's
2461 followed by one of "()| \r\n\t"
2463 \1 (backreferences) are turned into $1
2465 The structure of the code is
2466 while (there's a character to process) {
2467 handle transliteration ranges
2468 skip regexp comments /(?#comment)/ and codes /(?{code})/
2469 skip #-initiated comments in //x patterns
2470 check for embedded arrays
2471 check for embedded scalars
2473 leave intact backslashes from leaveit (below)
2474 deprecate \1 in substitution replacements
2475 handle string-changing backslashes \l \U \Q \E, etc.
2476 switch (what was escaped) {
2477 handle \- in a transliteration (becomes a literal -)
2478 handle \132 (octal characters)
2479 handle \x15 and \x{1234} (hex characters)
2480 handle \N{name} (named characters)
2481 handle \cV (control characters)
2482 handle printf-style backslashes (\f, \r, \n, etc)
2485 } (end if backslash)
2486 handle regular character
2487 } (end while character to read)
2492 S_scan_const(pTHX_ char *start)
2495 register char *send = PL_bufend; /* end of the constant */
2496 SV *sv = newSV(send - start); /* sv for the constant. See
2497 note below on sizing. */
2498 register char *s = start; /* start of the constant */
2499 register char *d = SvPVX(sv); /* destination for copies */
2500 bool dorange = FALSE; /* are we in a translit range? */
2501 bool didrange = FALSE; /* did we just finish a range? */
2502 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
2503 I32 this_utf8 = UTF; /* Is the source string assumed
2504 to be UTF8? But, this can
2505 show as true when the source
2506 isn't utf8, as for example
2507 when it is entirely composed
2510 /* Note on sizing: The scanned constant is placed into sv, which is
2511 * initialized by newSV() assuming one byte of output for every byte of
2512 * input. This routine expects newSV() to allocate an extra byte for a
2513 * trailing NUL, which this routine will append if it gets to the end of
2514 * the input. There may be more bytes of input than output (eg., \N{LATIN
2515 * CAPITAL LETTER A}), or more output than input if the constant ends up
2516 * recoded to utf8, but each time a construct is found that might increase
2517 * the needed size, SvGROW() is called. Its size parameter each time is
2518 * based on the best guess estimate at the time, namely the length used so
2519 * far, plus the length the current construct will occupy, plus room for
2520 * the trailing NUL, plus one byte for every input byte still unscanned */
2524 UV literal_endpoint = 0;
2525 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2528 PERL_ARGS_ASSERT_SCAN_CONST;
2530 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2531 /* If we are doing a trans and we know we want UTF8 set expectation */
2532 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2533 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2537 while (s < send || dorange) {
2538 /* get transliterations out of the way (they're most literal) */
2539 if (PL_lex_inwhat == OP_TRANS) {
2540 /* expand a range A-Z to the full set of characters. AIE! */
2542 I32 i; /* current expanded character */
2543 I32 min; /* first character in range */
2544 I32 max; /* last character in range */
2555 char * const c = (char*)utf8_hop((U8*)d, -1);
2559 *c = (char)UTF_TO_NATIVE(0xff);
2560 /* mark the range as done, and continue */
2566 i = d - SvPVX_const(sv); /* remember current offset */
2569 SvLEN(sv) + (has_utf8 ?
2570 (512 - UTF_CONTINUATION_MARK +
2573 /* How many two-byte within 0..255: 128 in UTF-8,
2574 * 96 in UTF-8-mod. */
2576 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2578 d = SvPVX(sv) + i; /* refresh d after realloc */
2582 for (j = 0; j <= 1; j++) {
2583 char * const c = (char*)utf8_hop((U8*)d, -1);
2584 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2590 max = (U8)0xff; /* only to \xff */
2591 uvmax = uv; /* \x{100} to uvmax */
2593 d = c; /* eat endpoint chars */
2598 d -= 2; /* eat the first char and the - */
2599 min = (U8)*d; /* first char in range */
2600 max = (U8)d[1]; /* last char in range */
2607 "Invalid range \"%c-%c\" in transliteration operator",
2608 (char)min, (char)max);
2612 if (literal_endpoint == 2 &&
2613 ((isLOWER(min) && isLOWER(max)) ||
2614 (isUPPER(min) && isUPPER(max)))) {
2616 for (i = min; i <= max; i++)
2618 *d++ = NATIVE_TO_NEED(has_utf8,i);
2620 for (i = min; i <= max; i++)
2622 *d++ = NATIVE_TO_NEED(has_utf8,i);
2627 for (i = min; i <= max; i++)
2630 const U8 ch = (U8)NATIVE_TO_UTF(i);
2631 if (UNI_IS_INVARIANT(ch))
2634 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2635 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2644 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2646 *d++ = (char)UTF_TO_NATIVE(0xff);
2648 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2652 /* mark the range as done, and continue */
2656 literal_endpoint = 0;
2661 /* range begins (ignore - as first or last char) */
2662 else if (*s == '-' && s+1 < send && s != start) {
2664 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2671 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2681 literal_endpoint = 0;
2682 native_range = TRUE;
2687 /* if we get here, we're not doing a transliteration */
2689 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2690 except for the last char, which will be done separately. */
2691 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2693 while (s+1 < send && *s != ')')
2694 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2696 else if (s[2] == '{' /* This should match regcomp.c */
2697 || (s[2] == '?' && s[3] == '{'))
2700 char *regparse = s + (s[2] == '{' ? 3 : 4);
2703 while (count && (c = *regparse)) {
2704 if (c == '\\' && regparse[1])
2712 if (*regparse != ')')
2713 regparse--; /* Leave one char for continuation. */
2714 while (s < regparse)
2715 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2719 /* likewise skip #-initiated comments in //x patterns */
2720 else if (*s == '#' && PL_lex_inpat &&
2721 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2722 while (s+1 < send && *s != '\n')
2723 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2726 /* check for embedded arrays
2727 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2729 else if (*s == '@' && s[1]) {
2730 if (isALNUM_lazy_if(s+1,UTF))
2732 if (strchr(":'{$", s[1]))
2734 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2735 break; /* in regexp, neither @+ nor @- are interpolated */
2738 /* check for embedded scalars. only stop if we're sure it's a
2741 else if (*s == '$') {
2742 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2744 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2746 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2747 "Possible unintended interpolation of $\\ in regex");
2749 break; /* in regexp, $ might be tail anchor */
2753 /* End of else if chain - OP_TRANS rejoin rest */
2756 if (*s == '\\' && s+1 < send) {
2759 /* deprecate \1 in strings and substitution replacements */
2760 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2761 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2763 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2768 /* string-change backslash escapes */
2769 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2773 /* skip any other backslash escapes in a pattern */
2774 else if (PL_lex_inpat) {
2775 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2776 goto default_action;
2779 /* if we get here, it's either a quoted -, or a digit */
2782 /* quoted - in transliterations */
2784 if (PL_lex_inwhat == OP_TRANS) {
2791 if ((isALPHA(*s) || isDIGIT(*s)))
2792 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2793 "Unrecognized escape \\%c passed through",
2795 /* default action is to copy the quoted character */
2796 goto default_action;
2799 /* eg. \132 indicates the octal constant 0x132 */
2800 case '0': case '1': case '2': case '3':
2801 case '4': case '5': case '6': case '7':
2805 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2808 goto NUM_ESCAPE_INSERT;
2810 /* eg. \x24 indicates the hex constant 0x24 */
2814 char* const e = strchr(s, '}');
2815 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2816 PERL_SCAN_DISALLOW_PREFIX;
2821 yyerror("Missing right brace on \\x{}");
2825 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2831 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2832 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2838 /* Insert oct, hex, or \N{U+...} escaped character. There will
2839 * always be enough room in sv since such escapes will be
2840 * longer than any UTF-8 sequence they can end up as, except if
2841 * they force us to recode the rest of the string into utf8 */
2843 /* Here uv is the ordinal of the next character being added in
2844 * unicode (converted from native). (It has to be done before
2845 * here because \N is interpreted as unicode, and oct and hex
2847 if (!UNI_IS_INVARIANT(uv)) {
2848 if (!has_utf8 && uv > 255) {
2849 /* Might need to recode whatever we have accumulated so
2850 * far if it contains any chars variant in utf8 or
2853 SvCUR_set(sv, d - SvPVX_const(sv));
2856 /* See Note on sizing above. */
2857 sv_utf8_upgrade_flags_grow(sv,
2858 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2859 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2860 d = SvPVX(sv) + SvCUR(sv);
2865 d = (char*)uvuni_to_utf8((U8*)d, uv);
2866 if (PL_lex_inwhat == OP_TRANS &&
2867 PL_sublex_info.sub_op) {
2868 PL_sublex_info.sub_op->op_private |=
2869 (PL_lex_repl ? OPpTRANS_FROM_UTF
2873 if (uv > 255 && !dorange)
2874 native_range = FALSE;
2886 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2891 char* e = strchr(s, '}');
2897 yyerror("Missing right brace on \\N{}");
2901 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2902 /* \N{U+...} The ... is a unicode value even on EBCDIC
2904 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2905 PERL_SCAN_DISALLOW_PREFIX;
2908 uv = grok_hex(s, &len, &flags, NULL);
2909 if ( e > s && len != (STRLEN)(e - s) ) {
2913 goto NUM_ESCAPE_INSERT;
2915 res = newSVpvn(s + 1, e - s - 1);
2916 res = new_constant( NULL, 0, "charnames",
2917 res, NULL, s - 2, e - s + 3 );
2919 sv_utf8_upgrade(res);
2920 str = SvPV_const(res,len);
2921 #ifdef EBCDIC_NEVER_MIND
2922 /* charnames uses pack U and that has been
2923 * recently changed to do the below uni->native
2924 * mapping, so this would be redundant (and wrong,
2925 * the code point would be doubly converted).
2926 * But leave this in just in case the pack U change
2927 * gets revoked, but the semantics is still
2928 * desireable for charnames. --jhi */
2930 UV uv = utf8_to_uvchr((const U8*)str, 0);
2933 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2935 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2936 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2937 str = SvPV_const(res, len);
2941 /* If destination is not in utf8 but this new character is,
2942 * recode the dest to utf8 */
2943 if (!has_utf8 && SvUTF8(res)) {
2944 SvCUR_set(sv, d - SvPVX_const(sv));
2947 /* See Note on sizing above. */
2948 sv_utf8_upgrade_flags_grow(sv,
2949 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2950 len + (STRLEN)(send - s) + 1);
2951 d = SvPVX(sv) + SvCUR(sv);
2953 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2955 /* See Note on sizing above. (NOTE: SvCUR() is not set
2956 * correctly here). */
2957 const STRLEN off = d - SvPVX_const(sv);
2958 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2962 native_range = FALSE; /* \N{} is guessed to be Unicode */
2964 Copy(str, d, len, char);
2971 yyerror("Missing braces on \\N{}");
2974 /* \c is a control character */
2983 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2986 yyerror("Missing control char name in \\c");
2990 /* printf-style backslashes, formfeeds, newlines, etc */
2992 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2995 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2998 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3001 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3004 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3007 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3010 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3016 } /* end if (backslash) */
3023 /* If we started with encoded form, or already know we want it,
3024 then encode the next character */
3025 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3029 /* One might think that it is wasted effort in the case of the
3030 * source being utf8 (this_utf8 == TRUE) to take the next character
3031 * in the source, convert it to an unsigned value, and then convert
3032 * it back again. But the source has not been validated here. The
3033 * routine that does the conversion checks for errors like
3036 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3037 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3039 SvCUR_set(sv, d - SvPVX_const(sv));
3042 /* See Note on sizing above. */
3043 sv_utf8_upgrade_flags_grow(sv,
3044 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3045 need + (STRLEN)(send - s) + 1);
3046 d = SvPVX(sv) + SvCUR(sv);
3048 } else if (need > len) {
3049 /* encoded value larger than old, may need extra space (NOTE:
3050 * SvCUR() is not set correctly here). See Note on sizing
3052 const STRLEN off = d - SvPVX_const(sv);
3053 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3057 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3059 if (uv > 255 && !dorange)
3060 native_range = FALSE;
3064 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3066 } /* while loop to process each character */
3068 /* terminate the string and set up the sv */
3070 SvCUR_set(sv, d - SvPVX_const(sv));
3071 if (SvCUR(sv) >= SvLEN(sv))
3072 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3075 if (PL_encoding && !has_utf8) {
3076 sv_recode_to_utf8(sv, PL_encoding);
3082 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3083 PL_sublex_info.sub_op->op_private |=
3084 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3088 /* shrink the sv if we allocated more than we used */
3089 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3090 SvPV_shrink_to_cur(sv);
3093 /* return the substring (via pl_yylval) only if we parsed anything */
3094 if (s > PL_bufptr) {
3095 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3096 const char *const key = PL_lex_inpat ? "qr" : "q";
3097 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3101 if (PL_lex_inwhat == OP_TRANS) {
3104 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3112 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3115 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3122 * Returns TRUE if there's more to the expression (e.g., a subscript),
3125 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3127 * ->[ and ->{ return TRUE
3128 * { and [ outside a pattern are always subscripts, so return TRUE
3129 * if we're outside a pattern and it's not { or [, then return FALSE
3130 * if we're in a pattern and the first char is a {
3131 * {4,5} (any digits around the comma) returns FALSE
3132 * if we're in a pattern and the first char is a [
3134 * [SOMETHING] has a funky algorithm to decide whether it's a
3135 * character class or not. It has to deal with things like
3136 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3137 * anything else returns TRUE
3140 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3143 S_intuit_more(pTHX_ register char *s)
3147 PERL_ARGS_ASSERT_INTUIT_MORE;
3149 if (PL_lex_brackets)
3151 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3153 if (*s != '{' && *s != '[')
3158 /* In a pattern, so maybe we have {n,m}. */
3175 /* On the other hand, maybe we have a character class */
3178 if (*s == ']' || *s == '^')
3181 /* this is terrifying, and it works */
3182 int weight = 2; /* let's weigh the evidence */
3184 unsigned char un_char = 255, last_un_char;
3185 const char * const send = strchr(s,']');
3186 char tmpbuf[sizeof PL_tokenbuf * 4];
3188 if (!send) /* has to be an expression */
3191 Zero(seen,256,char);
3194 else if (isDIGIT(*s)) {
3196 if (isDIGIT(s[1]) && s[2] == ']')
3202 for (; s < send; s++) {
3203 last_un_char = un_char;
3204 un_char = (unsigned char)*s;
3209 weight -= seen[un_char] * 10;
3210 if (isALNUM_lazy_if(s+1,UTF)) {
3212 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3213 len = (int)strlen(tmpbuf);
3214 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3219 else if (*s == '$' && s[1] &&
3220 strchr("[#!%*<>()-=",s[1])) {
3221 if (/*{*/ strchr("])} =",s[2]))
3230 if (strchr("wds]",s[1]))
3232 else if (seen[(U8)'\''] || seen[(U8)'"'])
3234 else if (strchr("rnftbxcav",s[1]))
3236 else if (isDIGIT(s[1])) {
3238 while (s[1] && isDIGIT(s[1]))
3248 if (strchr("aA01! ",last_un_char))
3250 if (strchr("zZ79~",s[1]))
3252 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3253 weight -= 5; /* cope with negative subscript */
3256 if (!isALNUM(last_un_char)
3257 && !(last_un_char == '$' || last_un_char == '@'
3258 || last_un_char == '&')
3259 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3264 if (keyword(tmpbuf, d - tmpbuf, 0))
3267 if (un_char == last_un_char + 1)
3269 weight -= seen[un_char];
3274 if (weight >= 0) /* probably a character class */
3284 * Does all the checking to disambiguate
3286 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3287 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3289 * First argument is the stuff after the first token, e.g. "bar".
3291 * Not a method if bar is a filehandle.
3292 * Not a method if foo is a subroutine prototyped to take a filehandle.
3293 * Not a method if it's really "Foo $bar"
3294 * Method if it's "foo $bar"
3295 * Not a method if it's really "print foo $bar"
3296 * Method if it's really "foo package::" (interpreted as package->foo)
3297 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3298 * Not a method if bar is a filehandle or package, but is quoted with
3303 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3306 char *s = start + (*start == '$');
3307 char tmpbuf[sizeof PL_tokenbuf];
3314 PERL_ARGS_ASSERT_INTUIT_METHOD;
3317 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3321 const char *proto = SvPVX_const(cv);
3332 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3333 /* start is the beginning of the possible filehandle/object,
3334 * and s is the end of it
3335 * tmpbuf is a copy of it
3338 if (*start == '$') {
3339 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3340 isUPPER(*PL_tokenbuf))
3343 len = start - SvPVX(PL_linestr);
3347 start = SvPVX(PL_linestr) + len;
3351 return *s == '(' ? FUNCMETH : METHOD;
3353 if (!keyword(tmpbuf, len, 0)) {
3354 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3358 soff = s - SvPVX(PL_linestr);
3362 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3363 if (indirgv && GvCVu(indirgv))
3365 /* filehandle or package name makes it a method */
3366 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3368 soff = s - SvPVX(PL_linestr);
3371 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3372 return 0; /* no assumptions -- "=>" quotes bearword */
3374 start_force(PL_curforce);
3375 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3376 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3377 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3379 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3384 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3386 return *s == '(' ? FUNCMETH : METHOD;
3392 /* Encoded script support. filter_add() effectively inserts a
3393 * 'pre-processing' function into the current source input stream.
3394 * Note that the filter function only applies to the current source file
3395 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3397 * The datasv parameter (which may be NULL) can be used to pass
3398 * private data to this instance of the filter. The filter function
3399 * can recover the SV using the FILTER_DATA macro and use it to
3400 * store private buffers and state information.
3402 * The supplied datasv parameter is upgraded to a PVIO type
3403 * and the IoDIRP/IoANY field is used to store the function pointer,
3404 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3405 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3406 * private use must be set using malloc'd pointers.
3410 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3419 if (!PL_rsfp_filters)
3420 PL_rsfp_filters = newAV();
3423 SvUPGRADE(datasv, SVt_PVIO);
3424 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3425 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3426 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3427 FPTR2DPTR(void *, IoANY(datasv)),
3428 SvPV_nolen(datasv)));
3429 av_unshift(PL_rsfp_filters, 1);
3430 av_store(PL_rsfp_filters, 0, datasv) ;
3435 /* Delete most recently added instance of this filter function. */
3437 Perl_filter_del(pTHX_ filter_t funcp)
3442 PERL_ARGS_ASSERT_FILTER_DEL;
3445 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3446 FPTR2DPTR(void*, funcp)));
3448 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3450 /* if filter is on top of stack (usual case) just pop it off */
3451 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3452 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3453 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3454 IoANY(datasv) = (void *)NULL;
3455 sv_free(av_pop(PL_rsfp_filters));
3459 /* we need to search for the correct entry and clear it */
3460 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3464 /* Invoke the idxth filter function for the current rsfp. */
3465 /* maxlen 0 = read one text line */
3467 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3472 /* This API is bad. It should have been using unsigned int for maxlen.
3473 Not sure if we want to change the API, but if not we should sanity
3474 check the value here. */
3475 const unsigned int correct_length
3484 PERL_ARGS_ASSERT_FILTER_READ;
3486 if (!PL_parser || !PL_rsfp_filters)
3488 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3489 /* Provide a default input filter to make life easy. */
3490 /* Note that we append to the line. This is handy. */
3491 DEBUG_P(PerlIO_printf(Perl_debug_log,
3492 "filter_read %d: from rsfp\n", idx));
3493 if (correct_length) {
3496 const int old_len = SvCUR(buf_sv);
3498 /* ensure buf_sv is large enough */
3499 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3500 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3501 correct_length)) <= 0) {
3502 if (PerlIO_error(PL_rsfp))
3503 return -1; /* error */
3505 return 0 ; /* end of file */
3507 SvCUR_set(buf_sv, old_len + len) ;
3508 SvPVX(buf_sv)[old_len + len] = '\0';
3511 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3512 if (PerlIO_error(PL_rsfp))
3513 return -1; /* error */
3515 return 0 ; /* end of file */
3518 return SvCUR(buf_sv);
3520 /* Skip this filter slot if filter has been deleted */
3521 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3522 DEBUG_P(PerlIO_printf(Perl_debug_log,
3523 "filter_read %d: skipped (filter deleted)\n",
3525 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3527 /* Get function pointer hidden within datasv */
3528 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3529 DEBUG_P(PerlIO_printf(Perl_debug_log,
3530 "filter_read %d: via function %p (%s)\n",
3531 idx, (void*)datasv, SvPV_nolen_const(datasv)));
3532 /* Call function. The function is expected to */
3533 /* call "FILTER_READ(idx+1, buf_sv)" first. */
3534 /* Return: <0:error, =0:eof, >0:not eof */
3535 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3539 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3543 PERL_ARGS_ASSERT_FILTER_GETS;
3545 #ifdef PERL_CR_FILTER
3546 if (!PL_rsfp_filters) {
3547 filter_add(S_cr_textfilter,NULL);
3550 if (PL_rsfp_filters) {
3552 SvCUR_set(sv, 0); /* start with empty line */
3553 if (FILTER_READ(0, sv, 0) > 0)
3554 return ( SvPVX(sv) ) ;
3559 return (sv_gets(sv, PL_rsfp, append));
3563 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3568 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3570 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3574 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3575 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3577 return GvHV(gv); /* Foo:: */
3580 /* use constant CLASS => 'MyClass' */
3581 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3582 if (gv && GvCV(gv)) {
3583 SV * const sv = cv_const_sv(GvCV(gv));
3585 pkgname = SvPV_const(sv, len);
3588 return gv_stashpvn(pkgname, len, 0);
3592 * S_readpipe_override
3593 * Check whether readpipe() is overriden, and generates the appropriate
3594 * optree, provided sublex_start() is called afterwards.
3597 S_readpipe_override(pTHX)
3600 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3601 pl_yylval.ival = OP_BACKTICK;
3603 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3605 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3606 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3607 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3609 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3610 append_elem(OP_LIST,
3611 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3612 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3619 * The intent of this yylex wrapper is to minimize the changes to the
3620 * tokener when we aren't interested in collecting madprops. It remains
3621 * to be seen how successful this strategy will be...
3628 char *s = PL_bufptr;
3630 /* make sure PL_thiswhite is initialized */
3634 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3635 if (PL_pending_ident)
3636 return S_pending_ident(aTHX);
3638 /* previous token ate up our whitespace? */
3639 if (!PL_lasttoke && PL_nextwhite) {
3640 PL_thiswhite = PL_nextwhite;
3644 /* isolate the token, and figure out where it is without whitespace */
3645 PL_realtokenstart = -1;
3649 assert(PL_curforce < 0);
3651 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3652 if (!PL_thistoken) {
3653 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3654 PL_thistoken = newSVpvs("");
3656 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3657 PL_thistoken = newSVpvn(tstart, s - tstart);
3660 if (PL_thismad) /* install head */
3661 CURMAD('X', PL_thistoken);
3664 /* last whitespace of a sublex? */
3665 if (optype == ')' && PL_endwhite) {
3666 CURMAD('X', PL_endwhite);
3671 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3672 if (!PL_thiswhite && !PL_endwhite && !optype) {
3673 sv_free(PL_thistoken);
3678 /* put off final whitespace till peg */
3679 if (optype == ';' && !PL_rsfp) {
3680 PL_nextwhite = PL_thiswhite;
3683 else if (PL_thisopen) {
3684 CURMAD('q', PL_thisopen);
3686 sv_free(PL_thistoken);
3690 /* Store actual token text as madprop X */
3691 CURMAD('X', PL_thistoken);
3695 /* add preceding whitespace as madprop _ */
3696 CURMAD('_', PL_thiswhite);
3700 /* add quoted material as madprop = */
3701 CURMAD('=', PL_thisstuff);
3705 /* add terminating quote as madprop Q */
3706 CURMAD('Q', PL_thisclose);
3710 /* special processing based on optype */
3714 /* opval doesn't need a TOKEN since it can already store mp */
3724 if (pl_yylval.opval)
3725 append_madprops(PL_thismad, pl_yylval.opval, 0);
3733 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3742 /* remember any fake bracket that lexer is about to discard */
3743 if (PL_lex_brackets == 1 &&
3744 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3747 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3750 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3751 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3754 break; /* don't bother looking for trailing comment */
3763 /* attach a trailing comment to its statement instead of next token */
3767 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3769 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3771 if (*s == '\n' || *s == '#') {
3772 while (s < PL_bufend && *s != '\n')
3776 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3777 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3794 /* Create new token struct. Note: opvals return early above. */
3795 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3802 S_tokenize_use(pTHX_ int is_use, char *s) {
3805 PERL_ARGS_ASSERT_TOKENIZE_USE;
3807 if (PL_expect != XSTATE)
3808 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3809 is_use ? "use" : "no"));
3811 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3812 s = force_version(s, TRUE);
3813 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3814 start_force(PL_curforce);
3815 NEXTVAL_NEXTTOKE.opval = NULL;
3818 else if (*s == 'v') {
3819 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3820 s = force_version(s, FALSE);
3824 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3825 s = force_version(s, FALSE);
3827 pl_yylval.ival = is_use;
3831 static const char* const exp_name[] =
3832 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3833 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3840 Works out what to call the token just pulled out of the input
3841 stream. The yacc parser takes care of taking the ops we return and
3842 stitching them into a tree.
3848 if read an identifier
3849 if we're in a my declaration
3850 croak if they tried to say my($foo::bar)
3851 build the ops for a my() declaration
3852 if it's an access to a my() variable
3853 are we in a sort block?
3854 croak if my($a); $a <=> $b
3855 build ops for access to a my() variable
3856 if in a dq string, and they've said @foo and we can't find @foo
3858 build ops for a bareword
3859 if we already built the token before, use it.
3864 #pragma segment Perl_yylex
3870 register char *s = PL_bufptr;
3876 /* orig_keyword, gvp, and gv are initialized here because
3877 * jump to the label just_a_word_zero can bypass their
3878 * initialization later. */
3879 I32 orig_keyword = 0;
3884 SV* tmp = newSVpvs("");
3885 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3886 (IV)CopLINE(PL_curcop),
3887 lex_state_names[PL_lex_state],
3888 exp_name[PL_expect],
3889 pv_display(tmp, s, strlen(s), 0, 60));
3892 /* check if there's an identifier for us to look at */
3893 if (PL_pending_ident)
3894 return REPORT(S_pending_ident(aTHX));
3896 /* no identifier pending identification */
3898 switch (PL_lex_state) {
3900 case LEX_NORMAL: /* Some compilers will produce faster */
3901 case LEX_INTERPNORMAL: /* code if we comment these out. */
3905 /* when we've already built the next token, just pull it out of the queue */
3909 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3911 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3912 PL_nexttoke[PL_lasttoke].next_mad = 0;
3913 if (PL_thismad && PL_thismad->mad_key == '_') {
3914 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3915 PL_thismad->mad_val = 0;
3916 mad_free(PL_thismad);
3921 PL_lex_state = PL_lex_defer;
3922 PL_expect = PL_lex_expect;
3923 PL_lex_defer = LEX_NORMAL;
3924 if (!PL_nexttoke[PL_lasttoke].next_type)
3929 pl_yylval = PL_nextval[PL_nexttoke];
3931 PL_lex_state = PL_lex_defer;
3932 PL_expect = PL_lex_expect;
3933 PL_lex_defer = LEX_NORMAL;
3937 /* FIXME - can these be merged? */
3938 return(PL_nexttoke[PL_lasttoke].next_type);
3940 return REPORT(PL_nexttype[PL_nexttoke]);
3943 /* interpolated case modifiers like \L \U, including \Q and \E.
3944 when we get here, PL_bufptr is at the \
3946 case LEX_INTERPCASEMOD:
3948 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3949 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3951 /* handle \E or end of string */
3952 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3954 if (PL_lex_casemods) {
3955 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3956 PL_lex_casestack[PL_lex_casemods] = '\0';
3958 if (PL_bufptr != PL_bufend
3959 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3961 PL_lex_state = LEX_INTERPCONCAT;
3964 PL_thistoken = newSVpvs("\\E");
3970 while (PL_bufptr != PL_bufend &&
3971 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3973 PL_thiswhite = newSVpvs("");
3974 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3978 if (PL_bufptr != PL_bufend)
3981 PL_lex_state = LEX_INTERPCONCAT;
3985 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3986 "### Saw case modifier\n"); });
3988 if (s[1] == '\\' && s[2] == 'E') {
3991 PL_thiswhite = newSVpvs("");
3992 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3995 PL_lex_state = LEX_INTERPCONCAT;
4000 if (!PL_madskills) /* when just compiling don't need correct */
4001 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4002 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4003 if ((*s == 'L' || *s == 'U') &&
4004 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4005 PL_lex_casestack[--PL_lex_casemods] = '\0';
4008 if (PL_lex_casemods > 10)
4009 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4010 PL_lex_casestack[PL_lex_casemods++] = *s;
4011 PL_lex_casestack[PL_lex_casemods] = '\0';
4012 PL_lex_state = LEX_INTERPCONCAT;
4013 start_force(PL_curforce);
4014 NEXTVAL_NEXTTOKE.ival = 0;
4016 start_force(PL_curforce);
4018 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4020 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4022 NEXTVAL_NEXTTOKE.ival = OP_LC;
4024 NEXTVAL_NEXTTOKE.ival = OP_UC;
4026 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4028 Perl_croak(aTHX_ "panic: yylex");
4030 SV* const tmpsv = newSVpvs("\\ ");
4031 /* replace the space with the character we want to escape
4033 SvPVX(tmpsv)[1] = *s;
4039 if (PL_lex_starts) {
4045 sv_free(PL_thistoken);
4046 PL_thistoken = newSVpvs("");
4049 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4050 if (PL_lex_casemods == 1 && PL_lex_inpat)
4059 case LEX_INTERPPUSH:
4060 return REPORT(sublex_push());
4062 case LEX_INTERPSTART:
4063 if (PL_bufptr == PL_bufend)
4064 return REPORT(sublex_done());
4065 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4066 "### Interpolated variable\n"); });
4068 PL_lex_dojoin = (*PL_bufptr == '@');
4069 PL_lex_state = LEX_INTERPNORMAL;
4070 if (PL_lex_dojoin) {
4071 start_force(PL_curforce);
4072 NEXTVAL_NEXTTOKE.ival = 0;
4074 start_force(PL_curforce);
4075 force_ident("\"", '$');
4076 start_force(PL_curforce);
4077 NEXTVAL_NEXTTOKE.ival = 0;
4079 start_force(PL_curforce);
4080 NEXTVAL_NEXTTOKE.ival = 0;
4082 start_force(PL_curforce);
4083 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4086 if (PL_lex_starts++) {
4091 sv_free(PL_thistoken);
4092 PL_thistoken = newSVpvs("");
4095 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4096 if (!PL_lex_casemods && PL_lex_inpat)
4103 case LEX_INTERPENDMAYBE:
4104 if (intuit_more(PL_bufptr)) {
4105 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4111 if (PL_lex_dojoin) {
4112 PL_lex_dojoin = FALSE;
4113 PL_lex_state = LEX_INTERPCONCAT;
4117 sv_free(PL_thistoken);
4118 PL_thistoken = newSVpvs("");
4123 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4124 && SvEVALED(PL_lex_repl))
4126 if (PL_bufptr != PL_bufend)
4127 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4131 case LEX_INTERPCONCAT:
4133 if (PL_lex_brackets)
4134 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4136 if (PL_bufptr == PL_bufend)
4137 return REPORT(sublex_done());
4139 if (SvIVX(PL_linestr) == '\'') {
4140 SV *sv = newSVsv(PL_linestr);
4143 else if ( PL_hints & HINT_NEW_RE )
4144 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4145 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4149 s = scan_const(PL_bufptr);
4151 PL_lex_state = LEX_INTERPCASEMOD;
4153 PL_lex_state = LEX_INTERPSTART;
4156 if (s != PL_bufptr) {
4157 start_force(PL_curforce);
4159 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4161 NEXTVAL_NEXTTOKE = pl_yylval;
4164 if (PL_lex_starts++) {
4168 sv_free(PL_thistoken);
4169 PL_thistoken = newSVpvs("");
4172 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4173 if (!PL_lex_casemods && PL_lex_inpat)
4186 PL_lex_state = LEX_NORMAL;
4187 s = scan_formline(PL_bufptr);
4188 if (!PL_lex_formbrack)
4194 PL_oldoldbufptr = PL_oldbufptr;
4200 sv_free(PL_thistoken);
4203 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4207 if (isIDFIRST_lazy_if(s,UTF))
4210 unsigned char c = *s;
4211 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4212 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4213 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4218 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4222 goto fake_eof; /* emulate EOF on ^D or ^Z */
4231 if (PL_lex_brackets) {
4232 yyerror((const char *)
4234 ? "Format not terminated"
4235 : "Missing right curly or square bracket"));
4237 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4238 "### Tokener got EOF\n");
4242 if (s++ < PL_bufend)
4243 goto retry; /* ignore stray nulls */
4246 if (!PL_in_eval && !PL_preambled) {
4247 PL_preambled = TRUE;
4253 /* Generate a string of Perl code to load the debugger.
4254 * If PERL5DB is set, it will return the contents of that,
4255 * otherwise a compile-time require of perl5db.pl. */
4257 const char * const pdb = PerlEnv_getenv("PERL5DB");
4260 sv_setpv(PL_linestr, pdb);
4261 sv_catpvs(PL_linestr,";");
4263 SETERRNO(0,SS_NORMAL);
4264 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4267 sv_setpvs(PL_linestr,"");
4268 if (PL_preambleav) {
4269 SV **svp = AvARRAY(PL_preambleav);
4270 SV **const end = svp + AvFILLp(PL_preambleav);
4272 sv_catsv(PL_linestr, *svp);
4274 sv_catpvs(PL_linestr, ";");
4276 sv_free(MUTABLE_SV(PL_preambleav));
4277 PL_preambleav = NULL;
4280 sv_catpvs(PL_linestr,
4281 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4282 if (PL_minus_n || PL_minus_p) {
4283 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4285 sv_catpvs(PL_linestr,"chomp;");
4288 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4289 || *PL_splitstr == '"')
4290 && strchr(PL_splitstr + 1, *PL_splitstr))
4291 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4293 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4294 bytes can be used as quoting characters. :-) */
4295 const char *splits = PL_splitstr;
4296 sv_catpvs(PL_linestr, "our @F=split(q\0");
4299 if (*splits == '\\')
4300 sv_catpvn(PL_linestr, splits, 1);
4301 sv_catpvn(PL_linestr, splits, 1);
4302 } while (*splits++);
4303 /* This loop will embed the trailing NUL of
4304 PL_linestr as the last thing it does before
4306 sv_catpvs(PL_linestr, ");");
4310 sv_catpvs(PL_linestr,"our @F=split(' ');");
4313 sv_catpvs(PL_linestr, "\n");
4314 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4315 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4316 PL_last_lop = PL_last_uni = NULL;
4317 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4318 update_debugger_info(PL_linestr, NULL, 0);
4323 bof = PL_rsfp ? TRUE : FALSE;
4326 fake_eof = LEX_FAKE_EOF;
4328 PL_bufptr = PL_bufend;
4329 CopLINE_inc(PL_curcop);
4330 if (!lex_next_chunk(fake_eof)) {
4331 CopLINE_dec(PL_curcop);
4333 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4335 CopLINE_dec(PL_curcop);
4338 PL_realtokenstart = -1;
4341 /* If it looks like the start of a BOM or raw UTF-16,
4342 * check if it in fact is. */
4343 if (bof && PL_rsfp &&
4348 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4350 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4351 s = swallow_bom((U8*)s);
4355 /* Incest with pod. */
4358 sv_catsv(PL_thiswhite, PL_linestr);
4360 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4361 sv_setpvs(PL_linestr, "");
4362 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4363 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4364 PL_last_lop = PL_last_uni = NULL;
4365 PL_doextract = FALSE;
4369 } while (PL_doextract);
4370 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4371 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4372 PL_last_lop = PL_last_uni = NULL;
4373 if (CopLINE(PL_curcop) == 1) {
4374 while (s < PL_bufend && isSPACE(*s))
4376 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4380 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4384 if (*s == '#' && *(s+1) == '!')
4386 #ifdef ALTERNATE_SHEBANG
4388 static char const as[] = ALTERNATE_SHEBANG;
4389 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4390 d = s + (sizeof(as) - 1);
4392 #endif /* ALTERNATE_SHEBANG */
4401 while (*d && !isSPACE(*d))
4405 #ifdef ARG_ZERO_IS_SCRIPT
4406 if (ipathend > ipath) {
4408 * HP-UX (at least) sets argv[0] to the script name,
4409 * which makes $^X incorrect. And Digital UNIX and Linux,
4410 * at least, set argv[0] to the basename of the Perl
4411 * interpreter. So, having found "#!", we'll set it right.
4413 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4415 assert(SvPOK(x) || SvGMAGICAL(x));
4416 if (sv_eq(x, CopFILESV(PL_curcop))) {
4417 sv_setpvn(x, ipath, ipathend - ipath);
4423 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4424 const char * const lstart = SvPV_const(x,llen);
4426 bstart += blen - llen;
4427 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4428 sv_setpvn(x, ipath, ipathend - ipath);
4433 TAINT_NOT; /* $^X is always tainted, but that's OK */
4435 #endif /* ARG_ZERO_IS_SCRIPT */
4440 d = instr(s,"perl -");
4442 d = instr(s,"perl");
4444 /* avoid getting into infinite loops when shebang
4445 * line contains "Perl" rather than "perl" */
4447 for (d = ipathend-4; d >= ipath; --d) {
4448 if ((*d == 'p' || *d == 'P')
4449 && !ibcmp(d, "perl", 4))
4459 #ifdef ALTERNATE_SHEBANG
4461 * If the ALTERNATE_SHEBANG on this system starts with a
4462 * character that can be part of a Perl expression, then if
4463 * we see it but not "perl", we're probably looking at the
4464 * start of Perl code, not a request to hand off to some
4465 * other interpreter. Similarly, if "perl" is there, but
4466 * not in the first 'word' of the line, we assume the line
4467 * contains the start of the Perl program.
4469 if (d && *s != '#') {
4470 const char *c = ipath;
4471 while (*c && !strchr("; \t\r\n\f\v#", *c))
4474 d = NULL; /* "perl" not in first word; ignore */
4476 *s = '#'; /* Don't try to parse shebang line */
4478 #endif /* ALTERNATE_SHEBANG */
4483 !instr(s,"indir") &&
4484 instr(PL_origargv[0],"perl"))
4491 while (s < PL_bufend && isSPACE(*s))
4493 if (s < PL_bufend) {
4494 Newx(newargv,PL_origargc+3,char*);
4496 while (s < PL_bufend && !isSPACE(*s))
4499 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4502 newargv = PL_origargv;
4505 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4507 Perl_croak(aTHX_ "Can't exec %s", ipath);
4510 while (*d && !isSPACE(*d))
4512 while (SPACE_OR_TAB(*d))
4516 const bool switches_done = PL_doswitches;
4517 const U32 oldpdb = PL_perldb;
4518 const bool oldn = PL_minus_n;
4519 const bool oldp = PL_minus_p;
4523 bool baduni = FALSE;
4525 const char *d2 = d1 + 1;
4526 if (parse_unicode_opts((const char **)&d2)
4530 if (baduni || *d1 == 'M' || *d1 == 'm') {
4531 const char * const m = d1;
4532 while (*d1 && !isSPACE(*d1))
4534 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4537 d1 = moreswitches(d1);
4539 if (PL_doswitches && !switches_done) {
4540 int argc = PL_origargc;
4541 char **argv = PL_origargv;
4544 } while (argc && argv[0][0] == '-' && argv[0][1]);
4545 init_argv_symbols(argc,argv);
4547 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4548 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4549 /* if we have already added "LINE: while (<>) {",
4550 we must not do it again */
4552 sv_setpvs(PL_linestr, "");
4553 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4554 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4555 PL_last_lop = PL_last_uni = NULL;
4556 PL_preambled = FALSE;
4557 if (PERLDB_LINE || PERLDB_SAVESRC)
4558 (void)gv_fetchfile(PL_origfilename);
4565 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4567 PL_lex_state = LEX_FORMLINE;
4572 #ifdef PERL_STRICT_CR
4573 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4575 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4577 case ' ': case '\t': case '\f': case 013:
4579 PL_realtokenstart = -1;
4581 PL_thiswhite = newSVpvs("");
4582 sv_catpvn(PL_thiswhite, s, 1);
4589 PL_realtokenstart = -1;
4593 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4594 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4595 /* handle eval qq[#line 1 "foo"\n ...] */
4596 CopLINE_dec(PL_curcop);
4599 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4601 if (!PL_in_eval || PL_rsfp)
4606 while (d < PL_bufend && *d != '\n')
4610 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4611 Perl_croak(aTHX_ "panic: input overflow");
4614 PL_thiswhite = newSVpvn(s, d - s);
4619 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4621 PL_lex_state = LEX_FORMLINE;
4627 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4628 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4631 TOKEN(PEG); /* make sure any #! line is accessible */
4636 /* if (PL_madskills && PL_lex_formbrack) { */
4638 while (d < PL_bufend && *d != '\n')
4642 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4643 Perl_croak(aTHX_ "panic: input overflow");
4644 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4646 PL_thiswhite = newSVpvs("");
4647 if (CopLINE(PL_curcop) == 1) {
4648 sv_setpvs(PL_thiswhite, "");
4651 sv_catpvn(PL_thiswhite, s, d - s);
4665 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4673 while (s < PL_bufend && SPACE_OR_TAB(*s))
4676 if (strnEQ(s,"=>",2)) {
4677 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4678 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4679 OPERATOR('-'); /* unary minus */
4681 PL_last_uni = PL_oldbufptr;
4683 case 'r': ftst = OP_FTEREAD; break;
4684 case 'w': ftst = OP_FTEWRITE; break;
4685 case 'x': ftst = OP_FTEEXEC; break;
4686 case 'o': ftst = OP_FTEOWNED; break;
4687 case 'R': ftst = OP_FTRREAD; break;
4688 case 'W': ftst = OP_FTRWRITE; break;
4689 case 'X': ftst = OP_FTREXEC; break;
4690 case 'O': ftst = OP_FTROWNED; break;
4691 case 'e': ftst = OP_FTIS; break;
4692 case 'z': ftst = OP_FTZERO; break;
4693 case 's': ftst = OP_FTSIZE; break;
4694 case 'f': ftst = OP_FTFILE; break;
4695 case 'd': ftst = OP_FTDIR; break;
4696 case 'l': ftst = OP_FTLINK; break;
4697 case 'p': ftst = OP_FTPIPE; break;
4698 case 'S': ftst = OP_FTSOCK; break;
4699 case 'u': ftst = OP_FTSUID; break;
4700 case 'g': ftst = OP_FTSGID; break;
4701 case 'k': ftst = OP_FTSVTX; break;
4702 case 'b': ftst = OP_FTBLK; break;
4703 case 'c': ftst = OP_FTCHR; break;
4704 case 't': ftst = OP_FTTTY; break;
4705 case 'T': ftst = OP_FTTEXT; break;
4706 case 'B': ftst = OP_FTBINARY; break;
4707 case 'M': case 'A': case 'C':
4708 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4710 case 'M': ftst = OP_FTMTIME; break;
4711 case 'A': ftst = OP_FTATIME; break;
4712 case 'C': ftst = OP_FTCTIME; break;
4720 PL_last_lop_op = (OPCODE)ftst;
4721 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4722 "### Saw file test %c\n", (int)tmp);
4727 /* Assume it was a minus followed by a one-letter named
4728 * subroutine call (or a -bareword), then. */
4729 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4730 "### '-%c' looked like a file test but was not\n",
4737 const char tmp = *s++;
4740 if (PL_expect == XOPERATOR)
4745 else if (*s == '>') {
4748 if (isIDFIRST_lazy_if(s,UTF)) {
4749 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4757 if (PL_expect == XOPERATOR)
4760 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4762 OPERATOR('-'); /* unary minus */
4768 const char tmp = *s++;
4771 if (PL_expect == XOPERATOR)
4776 if (PL_expect == XOPERATOR)
4779 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4786 if (PL_expect != XOPERATOR) {
4787 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4788 PL_expect = XOPERATOR;
4789 force_ident(PL_tokenbuf, '*');
4802 if (PL_expect == XOPERATOR) {
4806 PL_tokenbuf[0] = '%';
4807 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4808 sizeof PL_tokenbuf - 1, FALSE);
4809 if (!PL_tokenbuf[1]) {
4812 PL_pending_ident = '%';
4821 const char tmp = *s++;
4826 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4833 const char tmp = *s++;
4839 goto just_a_word_zero_gv;
4842 switch (PL_expect) {
4848 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4850 PL_bufptr = s; /* update in case we back off */
4852 deprecate(":= for an empty attribute list");
4859 PL_expect = XTERMBLOCK;
4862 stuffstart = s - SvPVX(PL_linestr) - 1;
4866 while (isIDFIRST_lazy_if(s,UTF)) {
4869 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4870 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4871 if (tmp < 0) tmp = -tmp;
4886 sv = newSVpvn(s, len);
4888 d = scan_str(d,TRUE,TRUE);
4890 /* MUST advance bufptr here to avoid bogus
4891 "at end of line" context messages from yyerror().
4893 PL_bufptr = s + len;
4894 yyerror("Unterminated attribute parameter in attribute list");
4898 return REPORT(0); /* EOF indicator */
4902 sv_catsv(sv, PL_lex_stuff);
4903 attrs = append_elem(OP_LIST, attrs,
4904 newSVOP(OP_CONST, 0, sv));
4905 SvREFCNT_dec(PL_lex_stuff);
4906 PL_lex_stuff = NULL;
4909 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4911 if (PL_in_my == KEY_our) {
4912 deprecate(":unique");
4915 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4918 /* NOTE: any CV attrs applied here need to be part of
4919 the CVf_BUILTIN_ATTRS define in cv.h! */
4920 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4922 CvLVALUE_on(PL_compcv);
4924 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4926 deprecate(":locked");
4928 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4930 CvMETHOD_on(PL_compcv);
4932 /* After we've set the flags, it could be argued that
4933 we don't need to do the attributes.pm-based setting
4934 process, and shouldn't bother appending recognized
4935 flags. To experiment with that, uncomment the
4936 following "else". (Note that's already been
4937 uncommented. That keeps the above-applied built-in
4938 attributes from being intercepted (and possibly
4939 rejected) by a package's attribute routines, but is
4940 justified by the performance win for the common case
4941 of applying only built-in attributes.) */
4943 attrs = append_elem(OP_LIST, attrs,
4944 newSVOP(OP_CONST, 0,
4948 if (*s == ':' && s[1] != ':')
4951 break; /* require real whitespace or :'s */
4952 /* XXX losing whitespace on sequential attributes here */
4956 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4957 if (*s != ';' && *s != '}' && *s != tmp
4958 && (tmp != '=' || *s != ')')) {
4959 const char q = ((*s == '\'') ? '"' : '\'');
4960 /* If here for an expression, and parsed no attrs, back
4962 if (tmp == '=' && !attrs) {
4966 /* MUST advance bufptr here to avoid bogus "at end of line"
4967 context messages from yyerror().
4970 yyerror( (const char *)
4972 ? Perl_form(aTHX_ "Invalid separator character "
4973 "%c%c%c in attribute list", q, *s, q)
4974 : "Unterminated attribute list" ) );
4982 start_force(PL_curforce);
4983 NEXTVAL_NEXTTOKE.opval = attrs;
4984 CURMAD('_', PL_nextwhite);
4989 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4990 (s - SvPVX(PL_linestr)) - stuffstart);
4998 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4999 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5007 const char tmp = *s++;
5012 const char tmp = *s++;
5020 if (PL_lex_brackets <= 0)
5021 yyerror("Unmatched right square bracket");
5024 if (PL_lex_state == LEX_INTERPNORMAL) {
5025 if (PL_lex_brackets == 0) {
5026 if (*s == '-' && s[1] == '>')
5027 PL_lex_state = LEX_INTERPENDMAYBE;
5028 else if (*s != '[' && *s != '{')
5029 PL_lex_state = LEX_INTERPEND;
5036 if (PL_lex_brackets > 100) {
5037 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5039 switch (PL_expect) {
5041 if (PL_lex_formbrack) {
5045 if (PL_oldoldbufptr == PL_last_lop)
5046 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5048 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5049 OPERATOR(HASHBRACK);
5051 while (s < PL_bufend && SPACE_OR_TAB(*s))
5054 PL_tokenbuf[0] = '\0';
5055 if (d < PL_bufend && *d == '-') {
5056 PL_tokenbuf[0] = '-';
5058 while (d < PL_bufend && SPACE_OR_TAB(*d))
5061 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5062 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5064 while (d < PL_bufend && SPACE_OR_TAB(*d))
5067 const char minus = (PL_tokenbuf[0] == '-');
5068 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5076 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5081 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5086 if (PL_oldoldbufptr == PL_last_lop)
5087 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5089 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5092 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5094 /* This hack is to get the ${} in the message. */
5096 yyerror("syntax error");
5099 OPERATOR(HASHBRACK);
5101 /* This hack serves to disambiguate a pair of curlies
5102 * as being a block or an anon hash. Normally, expectation
5103 * determines that, but in cases where we're not in a
5104 * position to expect anything in particular (like inside
5105 * eval"") we have to resolve the ambiguity. This code
5106 * covers the case where the first term in the curlies is a
5107 * quoted string. Most other cases need to be explicitly
5108 * disambiguated by prepending a "+" before the opening
5109 * curly in order to force resolution as an anon hash.
5111 * XXX should probably propagate the outer expectation
5112 * into eval"" to rely less on this hack, but that could
5113 * potentially break current behavior of eval"".
5117 if (*s == '\'' || *s == '"' || *s == '`') {
5118 /* common case: get past first string, handling escapes */
5119 for (t++; t < PL_bufend && *t != *s;)
5120 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5124 else if (*s == 'q') {
5127 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5130 /* skip q//-like construct */
5132 char open, close, term;
5135 while (t < PL_bufend && isSPACE(*t))
5137 /* check for q => */
5138 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5139 OPERATOR(HASHBRACK);
5143 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5147 for (t++; t < PL_bufend; t++) {
5148 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5150 else if (*t == open)
5154 for (t++; t < PL_bufend; t++) {
5155 if (*t == '\\' && t+1 < PL_bufend)
5157 else if (*t == close && --brackets <= 0)
5159 else if (*t == open)
5166 /* skip plain q word */
5167 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5170 else if (isALNUM_lazy_if(t,UTF)) {
5172 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5175 while (t < PL_bufend && isSPACE(*t))
5177 /* if comma follows first term, call it an anon hash */
5178 /* XXX it could be a comma expression with loop modifiers */
5179 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5180 || (*t == '=' && t[1] == '>')))
5181 OPERATOR(HASHBRACK);
5182 if (PL_expect == XREF)
5185 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5191 pl_yylval.ival = CopLINE(PL_curcop);
5192 if (isSPACE(*s) || *s == '#')
5193 PL_copline = NOLINE; /* invalidate current command line number */
5198 if (PL_lex_brackets <= 0)
5199 yyerror("Unmatched right curly bracket");
5201 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5202 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5203 PL_lex_formbrack = 0;
5204 if (PL_lex_state == LEX_INTERPNORMAL) {
5205 if (PL_lex_brackets == 0) {
5206 if (PL_expect & XFAKEBRACK) {
5207 PL_expect &= XENUMMASK;
5208 PL_lex_state = LEX_INTERPEND;
5213 PL_thiswhite = newSVpvs("");
5214 sv_catpvs(PL_thiswhite,"}");
5217 return yylex(); /* ignore fake brackets */
5219 if (*s == '-' && s[1] == '>')
5220 PL_lex_state = LEX_INTERPENDMAYBE;
5221 else if (*s != '[' && *s != '{')
5222 PL_lex_state = LEX_INTERPEND;
5225 if (PL_expect & XFAKEBRACK) {
5226 PL_expect &= XENUMMASK;
5228 return yylex(); /* ignore fake brackets */
5230 start_force(PL_curforce);
5232 curmad('X', newSVpvn(s-1,1));
5233 CURMAD('_', PL_thiswhite);
5238 PL_thistoken = newSVpvs("");
5246 if (PL_expect == XOPERATOR) {
5247 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5248 && isIDFIRST_lazy_if(s,UTF))
5250 CopLINE_dec(PL_curcop);
5251 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5252 CopLINE_inc(PL_curcop);
5257 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5259 PL_expect = XOPERATOR;
5260 force_ident(PL_tokenbuf, '&');
5264 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5276 const char tmp = *s++;
5283 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5284 && strchr("+-*/%.^&|<",tmp))
5285 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5286 "Reversed %c= operator",(int)tmp);
5288 if (PL_expect == XSTATE && isALPHA(tmp) &&
5289 (s == PL_linestart+1 || s[-2] == '\n') )
5291 if (PL_in_eval && !PL_rsfp) {
5296 if (strnEQ(s,"=cut",4)) {
5312 PL_thiswhite = newSVpvs("");
5313 sv_catpvn(PL_thiswhite, PL_linestart,
5314 PL_bufend - PL_linestart);
5318 PL_doextract = TRUE;
5322 if (PL_lex_brackets < PL_lex_formbrack) {
5324 #ifdef PERL_STRICT_CR
5325 while (SPACE_OR_TAB(*t))
5327 while (SPACE_OR_TAB(*t) || *t == '\r')
5330 if (*t == '\n' || *t == '#') {
5341 const char tmp = *s++;
5343 /* was this !=~ where !~ was meant?
5344 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5346 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5347 const char *t = s+1;
5349 while (t < PL_bufend && isSPACE(*t))
5352 if (*t == '/' || *t == '?' ||
5353 ((*t == 'm' || *t == 's' || *t == 'y')
5354 && !isALNUM(t[1])) ||
5355 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5356 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5357 "!=~ should be !~");
5367 if (PL_expect != XOPERATOR) {
5368 if (s[1] != '<' && !strchr(s,'>'))
5371 s = scan_heredoc(s);
5373 s = scan_inputsymbol(s);
5374 TERM(sublex_start());
5380 SHop(OP_LEFT_SHIFT);
5394 const char tmp = *s++;
5396 SHop(OP_RIGHT_SHIFT);
5397 else if (tmp == '=')
5406 if (PL_expect == XOPERATOR) {
5407 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5408 return deprecate_commaless_var_list();
5412 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5413 PL_tokenbuf[0] = '@';
5414 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5415 sizeof PL_tokenbuf - 1, FALSE);
5416 if (PL_expect == XOPERATOR)
5417 no_op("Array length", s);
5418 if (!PL_tokenbuf[1])
5420 PL_expect = XOPERATOR;
5421 PL_pending_ident = '#';
5425 PL_tokenbuf[0] = '$';
5426 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5427 sizeof PL_tokenbuf - 1, FALSE);
5428 if (PL_expect == XOPERATOR)
5430 if (!PL_tokenbuf[1]) {
5432 yyerror("Final $ should be \\$ or $name");
5436 /* This kludge not intended to be bulletproof. */
5437 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5438 pl_yylval.opval = newSVOP(OP_CONST, 0,
5439 newSViv(CopARYBASE_get(&PL_compiling)));
5440 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5446 const char tmp = *s;
5447 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5450 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5451 && intuit_more(s)) {
5453 PL_tokenbuf[0] = '@';
5454 if (ckWARN(WARN_SYNTAX)) {
5457 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5460 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5461 while (t < PL_bufend && *t != ']')
5463 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5464 "Multidimensional syntax %.*s not supported",
5465 (int)((t - PL_bufptr) + 1), PL_bufptr);
5469 else if (*s == '{') {
5471 PL_tokenbuf[0] = '%';
5472 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5473 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5475 char tmpbuf[sizeof PL_tokenbuf];
5478 } while (isSPACE(*t));
5479 if (isIDFIRST_lazy_if(t,UTF)) {
5481 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5485 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5486 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5487 "You need to quote \"%s\"",
5494 PL_expect = XOPERATOR;
5495 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5496 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5497 if (!islop || PL_last_lop_op == OP_GREPSTART)
5498 PL_expect = XOPERATOR;
5499 else if (strchr("$@\"'`q", *s))
5500 PL_expect = XTERM; /* e.g. print $fh "foo" */
5501 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5502 PL_expect = XTERM; /* e.g. print $fh &sub */
5503 else if (isIDFIRST_lazy_if(s,UTF)) {
5504 char tmpbuf[sizeof PL_tokenbuf];
5506 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5507 if ((t2 = keyword(tmpbuf, len, 0))) {
5508 /* binary operators exclude handle interpretations */
5520 PL_expect = XTERM; /* e.g. print $fh length() */
5525 PL_expect = XTERM; /* e.g. print $fh subr() */
5528 else if (isDIGIT(*s))
5529 PL_expect = XTERM; /* e.g. print $fh 3 */
5530 else if (*s == '.' && isDIGIT(s[1]))
5531 PL_expect = XTERM; /* e.g. print $fh .3 */
5532 else if ((*s == '?' || *s == '-' || *s == '+')
5533 && !isSPACE(s[1]) && s[1] != '=')
5534 PL_expect = XTERM; /* e.g. print $fh -1 */
5535 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5537 PL_expect = XTERM; /* e.g. print $fh /.../
5538 XXX except DORDOR operator
5540 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5542 PL_expect = XTERM; /* print $fh <<"EOF" */
5545 PL_pending_ident = '$';
5549 if (PL_expect == XOPERATOR)
5551 PL_tokenbuf[0] = '@';
5552 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5553 if (!PL_tokenbuf[1]) {
5556 if (PL_lex_state == LEX_NORMAL)
5558 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5560 PL_tokenbuf[0] = '%';
5562 /* Warn about @ where they meant $. */
5563 if (*s == '[' || *s == '{') {
5564 if (ckWARN(WARN_SYNTAX)) {
5565 const char *t = s + 1;
5566 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5568 if (*t == '}' || *t == ']') {
5570 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5571 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5572 "Scalar value %.*s better written as $%.*s",
5573 (int)(t-PL_bufptr), PL_bufptr,
5574 (int)(t-PL_bufptr-1), PL_bufptr+1);
5579 PL_pending_ident = '@';
5582 case '/': /* may be division, defined-or, or pattern */
5583 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5587 case '?': /* may either be conditional or pattern */
5588 if (PL_expect == XOPERATOR) {
5596 /* A // operator. */
5606 /* Disable warning on "study /blah/" */
5607 if (PL_oldoldbufptr == PL_last_uni
5608 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5609 || memNE(PL_last_uni, "study", 5)
5610 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5613 s = scan_pat(s,OP_MATCH);
5614 TERM(sublex_start());
5618 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5619 #ifdef PERL_STRICT_CR
5622 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5624 && (s == PL_linestart || s[-1] == '\n') )
5626 PL_lex_formbrack = 0;
5630 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5634 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5640 pl_yylval.ival = OPf_SPECIAL;
5646 if (PL_expect != XOPERATOR)
5651 case '0': case '1': case '2': case '3': case '4':
5652 case '5': case '6': case '7': case '8': case '9':
5653 s = scan_num(s, &pl_yylval);
5654 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5655 if (PL_expect == XOPERATOR)
5660 s = scan_str(s,!!PL_madskills,FALSE);
5661 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5662 if (PL_expect == XOPERATOR) {
5663 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5664 return deprecate_commaless_var_list();
5671 pl_yylval.ival = OP_CONST;
5672 TERM(sublex_start());
5675 s = scan_str(s,!!PL_madskills,FALSE);
5676 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5677 if (PL_expect == XOPERATOR) {
5678 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5679 return deprecate_commaless_var_list();
5686 pl_yylval.ival = OP_CONST;
5687 /* FIXME. I think that this can be const if char *d is replaced by
5688 more localised variables. */
5689 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5690 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5691 pl_yylval.ival = OP_STRINGIFY;
5695 TERM(sublex_start());
5698 s = scan_str(s,!!PL_madskills,FALSE);
5699 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5700 if (PL_expect == XOPERATOR)
5701 no_op("Backticks",s);
5704 readpipe_override();
5705 TERM(sublex_start());
5709 if (PL_lex_inwhat && isDIGIT(*s))
5710 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5712 if (PL_expect == XOPERATOR)
5713 no_op("Backslash",s);
5717 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5718 char *start = s + 2;
5719 while (isDIGIT(*start) || *start == '_')
5721 if (*start == '.' && isDIGIT(start[1])) {
5722 s = scan_num(s, &pl_yylval);
5725 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5726 else if (!isALPHA(*start) && (PL_expect == XTERM
5727 || PL_expect == XREF || PL_expect == XSTATE
5728 || PL_expect == XTERMORDORDOR)) {
5729 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5731 s = scan_num(s, &pl_yylval);
5738 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5781 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5783 /* Some keywords can be followed by any delimiter, including ':' */
5784 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5785 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5786 (PL_tokenbuf[0] == 'q' &&
5787 strchr("qwxr", PL_tokenbuf[1])))));
5789 /* x::* is just a word, unless x is "CORE" */
5790 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5794 while (d < PL_bufend && isSPACE(*d))
5795 d++; /* no comments skipped here, or s### is misparsed */
5797 /* Is this a word before a => operator? */
5798 if (*d == '=' && d[1] == '>') {
5801 = (OP*)newSVOP(OP_CONST, 0,
5802 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5803 pl_yylval.opval->op_private = OPpCONST_BARE;
5807 /* Check for plugged-in keyword */
5811 char *saved_bufptr = PL_bufptr;
5813 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5815 if (result == KEYWORD_PLUGIN_DECLINE) {
5816 /* not a plugged-in keyword */
5817 PL_bufptr = saved_bufptr;
5818 } else if (result == KEYWORD_PLUGIN_STMT) {
5819 pl_yylval.opval = o;
5822 return REPORT(PLUGSTMT);
5823 } else if (result == KEYWORD_PLUGIN_EXPR) {
5824 pl_yylval.opval = o;
5826 PL_expect = XOPERATOR;
5827 return REPORT(PLUGEXPR);
5829 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5834 /* Check for built-in keyword */
5835 tmp = keyword(PL_tokenbuf, len, 0);
5837 /* Is this a label? */
5838 if (!anydelim && PL_expect == XSTATE
5839 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5841 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5843 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5848 if (tmp < 0) { /* second-class keyword? */
5849 GV *ogv = NULL; /* override (winner) */
5850 GV *hgv = NULL; /* hidden (loser) */
5851 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5853 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5856 if (GvIMPORTED_CV(gv))
5858 else if (! CvMETHOD(cv))
5862 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5863 (gv = *gvp) && isGV_with_GP(gv) &&
5864 GvCVu(gv) && GvIMPORTED_CV(gv))
5871 tmp = 0; /* overridden by import or by GLOBAL */
5874 && -tmp==KEY_lock /* XXX generalizable kludge */
5877 tmp = 0; /* any sub overrides "weak" keyword */
5879 else { /* no override */
5881 if (tmp == KEY_dump) {
5882 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5883 "dump() better written as CORE::dump()");
5887 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
5888 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5889 "Ambiguous call resolved as CORE::%s(), %s",
5890 GvENAME(hgv), "qualify as such or use &");
5897 default: /* not a keyword */
5898 /* Trade off - by using this evil construction we can pull the
5899 variable gv into the block labelled keylookup. If not, then
5900 we have to give it function scope so that the goto from the
5901 earlier ':' case doesn't bypass the initialisation. */
5903 just_a_word_zero_gv:
5911 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5915 SV *nextPL_nextwhite = 0;
5919 /* Get the rest if it looks like a package qualifier */
5921 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5923 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5926 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5927 *s == '\'' ? "'" : "::");
5932 if (PL_expect == XOPERATOR) {
5933 if (PL_bufptr == PL_linestart) {
5934 CopLINE_dec(PL_curcop);
5935 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5936 CopLINE_inc(PL_curcop);
5939 no_op("Bareword",s);
5942 /* Look for a subroutine with this name in current package,
5943 unless name is "Foo::", in which case Foo is a bearword
5944 (and a package name). */
5946 if (len > 2 && !PL_madskills &&
5947 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5949 if (ckWARN(WARN_BAREWORD)
5950 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5951 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5952 "Bareword \"%s\" refers to nonexistent package",
5955 PL_tokenbuf[len] = '\0';
5961 /* Mustn't actually add anything to a symbol table.
5962 But also don't want to "initialise" any placeholder
5963 constants that might already be there into full
5964 blown PVGVs with attached PVCV. */
5965 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5966 GV_NOADD_NOINIT, SVt_PVCV);
5971 /* if we saw a global override before, get the right name */
5974 sv = newSVpvs("CORE::GLOBAL::");
5975 sv_catpv(sv,PL_tokenbuf);
5978 /* If len is 0, newSVpv does strlen(), which is correct.
5979 If len is non-zero, then it will be the true length,
5980 and so the scalar will be created correctly. */
5981 sv = newSVpv(PL_tokenbuf,len);
5984 if (PL_madskills && !PL_thistoken) {
5985 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5986 PL_thistoken = newSVpvn(start,s - start);
5987 PL_realtokenstart = s - SvPVX(PL_linestr);
5991 /* Presume this is going to be a bareword of some sort. */
5994 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5995 pl_yylval.opval->op_private = OPpCONST_BARE;
5996 /* UTF-8 package name? */
5997 if (UTF && !IN_BYTES &&
5998 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
6001 /* And if "Foo::", then that's what it certainly is. */
6008 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6009 const_op->op_private = OPpCONST_BARE;
6010 rv2cv_op = newCVREF(0, const_op);
6012 if (rv2cv_op->op_type == OP_RV2CV &&
6013 (rv2cv_op->op_flags & OPf_KIDS)) {
6014 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6015 switch (rv_op->op_type) {
6017 SV *sv = cSVOPx_sv(rv_op);
6018 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6022 GV *gv = cGVOPx_gv(rv_op);
6023 CV *maybe_cv = GvCVu(gv);
6024 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6030 /* See if it's the indirect object for a list operator. */
6032 if (PL_oldoldbufptr &&
6033 PL_oldoldbufptr < PL_bufptr &&
6034 (PL_oldoldbufptr == PL_last_lop
6035 || PL_oldoldbufptr == PL_last_uni) &&
6036 /* NO SKIPSPACE BEFORE HERE! */
6037 (PL_expect == XREF ||
6038 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6040 bool immediate_paren = *s == '(';
6042 /* (Now we can afford to cross potential line boundary.) */
6043 s = SKIPSPACE2(s,nextPL_nextwhite);
6045 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6048 /* Two barewords in a row may indicate method call. */
6050 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6051 (tmp = intuit_method(s, gv, cv))) {
6056 /* If not a declared subroutine, it's an indirect object. */
6057 /* (But it's an indir obj regardless for sort.) */
6058 /* Also, if "_" follows a filetest operator, it's a bareword */
6061 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6063 (PL_last_lop_op != OP_MAPSTART &&
6064 PL_last_lop_op != OP_GREPSTART))))
6065 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6066 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6069 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6074 PL_expect = XOPERATOR;
6077 s = SKIPSPACE2(s,nextPL_nextwhite);
6078 PL_nextwhite = nextPL_nextwhite;
6083 /* Is this a word before a => operator? */
6084 if (*s == '=' && s[1] == '>' && !pkgname) {
6087 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6088 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6089 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6093 /* If followed by a paren, it's certainly a subroutine. */
6098 while (SPACE_OR_TAB(*d))
6100 if (*d == ')' && (sv = cv_const_sv(cv))) {
6107 PL_nextwhite = PL_thiswhite;
6110 start_force(PL_curforce);
6112 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6113 PL_expect = XOPERATOR;
6116 PL_nextwhite = nextPL_nextwhite;
6117 curmad('X', PL_thistoken);
6118 PL_thistoken = newSVpvs("");
6127 /* If followed by var or block, call it a method (unless sub) */
6129 if ((*s == '$' || *s == '{') && !cv) {
6131 PL_last_lop = PL_oldbufptr;
6132 PL_last_lop_op = OP_METHOD;
6136 /* If followed by a bareword, see if it looks like indir obj. */
6139 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6140 && (tmp = intuit_method(s, gv, cv))) {
6145 /* Not a method, so call it a subroutine (if defined) */
6148 if (lastchar == '-')
6149 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6150 "Ambiguous use of -%s resolved as -&%s()",
6151 PL_tokenbuf, PL_tokenbuf);
6152 /* Check for a constant sub */
6153 if ((sv = cv_const_sv(cv))) {
6156 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6157 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6158 pl_yylval.opval->op_private = 0;
6162 op_free(pl_yylval.opval);
6163 pl_yylval.opval = rv2cv_op;
6164 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6165 PL_last_lop = PL_oldbufptr;
6166 PL_last_lop_op = OP_ENTERSUB;
6167 /* Is there a prototype? */
6175 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6178 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6180 while (*proto == ';')
6182 if (*proto == '&' && *s == '{') {
6184 sv_setpvs(PL_subname, "__ANON__");
6186 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6193 PL_nextwhite = PL_thiswhite;
6196 start_force(PL_curforce);
6197 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6200 PL_nextwhite = nextPL_nextwhite;
6201 curmad('X', PL_thistoken);
6202 PL_thistoken = newSVpvs("");
6209 /* Guess harder when madskills require "best effort". */
6210 if (PL_madskills && (!gv || !GvCVu(gv))) {
6211 int probable_sub = 0;
6212 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6214 else if (isALPHA(*s)) {
6218 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6219 if (!keyword(tmpbuf, tmplen, 0))
6222 while (d < PL_bufend && isSPACE(*d))
6224 if (*d == '=' && d[1] == '>')
6229 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6230 op_free(pl_yylval.opval);
6231 pl_yylval.opval = rv2cv_op;
6232 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6233 PL_last_lop = PL_oldbufptr;
6234 PL_last_lop_op = OP_ENTERSUB;
6235 PL_nextwhite = PL_thiswhite;
6237 start_force(PL_curforce);
6238 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6240 PL_nextwhite = nextPL_nextwhite;
6241 curmad('X', PL_thistoken);
6242 PL_thistoken = newSVpvs("");
6247 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6254 /* Call it a bare word */
6256 if (PL_hints & HINT_STRICT_SUBS)
6257 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6260 /* after "print" and similar functions (corresponding to
6261 * "F? L" in opcode.pl), whatever wasn't already parsed as
6262 * a filehandle should be subject to "strict subs".
6263 * Likewise for the optional indirect-object argument to system
6264 * or exec, which can't be a bareword */
6265 if ((PL_last_lop_op == OP_PRINT
6266 || PL_last_lop_op == OP_PRTF
6267 || PL_last_lop_op == OP_SAY
6268 || PL_last_lop_op == OP_SYSTEM
6269 || PL_last_lop_op == OP_EXEC)
6270 && (PL_hints & HINT_STRICT_SUBS))
6271 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6272 if (lastchar != '-') {
6273 if (ckWARN(WARN_RESERVED)) {
6277 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6278 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6286 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6287 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6288 "Operator or semicolon missing before %c%s",
6289 lastchar, PL_tokenbuf);
6290 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6291 "Ambiguous use of %c resolved as operator %c",
6292 lastchar, lastchar);
6298 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6299 newSVpv(CopFILE(PL_curcop),0));
6303 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6304 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6307 case KEY___PACKAGE__:
6308 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6310 ? newSVhek(HvNAME_HEK(PL_curstash))
6317 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6318 const char *pname = "main";
6319 if (PL_tokenbuf[2] == 'D')
6320 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6321 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6325 GvIOp(gv) = newIO();
6326 IoIFP(GvIOp(gv)) = PL_rsfp;
6327 #if defined(HAS_FCNTL) && defined(F_SETFD)
6329 const int fd = PerlIO_fileno(PL_rsfp);
6330 fcntl(fd,F_SETFD,fd >= 3);
6333 /* Mark this internal pseudo-handle as clean */
6334 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6335 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6336 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6338 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6339 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6340 /* if the script was opened in binmode, we need to revert
6341 * it to text mode for compatibility; but only iff it has CRs
6342 * XXX this is a questionable hack at best. */
6343 if (PL_bufend-PL_bufptr > 2
6344 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6347 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6348 loc = PerlIO_tell(PL_rsfp);
6349 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6352 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6354 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6355 #endif /* NETWARE */
6356 #ifdef PERLIO_IS_STDIO /* really? */
6357 # if defined(__BORLANDC__)
6358 /* XXX see note in do_binmode() */
6359 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6363 PerlIO_seek(PL_rsfp, loc, 0);
6367 #ifdef PERLIO_LAYERS
6370 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6371 else if (PL_encoding) {
6378 XPUSHs(PL_encoding);
6380 call_method("name", G_SCALAR);
6384 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6385 Perl_form(aTHX_ ":encoding(%"SVf")",
6394 if (PL_realtokenstart >= 0) {
6395 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6397 PL_endwhite = newSVpvs("");
6398 sv_catsv(PL_endwhite, PL_thiswhite);
6400 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6401 PL_realtokenstart = -1;
6403 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6419 if (PL_expect == XSTATE) {
6426 if (*s == ':' && s[1] == ':') {
6429 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6430 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6431 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6434 else if (tmp == KEY_require || tmp == KEY_do)
6435 /* that's a way to remember we saw "CORE::" */
6448 LOP(OP_ACCEPT,XTERM);
6454 LOP(OP_ATAN2,XTERM);
6460 LOP(OP_BINMODE,XTERM);
6463 LOP(OP_BLESS,XTERM);
6472 /* When 'use switch' is in effect, continue has a dual
6473 life as a control operator. */
6475 if (!FEATURE_IS_ENABLED("switch"))
6478 /* We have to disambiguate the two senses of
6479 "continue". If the next token is a '{' then
6480 treat it as the start of a continue block;
6481 otherwise treat it as a control operator.
6493 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6510 if (!PL_cryptseen) {
6511 PL_cryptseen = TRUE;
6515 LOP(OP_CRYPT,XTERM);
6518 LOP(OP_CHMOD,XTERM);
6521 LOP(OP_CHOWN,XTERM);
6524 LOP(OP_CONNECT,XTERM);
6543 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6544 if (orig_keyword == KEY_do) {
6553 PL_hints |= HINT_BLOCK_SCOPE;
6563 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6564 LOP(OP_DBMOPEN,XTERM);
6570 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6577 pl_yylval.ival = CopLINE(PL_curcop);
6593 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6594 UNIBRACK(OP_ENTEREVAL);
6608 case KEY_endhostent:
6614 case KEY_endservent:
6617 case KEY_endprotoent:
6628 pl_yylval.ival = CopLINE(PL_curcop);
6630 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6633 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6636 if ((PL_bufend - p) >= 3 &&
6637 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6639 else if ((PL_bufend - p) >= 4 &&
6640 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6643 if (isIDFIRST_lazy_if(p,UTF)) {
6644 p = scan_ident(p, PL_bufend,
6645 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6649 Perl_croak(aTHX_ "Missing $ on loop variable");
6651 s = SvPVX(PL_linestr) + soff;
6657 LOP(OP_FORMLINE,XTERM);
6663 LOP(OP_FCNTL,XTERM);
6669 LOP(OP_FLOCK,XTERM);
6678 LOP(OP_GREPSTART, XREF);
6681 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6696 case KEY_getpriority:
6697 LOP(OP_GETPRIORITY,XTERM);
6699 case KEY_getprotobyname:
6702 case KEY_getprotobynumber:
6703 LOP(OP_GPBYNUMBER,XTERM);
6705 case KEY_getprotoent:
6717 case KEY_getpeername:
6718 UNI(OP_GETPEERNAME);
6720 case KEY_gethostbyname:
6723 case KEY_gethostbyaddr:
6724 LOP(OP_GHBYADDR,XTERM);
6726 case KEY_gethostent:
6729 case KEY_getnetbyname:
6732 case KEY_getnetbyaddr:
6733 LOP(OP_GNBYADDR,XTERM);
6738 case KEY_getservbyname:
6739 LOP(OP_GSBYNAME,XTERM);
6741 case KEY_getservbyport:
6742 LOP(OP_GSBYPORT,XTERM);
6744 case KEY_getservent:
6747 case KEY_getsockname:
6748 UNI(OP_GETSOCKNAME);
6750 case KEY_getsockopt:
6751 LOP(OP_GSOCKOPT,XTERM);
6766 pl_yylval.ival = CopLINE(PL_curcop);
6776 pl_yylval.ival = CopLINE(PL_curcop);
6780 LOP(OP_INDEX,XTERM);
6786 LOP(OP_IOCTL,XTERM);
6798 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6830 LOP(OP_LISTEN,XTERM);
6839 s = scan_pat(s,OP_MATCH);
6840 TERM(sublex_start());
6843 LOP(OP_MAPSTART, XREF);
6846 LOP(OP_MKDIR,XTERM);
6849 LOP(OP_MSGCTL,XTERM);
6852 LOP(OP_MSGGET,XTERM);
6855 LOP(OP_MSGRCV,XTERM);
6858 LOP(OP_MSGSND,XTERM);
6863 PL_in_my = (U16)tmp;
6865 if (isIDFIRST_lazy_if(s,UTF)) {
6869 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6870 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6872 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6873 if (!PL_in_my_stash) {
6876 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6880 if (PL_madskills) { /* just add type to declarator token */
6881 sv_catsv(PL_thistoken, PL_nextwhite);
6883 sv_catpvn(PL_thistoken, start, s - start);
6891 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6898 s = tokenize_use(0, s);
6902 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6909 if (isIDFIRST_lazy_if(s,UTF)) {
6911 for (d = s; isALNUM_lazy_if(d,UTF);)
6913 for (t=d; isSPACE(*t);)
6915 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6917 && !(t[0] == '=' && t[1] == '>')
6919 int parms_len = (int)(d-s);
6920 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6921 "Precedence problem: open %.*s should be open(%.*s)",
6922 parms_len, s, parms_len, s);
6928 pl_yylval.ival = OP_OR;
6938 LOP(OP_OPEN_DIR,XTERM);
6941 checkcomma(s,PL_tokenbuf,"filehandle");
6945 checkcomma(s,PL_tokenbuf,"filehandle");
6964 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6965 s = force_version(s, FALSE);
6969 LOP(OP_PIPE_OP,XTERM);
6972 s = scan_str(s,!!PL_madskills,FALSE);
6975 pl_yylval.ival = OP_CONST;
6976 TERM(sublex_start());
6982 s = scan_str(s,!!PL_madskills,FALSE);
6985 PL_expect = XOPERATOR;
6987 if (SvCUR(PL_lex_stuff)) {
6990 d = SvPV_force(PL_lex_stuff, len);
6992 for (; isSPACE(*d) && len; --len, ++d)
6997 if (!warned && ckWARN(WARN_QW)) {
6998 for (; !isSPACE(*d) && len; --len, ++d) {
7000 Perl_warner(aTHX_ packWARN(WARN_QW),
7001 "Possible attempt to separate words with commas");
7004 else if (*d == '#') {
7005 Perl_warner(aTHX_ packWARN(WARN_QW),
7006 "Possible attempt to put comments in qw() list");
7012 for (; !isSPACE(*d) && len; --len, ++d)
7015 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7016 words = append_elem(OP_LIST, words,
7017 newSVOP(OP_CONST, 0, tokeq(sv)));
7021 start_force(PL_curforce);
7022 NEXTVAL_NEXTTOKE.opval = words;
7027 SvREFCNT_dec(PL_lex_stuff);
7028 PL_lex_stuff = NULL;
7034 s = scan_str(s,!!PL_madskills,FALSE);
7037 pl_yylval.ival = OP_STRINGIFY;
7038 if (SvIVX(PL_lex_stuff) == '\'')
7039 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
7040 TERM(sublex_start());
7043 s = scan_pat(s,OP_QR);
7044 TERM(sublex_start());
7047 s = scan_str(s,!!PL_madskills,FALSE);
7050 readpipe_override();
7051 TERM(sublex_start());
7059 s = force_version(s, FALSE);
7061 else if (*s != 'v' || !isDIGIT(s[1])
7062 || (s = force_version(s, TRUE), *s == 'v'))
7064 *PL_tokenbuf = '\0';
7065 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7066 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7067 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7069 yyerror("<> should be quotes");
7071 if (orig_keyword == KEY_require) {
7079 PL_last_uni = PL_oldbufptr;
7080 PL_last_lop_op = OP_REQUIRE;
7082 return REPORT( (int)REQUIRE );
7088 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7092 LOP(OP_RENAME,XTERM);
7101 LOP(OP_RINDEX,XTERM);
7110 UNIDOR(OP_READLINE);
7113 UNIDOR(OP_BACKTICK);
7122 LOP(OP_REVERSE,XTERM);
7125 UNIDOR(OP_READLINK);
7132 if (pl_yylval.opval)
7133 TERM(sublex_start());
7135 TOKEN(1); /* force error */
7138 checkcomma(s,PL_tokenbuf,"filehandle");
7148 LOP(OP_SELECT,XTERM);
7154 LOP(OP_SEMCTL,XTERM);
7157 LOP(OP_SEMGET,XTERM);
7160 LOP(OP_SEMOP,XTERM);
7166 LOP(OP_SETPGRP,XTERM);
7168 case KEY_setpriority:
7169 LOP(OP_SETPRIORITY,XTERM);
7171 case KEY_sethostent:
7177 case KEY_setservent:
7180 case KEY_setprotoent:
7190 LOP(OP_SEEKDIR,XTERM);
7192 case KEY_setsockopt:
7193 LOP(OP_SSOCKOPT,XTERM);
7199 LOP(OP_SHMCTL,XTERM);
7202 LOP(OP_SHMGET,XTERM);
7205 LOP(OP_SHMREAD,XTERM);
7208 LOP(OP_SHMWRITE,XTERM);
7211 LOP(OP_SHUTDOWN,XTERM);
7220 LOP(OP_SOCKET,XTERM);
7222 case KEY_socketpair:
7223 LOP(OP_SOCKPAIR,XTERM);
7226 checkcomma(s,PL_tokenbuf,"subroutine name");
7228 if (*s == ';' || *s == ')') /* probably a close */
7229 Perl_croak(aTHX_ "sort is now a reserved word");
7231 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7235 LOP(OP_SPLIT,XTERM);
7238 LOP(OP_SPRINTF,XTERM);
7241 LOP(OP_SPLICE,XTERM);
7256 LOP(OP_SUBSTR,XTERM);
7262 char tmpbuf[sizeof PL_tokenbuf];
7263 SSize_t tboffset = 0;
7264 expectation attrful;
7265 bool have_name, have_proto;
7266 const int key = tmp;
7271 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7272 SV *subtoken = newSVpvn(tstart, s - tstart);
7276 s = SKIPSPACE2(s,tmpwhite);
7281 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7282 (*s == ':' && s[1] == ':'))
7285 SV *nametoke = NULL;
7289 attrful = XATTRBLOCK;
7290 /* remember buffer pos'n for later force_word */
7291 tboffset = s - PL_oldbufptr;
7292 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7295 nametoke = newSVpvn(s, d - s);
7297 if (memchr(tmpbuf, ':', len))
7298 sv_setpvn(PL_subname, tmpbuf, len);
7300 sv_setsv(PL_subname,PL_curstname);
7301 sv_catpvs(PL_subname,"::");
7302 sv_catpvn(PL_subname,tmpbuf,len);
7309 CURMAD('X', nametoke);
7310 CURMAD('_', tmpwhite);
7311 (void) force_word(PL_oldbufptr + tboffset, WORD,
7314 s = SKIPSPACE2(d,tmpwhite);
7321 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7322 PL_expect = XTERMBLOCK;
7323 attrful = XATTRTERM;
7324 sv_setpvs(PL_subname,"?");
7328 if (key == KEY_format) {
7330 PL_lex_formbrack = PL_lex_brackets + 1;
7332 PL_thistoken = subtoken;
7336 (void) force_word(PL_oldbufptr + tboffset, WORD,
7342 /* Look for a prototype */
7345 bool bad_proto = FALSE;
7346 bool in_brackets = FALSE;
7347 char greedy_proto = ' ';
7348 bool proto_after_greedy_proto = FALSE;
7349 bool must_be_last = FALSE;
7350 bool underscore = FALSE;
7351 bool seen_underscore = FALSE;
7352 const bool warnsyntax = ckWARN(WARN_SYNTAX);
7354 s = scan_str(s,!!PL_madskills,FALSE);
7356 Perl_croak(aTHX_ "Prototype not terminated");
7357 /* strip spaces and check for bad characters */
7358 d = SvPVX(PL_lex_stuff);
7360 for (p = d; *p; ++p) {
7366 proto_after_greedy_proto = TRUE;
7367 if (!strchr("$@%*;[]&\\_", *p)) {
7379 else if ( *p == ']' ) {
7380 in_brackets = FALSE;
7382 else if ( (*p == '@' || *p == '%') &&
7383 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7385 must_be_last = TRUE;
7388 else if ( *p == '_' ) {
7389 underscore = seen_underscore = TRUE;
7396 if (proto_after_greedy_proto)
7397 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7398 "Prototype after '%c' for %"SVf" : %s",
7399 greedy_proto, SVfARG(PL_subname), d);
7401 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7402 "Illegal character %sin prototype for %"SVf" : %s",
7403 seen_underscore ? "after '_' " : "",
7404 SVfARG(PL_subname), d);
7405 SvCUR_set(PL_lex_stuff, tmp);
7410 CURMAD('q', PL_thisopen);
7411 CURMAD('_', tmpwhite);
7412 CURMAD('=', PL_thisstuff);
7413 CURMAD('Q', PL_thisclose);
7414 NEXTVAL_NEXTTOKE.opval =
7415 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7416 PL_lex_stuff = NULL;
7419 s = SKIPSPACE2(s,tmpwhite);
7427 if (*s == ':' && s[1] != ':')
7428 PL_expect = attrful;
7429 else if (*s != '{' && key == KEY_sub) {
7431 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7433 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7440 curmad('^', newSVpvs(""));
7441 CURMAD('_', tmpwhite);
7445 PL_thistoken = subtoken;
7448 NEXTVAL_NEXTTOKE.opval =
7449 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7450 PL_lex_stuff = NULL;
7456 sv_setpvs(PL_subname, "__ANON__");
7458 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7462 (void) force_word(PL_oldbufptr + tboffset, WORD,
7471 LOP(OP_SYSTEM,XREF);
7474 LOP(OP_SYMLINK,XTERM);
7477 LOP(OP_SYSCALL,XTERM);
7480 LOP(OP_SYSOPEN,XTERM);
7483 LOP(OP_SYSSEEK,XTERM);
7486 LOP(OP_SYSREAD,XTERM);
7489 LOP(OP_SYSWRITE,XTERM);
7493 TERM(sublex_start());
7514 LOP(OP_TRUNCATE,XTERM);
7526 pl_yylval.ival = CopLINE(PL_curcop);
7530 pl_yylval.ival = CopLINE(PL_curcop);
7534 LOP(OP_UNLINK,XTERM);
7540 LOP(OP_UNPACK,XTERM);
7543 LOP(OP_UTIME,XTERM);
7549 LOP(OP_UNSHIFT,XTERM);
7552 s = tokenize_use(1, s);
7562 pl_yylval.ival = CopLINE(PL_curcop);
7566 pl_yylval.ival = CopLINE(PL_curcop);
7570 PL_hints |= HINT_BLOCK_SCOPE;
7577 LOP(OP_WAITPID,XTERM);
7586 ctl_l[0] = toCTRL('L');
7588 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7591 /* Make sure $^L is defined */
7592 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7597 if (PL_expect == XOPERATOR)
7603 pl_yylval.ival = OP_XOR;
7608 TERM(sublex_start());
7613 #pragma segment Main
7617 S_pending_ident(pTHX)
7622 /* pit holds the identifier we read and pending_ident is reset */
7623 char pit = PL_pending_ident;
7624 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7625 /* All routes through this function want to know if there is a colon. */
7626 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7627 PL_pending_ident = 0;
7629 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7630 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7631 "### Pending identifier '%s'\n", PL_tokenbuf); });
7633 /* if we're in a my(), we can't allow dynamics here.
7634 $foo'bar has already been turned into $foo::bar, so
7635 just check for colons.
7637 if it's a legal name, the OP is a PADANY.
7640 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7642 yyerror(Perl_form(aTHX_ "No package name allowed for "
7643 "variable %s in \"our\"",
7645 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7649 yyerror(Perl_form(aTHX_ PL_no_myglob,
7650 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7652 pl_yylval.opval = newOP(OP_PADANY, 0);
7653 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7659 build the ops for accesses to a my() variable.
7661 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7662 then used in a comparison. This catches most, but not
7663 all cases. For instance, it catches
7664 sort { my($a); $a <=> $b }
7666 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7667 (although why you'd do that is anyone's guess).
7672 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7673 if (tmp != NOT_IN_PAD) {
7674 /* might be an "our" variable" */
7675 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7676 /* build ops for a bareword */
7677 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7678 HEK * const stashname = HvNAME_HEK(stash);
7679 SV * const sym = newSVhek(stashname);
7680 sv_catpvs(sym, "::");
7681 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7682 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7683 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7686 ? (GV_ADDMULTI | GV_ADDINEVAL)
7689 ((PL_tokenbuf[0] == '$') ? SVt_PV
7690 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7695 /* if it's a sort block and they're naming $a or $b */
7696 if (PL_last_lop_op == OP_SORT &&
7697 PL_tokenbuf[0] == '$' &&
7698 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7701 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7702 d < PL_bufend && *d != '\n';
7705 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7706 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7712 pl_yylval.opval = newOP(OP_PADANY, 0);
7713 pl_yylval.opval->op_targ = tmp;
7719 Whine if they've said @foo in a doublequoted string,
7720 and @foo isn't a variable we can find in the symbol
7723 if (ckWARN(WARN_AMBIGUOUS) &&
7724 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7725 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7727 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7728 /* DO NOT warn for @- and @+ */
7729 && !( PL_tokenbuf[2] == '\0' &&
7730 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7733 /* Downgraded from fatal to warning 20000522 mjd */
7734 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7735 "Possible unintended interpolation of %s in string",
7740 /* build ops for a bareword */
7741 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7743 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7745 PL_tokenbuf + 1, tokenbuf_len - 1,
7746 /* If the identifier refers to a stash, don't autovivify it.
7747 * Change 24660 had the side effect of causing symbol table
7748 * hashes to always be defined, even if they were freshly
7749 * created and the only reference in the entire program was
7750 * the single statement with the defined %foo::bar:: test.
7751 * It appears that all code in the wild doing this actually
7752 * wants to know whether sub-packages have been loaded, so
7753 * by avoiding auto-vivifying symbol tables, we ensure that
7754 * defined %foo::bar:: continues to be false, and the existing
7755 * tests still give the expected answers, even though what
7756 * they're actually testing has now changed subtly.
7758 (*PL_tokenbuf == '%'
7759 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7762 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7763 ((PL_tokenbuf[0] == '$') ? SVt_PV
7764 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7770 * The following code was generated by perl_keyword.pl.
7774 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7778 PERL_ARGS_ASSERT_KEYWORD;
7782 case 1: /* 5 tokens of length 1 */
7814 case 2: /* 18 tokens of length 2 */
7960 case 3: /* 29 tokens of length 3 */
7964 if (name[1] == 'N' &&
8027 if (name[1] == 'i' &&
8059 if (name[1] == 'o' &&
8068 if (name[1] == 'e' &&
8077 if (name[1] == 'n' &&
8086 if (name[1] == 'o' &&
8095 if (name[1] == 'a' &&
8104 if (name[1] == 'o' &&
8166 if (name[1] == 'e' &&
8180 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8206 if (name[1] == 'i' &&
8215 if (name[1] == 's' &&
8224 if (name[1] == 'e' &&
8233 if (name[1] == 'o' &&
8245 case 4: /* 41 tokens of length 4 */
8249 if (name[1] == 'O' &&
8259 if (name[1] == 'N' &&
8269 if (name[1] == 'i' &&
8279 if (name[1] == 'h' &&
8289 if (name[1] == 'u' &&
8302 if (name[2] == 'c' &&
8311 if (name[2] == 's' &&
8320 if (name[2] == 'a' &&
8356 if (name[1] == 'o' &&
8369 if (name[2] == 't' &&
8378 if (name[2] == 'o' &&
8387 if (name[2] == 't' &&
8396 if (name[2] == 'e' &&
8409 if (name[1] == 'o' &&
8422 if (name[2] == 'y' &&
8431 if (name[2] == 'l' &&
8447 if (name[2] == 's' &&
8456 if (name[2] == 'n' &&
8465 if (name[2] == 'c' &&
8478 if (name[1] == 'e' &&
8488 if (name[1] == 'p' &&
8501 if (name[2] == 'c' &&
8510 if (name[2] == 'p' &&
8519 if (name[2] == 's' &&
8535 if (name[2] == 'n' &&
8605 if (name[2] == 'r' &&
8614 if (name[2] == 'r' &&
8623 if (name[2] == 'a' &&
8639 if (name[2] == 'l' &&
8701 if (name[2] == 'e' &&
8704 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8717 case 5: /* 39 tokens of length 5 */
8721 if (name[1] == 'E' &&
8732 if (name[1] == 'H' &&
8746 if (name[2] == 'a' &&
8756 if (name[2] == 'a' &&
8773 if (name[2] == 'e' &&
8783 if (name[2] == 'e' &&
8787 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8803 if (name[3] == 'i' &&
8812 if (name[3] == 'o' &&
8848 if (name[2] == 'o' &&
8858 if (name[2] == 'y' &&
8872 if (name[1] == 'l' &&
8886 if (name[2] == 'n' &&
8896 if (name[2] == 'o' &&
8910 if (name[1] == 'i' &&
8915 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8924 if (name[2] == 'd' &&
8934 if (name[2] == 'c' &&
8951 if (name[2] == 'c' &&
8961 if (name[2] == 't' &&
8975 if (name[1] == 'k' &&
8986 if (name[1] == 'r' &&
9000 if (name[2] == 's' &&
9010 if (name[2] == 'd' &&
9027 if (name[2] == 'm' &&
9037 if (name[2] == 'i' &&
9047 if (name[2] == 'e' &&
9057 if (name[2] == 'l' &&
9067 if (name[2] == 'a' &&
9080 if (name[3] == 't' &&
9083 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9089 if (name[3] == 'd' &&
9106 if (name[1] == 'i' &&
9120 if (name[2] == 'a' &&
9133 if (name[3] == 'e' &&
9168 if (name[2] == 'i' &&
9185 if (name[2] == 'i' &&
9195 if (name[2] == 'i' &&
9212 case 6: /* 33 tokens of length 6 */
9216 if (name[1] == 'c' &&
9231 if (name[2] == 'l' &&
9242 if (name[2] == 'r' &&
9257 if (name[1] == 'e' &&
9272 if (name[2] == 's' &&
9277 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9283 if (name[2] == 'i' &&
9301 if (name[2] == 'l' &&
9312 if (name[2] == 'r' &&
9327 if (name[1] == 'm' &&
9342 if (name[2] == 'n' &&
9353 if (name[2] == 's' &&
9368 if (name[1] == 's' &&
9374 if (name[4] == 't' &&
9383 if (name[4] == 'e' &&
9392 if (name[4] == 'c' &&
9401 if (name[4] == 'n' &&
9417 if (name[1] == 'r' &&
9435 if (name[3] == 'a' &&
9445 if (name[3] == 'u' &&
9459 if (name[2] == 'n' &&
9477 if (name[2] == 'a' &&
9491 if (name[3] == 'e' &&
9504 if (name[4] == 't' &&
9513 if (name[4] == 'e' &&
9535 if (name[4] == 't' &&
9544 if (name[4] == 'e' &&
9560 if (name[2] == 'c' &&
9571 if (name[2] == 'l' &&
9582 if (name[2] == 'b' &&
9593 if (name[2] == 's' &&
9616 if (name[4] == 's' &&
9625 if (name[4] == 'n' &&
9638 if (name[3] == 'a' &&
9655 if (name[1] == 'a' &&
9670 case 7: /* 29 tokens of length 7 */
9674 if (name[1] == 'E' &&
9687 if (name[1] == '_' &&
9700 if (name[1] == 'i' &&
9707 return -KEY_binmode;
9713 if (name[1] == 'o' &&
9720 return -KEY_connect;
9729 if (name[2] == 'm' &&
9735 return -KEY_dbmopen;
9746 if (name[4] == 'u' &&
9750 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9756 if (name[4] == 'n' &&
9777 if (name[1] == 'o' &&
9790 if (name[1] == 'e' &&
9797 if (name[5] == 'r' &&
9800 return -KEY_getpgrp;
9806 if (name[5] == 'i' &&
9809 return -KEY_getppid;
9822 if (name[1] == 'c' &&
9829 return -KEY_lcfirst;
9835 if (name[1] == 'p' &&
9842 return -KEY_opendir;
9848 if (name[1] == 'a' &&
9866 if (name[3] == 'd' &&
9871 return -KEY_readdir;
9877 if (name[3] == 'u' &&
9888 if (name[3] == 'e' &&
9893 return -KEY_reverse;
9912 if (name[3] == 'k' &&
9917 return -KEY_seekdir;
9923 if (name[3] == 'p' &&
9928 return -KEY_setpgrp;
9938 if (name[2] == 'm' &&
9944 return -KEY_shmread;
9950 if (name[2] == 'r' &&
9956 return -KEY_sprintf;
9965 if (name[3] == 'l' &&
9970 return -KEY_symlink;
9979 if (name[4] == 'a' &&
9983 return -KEY_syscall;
9989 if (name[4] == 'p' &&
9993 return -KEY_sysopen;
9999 if (name[4] == 'e' &&
10003 return -KEY_sysread;
10009 if (name[4] == 'e' &&
10013 return -KEY_sysseek;
10031 if (name[1] == 'e' &&
10038 return -KEY_telldir;
10047 if (name[2] == 'f' &&
10053 return -KEY_ucfirst;
10059 if (name[2] == 's' &&
10065 return -KEY_unshift;
10075 if (name[1] == 'a' &&
10082 return -KEY_waitpid;
10091 case 8: /* 26 tokens of length 8 */
10095 if (name[1] == 'U' &&
10103 return KEY_AUTOLOAD;
10109 if (name[1] == '_')
10114 if (name[3] == 'A' &&
10120 return KEY___DATA__;
10126 if (name[3] == 'I' &&
10132 return -KEY___FILE__;
10138 if (name[3] == 'I' &&
10144 return -KEY___LINE__;
10160 if (name[2] == 'o' &&
10167 return -KEY_closedir;
10173 if (name[2] == 'n' &&
10180 return -KEY_continue;
10190 if (name[1] == 'b' &&
10198 return -KEY_dbmclose;
10204 if (name[1] == 'n' &&
10210 if (name[4] == 'r' &&
10215 return -KEY_endgrent;
10221 if (name[4] == 'w' &&
10226 return -KEY_endpwent;
10239 if (name[1] == 'o' &&
10247 return -KEY_formline;
10253 if (name[1] == 'e' &&
10259 if (name[4] == 'r')
10264 if (name[6] == 'n' &&
10267 return -KEY_getgrent;
10273 if (name[6] == 'i' &&
10276 return -KEY_getgrgid;
10282 if (name[6] == 'a' &&
10285 return -KEY_getgrnam;
10298 if (name[4] == 'o' &&
10303 return -KEY_getlogin;
10309 if (name[4] == 'w')
10314 if (name[6] == 'n' &&
10317 return -KEY_getpwent;
10323 if (name[6] == 'a' &&
10326 return -KEY_getpwnam;
10332 if (name[6] == 'i' &&
10335 return -KEY_getpwuid;
10355 if (name[1] == 'e' &&
10362 if (name[5] == 'i' &&
10369 return -KEY_readline;
10374 return -KEY_readlink;
10385 if (name[5] == 'i' &&
10389 return -KEY_readpipe;
10405 if (name[2] == 't')
10410 if (name[4] == 'r' &&
10415 return -KEY_setgrent;
10421 if (name[4] == 'w' &&
10426 return -KEY_setpwent;
10442 if (name[3] == 'w' &&
10448 return -KEY_shmwrite;
10454 if (name[3] == 't' &&
10460 return -KEY_shutdown;
10470 if (name[2] == 's' &&
10477 return -KEY_syswrite;
10487 if (name[1] == 'r' &&
10495 return -KEY_truncate;
10504 case 9: /* 9 tokens of length 9 */
10508 if (name[1] == 'N' &&
10517 return KEY_UNITCHECK;
10523 if (name[1] == 'n' &&
10532 return -KEY_endnetent;
10538 if (name[1] == 'e' &&
10547 return -KEY_getnetent;
10553 if (name[1] == 'o' &&
10562 return -KEY_localtime;
10568 if (name[1] == 'r' &&
10577 return KEY_prototype;
10583 if (name[1] == 'u' &&
10592 return -KEY_quotemeta;
10598 if (name[1] == 'e' &&
10607 return -KEY_rewinddir;
10613 if (name[1] == 'e' &&
10622 return -KEY_setnetent;
10628 if (name[1] == 'a' &&
10637 return -KEY_wantarray;
10646 case 10: /* 9 tokens of length 10 */
10650 if (name[1] == 'n' &&
10656 if (name[4] == 'o' &&
10663 return -KEY_endhostent;
10669 if (name[4] == 'e' &&
10676 return -KEY_endservent;
10689 if (name[1] == 'e' &&
10695 if (name[4] == 'o' &&
10702 return -KEY_gethostent;
10711 if (name[5] == 'r' &&
10717 return -KEY_getservent;
10723 if (name[5] == 'c' &&
10729 return -KEY_getsockopt;
10749 if (name[2] == 't')
10754 if (name[4] == 'o' &&
10761 return -KEY_sethostent;
10770 if (name[5] == 'r' &&
10776 return -KEY_setservent;
10782 if (name[5] == 'c' &&
10788 return -KEY_setsockopt;
10805 if (name[2] == 'c' &&
10814 return -KEY_socketpair;
10827 case 11: /* 8 tokens of length 11 */
10831 if (name[1] == '_' &&
10841 { /* __PACKAGE__ */
10842 return -KEY___PACKAGE__;
10848 if (name[1] == 'n' &&
10858 { /* endprotoent */
10859 return -KEY_endprotoent;
10865 if (name[1] == 'e' &&
10874 if (name[5] == 'e' &&
10880 { /* getpeername */
10881 return -KEY_getpeername;
10890 if (name[6] == 'o' &&
10895 { /* getpriority */
10896 return -KEY_getpriority;
10902 if (name[6] == 't' &&
10907 { /* getprotoent */
10908 return -KEY_getprotoent;
10922 if (name[4] == 'o' &&
10929 { /* getsockname */
10930 return -KEY_getsockname;
10943 if (name[1] == 'e' &&
10951 if (name[6] == 'o' &&
10956 { /* setpriority */
10957 return -KEY_setpriority;
10963 if (name[6] == 't' &&
10968 { /* setprotoent */
10969 return -KEY_setprotoent;
10985 case 12: /* 2 tokens of length 12 */
10986 if (name[0] == 'g' &&
10998 if (name[9] == 'd' &&
11001 { /* getnetbyaddr */
11002 return -KEY_getnetbyaddr;
11008 if (name[9] == 'a' &&
11011 { /* getnetbyname */
11012 return -KEY_getnetbyname;
11024 case 13: /* 4 tokens of length 13 */
11025 if (name[0] == 'g' &&
11032 if (name[4] == 'o' &&
11041 if (name[10] == 'd' &&
11044 { /* gethostbyaddr */
11045 return -KEY_gethostbyaddr;
11051 if (name[10] == 'a' &&
11054 { /* gethostbyname */
11055 return -KEY_gethostbyname;
11068 if (name[4] == 'e' &&
11077 if (name[10] == 'a' &&
11080 { /* getservbyname */
11081 return -KEY_getservbyname;
11087 if (name[10] == 'o' &&
11090 { /* getservbyport */
11091 return -KEY_getservbyport;
11110 case 14: /* 1 tokens of length 14 */
11111 if (name[0] == 'g' &&
11125 { /* getprotobyname */
11126 return -KEY_getprotobyname;
11131 case 16: /* 1 tokens of length 16 */
11132 if (name[0] == 'g' &&
11148 { /* getprotobynumber */
11149 return -KEY_getprotobynumber;
11163 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11167 PERL_ARGS_ASSERT_CHECKCOMMA;
11169 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
11170 if (ckWARN(WARN_SYNTAX)) {
11173 for (w = s+2; *w && level; w++) {
11176 else if (*w == ')')
11179 while (isSPACE(*w))
11181 /* the list of chars below is for end of statements or
11182 * block / parens, boolean operators (&&, ||, //) and branch
11183 * constructs (or, and, if, until, unless, while, err, for).
11184 * Not a very solid hack... */
11185 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11186 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11187 "%s (...) interpreted as function",name);
11190 while (s < PL_bufend && isSPACE(*s))
11194 while (s < PL_bufend && isSPACE(*s))
11196 if (isIDFIRST_lazy_if(s,UTF)) {
11197 const char * const w = s++;
11198 while (isALNUM_lazy_if(s,UTF))
11200 while (s < PL_bufend && isSPACE(*s))
11204 if (keyword(w, s - w, 0))
11207 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11208 if (gv && GvCVu(gv))
11210 Perl_croak(aTHX_ "No comma allowed after %s", what);
11215 /* Either returns sv, or mortalizes sv and returns a new SV*.
11216 Best used as sv=new_constant(..., sv, ...).
11217 If s, pv are NULL, calls subroutine with one argument,
11218 and type is used with error messages only. */
11221 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11222 SV *sv, SV *pv, const char *type, STRLEN typelen)
11225 HV * const table = GvHV(PL_hintgv); /* ^H */
11229 const char *why1 = "", *why2 = "", *why3 = "";
11231 PERL_ARGS_ASSERT_NEW_CONSTANT;
11233 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11236 why2 = (const char *)
11237 (strEQ(key,"charnames")
11238 ? "(possibly a missing \"use charnames ...\")"
11240 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11241 (type ? type: "undef"), why2);
11243 /* This is convoluted and evil ("goto considered harmful")
11244 * but I do not understand the intricacies of all the different
11245 * failure modes of %^H in here. The goal here is to make
11246 * the most probable error message user-friendly. --jhi */
11251 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11252 (type ? type: "undef"), why1, why2, why3);
11254 yyerror(SvPVX_const(msg));
11258 cvp = hv_fetch(table, key, keylen, FALSE);
11259 if (!cvp || !SvOK(*cvp)) {
11262 why3 = "} is not defined";
11265 sv_2mortal(sv); /* Parent created it permanently */
11268 pv = newSVpvn_flags(s, len, SVs_TEMP);
11270 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11272 typesv = &PL_sv_undef;
11274 PUSHSTACKi(PERLSI_OVERLOAD);
11286 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11290 /* Check the eval first */
11291 if (!PL_in_eval && SvTRUE(ERRSV)) {
11292 sv_catpvs(ERRSV, "Propagated");
11293 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11295 res = SvREFCNT_inc_simple(sv);
11299 SvREFCNT_inc_simple_void(res);
11308 why1 = "Call to &{$^H{";
11310 why3 = "}} did not return a defined value";
11318 /* Returns a NUL terminated string, with the length of the string written to
11322 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11325 register char *d = dest;
11326 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
11328 PERL_ARGS_ASSERT_SCAN_WORD;
11332 Perl_croak(aTHX_ ident_too_long);
11333 if (isALNUM(*s)) /* UTF handled below */
11335 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11340 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11344 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11345 char *t = s + UTF8SKIP(s);
11347 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11351 Perl_croak(aTHX_ ident_too_long);
11352 Copy(s, d, len, char);
11365 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11368 char *bracket = NULL;
11370 register char *d = dest;
11371 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
11373 PERL_ARGS_ASSERT_SCAN_IDENT;
11378 while (isDIGIT(*s)) {
11380 Perl_croak(aTHX_ ident_too_long);
11387 Perl_croak(aTHX_ ident_too_long);
11388 if (isALNUM(*s)) /* UTF handled below */
11390 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11395 else if (*s == ':' && s[1] == ':') {
11399 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11400 char *t = s + UTF8SKIP(s);
11401 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11403 if (d + (t - s) > e)
11404 Perl_croak(aTHX_ ident_too_long);
11405 Copy(s, d, t - s, char);
11416 if (PL_lex_state != LEX_NORMAL)
11417 PL_lex_state = LEX_INTERPENDMAYBE;
11420 if (*s == '$' && s[1] &&
11421 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11434 if (*d == '^' && *s && isCONTROLVAR(*s)) {
11439 if (isSPACE(s[-1])) {
11441 const char ch = *s++;
11442 if (!SPACE_OR_TAB(ch)) {
11448 if (isIDFIRST_lazy_if(d,UTF)) {
11452 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11453 end += UTF8SKIP(end);
11454 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11455 end += UTF8SKIP(end);
11457 Copy(s, d, end - s, char);
11462 while ((isALNUM(*s) || *s == ':') && d < e)
11465 Perl_croak(aTHX_ ident_too_long);
11468 while (s < send && SPACE_OR_TAB(*s))
11470 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11471 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11472 const char * const brack =
11474 ((*s == '[') ? "[...]" : "{...}");
11475 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11476 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11477 funny, dest, brack, funny, dest, brack);
11480 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11484 /* Handle extended ${^Foo} variables
11485 * 1999-02-27 mjd-perl-patch@plover.com */
11486 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11490 while (isALNUM(*s) && d < e) {
11494 Perl_croak(aTHX_ ident_too_long);
11499 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11500 PL_lex_state = LEX_INTERPEND;
11503 if (PL_lex_state == LEX_NORMAL) {
11504 if (ckWARN(WARN_AMBIGUOUS) &&
11505 (keyword(dest, d - dest, 0)
11506 || get_cvn_flags(dest, d - dest, 0)))
11510 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11511 "Ambiguous use of %c{%s} resolved to %c%s",
11512 funny, dest, funny, dest);
11517 s = bracket; /* let the parser handle it */
11521 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11522 PL_lex_state = LEX_INTERPEND;
11527 S_pmflag(U32 pmfl, const char ch) {
11529 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11530 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11531 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11532 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11533 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11539 Perl_pmflag(pTHX_ U32* pmfl, int ch)
11541 PERL_ARGS_ASSERT_PMFLAG;
11543 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11544 "Perl_pmflag() is deprecated, and will be removed from the XS API");
11547 *pmfl = S_pmflag(*pmfl, (char)ch);
11552 S_scan_pat(pTHX_ char *start, I32 type)
11556 char *s = scan_str(start,!!PL_madskills,FALSE);
11557 const char * const valid_flags =
11558 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11563 PERL_ARGS_ASSERT_SCAN_PAT;
11566 const char * const delimiter = skipspace(start);
11570 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11571 : "Search pattern not terminated" ));
11574 pm = (PMOP*)newPMOP(type, 0);
11575 if (PL_multi_open == '?') {
11576 /* This is the only point in the code that sets PMf_ONCE: */
11577 pm->op_pmflags |= PMf_ONCE;
11579 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11580 allows us to restrict the list needed by reset to just the ??
11582 assert(type != OP_TRANS);
11584 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11587 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11590 elements = mg->mg_len / sizeof(PMOP**);
11591 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11592 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11593 mg->mg_len = elements * sizeof(PMOP**);
11594 PmopSTASH_set(pm,PL_curstash);
11600 while (*s && strchr(valid_flags, *s))
11601 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11603 if (PL_madskills && modstart != s) {
11604 SV* tmptoken = newSVpvn(modstart, s - modstart);
11605 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11608 /* issue a warning if /c is specified,but /g is not */
11609 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11611 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11612 "Use of /c modifier is meaningless without /g" );
11615 PL_lex_op = (OP*)pm;
11616 pl_yylval.ival = OP_MATCH;
11621 S_scan_subst(pTHX_ char *start)
11632 PERL_ARGS_ASSERT_SCAN_SUBST;
11634 pl_yylval.ival = OP_NULL;
11636 s = scan_str(start,!!PL_madskills,FALSE);
11639 Perl_croak(aTHX_ "Substitution pattern not terminated");
11641 if (s[-1] == PL_multi_open)
11644 if (PL_madskills) {
11645 CURMAD('q', PL_thisopen);
11646 CURMAD('_', PL_thiswhite);
11647 CURMAD('E', PL_thisstuff);
11648 CURMAD('Q', PL_thisclose);
11649 PL_realtokenstart = s - SvPVX(PL_linestr);
11653 first_start = PL_multi_start;
11654 s = scan_str(s,!!PL_madskills,FALSE);
11656 if (PL_lex_stuff) {
11657 SvREFCNT_dec(PL_lex_stuff);
11658 PL_lex_stuff = NULL;
11660 Perl_croak(aTHX_ "Substitution replacement not terminated");
11662 PL_multi_start = first_start; /* so whole substitution is taken together */
11664 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11667 if (PL_madskills) {
11668 CURMAD('z', PL_thisopen);
11669 CURMAD('R', PL_thisstuff);
11670 CURMAD('Z', PL_thisclose);
11676 if (*s == EXEC_PAT_MOD) {
11680 else if (strchr(S_PAT_MODS, *s))
11681 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11687 if (PL_madskills) {
11689 curmad('m', newSVpvn(modstart, s - modstart));
11690 append_madprops(PL_thismad, (OP*)pm, 0);
11694 if ((pm->op_pmflags & PMf_CONTINUE)) {
11695 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11699 SV * const repl = newSVpvs("");
11701 PL_sublex_info.super_bufptr = s;
11702 PL_sublex_info.super_bufend = PL_bufend;
11704 pm->op_pmflags |= PMf_EVAL;
11707 sv_catpvs(repl, "eval ");
11709 sv_catpvs(repl, "do ");
11711 sv_catpvs(repl, "{");
11712 sv_catsv(repl, PL_lex_repl);
11713 if (strchr(SvPVX(PL_lex_repl), '#'))
11714 sv_catpvs(repl, "\n");
11715 sv_catpvs(repl, "}");
11717 SvREFCNT_dec(PL_lex_repl);
11718 PL_lex_repl = repl;
11721 PL_lex_op = (OP*)pm;
11722 pl_yylval.ival = OP_SUBST;
11727 S_scan_trans(pTHX_ char *start)
11740 PERL_ARGS_ASSERT_SCAN_TRANS;
11742 pl_yylval.ival = OP_NULL;
11744 s = scan_str(start,!!PL_madskills,FALSE);
11746 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11748 if (s[-1] == PL_multi_open)
11751 if (PL_madskills) {
11752 CURMAD('q', PL_thisopen);
11753 CURMAD('_', PL_thiswhite);
11754 CURMAD('E', PL_thisstuff);
11755 CURMAD('Q', PL_thisclose);
11756 PL_realtokenstart = s - SvPVX(PL_linestr);
11760 s = scan_str(s,!!PL_madskills,FALSE);
11762 if (PL_lex_stuff) {
11763 SvREFCNT_dec(PL_lex_stuff);
11764 PL_lex_stuff = NULL;
11766 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11768 if (PL_madskills) {
11769 CURMAD('z', PL_thisopen);
11770 CURMAD('R', PL_thisstuff);
11771 CURMAD('Z', PL_thisclose);
11774 complement = del = squash = 0;
11781 complement = OPpTRANS_COMPLEMENT;
11784 del = OPpTRANS_DELETE;
11787 squash = OPpTRANS_SQUASH;
11796 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11797 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11798 o->op_private &= ~OPpTRANS_ALL;
11799 o->op_private |= del|squash|complement|
11800 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11801 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11804 pl_yylval.ival = OP_TRANS;
11807 if (PL_madskills) {
11809 curmad('m', newSVpvn(modstart, s - modstart));
11810 append_madprops(PL_thismad, o, 0);
11819 S_scan_heredoc(pTHX_ register char *s)
11823 I32 op_type = OP_SCALAR;
11827 const char *found_newline;
11831 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11833 I32 stuffstart = s - SvPVX(PL_linestr);
11836 PL_realtokenstart = -1;
11839 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11843 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11847 while (SPACE_OR_TAB(*peek))
11849 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11852 s = delimcpy(d, e, s, PL_bufend, term, &len);
11862 if (!isALNUM_lazy_if(s,UTF))
11863 deprecate("bare << to mean <<\"\"");
11864 for (; isALNUM_lazy_if(s,UTF); s++) {
11869 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11870 Perl_croak(aTHX_ "Delimiter for here document is too long");
11873 len = d - PL_tokenbuf;
11876 if (PL_madskills) {
11877 tstart = PL_tokenbuf + !outer;
11878 PL_thisclose = newSVpvn(tstart, len - !outer);
11879 tstart = SvPVX(PL_linestr) + stuffstart;
11880 PL_thisopen = newSVpvn(tstart, s - tstart);
11881 stuffstart = s - SvPVX(PL_linestr);
11884 #ifndef PERL_STRICT_CR
11885 d = strchr(s, '\r');
11887 char * const olds = s;
11889 while (s < PL_bufend) {
11895 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11904 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11911 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11912 herewas = newSVpvn(s,PL_bufend-s);
11916 herewas = newSVpvn(s-1,found_newline-s+1);
11919 herewas = newSVpvn(s,found_newline-s);
11923 if (PL_madskills) {
11924 tstart = SvPVX(PL_linestr) + stuffstart;
11926 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11928 PL_thisstuff = newSVpvn(tstart, s - tstart);
11931 s += SvCUR(herewas);
11934 stuffstart = s - SvPVX(PL_linestr);
11940 tmpstr = newSV_type(SVt_PVIV);
11941 SvGROW(tmpstr, 80);
11942 if (term == '\'') {
11943 op_type = OP_CONST;
11944 SvIV_set(tmpstr, -1);
11946 else if (term == '`') {
11947 op_type = OP_BACKTICK;
11948 SvIV_set(tmpstr, '\\');
11952 PL_multi_start = CopLINE(PL_curcop);
11953 PL_multi_open = PL_multi_close = '<';
11954 term = *PL_tokenbuf;
11955 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11956 char * const bufptr = PL_sublex_info.super_bufptr;
11957 char * const bufend = PL_sublex_info.super_bufend;
11958 char * const olds = s - SvCUR(herewas);
11959 s = strchr(bufptr, '\n');
11963 while (s < bufend &&
11964 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11966 CopLINE_inc(PL_curcop);
11969 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11970 missingterm(PL_tokenbuf);
11972 sv_setpvn(herewas,bufptr,d-bufptr+1);
11973 sv_setpvn(tmpstr,d+1,s-d);
11975 sv_catpvn(herewas,s,bufend-s);
11976 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11983 while (s < PL_bufend &&
11984 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11986 CopLINE_inc(PL_curcop);
11988 if (s >= PL_bufend) {
11989 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11990 missingterm(PL_tokenbuf);
11992 sv_setpvn(tmpstr,d+1,s-d);
11994 if (PL_madskills) {
11996 sv_catpvn(PL_thisstuff, d + 1, s - d);
11998 PL_thisstuff = newSVpvn(d + 1, s - d);
11999 stuffstart = s - SvPVX(PL_linestr);
12003 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12005 sv_catpvn(herewas,s,PL_bufend-s);
12006 sv_setsv(PL_linestr,herewas);
12007 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12008 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12009 PL_last_lop = PL_last_uni = NULL;
12012 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
12013 while (s >= PL_bufend) { /* multiple line string? */
12015 if (PL_madskills) {
12016 tstart = SvPVX(PL_linestr) + stuffstart;
12018 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12020 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12024 CopLINE_inc(PL_curcop);
12025 if (!outer || !lex_next_chunk(0)) {
12026 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12027 missingterm(PL_tokenbuf);
12029 CopLINE_dec(PL_curcop);
12032 stuffstart = s - SvPVX(PL_linestr);
12034 CopLINE_inc(PL_curcop);
12035 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12036 PL_last_lop = PL_last_uni = NULL;
12037 #ifndef PERL_STRICT_CR
12038 if (PL_bufend - PL_linestart >= 2) {
12039 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12040 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12042 PL_bufend[-2] = '\n';
12044 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12046 else if (PL_bufend[-1] == '\r')
12047 PL_bufend[-1] = '\n';
12049 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12050 PL_bufend[-1] = '\n';
12052 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12053 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12054 *(SvPVX(PL_linestr) + off ) = ' ';
12055 sv_catsv(PL_linestr,herewas);
12056 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12057 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12061 sv_catsv(tmpstr,PL_linestr);
12066 PL_multi_end = CopLINE(PL_curcop);
12067 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12068 SvPV_shrink_to_cur(tmpstr);
12070 SvREFCNT_dec(herewas);
12072 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12074 else if (PL_encoding)
12075 sv_recode_to_utf8(tmpstr, PL_encoding);
12077 PL_lex_stuff = tmpstr;
12078 pl_yylval.ival = op_type;
12082 /* scan_inputsymbol
12083 takes: current position in input buffer
12084 returns: new position in input buffer
12085 side-effects: pl_yylval and lex_op are set.
12090 <FH> read from filehandle
12091 <pkg::FH> read from package qualified filehandle
12092 <pkg'FH> read from package qualified filehandle
12093 <$fh> read from filehandle in $fh
12094 <*.h> filename glob
12099 S_scan_inputsymbol(pTHX_ char *start)
12102 register char *s = start; /* current position in buffer */
12105 char *d = PL_tokenbuf; /* start of temp holding space */
12106 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12108 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12110 end = strchr(s, '\n');
12113 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
12115 /* die if we didn't have space for the contents of the <>,
12116 or if it didn't end, or if we see a newline
12119 if (len >= (I32)sizeof PL_tokenbuf)
12120 Perl_croak(aTHX_ "Excessively long <> operator");
12122 Perl_croak(aTHX_ "Unterminated <> operator");
12127 Remember, only scalar variables are interpreted as filehandles by
12128 this code. Anything more complex (e.g., <$fh{$num}>) will be
12129 treated as a glob() call.
12130 This code makes use of the fact that except for the $ at the front,
12131 a scalar variable and a filehandle look the same.
12133 if (*d == '$' && d[1]) d++;
12135 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12136 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12139 /* If we've tried to read what we allow filehandles to look like, and
12140 there's still text left, then it must be a glob() and not a getline.
12141 Use scan_str to pull out the stuff between the <> and treat it
12142 as nothing more than a string.
12145 if (d - PL_tokenbuf != len) {
12146 pl_yylval.ival = OP_GLOB;
12147 s = scan_str(start,!!PL_madskills,FALSE);
12149 Perl_croak(aTHX_ "Glob not terminated");
12153 bool readline_overriden = FALSE;
12156 /* we're in a filehandle read situation */
12159 /* turn <> into <ARGV> */
12161 Copy("ARGV",d,5,char);
12163 /* Check whether readline() is overriden */
12164 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12166 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12168 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12169 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12170 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12171 readline_overriden = TRUE;
12173 /* if <$fh>, create the ops to turn the variable into a
12177 /* try to find it in the pad for this block, otherwise find
12178 add symbol table ops
12180 const PADOFFSET tmp = pad_findmy(d, len, 0);
12181 if (tmp != NOT_IN_PAD) {
12182 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12183 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12184 HEK * const stashname = HvNAME_HEK(stash);
12185 SV * const sym = sv_2mortal(newSVhek(stashname));
12186 sv_catpvs(sym, "::");
12187 sv_catpv(sym, d+1);
12192 OP * const o = newOP(OP_PADSV, 0);
12194 PL_lex_op = readline_overriden
12195 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12196 append_elem(OP_LIST, o,
12197 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12198 : (OP*)newUNOP(OP_READLINE, 0, o);
12207 ? (GV_ADDMULTI | GV_ADDINEVAL)
12210 PL_lex_op = readline_overriden
12211 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12212 append_elem(OP_LIST,
12213 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12214 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12215 : (OP*)newUNOP(OP_READLINE, 0,
12216 newUNOP(OP_RV2SV, 0,
12217 newGVOP(OP_GV, 0, gv)));
12219 if (!readline_overriden)
12220 PL_lex_op->op_flags |= OPf_SPECIAL;
12221 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12222 pl_yylval.ival = OP_NULL;
12225 /* If it's none of the above, it must be a literal filehandle
12226 (<Foo::BAR> or <FOO>) so build a simple readline OP */
12228 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12229 PL_lex_op = readline_overriden
12230 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12231 append_elem(OP_LIST,
12232 newGVOP(OP_GV, 0, gv),
12233 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12234 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12235 pl_yylval.ival = OP_NULL;
12244 takes: start position in buffer
12245 keep_quoted preserve \ on the embedded delimiter(s)
12246 keep_delims preserve the delimiters around the string
12247 returns: position to continue reading from buffer
12248 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12249 updates the read buffer.
12251 This subroutine pulls a string out of the input. It is called for:
12252 q single quotes q(literal text)
12253 ' single quotes 'literal text'
12254 qq double quotes qq(interpolate $here please)
12255 " double quotes "interpolate $here please"
12256 qx backticks qx(/bin/ls -l)
12257 ` backticks `/bin/ls -l`
12258 qw quote words @EXPORT_OK = qw( func() $spam )
12259 m// regexp match m/this/
12260 s/// regexp substitute s/this/that/
12261 tr/// string transliterate tr/this/that/
12262 y/// string transliterate y/this/that/
12263 ($*@) sub prototypes sub foo ($)
12264 (stuff) sub attr parameters sub foo : attr(stuff)
12265 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12267 In most of these cases (all but <>, patterns and transliterate)
12268 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12269 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12270 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12273 It skips whitespace before the string starts, and treats the first
12274 character as the delimiter. If the delimiter is one of ([{< then
12275 the corresponding "close" character )]}> is used as the closing
12276 delimiter. It allows quoting of delimiters, and if the string has
12277 balanced delimiters ([{<>}]) it allows nesting.
12279 On success, the SV with the resulting string is put into lex_stuff or,
12280 if that is already non-NULL, into lex_repl. The second case occurs only
12281 when parsing the RHS of the special constructs s/// and tr/// (y///).
12282 For convenience, the terminating delimiter character is stuffed into
12287 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12290 SV *sv; /* scalar value: string */
12291 const char *tmps; /* temp string, used for delimiter matching */
12292 register char *s = start; /* current position in the buffer */
12293 register char term; /* terminating character */
12294 register char *to; /* current position in the sv's data */
12295 I32 brackets = 1; /* bracket nesting level */
12296 bool has_utf8 = FALSE; /* is there any utf8 content? */
12297 I32 termcode; /* terminating char. code */
12298 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
12299 STRLEN termlen; /* length of terminating string */
12300 int last_off = 0; /* last position for nesting bracket */
12306 PERL_ARGS_ASSERT_SCAN_STR;
12308 /* skip space before the delimiter */
12314 if (PL_realtokenstart >= 0) {
12315 stuffstart = PL_realtokenstart;
12316 PL_realtokenstart = -1;
12319 stuffstart = start - SvPVX(PL_linestr);
12321 /* mark where we are, in case we need to report errors */
12324 /* after skipping whitespace, the next character is the terminator */
12327 termcode = termstr[0] = term;
12331 termcode = utf8_to_uvchr((U8*)s, &termlen);
12332 Copy(s, termstr, termlen, U8);
12333 if (!UTF8_IS_INVARIANT(term))
12337 /* mark where we are */
12338 PL_multi_start = CopLINE(PL_curcop);
12339 PL_multi_open = term;
12341 /* find corresponding closing delimiter */
12342 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12343 termcode = termstr[0] = term = tmps[5];
12345 PL_multi_close = term;
12347 /* create a new SV to hold the contents. 79 is the SV's initial length.
12348 What a random number. */
12349 sv = newSV_type(SVt_PVIV);
12351 SvIV_set(sv, termcode);
12352 (void)SvPOK_only(sv); /* validate pointer */
12354 /* move past delimiter and try to read a complete string */
12356 sv_catpvn(sv, s, termlen);
12359 tstart = SvPVX(PL_linestr) + stuffstart;
12360 if (!PL_thisopen && !keep_delims) {
12361 PL_thisopen = newSVpvn(tstart, s - tstart);
12362 stuffstart = s - SvPVX(PL_linestr);
12366 if (PL_encoding && !UTF) {
12370 int offset = s - SvPVX_const(PL_linestr);
12371 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12372 &offset, (char*)termstr, termlen);
12373 const char * const ns = SvPVX_const(PL_linestr) + offset;
12374 char * const svlast = SvEND(sv) - 1;
12376 for (; s < ns; s++) {
12377 if (*s == '\n' && !PL_rsfp)
12378 CopLINE_inc(PL_curcop);
12381 goto read_more_line;
12383 /* handle quoted delimiters */
12384 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12386 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12388 if ((svlast-1 - t) % 2) {
12389 if (!keep_quoted) {
12390 *(svlast-1) = term;
12392 SvCUR_set(sv, SvCUR(sv) - 1);
12397 if (PL_multi_open == PL_multi_close) {
12403 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12404 /* At here, all closes are "was quoted" one,
12405 so we don't check PL_multi_close. */
12407 if (!keep_quoted && *(t+1) == PL_multi_open)
12412 else if (*t == PL_multi_open)
12420 SvCUR_set(sv, w - SvPVX_const(sv));
12422 last_off = w - SvPVX(sv);
12423 if (--brackets <= 0)
12428 if (!keep_delims) {
12429 SvCUR_set(sv, SvCUR(sv) - 1);
12435 /* extend sv if need be */
12436 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12437 /* set 'to' to the next character in the sv's string */
12438 to = SvPVX(sv)+SvCUR(sv);
12440 /* if open delimiter is the close delimiter read unbridle */
12441 if (PL_multi_open == PL_multi_close) {
12442 for (; s < PL_bufend; s++,to++) {
12443 /* embedded newlines increment the current line number */
12444 if (*s == '\n' && !PL_rsfp)
12445 CopLINE_inc(PL_curcop);
12446 /* handle quoted delimiters */
12447 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12448 if (!keep_quoted && s[1] == term)
12450 /* any other quotes are simply copied straight through */
12454 /* terminate when run out of buffer (the for() condition), or
12455 have found the terminator */
12456 else if (*s == term) {
12459 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12462 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12468 /* if the terminator isn't the same as the start character (e.g.,
12469 matched brackets), we have to allow more in the quoting, and
12470 be prepared for nested brackets.
12473 /* read until we run out of string, or we find the terminator */
12474 for (; s < PL_bufend; s++,to++) {
12475 /* embedded newlines increment the line count */
12476 if (*s == '\n' && !PL_rsfp)
12477 CopLINE_inc(PL_curcop);
12478 /* backslashes can escape the open or closing characters */
12479 if (*s == '\\' && s+1 < PL_bufend) {
12480 if (!keep_quoted &&
12481 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12486 /* allow nested opens and closes */
12487 else if (*s == PL_multi_close && --brackets <= 0)
12489 else if (*s == PL_multi_open)
12491 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12496 /* terminate the copied string and update the sv's end-of-string */
12498 SvCUR_set(sv, to - SvPVX_const(sv));
12501 * this next chunk reads more into the buffer if we're not done yet
12505 break; /* handle case where we are done yet :-) */
12507 #ifndef PERL_STRICT_CR
12508 if (to - SvPVX_const(sv) >= 2) {
12509 if ((to[-2] == '\r' && to[-1] == '\n') ||
12510 (to[-2] == '\n' && to[-1] == '\r'))
12514 SvCUR_set(sv, to - SvPVX_const(sv));
12516 else if (to[-1] == '\r')
12519 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12524 /* if we're out of file, or a read fails, bail and reset the current
12525 line marker so we can report where the unterminated string began
12528 if (PL_madskills) {
12529 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12531 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12533 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12536 CopLINE_inc(PL_curcop);
12537 PL_bufptr = PL_bufend;
12538 if (!lex_next_chunk(0)) {
12540 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12549 /* at this point, we have successfully read the delimited string */
12551 if (!PL_encoding || UTF) {
12553 if (PL_madskills) {
12554 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12555 const int len = s - tstart;
12557 sv_catpvn(PL_thisstuff, tstart, len);
12559 PL_thisstuff = newSVpvn(tstart, len);
12560 if (!PL_thisclose && !keep_delims)
12561 PL_thisclose = newSVpvn(s,termlen);
12566 sv_catpvn(sv, s, termlen);
12571 if (PL_madskills) {
12572 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12573 const int len = s - tstart - termlen;
12575 sv_catpvn(PL_thisstuff, tstart, len);
12577 PL_thisstuff = newSVpvn(tstart, len);
12578 if (!PL_thisclose && !keep_delims)
12579 PL_thisclose = newSVpvn(s - termlen,termlen);
12583 if (has_utf8 || PL_encoding)
12586 PL_multi_end = CopLINE(PL_curcop);
12588 /* if we allocated too much space, give some back */
12589 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12590 SvLEN_set(sv, SvCUR(sv) + 1);
12591 SvPV_renew(sv, SvLEN(sv));
12594 /* decide whether this is the first or second quoted string we've read
12607 takes: pointer to position in buffer
12608 returns: pointer to new position in buffer
12609 side-effects: builds ops for the constant in pl_yylval.op
12611 Read a number in any of the formats that Perl accepts:
12613 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12614 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12617 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12619 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12622 If it reads a number without a decimal point or an exponent, it will
12623 try converting the number to an integer and see if it can do so
12624 without loss of precision.
12628 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12631 register const char *s = start; /* current position in buffer */
12632 register char *d; /* destination in temp buffer */
12633 register char *e; /* end of temp buffer */
12634 NV nv; /* number read, as a double */
12635 SV *sv = NULL; /* place to put the converted number */
12636 bool floatit; /* boolean: int or float? */
12637 const char *lastub = NULL; /* position of last underbar */
12638 static char const number_too_long[] = "Number too long";
12640 PERL_ARGS_ASSERT_SCAN_NUM;
12642 /* We use the first character to decide what type of number this is */
12646 Perl_croak(aTHX_ "panic: scan_num");
12648 /* if it starts with a 0, it could be an octal number, a decimal in
12649 0.13 disguise, or a hexadecimal number, or a binary number. */
12653 u holds the "number so far"
12654 shift the power of 2 of the base
12655 (hex == 4, octal == 3, binary == 1)
12656 overflowed was the number more than we can hold?
12658 Shift is used when we add a digit. It also serves as an "are
12659 we in octal/hex/binary?" indicator to disallow hex characters
12660 when in octal mode.
12665 bool overflowed = FALSE;
12666 bool just_zero = TRUE; /* just plain 0 or binary number? */
12667 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12668 static const char* const bases[5] =
12669 { "", "binary", "", "octal", "hexadecimal" };
12670 static const char* const Bases[5] =
12671 { "", "Binary", "", "Octal", "Hexadecimal" };
12672 static const char* const maxima[5] =
12674 "0b11111111111111111111111111111111",
12678 const char *base, *Base, *max;
12680 /* check for hex */
12685 } else if (s[1] == 'b') {
12690 /* check for a decimal in disguise */
12691 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12693 /* so it must be octal */
12700 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12701 "Misplaced _ in number");
12705 base = bases[shift];
12706 Base = Bases[shift];
12707 max = maxima[shift];
12709 /* read the rest of the number */
12711 /* x is used in the overflow test,
12712 b is the digit we're adding on. */
12717 /* if we don't mention it, we're done */
12721 /* _ are ignored -- but warned about if consecutive */
12723 if (lastub && s == lastub + 1)
12724 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12725 "Misplaced _ in number");
12729 /* 8 and 9 are not octal */
12730 case '8': case '9':
12732 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12736 case '2': case '3': case '4':
12737 case '5': case '6': case '7':
12739 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12742 case '0': case '1':
12743 b = *s++ & 15; /* ASCII digit -> value of digit */
12747 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12748 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12749 /* make sure they said 0x */
12752 b = (*s++ & 7) + 9;
12754 /* Prepare to put the digit we have onto the end
12755 of the number so far. We check for overflows.
12761 x = u << shift; /* make room for the digit */
12763 if ((x >> shift) != u
12764 && !(PL_hints & HINT_NEW_BINARY)) {
12767 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12768 "Integer overflow in %s number",
12771 u = x | b; /* add the digit to the end */
12774 n *= nvshift[shift];
12775 /* If an NV has not enough bits in its
12776 * mantissa to represent an UV this summing of
12777 * small low-order numbers is a waste of time
12778 * (because the NV cannot preserve the
12779 * low-order bits anyway): we could just
12780 * remember when did we overflow and in the
12781 * end just multiply n by the right
12789 /* if we get here, we had success: make a scalar value from
12794 /* final misplaced underbar check */
12795 if (s[-1] == '_') {
12796 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12801 if (n > 4294967295.0)
12802 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12803 "%s number > %s non-portable",
12809 if (u > 0xffffffff)
12810 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12811 "%s number > %s non-portable",
12816 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12817 sv = new_constant(start, s - start, "integer",
12818 sv, NULL, NULL, 0);
12819 else if (PL_hints & HINT_NEW_BINARY)
12820 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12825 handle decimal numbers.
12826 we're also sent here when we read a 0 as the first digit
12828 case '1': case '2': case '3': case '4': case '5':
12829 case '6': case '7': case '8': case '9': case '.':
12832 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12835 /* read next group of digits and _ and copy into d */
12836 while (isDIGIT(*s) || *s == '_') {
12837 /* skip underscores, checking for misplaced ones
12841 if (lastub && s == lastub + 1)
12842 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12843 "Misplaced _ in number");
12847 /* check for end of fixed-length buffer */
12849 Perl_croak(aTHX_ number_too_long);
12850 /* if we're ok, copy the character */
12855 /* final misplaced underbar check */
12856 if (lastub && s == lastub + 1) {
12857 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12860 /* read a decimal portion if there is one. avoid
12861 3..5 being interpreted as the number 3. followed
12864 if (*s == '.' && s[1] != '.') {
12869 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12870 "Misplaced _ in number");
12874 /* copy, ignoring underbars, until we run out of digits.
12876 for (; isDIGIT(*s) || *s == '_'; s++) {
12877 /* fixed length buffer check */
12879 Perl_croak(aTHX_ number_too_long);
12881 if (lastub && s == lastub + 1)
12882 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12883 "Misplaced _ in number");
12889 /* fractional part ending in underbar? */
12890 if (s[-1] == '_') {
12891 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12892 "Misplaced _ in number");
12894 if (*s == '.' && isDIGIT(s[1])) {
12895 /* oops, it's really a v-string, but without the "v" */
12901 /* read exponent part, if present */
12902 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12906 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12907 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12909 /* stray preinitial _ */
12911 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12912 "Misplaced _ in number");
12916 /* allow positive or negative exponent */
12917 if (*s == '+' || *s == '-')
12920 /* stray initial _ */
12922 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12923 "Misplaced _ in number");
12927 /* read digits of exponent */
12928 while (isDIGIT(*s) || *s == '_') {
12931 Perl_croak(aTHX_ number_too_long);
12935 if (((lastub && s == lastub + 1) ||
12936 (!isDIGIT(s[1]) && s[1] != '_')))
12937 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12938 "Misplaced _ in number");
12945 /* make an sv from the string */
12949 We try to do an integer conversion first if no characters
12950 indicating "float" have been found.
12955 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12957 if (flags == IS_NUMBER_IN_UV) {
12959 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12962 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12963 if (uv <= (UV) IV_MIN)
12964 sv_setiv(sv, -(IV)uv);
12971 /* terminate the string */
12973 nv = Atof(PL_tokenbuf);
12978 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12979 const char *const key = floatit ? "float" : "integer";
12980 const STRLEN keylen = floatit ? 5 : 7;
12981 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12982 key, keylen, sv, NULL, NULL, 0);
12986 /* if it starts with a v, it could be a v-string */
12989 sv = newSV(5); /* preallocate storage space */
12990 s = scan_vstring(s, PL_bufend, sv);
12994 /* make the op for the constant and return */
12997 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12999 lvalp->opval = NULL;
13005 S_scan_formline(pTHX_ register char *s)
13008 register char *eol;
13010 SV * const stuff = newSVpvs("");
13011 bool needargs = FALSE;
13012 bool eofmt = FALSE;
13014 char *tokenstart = s;
13015 SV* savewhite = NULL;
13017 if (PL_madskills) {
13018 savewhite = PL_thiswhite;
13023 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13025 while (!needargs) {
13028 #ifdef PERL_STRICT_CR
13029 while (SPACE_OR_TAB(*t))
13032 while (SPACE_OR_TAB(*t) || *t == '\r')
13035 if (*t == '\n' || t == PL_bufend) {
13040 if (PL_in_eval && !PL_rsfp) {
13041 eol = (char *) memchr(s,'\n',PL_bufend-s);
13046 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13048 for (t = s; t < eol; t++) {
13049 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13051 goto enough; /* ~~ must be first line in formline */
13053 if (*t == '@' || *t == '^')
13057 sv_catpvn(stuff, s, eol-s);
13058 #ifndef PERL_STRICT_CR
13059 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13060 char *end = SvPVX(stuff) + SvCUR(stuff);
13063 SvCUR_set(stuff, SvCUR(stuff) - 1);
13074 if (PL_madskills) {
13076 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13078 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13081 PL_bufptr = PL_bufend;
13082 CopLINE_inc(PL_curcop);
13083 got_some = lex_next_chunk(0);
13084 CopLINE_dec(PL_curcop);
13087 tokenstart = PL_bufptr;
13095 if (SvCUR(stuff)) {
13098 PL_lex_state = LEX_NORMAL;
13099 start_force(PL_curforce);
13100 NEXTVAL_NEXTTOKE.ival = 0;
13104 PL_lex_state = LEX_FORMLINE;
13106 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13108 else if (PL_encoding)
13109 sv_recode_to_utf8(stuff, PL_encoding);
13111 start_force(PL_curforce);
13112 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13114 start_force(PL_curforce);
13115 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13119 SvREFCNT_dec(stuff);
13121 PL_lex_formbrack = 0;
13125 if (PL_madskills) {
13127 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13129 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13130 PL_thiswhite = savewhite;
13137 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13140 const I32 oldsavestack_ix = PL_savestack_ix;
13141 CV* const outsidecv = PL_compcv;
13144 assert(SvTYPE(PL_compcv) == SVt_PVCV);
13146 SAVEI32(PL_subline);
13147 save_item(PL_subname);
13148 SAVESPTR(PL_compcv);
13150 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13151 CvFLAGS(PL_compcv) |= flags;
13153 PL_subline = CopLINE(PL_curcop);
13154 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13155 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13156 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13158 return oldsavestack_ix;
13162 #pragma segment Perl_yylex
13165 S_yywarn(pTHX_ const char *const s)
13169 PERL_ARGS_ASSERT_YYWARN;
13171 PL_in_eval |= EVAL_WARNONLY;
13173 PL_in_eval &= ~EVAL_WARNONLY;
13178 Perl_yyerror(pTHX_ const char *const s)
13181 const char *where = NULL;
13182 const char *context = NULL;
13185 int yychar = PL_parser->yychar;
13187 PERL_ARGS_ASSERT_YYERROR;
13189 if (!yychar || (yychar == ';' && !PL_rsfp))
13191 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13192 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13193 PL_oldbufptr != PL_bufptr) {
13196 The code below is removed for NetWare because it abends/crashes on NetWare
13197 when the script has error such as not having the closing quotes like:
13198 if ($var eq "value)
13199 Checking of white spaces is anyway done in NetWare code.
13202 while (isSPACE(*PL_oldoldbufptr))
13205 context = PL_oldoldbufptr;
13206 contlen = PL_bufptr - PL_oldoldbufptr;
13208 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13209 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13212 The code below is removed for NetWare because it abends/crashes on NetWare
13213 when the script has error such as not having the closing quotes like:
13214 if ($var eq "value)
13215 Checking of white spaces is anyway done in NetWare code.
13218 while (isSPACE(*PL_oldbufptr))
13221 context = PL_oldbufptr;
13222 contlen = PL_bufptr - PL_oldbufptr;
13224 else if (yychar > 255)
13225 where = "next token ???";
13226 else if (yychar == -2) { /* YYEMPTY */
13227 if (PL_lex_state == LEX_NORMAL ||
13228 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13229 where = "at end of line";
13230 else if (PL_lex_inpat)
13231 where = "within pattern";
13233 where = "within string";
13236 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13238 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13239 else if (isPRINT_LC(yychar)) {
13240 const char string = yychar;
13241 sv_catpvn(where_sv, &string, 1);
13244 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13245 where = SvPVX_const(where_sv);
13247 msg = sv_2mortal(newSVpv(s, 0));
13248 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13249 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13251 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13253 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13254 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13255 Perl_sv_catpvf(aTHX_ msg,
13256 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13257 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13260 if (PL_in_eval & EVAL_WARNONLY) {
13261 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13265 if (PL_error_count >= 10) {
13266 if (PL_in_eval && SvCUR(ERRSV))
13267 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13268 SVfARG(ERRSV), OutCopFILE(PL_curcop));
13270 Perl_croak(aTHX_ "%s has too many errors.\n",
13271 OutCopFILE(PL_curcop));
13274 PL_in_my_stash = NULL;
13278 #pragma segment Main
13282 S_swallow_bom(pTHX_ U8 *s)
13285 const STRLEN slen = SvCUR(PL_linestr);
13287 PERL_ARGS_ASSERT_SWALLOW_BOM;
13291 if (s[1] == 0xFE) {
13292 /* UTF-16 little-endian? (or UTF32-LE?) */
13293 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
13294 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
13295 #ifndef PERL_NO_UTF16_FILTER
13296 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
13298 if (PL_bufend > (char*)s) {
13299 s = add_utf16_textfilter(s, TRUE);
13302 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
13307 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
13308 #ifndef PERL_NO_UTF16_FILTER
13309 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13311 if (PL_bufend > (char *)s) {
13312 s = add_utf16_textfilter(s, FALSE);
13315 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
13320 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13321 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13322 s += 3; /* UTF-8 */
13328 if (s[2] == 0xFE && s[3] == 0xFF) {
13329 /* UTF-32 big-endian */
13330 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
13333 else if (s[2] == 0 && s[3] != 0) {
13336 * are a good indicator of UTF-16BE. */
13337 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13338 s = add_utf16_textfilter(s, FALSE);
13343 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13344 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13345 s += 4; /* UTF-8 */
13351 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13354 * are a good indicator of UTF-16LE. */
13355 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13356 s = add_utf16_textfilter(s, TRUE);
13363 #ifndef PERL_NO_UTF16_FILTER
13365 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13368 SV *const filter = FILTER_DATA(idx);
13369 /* We re-use this each time round, throwing the contents away before we
13371 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13372 SV *const utf8_buffer = filter;
13373 IV status = IoPAGE(filter);
13374 const bool reverse = (bool) IoLINES(filter);
13377 /* As we're automatically added, at the lowest level, and hence only called
13378 from this file, we can be sure that we're not called in block mode. Hence
13379 don't bother writing code to deal with block mode. */
13381 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13384 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13386 DEBUG_P(PerlIO_printf(Perl_debug_log,
13387 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13388 FPTR2DPTR(void *, S_utf16_textfilter),
13389 reverse ? 'l' : 'b', idx, maxlen, status,
13390 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13397 /* First, look in our buffer of existing UTF-8 data: */
13398 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13402 } else if (status == 0) {
13404 IoPAGE(filter) = 0;
13405 nl = SvEND(utf8_buffer);
13408 STRLEN got = nl - SvPVX(utf8_buffer);
13409 /* Did we have anything to append? */
13411 sv_catpvn(sv, SvPVX(utf8_buffer), got);
13412 /* Everything else in this code works just fine if SVp_POK isn't
13413 set. This, however, needs it, and we need it to work, else
13414 we loop infinitely because the buffer is never consumed. */
13415 sv_chop(utf8_buffer, nl);
13419 /* OK, not a complete line there, so need to read some more UTF-16.
13420 Read an extra octect if the buffer currently has an odd number. */
13424 if (SvCUR(utf16_buffer) >= 2) {
13425 /* Location of the high octet of the last complete code point.
13426 Gosh, UTF-16 is a pain. All the benefits of variable length,
13427 *coupled* with all the benefits of partial reads and
13429 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13430 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13432 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13436 /* We have the first half of a surrogate. Read more. */
13437 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13440 status = FILTER_READ(idx + 1, utf16_buffer,
13441 160 + (SvCUR(utf16_buffer) & 1));
13442 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13443 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13446 IoPAGE(filter) = status;
13451 chars = SvCUR(utf16_buffer) >> 1;
13452 have = SvCUR(utf8_buffer);
13453 SvGROW(utf8_buffer, have + chars * 3 + 1);
13456 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13457 (U8*)SvPVX_const(utf8_buffer) + have,
13458 chars * 2, &newlen);
13460 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13461 (U8*)SvPVX_const(utf8_buffer) + have,
13462 chars * 2, &newlen);
13464 SvCUR_set(utf8_buffer, have + newlen);
13467 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13468 it's private to us, and utf16_to_utf8{,reversed} take a
13469 (pointer,length) pair, rather than a NUL-terminated string. */
13470 if(SvCUR(utf16_buffer) & 1) {
13471 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13472 SvCUR_set(utf16_buffer, 1);
13474 SvCUR_set(utf16_buffer, 0);
13477 DEBUG_P(PerlIO_printf(Perl_debug_log,
13478 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13480 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13481 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13486 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13488 SV *filter = filter_add(S_utf16_textfilter, NULL);
13490 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13491 sv_setpvs(filter, "");
13492 IoLINES(filter) = reversed;
13493 IoPAGE(filter) = 1; /* Not EOF */
13495 /* Sadly, we have to return a valid pointer, come what may, so we have to
13496 ignore any error return from this. */
13497 SvCUR_set(PL_linestr, 0);
13498 if (FILTER_READ(0, PL_linestr, 0)) {
13499 SvUTF8_on(PL_linestr);
13501 SvUTF8_on(PL_linestr);
13503 PL_bufend = SvEND(PL_linestr);
13504 return (U8*)SvPVX(PL_linestr);
13509 Returns a pointer to the next character after the parsed
13510 vstring, as well as updating the passed in sv.
13512 Function must be called like
13515 s = scan_vstring(s,e,sv);
13517 where s and e are the start and end of the string.
13518 The sv should already be large enough to store the vstring
13519 passed in, for performance reasons.
13524 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13527 const char *pos = s;
13528 const char *start = s;
13530 PERL_ARGS_ASSERT_SCAN_VSTRING;
13532 if (*pos == 'v') pos++; /* get past 'v' */
13533 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13535 if ( *pos != '.') {
13536 /* this may not be a v-string if followed by => */
13537 const char *next = pos;
13538 while (next < e && isSPACE(*next))
13540 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13541 /* return string not v-string */
13542 sv_setpvn(sv,(char *)s,pos-s);
13543 return (char *)pos;
13547 if (!isALPHA(*pos)) {
13548 U8 tmpbuf[UTF8_MAXBYTES+1];
13551 s++; /* get past 'v' */
13556 /* this is atoi() that tolerates underscores */
13559 const char *end = pos;
13561 while (--end >= s) {
13563 const UV orev = rev;
13564 rev += (*end - '0') * mult;
13567 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13568 "Integer overflow in decimal number");
13572 if (rev > 0x7FFFFFFF)
13573 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13575 /* Append native character for the rev point */
13576 tmpend = uvchr_to_utf8(tmpbuf, rev);
13577 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13578 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13580 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13586 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13590 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13597 Perl_keyword_plugin_standard(pTHX_
13598 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13600 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13601 PERL_UNUSED_CONTEXT;
13602 PERL_UNUSED_ARG(keyword_ptr);
13603 PERL_UNUSED_ARG(keyword_len);
13604 PERL_UNUSED_ARG(op_ptr);
13605 return KEYWORD_PLUGIN_DECLINE;
13610 * c-indentation-style: bsd
13611 * c-basic-offset: 4
13612 * indent-tabs-mode: t
13615 * ex: set ts=8 sts=4 sw=4 noet: