3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 #define PERL_IN_TOKE_C
28 #define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
31 #define pl_yylval (PL_parser->yylval)
33 /* YYINITDEPTH -- initial size of the parser's stacks. */
34 #define YYINITDEPTH 200
36 /* XXX temporary backwards compatibility */
37 #define PL_lex_brackets (PL_parser->lex_brackets)
38 #define PL_lex_brackstack (PL_parser->lex_brackstack)
39 #define PL_lex_casemods (PL_parser->lex_casemods)
40 #define PL_lex_casestack (PL_parser->lex_casestack)
41 #define PL_lex_defer (PL_parser->lex_defer)
42 #define PL_lex_dojoin (PL_parser->lex_dojoin)
43 #define PL_lex_expect (PL_parser->lex_expect)
44 #define PL_lex_formbrack (PL_parser->lex_formbrack)
45 #define PL_lex_inpat (PL_parser->lex_inpat)
46 #define PL_lex_inwhat (PL_parser->lex_inwhat)
47 #define PL_lex_op (PL_parser->lex_op)
48 #define PL_lex_repl (PL_parser->lex_repl)
49 #define PL_lex_starts (PL_parser->lex_starts)
50 #define PL_lex_stuff (PL_parser->lex_stuff)
51 #define PL_multi_start (PL_parser->multi_start)
52 #define PL_multi_open (PL_parser->multi_open)
53 #define PL_multi_close (PL_parser->multi_close)
54 #define PL_pending_ident (PL_parser->pending_ident)
55 #define PL_preambled (PL_parser->preambled)
56 #define PL_sublex_info (PL_parser->sublex_info)
57 #define PL_linestr (PL_parser->linestr)
58 #define PL_expect (PL_parser->expect)
59 #define PL_copline (PL_parser->copline)
60 #define PL_bufptr (PL_parser->bufptr)
61 #define PL_oldbufptr (PL_parser->oldbufptr)
62 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63 #define PL_linestart (PL_parser->linestart)
64 #define PL_bufend (PL_parser->bufend)
65 #define PL_last_uni (PL_parser->last_uni)
66 #define PL_last_lop (PL_parser->last_lop)
67 #define PL_last_lop_op (PL_parser->last_lop_op)
68 #define PL_lex_state (PL_parser->lex_state)
69 #define PL_rsfp (PL_parser->rsfp)
70 #define PL_rsfp_filters (PL_parser->rsfp_filters)
71 #define PL_in_my (PL_parser->in_my)
72 #define PL_in_my_stash (PL_parser->in_my_stash)
73 #define PL_tokenbuf (PL_parser->tokenbuf)
74 #define PL_multi_end (PL_parser->multi_end)
75 #define PL_error_count (PL_parser->error_count)
78 # define PL_endwhite (PL_parser->endwhite)
79 # define PL_faketokens (PL_parser->faketokens)
80 # define PL_lasttoke (PL_parser->lasttoke)
81 # define PL_nextwhite (PL_parser->nextwhite)
82 # define PL_realtokenstart (PL_parser->realtokenstart)
83 # define PL_skipwhite (PL_parser->skipwhite)
84 # define PL_thisclose (PL_parser->thisclose)
85 # define PL_thismad (PL_parser->thismad)
86 # define PL_thisopen (PL_parser->thisopen)
87 # define PL_thisstuff (PL_parser->thisstuff)
88 # define PL_thistoken (PL_parser->thistoken)
89 # define PL_thiswhite (PL_parser->thiswhite)
90 # define PL_thiswhite (PL_parser->thiswhite)
91 # define PL_nexttoke (PL_parser->nexttoke)
92 # define PL_curforce (PL_parser->curforce)
94 # define PL_nexttoke (PL_parser->nexttoke)
95 # define PL_nexttype (PL_parser->nexttype)
96 # define PL_nextval (PL_parser->nextval)
100 S_pending_ident(pTHX);
102 static const char ident_too_long[] = "Identifier too long";
104 #ifndef PERL_NO_UTF16_FILTER
105 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
106 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
110 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
111 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
113 # define CURMAD(slot,sv)
114 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
117 #define XFAKEBRACK 128
118 #define XENUMMASK 127
120 #ifdef USE_UTF8_SCRIPTS
121 # define UTF (!IN_BYTES)
123 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
126 /* The maximum number of characters preceding the unrecognized one to display */
127 #define UNRECOGNIZED_PRECEDE_COUNT 10
129 /* In variables named $^X, these are the legal values for X.
130 * 1999-02-27 mjd-perl-patch@plover.com */
131 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
133 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
135 /* LEX_* are values for PL_lex_state, the state of the lexer.
136 * They are arranged oddly so that the guard on the switch statement
137 * can get by with a single comparison (if the compiler is smart enough).
140 /* #define LEX_NOTPARSING 11 is done in perl.h. */
142 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
143 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
144 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
145 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
146 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
148 /* at end of code, eg "$x" followed by: */
149 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
150 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
153 string or after \E, $foo, etc */
154 #define LEX_INTERPCONST 2 /* NOT USED */
155 #define LEX_FORMLINE 1 /* expecting a format line */
156 #define LEX_KNOWNEXT 0 /* next token known; just return it */
160 static const char* const lex_state_names[] = {
179 #include "keywords.h"
181 /* CLINE is a macro that ensures PL_copline has a sane value */
186 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
189 # define SKIPSPACE0(s) skipspace0(s)
190 # define SKIPSPACE1(s) skipspace1(s)
191 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
192 # define PEEKSPACE(s) skipspace2(s,0)
194 # define SKIPSPACE0(s) skipspace(s)
195 # define SKIPSPACE1(s) skipspace(s)
196 # define SKIPSPACE2(s,tsv) skipspace(s)
197 # define PEEKSPACE(s) skipspace(s)
201 * Convenience functions to return different tokens and prime the
202 * lexer for the next token. They all take an argument.
204 * TOKEN : generic token (used for '(', DOLSHARP, etc)
205 * OPERATOR : generic operator
206 * AOPERATOR : assignment operator
207 * PREBLOCK : beginning the block after an if, while, foreach, ...
208 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
209 * PREREF : *EXPR where EXPR is not a simple identifier
210 * TERM : expression term
211 * LOOPX : loop exiting command (goto, last, dump, etc)
212 * FTST : file test operator
213 * FUN0 : zero-argument function
214 * FUN1 : not used, except for not, which isn't a UNIOP
215 * BOop : bitwise or or xor
217 * SHop : shift operator
218 * PWop : power operator
219 * PMop : pattern-matching operator
220 * Aop : addition-level operator
221 * Mop : multiplication-level operator
222 * Eop : equality-testing operator
223 * Rop : relational operator <= != gt
225 * Also see LOP and lop() below.
228 #ifdef DEBUGGING /* Serve -DT. */
229 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
231 # define REPORT(retval) (retval)
234 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
235 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
236 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
237 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
238 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
239 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
240 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
241 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
242 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
243 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
244 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
245 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
246 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
247 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
248 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
249 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
250 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
251 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
252 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
253 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
255 /* This bit of chicanery makes a unary function followed by
256 * a parenthesis into a function with one argument, highest precedence.
257 * The UNIDOR macro is for unary functions that can be followed by the //
258 * operator (such as C<shift // 0>).
260 #define UNI2(f,x) { \
261 pl_yylval.ival = f; \
264 PL_last_uni = PL_oldbufptr; \
265 PL_last_lop_op = f; \
267 return REPORT( (int)FUNC1 ); \
269 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 #define UNI(f) UNI2(f,XTERM)
272 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
274 #define UNIBRACK(f) { \
275 pl_yylval.ival = f; \
277 PL_last_uni = PL_oldbufptr; \
279 return REPORT( (int)FUNC1 ); \
281 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
284 /* grandfather return to old style */
285 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
289 /* how to interpret the pl_yylval associated with the token */
293 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
299 static struct debug_tokens {
301 enum token_type type;
303 } const debug_tokens[] =
305 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
306 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
307 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
308 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
309 { ARROW, TOKENTYPE_NONE, "ARROW" },
310 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
311 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
312 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
313 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
314 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
315 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
316 { DO, TOKENTYPE_NONE, "DO" },
317 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
318 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
319 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
320 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
321 { ELSE, TOKENTYPE_NONE, "ELSE" },
322 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
323 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
324 { FOR, TOKENTYPE_IVAL, "FOR" },
325 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
326 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
327 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
328 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
329 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
330 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
331 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
332 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
333 { IF, TOKENTYPE_IVAL, "IF" },
334 { LABEL, TOKENTYPE_PVAL, "LABEL" },
335 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
336 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
337 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
338 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
339 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
340 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
341 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
342 { MY, TOKENTYPE_IVAL, "MY" },
343 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
344 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
345 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
346 { OROP, TOKENTYPE_IVAL, "OROP" },
347 { OROR, TOKENTYPE_NONE, "OROR" },
348 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
349 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
350 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
351 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
352 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
353 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
354 { PREINC, TOKENTYPE_NONE, "PREINC" },
355 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
356 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
357 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
358 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
359 { SUB, TOKENTYPE_NONE, "SUB" },
360 { THING, TOKENTYPE_OPVAL, "THING" },
361 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
362 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
363 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
364 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
365 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
366 { USE, TOKENTYPE_IVAL, "USE" },
367 { WHEN, TOKENTYPE_IVAL, "WHEN" },
368 { WHILE, TOKENTYPE_IVAL, "WHILE" },
369 { WORD, TOKENTYPE_OPVAL, "WORD" },
370 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
371 { 0, TOKENTYPE_NONE, NULL }
374 /* dump the returned token in rv, plus any optional arg in pl_yylval */
377 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
381 PERL_ARGS_ASSERT_TOKEREPORT;
384 const char *name = NULL;
385 enum token_type type = TOKENTYPE_NONE;
386 const struct debug_tokens *p;
387 SV* const report = newSVpvs("<== ");
389 for (p = debug_tokens; p->token; p++) {
390 if (p->token == (int)rv) {
397 Perl_sv_catpv(aTHX_ report, name);
398 else if ((char)rv > ' ' && (char)rv < '~')
399 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
401 sv_catpvs(report, "EOF");
403 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
406 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
409 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
411 case TOKENTYPE_OPNUM:
412 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
413 PL_op_name[lvalp->ival]);
416 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
418 case TOKENTYPE_OPVAL:
420 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
421 PL_op_name[lvalp->opval->op_type]);
422 if (lvalp->opval->op_type == OP_CONST) {
423 Perl_sv_catpvf(aTHX_ report, " %s",
424 SvPEEK(cSVOPx_sv(lvalp->opval)));
429 sv_catpvs(report, "(opval=null)");
432 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
438 /* print the buffer with suitable escapes */
441 S_printbuf(pTHX_ const char *const fmt, const char *const s)
443 SV* const tmp = newSVpvs("");
445 PERL_ARGS_ASSERT_PRINTBUF;
447 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
454 S_deprecate_commaless_var_list(pTHX) {
456 deprecate("comma-less variable list");
457 return REPORT(','); /* grandfather non-comma-format format */
463 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
464 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
468 S_ao(pTHX_ int toketype)
471 if (*PL_bufptr == '=') {
473 if (toketype == ANDAND)
474 pl_yylval.ival = OP_ANDASSIGN;
475 else if (toketype == OROR)
476 pl_yylval.ival = OP_ORASSIGN;
477 else if (toketype == DORDOR)
478 pl_yylval.ival = OP_DORASSIGN;
486 * When Perl expects an operator and finds something else, no_op
487 * prints the warning. It always prints "<something> found where
488 * operator expected. It prints "Missing semicolon on previous line?"
489 * if the surprise occurs at the start of the line. "do you need to
490 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
491 * where the compiler doesn't know if foo is a method call or a function.
492 * It prints "Missing operator before end of line" if there's nothing
493 * after the missing operator, or "... before <...>" if there is something
494 * after the missing operator.
498 S_no_op(pTHX_ const char *const what, char *s)
501 char * const oldbp = PL_bufptr;
502 const bool is_first = (PL_oldbufptr == PL_linestart);
504 PERL_ARGS_ASSERT_NO_OP;
510 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
511 if (ckWARN_d(WARN_SYNTAX)) {
513 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
514 "\t(Missing semicolon on previous line?)\n");
515 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
517 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
519 if (t < PL_bufptr && isSPACE(*t))
520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
521 "\t(Do you need to predeclare %.*s?)\n",
522 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
535 * Complain about missing quote/regexp/heredoc terminator.
536 * If it's called with NULL then it cauterizes the line buffer.
537 * If we're in a delimited string and the delimiter is a control
538 * character, it's reformatted into a two-char sequence like ^C.
543 S_missingterm(pTHX_ char *s)
549 char * const nl = strrchr(s,'\n');
553 else if (isCNTRL(PL_multi_close)) {
555 tmpbuf[1] = (char)toCTRL(PL_multi_close);
560 *tmpbuf = (char)PL_multi_close;
564 q = strchr(s,'"') ? '\'' : '"';
565 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
568 #define FEATURE_IS_ENABLED(name) \
569 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
570 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
571 /* The longest string we pass in. */
572 #define MAX_FEATURE_LEN (sizeof("switch")-1)
575 * S_feature_is_enabled
576 * Check whether the named feature is enabled.
579 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
582 HV * const hinthv = GvHV(PL_hintgv);
583 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
585 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
587 assert(namelen <= MAX_FEATURE_LEN);
588 memcpy(&he_name[8], name, namelen);
590 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
598 Perl_deprecate(pTHX_ const char *const s)
600 PERL_ARGS_ASSERT_DEPRECATE;
602 Perl_ck_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
606 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
607 * utf16-to-utf8-reversed.
610 #ifdef PERL_CR_FILTER
614 register const char *s = SvPVX_const(sv);
615 register const char * const e = s + SvCUR(sv);
617 PERL_ARGS_ASSERT_STRIP_RETURN;
619 /* outer loop optimized to do nothing if there are no CR-LFs */
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
626 if (*s == '\r' && s[1] == '\n')
637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
640 if (count > 0 && !maxlen)
651 * Create a parser object and initialise its parser and lexer fields
653 * rsfp is the opened file handle to read from (if any),
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
666 const char *s = NULL;
668 yy_parser *parser, *oparser;
670 /* create and initialise a parser */
672 Newxz(parser, 1, yy_parser);
673 parser->old_parser = oparser = PL_parser;
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
684 /* on scope exit, free this parser and restore any outer one */
686 parser->saved_curcop = PL_curcop;
688 /* initialise lexer state */
691 parser->curforce = -1;
693 parser->nexttoke = 0;
695 parser->error_count = oparser ? oparser->error_count : 0;
696 parser->copline = NOLINE;
697 parser->lex_state = LEX_NORMAL;
698 parser->expect = XSTATE;
700 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
701 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
703 Newx(parser->lex_brackstack, 120, char);
704 Newx(parser->lex_casestack, 12, char);
705 *parser->lex_casestack = '\0';
708 s = SvPV_const(line, len);
714 parser->linestr = newSVpvs("\n;");
715 } else if (SvREADONLY(line) || s[len-1] != ';') {
716 parser->linestr = newSVsv(line);
718 sv_catpvs(parser->linestr, "\n;");
721 SvREFCNT_inc_simple_void_NN(line);
722 parser->linestr = line;
724 parser->oldoldbufptr =
727 parser->linestart = SvPVX(parser->linestr);
728 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
729 parser->last_lop = parser->last_uni = NULL;
733 /* delete a parser object */
736 Perl_parser_free(pTHX_ const yy_parser *parser)
738 PERL_ARGS_ASSERT_PARSER_FREE;
740 PL_curcop = parser->saved_curcop;
741 SvREFCNT_dec(parser->linestr);
743 if (parser->rsfp == PerlIO_stdin())
744 PerlIO_clearerr(parser->rsfp);
745 else if (parser->rsfp && (!parser->old_parser ||
746 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
747 PerlIO_close(parser->rsfp);
748 SvREFCNT_dec(parser->rsfp_filters);
750 Safefree(parser->stack);
751 Safefree(parser->lex_brackstack);
752 Safefree(parser->lex_casestack);
753 PL_parser = parser->old_parser;
760 * Finalizer for lexing operations. Must be called when the parser is
761 * done with the lexer.
768 PL_doextract = FALSE;
773 * This subroutine has nothing to do with tilting, whether at windmills
774 * or pinball tables. Its name is short for "increment line". It
775 * increments the current line number in CopLINE(PL_curcop) and checks
776 * to see whether the line starts with a comment of the form
777 * # line 500 "foo.pm"
778 * If so, it sets the current line number and file to the values in the comment.
782 S_incline(pTHX_ const char *s)
789 PERL_ARGS_ASSERT_INCLINE;
791 CopLINE_inc(PL_curcop);
794 while (SPACE_OR_TAB(*s))
796 if (strnEQ(s, "line", 4))
800 if (SPACE_OR_TAB(*s))
804 while (SPACE_OR_TAB(*s))
812 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
814 while (SPACE_OR_TAB(*s))
816 if (*s == '"' && (t = strchr(s+1, '"'))) {
826 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
828 if (*e != '\n' && *e != '\0')
829 return; /* false alarm */
832 const STRLEN len = t - s;
834 SV *const temp_sv = CopFILESV(PL_curcop);
840 tmplen = SvCUR(temp_sv);
846 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
847 /* must copy *{"::_<(eval N)[oldfilename:L]"}
848 * to *{"::_<newfilename"} */
849 /* However, the long form of evals is only turned on by the
850 debugger - usually they're "(eval %lu)" */
854 STRLEN tmplen2 = len;
855 if (tmplen + 2 <= sizeof smallbuf)
858 Newx(tmpbuf, tmplen + 2, char);
861 memcpy(tmpbuf + 2, cf, tmplen);
863 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
868 if (tmplen2 + 2 <= sizeof smallbuf)
871 Newx(tmpbuf2, tmplen2 + 2, char);
873 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
874 /* Either they malloc'd it, or we malloc'd it,
875 so no prefix is present in ours. */
880 memcpy(tmpbuf2 + 2, s, tmplen2);
883 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
885 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
886 /* adjust ${"::_<newfilename"} to store the new file name */
887 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
888 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
889 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
892 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
894 if (tmpbuf != smallbuf) Safefree(tmpbuf);
897 CopFILE_free(PL_curcop);
898 CopFILE_setn(PL_curcop, s, len);
900 CopLINE_set(PL_curcop, atoi(n)-1);
904 /* skip space before PL_thistoken */
907 S_skipspace0(pTHX_ register char *s)
909 PERL_ARGS_ASSERT_SKIPSPACE0;
916 PL_thiswhite = newSVpvs("");
917 sv_catsv(PL_thiswhite, PL_skipwhite);
918 sv_free(PL_skipwhite);
921 PL_realtokenstart = s - SvPVX(PL_linestr);
925 /* skip space after PL_thistoken */
928 S_skipspace1(pTHX_ register char *s)
930 const char *start = s;
931 I32 startoff = start - SvPVX(PL_linestr);
933 PERL_ARGS_ASSERT_SKIPSPACE1;
938 start = SvPVX(PL_linestr) + startoff;
939 if (!PL_thistoken && PL_realtokenstart >= 0) {
940 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
941 PL_thistoken = newSVpvn(tstart, start - tstart);
943 PL_realtokenstart = -1;
946 PL_nextwhite = newSVpvs("");
947 sv_catsv(PL_nextwhite, PL_skipwhite);
948 sv_free(PL_skipwhite);
955 S_skipspace2(pTHX_ register char *s, SV **svp)
958 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
959 const I32 startoff = s - SvPVX(PL_linestr);
961 PERL_ARGS_ASSERT_SKIPSPACE2;
964 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
965 if (!PL_madskills || !svp)
967 start = SvPVX(PL_linestr) + startoff;
968 if (!PL_thistoken && PL_realtokenstart >= 0) {
969 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
970 PL_thistoken = newSVpvn(tstart, start - tstart);
971 PL_realtokenstart = -1;
976 sv_setsv(*svp, PL_skipwhite);
977 sv_free(PL_skipwhite);
986 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
988 AV *av = CopFILEAVx(PL_curcop);
990 SV * const sv = newSV_type(SVt_PVMG);
992 sv_setsv(sv, orig_sv);
994 sv_setpvn(sv, buf, len);
997 av_store(av, (I32)CopLINE(PL_curcop), sv);
1003 * Called to gobble the appropriate amount and type of whitespace.
1004 * Skips comments as well.
1008 S_skipspace(pTHX_ register char *s)
1013 int startoff = s - SvPVX(PL_linestr);
1015 PERL_ARGS_ASSERT_SKIPSPACE;
1018 sv_free(PL_skipwhite);
1022 PERL_ARGS_ASSERT_SKIPSPACE;
1024 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1025 while (s < PL_bufend && SPACE_OR_TAB(*s))
1035 SSize_t oldprevlen, oldoldprevlen;
1036 SSize_t oldloplen = 0, oldunilen = 0;
1037 while (s < PL_bufend && isSPACE(*s)) {
1038 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1043 if (s < PL_bufend && *s == '#') {
1044 while (s < PL_bufend && *s != '\n')
1046 if (s < PL_bufend) {
1048 if (PL_in_eval && !PL_rsfp) {
1055 /* only continue to recharge the buffer if we're at the end
1056 * of the buffer, we're not reading from a source filter, and
1057 * we're in normal lexing mode
1059 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1060 PL_lex_state == LEX_FORMLINE)
1067 /* try to recharge the buffer */
1069 curoff = s - SvPVX(PL_linestr);
1072 if ((s = filter_gets(PL_linestr, PL_rsfp,
1073 (prevlen = SvCUR(PL_linestr)))) == NULL)
1076 if (PL_madskills && curoff != startoff) {
1078 PL_skipwhite = newSVpvs("");
1079 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1083 /* mustn't throw out old stuff yet if madpropping */
1084 SvCUR(PL_linestr) = curoff;
1085 s = SvPVX(PL_linestr) + curoff;
1087 if (curoff && s[-1] == '\n')
1091 /* end of file. Add on the -p or -n magic */
1092 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1095 sv_catpvs(PL_linestr,
1096 ";}continue{print or die qq(-p destination: $!\\n);}");
1098 sv_setpvs(PL_linestr,
1099 ";}continue{print or die qq(-p destination: $!\\n);}");
1101 PL_minus_n = PL_minus_p = 0;
1103 else if (PL_minus_n) {
1105 sv_catpvs(PL_linestr, ";}");
1107 sv_setpvs(PL_linestr, ";}");
1113 sv_catpvs(PL_linestr,";");
1115 sv_setpvs(PL_linestr,";");
1118 /* reset variables for next time we lex */
1119 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1125 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1126 PL_last_lop = PL_last_uni = NULL;
1128 /* Close the filehandle. Could be from
1129 * STDIN, or a regular file. If we were reading code from
1130 * STDIN (because the commandline held no -e or filename)
1131 * then we don't close it, we reset it so the code can
1132 * read from STDIN too.
1135 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1136 PerlIO_clearerr(PL_rsfp);
1138 (void)PerlIO_close(PL_rsfp);
1143 /* not at end of file, so we only read another line */
1144 /* make corresponding updates to old pointers, for yyerror() */
1145 oldprevlen = PL_oldbufptr - PL_bufend;
1146 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1148 oldunilen = PL_last_uni - PL_bufend;
1150 oldloplen = PL_last_lop - PL_bufend;
1151 PL_linestart = PL_bufptr = s + prevlen;
1152 PL_bufend = s + SvCUR(PL_linestr);
1154 PL_oldbufptr = s + oldprevlen;
1155 PL_oldoldbufptr = s + oldoldprevlen;
1157 PL_last_uni = s + oldunilen;
1159 PL_last_lop = s + oldloplen;
1162 /* debugger active and we're not compiling the debugger code,
1163 * so store the line into the debugger's array of lines
1165 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
1166 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1173 PL_skipwhite = newSVpvs("");
1174 curoff = s - SvPVX(PL_linestr);
1175 if (curoff - startoff)
1176 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1185 * Check the unary operators to ensure there's no ambiguity in how they're
1186 * used. An ambiguous piece of code would be:
1188 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1189 * the +5 is its argument.
1199 if (PL_oldoldbufptr != PL_last_uni)
1201 while (isSPACE(*PL_last_uni))
1204 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1206 if ((t = strchr(s, '(')) && t < PL_bufptr)
1209 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1210 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1211 (int)(s - PL_last_uni), PL_last_uni);
1215 * LOP : macro to build a list operator. Its behaviour has been replaced
1216 * with a subroutine, S_lop() for which LOP is just another name.
1219 #define LOP(f,x) return lop(f,x,s)
1223 * Build a list operator (or something that might be one). The rules:
1224 * - if we have a next token, then it's a list operator [why?]
1225 * - if the next thing is an opening paren, then it's a function
1226 * - else it's a list operator
1230 S_lop(pTHX_ I32 f, int x, char *s)
1234 PERL_ARGS_ASSERT_LOP;
1240 PL_last_lop = PL_oldbufptr;
1241 PL_last_lop_op = (OPCODE)f;
1244 return REPORT(LSTOP);
1247 return REPORT(LSTOP);
1250 return REPORT(FUNC);
1253 return REPORT(FUNC);
1255 return REPORT(LSTOP);
1261 * Sets up for an eventual force_next(). start_force(0) basically does
1262 * an unshift, while start_force(-1) does a push. yylex removes items
1267 S_start_force(pTHX_ int where)
1271 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1272 where = PL_lasttoke;
1273 assert(PL_curforce < 0 || PL_curforce == where);
1274 if (PL_curforce != where) {
1275 for (i = PL_lasttoke; i > where; --i) {
1276 PL_nexttoke[i] = PL_nexttoke[i-1];
1280 if (PL_curforce < 0) /* in case of duplicate start_force() */
1281 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1282 PL_curforce = where;
1285 curmad('^', newSVpvs(""));
1286 CURMAD('_', PL_nextwhite);
1291 S_curmad(pTHX_ char slot, SV *sv)
1297 if (PL_curforce < 0)
1298 where = &PL_thismad;
1300 where = &PL_nexttoke[PL_curforce].next_mad;
1306 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1308 else if (PL_encoding) {
1309 sv_recode_to_utf8(sv, PL_encoding);
1314 /* keep a slot open for the head of the list? */
1315 if (slot != '_' && *where && (*where)->mad_key == '^') {
1316 (*where)->mad_key = slot;
1317 sv_free(MUTABLE_SV(((*where)->mad_val)));
1318 (*where)->mad_val = (void*)sv;
1321 addmad(newMADsv(slot, sv), where, 0);
1324 # define start_force(where) NOOP
1325 # define curmad(slot, sv) NOOP
1330 * When the lexer realizes it knows the next token (for instance,
1331 * it is reordering tokens for the parser) then it can call S_force_next
1332 * to know what token to return the next time the lexer is called. Caller
1333 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1334 * and possibly PL_expect to ensure the lexer handles the token correctly.
1338 S_force_next(pTHX_ I32 type)
1343 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1344 tokereport(type, &NEXTVAL_NEXTTOKE);
1348 if (PL_curforce < 0)
1349 start_force(PL_lasttoke);
1350 PL_nexttoke[PL_curforce].next_type = type;
1351 if (PL_lex_state != LEX_KNOWNEXT)
1352 PL_lex_defer = PL_lex_state;
1353 PL_lex_state = LEX_KNOWNEXT;
1354 PL_lex_expect = PL_expect;
1357 PL_nexttype[PL_nexttoke] = type;
1359 if (PL_lex_state != LEX_KNOWNEXT) {
1360 PL_lex_defer = PL_lex_state;
1361 PL_lex_expect = PL_expect;
1362 PL_lex_state = LEX_KNOWNEXT;
1368 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1371 SV * const sv = newSVpvn_utf8(start, len,
1374 && !is_ascii_string((const U8*)start, len)
1375 && is_utf8_string((const U8*)start, len));
1381 * When the lexer knows the next thing is a word (for instance, it has
1382 * just seen -> and it knows that the next char is a word char, then
1383 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1387 * char *start : buffer position (must be within PL_linestr)
1388 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1389 * int check_keyword : if true, Perl checks to make sure the word isn't
1390 * a keyword (do this if the word is a label, e.g. goto FOO)
1391 * int allow_pack : if true, : characters will also be allowed (require,
1392 * use, etc. do this)
1393 * int allow_initial_tick : used by the "sub" lexer only.
1397 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1403 PERL_ARGS_ASSERT_FORCE_WORD;
1405 start = SKIPSPACE1(start);
1407 if (isIDFIRST_lazy_if(s,UTF) ||
1408 (allow_pack && *s == ':') ||
1409 (allow_initial_tick && *s == '\'') )
1411 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1412 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1414 start_force(PL_curforce);
1416 curmad('X', newSVpvn(start,s-start));
1417 if (token == METHOD) {
1422 PL_expect = XOPERATOR;
1426 curmad('g', newSVpvs( "forced" ));
1427 NEXTVAL_NEXTTOKE.opval
1428 = (OP*)newSVOP(OP_CONST,0,
1429 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1430 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1438 * Called when the lexer wants $foo *foo &foo etc, but the program
1439 * text only contains the "foo" portion. The first argument is a pointer
1440 * to the "foo", and the second argument is the type symbol to prefix.
1441 * Forces the next token to be a "WORD".
1442 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1446 S_force_ident(pTHX_ register const char *s, int kind)
1450 PERL_ARGS_ASSERT_FORCE_IDENT;
1453 const STRLEN len = strlen(s);
1454 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1455 start_force(PL_curforce);
1456 NEXTVAL_NEXTTOKE.opval = o;
1459 o->op_private = OPpCONST_ENTERED;
1460 /* XXX see note in pp_entereval() for why we forgo typo
1461 warnings if the symbol must be introduced in an eval.
1463 gv_fetchpvn_flags(s, len,
1464 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1466 kind == '$' ? SVt_PV :
1467 kind == '@' ? SVt_PVAV :
1468 kind == '%' ? SVt_PVHV :
1476 Perl_str_to_version(pTHX_ SV *sv)
1481 const char *start = SvPV_const(sv,len);
1482 const char * const end = start + len;
1483 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1485 PERL_ARGS_ASSERT_STR_TO_VERSION;
1487 while (start < end) {
1491 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1496 retval += ((NV)n)/nshift;
1505 * Forces the next token to be a version number.
1506 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1507 * and if "guessing" is TRUE, then no new token is created (and the caller
1508 * must use an alternative parsing method).
1512 S_force_version(pTHX_ char *s, int guessing)
1518 I32 startoff = s - SvPVX(PL_linestr);
1521 PERL_ARGS_ASSERT_FORCE_VERSION;
1529 while (isDIGIT(*d) || *d == '_' || *d == '.')
1533 start_force(PL_curforce);
1534 curmad('X', newSVpvn(s,d-s));
1537 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1539 s = scan_num(s, &pl_yylval);
1540 version = pl_yylval.opval;
1541 ver = cSVOPx(version)->op_sv;
1542 if (SvPOK(ver) && !SvNIOK(ver)) {
1543 SvUPGRADE(ver, SVt_PVNV);
1544 SvNV_set(ver, str_to_version(ver));
1545 SvNOK_on(ver); /* hint that it is a version */
1548 else if (guessing) {
1551 sv_free(PL_nextwhite); /* let next token collect whitespace */
1553 s = SvPVX(PL_linestr) + startoff;
1561 if (PL_madskills && !version) {
1562 sv_free(PL_nextwhite); /* let next token collect whitespace */
1564 s = SvPVX(PL_linestr) + startoff;
1567 /* NOTE: The parser sees the package name and the VERSION swapped */
1568 start_force(PL_curforce);
1569 NEXTVAL_NEXTTOKE.opval = version;
1577 * Tokenize a quoted string passed in as an SV. It finds the next
1578 * chunk, up to end of string or a backslash. It may make a new
1579 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1584 S_tokeq(pTHX_ SV *sv)
1588 register char *send;
1593 PERL_ARGS_ASSERT_TOKEQ;
1598 s = SvPV_force(sv, len);
1599 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1602 while (s < send && *s != '\\')
1607 if ( PL_hints & HINT_NEW_STRING ) {
1608 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1612 if (s + 1 < send && (s[1] == '\\'))
1613 s++; /* all that, just for this */
1618 SvCUR_set(sv, d - SvPVX_const(sv));
1620 if ( PL_hints & HINT_NEW_STRING )
1621 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1626 * Now come three functions related to double-quote context,
1627 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1628 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1629 * interact with PL_lex_state, and create fake ( ... ) argument lists
1630 * to handle functions and concatenation.
1631 * They assume that whoever calls them will be setting up a fake
1632 * join call, because each subthing puts a ',' after it. This lets
1635 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1637 * (I'm not sure whether the spurious commas at the end of lcfirst's
1638 * arguments and join's arguments are created or not).
1643 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1645 * Pattern matching will set PL_lex_op to the pattern-matching op to
1646 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1648 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1650 * Everything else becomes a FUNC.
1652 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1653 * had an OP_CONST or OP_READLINE). This just sets us up for a
1654 * call to S_sublex_push().
1658 S_sublex_start(pTHX)
1661 register const I32 op_type = pl_yylval.ival;
1663 if (op_type == OP_NULL) {
1664 pl_yylval.opval = PL_lex_op;
1668 if (op_type == OP_CONST || op_type == OP_READLINE) {
1669 SV *sv = tokeq(PL_lex_stuff);
1671 if (SvTYPE(sv) == SVt_PVIV) {
1672 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1674 const char * const p = SvPV_const(sv, len);
1675 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1679 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1680 PL_lex_stuff = NULL;
1681 /* Allow <FH> // "foo" */
1682 if (op_type == OP_READLINE)
1683 PL_expect = XTERMORDORDOR;
1686 else if (op_type == OP_BACKTICK && PL_lex_op) {
1687 /* readpipe() vas overriden */
1688 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1689 pl_yylval.opval = PL_lex_op;
1691 PL_lex_stuff = NULL;
1695 PL_sublex_info.super_state = PL_lex_state;
1696 PL_sublex_info.sub_inwhat = (U16)op_type;
1697 PL_sublex_info.sub_op = PL_lex_op;
1698 PL_lex_state = LEX_INTERPPUSH;
1702 pl_yylval.opval = PL_lex_op;
1712 * Create a new scope to save the lexing state. The scope will be
1713 * ended in S_sublex_done. Returns a '(', starting the function arguments
1714 * to the uc, lc, etc. found before.
1715 * Sets PL_lex_state to LEX_INTERPCONCAT.
1724 PL_lex_state = PL_sublex_info.super_state;
1725 SAVEBOOL(PL_lex_dojoin);
1726 SAVEI32(PL_lex_brackets);
1727 SAVEI32(PL_lex_casemods);
1728 SAVEI32(PL_lex_starts);
1729 SAVEI8(PL_lex_state);
1730 SAVEVPTR(PL_lex_inpat);
1731 SAVEI16(PL_lex_inwhat);
1732 SAVECOPLINE(PL_curcop);
1733 SAVEPPTR(PL_bufptr);
1734 SAVEPPTR(PL_bufend);
1735 SAVEPPTR(PL_oldbufptr);
1736 SAVEPPTR(PL_oldoldbufptr);
1737 SAVEPPTR(PL_last_lop);
1738 SAVEPPTR(PL_last_uni);
1739 SAVEPPTR(PL_linestart);
1740 SAVESPTR(PL_linestr);
1741 SAVEGENERICPV(PL_lex_brackstack);
1742 SAVEGENERICPV(PL_lex_casestack);
1744 PL_linestr = PL_lex_stuff;
1745 PL_lex_stuff = NULL;
1747 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1748 = SvPVX(PL_linestr);
1749 PL_bufend += SvCUR(PL_linestr);
1750 PL_last_lop = PL_last_uni = NULL;
1751 SAVEFREESV(PL_linestr);
1753 PL_lex_dojoin = FALSE;
1754 PL_lex_brackets = 0;
1755 Newx(PL_lex_brackstack, 120, char);
1756 Newx(PL_lex_casestack, 12, char);
1757 PL_lex_casemods = 0;
1758 *PL_lex_casestack = '\0';
1760 PL_lex_state = LEX_INTERPCONCAT;
1761 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1763 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1764 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1765 PL_lex_inpat = PL_sublex_info.sub_op;
1767 PL_lex_inpat = NULL;
1774 * Restores lexer state after a S_sublex_push.
1781 if (!PL_lex_starts++) {
1782 SV * const sv = newSVpvs("");
1783 if (SvUTF8(PL_linestr))
1785 PL_expect = XOPERATOR;
1786 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1790 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1791 PL_lex_state = LEX_INTERPCASEMOD;
1795 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1796 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1797 PL_linestr = PL_lex_repl;
1799 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1800 PL_bufend += SvCUR(PL_linestr);
1801 PL_last_lop = PL_last_uni = NULL;
1802 SAVEFREESV(PL_linestr);
1803 PL_lex_dojoin = FALSE;
1804 PL_lex_brackets = 0;
1805 PL_lex_casemods = 0;
1806 *PL_lex_casestack = '\0';
1808 if (SvEVALED(PL_lex_repl)) {
1809 PL_lex_state = LEX_INTERPNORMAL;
1811 /* we don't clear PL_lex_repl here, so that we can check later
1812 whether this is an evalled subst; that means we rely on the
1813 logic to ensure sublex_done() is called again only via the
1814 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1817 PL_lex_state = LEX_INTERPCONCAT;
1827 PL_endwhite = newSVpvs("");
1828 sv_catsv(PL_endwhite, PL_thiswhite);
1832 sv_setpvs(PL_thistoken,"");
1834 PL_realtokenstart = -1;
1838 PL_bufend = SvPVX(PL_linestr);
1839 PL_bufend += SvCUR(PL_linestr);
1840 PL_expect = XOPERATOR;
1841 PL_sublex_info.sub_inwhat = 0;
1849 Extracts a pattern, double-quoted string, or transliteration. This
1852 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1853 processing a pattern (PL_lex_inpat is true), a transliteration
1854 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1856 Returns a pointer to the character scanned up to. If this is
1857 advanced from the start pointer supplied (i.e. if anything was
1858 successfully parsed), will leave an OP for the substring scanned
1859 in pl_yylval. Caller must intuit reason for not parsing further
1860 by looking at the next characters herself.
1864 double-quoted style: \r and \n
1865 regexp special ones: \D \s
1868 case and quoting: \U \Q \E
1869 stops on @ and $, but not for $ as tail anchor
1871 In transliterations:
1872 characters are VERY literal, except for - not at the start or end
1873 of the string, which indicates a range. If the range is in bytes,
1874 scan_const expands the range to the full set of intermediate
1875 characters. If the range is in utf8, the hyphen is replaced with
1876 a certain range mark which will be handled by pmtrans() in op.c.
1878 In double-quoted strings:
1880 double-quoted style: \r and \n
1882 deprecated backrefs: \1 (in substitution replacements)
1883 case and quoting: \U \Q \E
1886 scan_const does *not* construct ops to handle interpolated strings.
1887 It stops processing as soon as it finds an embedded $ or @ variable
1888 and leaves it to the caller to work out what's going on.
1890 embedded arrays (whether in pattern or not) could be:
1891 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1893 $ in double-quoted strings must be the symbol of an embedded scalar.
1895 $ in pattern could be $foo or could be tail anchor. Assumption:
1896 it's a tail anchor if $ is the last thing in the string, or if it's
1897 followed by one of "()| \r\n\t"
1899 \1 (backreferences) are turned into $1
1901 The structure of the code is
1902 while (there's a character to process) {
1903 handle transliteration ranges
1904 skip regexp comments /(?#comment)/ and codes /(?{code})/
1905 skip #-initiated comments in //x patterns
1906 check for embedded arrays
1907 check for embedded scalars
1909 leave intact backslashes from leaveit (below)
1910 deprecate \1 in substitution replacements
1911 handle string-changing backslashes \l \U \Q \E, etc.
1912 switch (what was escaped) {
1913 handle \- in a transliteration (becomes a literal -)
1914 handle \132 (octal characters)
1915 handle \x15 and \x{1234} (hex characters)
1916 handle \N{name} (named characters)
1917 handle \cV (control characters)
1918 handle printf-style backslashes (\f, \r, \n, etc)
1921 } (end if backslash)
1922 handle regular character
1923 } (end while character to read)
1928 S_scan_const(pTHX_ char *start)
1931 register char *send = PL_bufend; /* end of the constant */
1932 SV *sv = newSV(send - start); /* sv for the constant. See
1933 note below on sizing. */
1934 register char *s = start; /* start of the constant */
1935 register char *d = SvPVX(sv); /* destination for copies */
1936 bool dorange = FALSE; /* are we in a translit range? */
1937 bool didrange = FALSE; /* did we just finish a range? */
1938 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1939 I32 this_utf8 = UTF; /* Is the source string assumed
1940 to be UTF8? But, this can
1941 show as true when the source
1942 isn't utf8, as for example
1943 when it is entirely composed
1946 /* Note on sizing: The scanned constant is placed into sv, which is
1947 * initialized by newSV() assuming one byte of output for every byte of
1948 * input. This routine expects newSV() to allocate an extra byte for a
1949 * trailing NUL, which this routine will append if it gets to the end of
1950 * the input. There may be more bytes of input than output (eg., \N{LATIN
1951 * CAPITAL LETTER A}), or more output than input if the constant ends up
1952 * recoded to utf8, but each time a construct is found that might increase
1953 * the needed size, SvGROW() is called. Its size parameter each time is
1954 * based on the best guess estimate at the time, namely the length used so
1955 * far, plus the length the current construct will occupy, plus room for
1956 * the trailing NUL, plus one byte for every input byte still unscanned */
1960 UV literal_endpoint = 0;
1961 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1964 PERL_ARGS_ASSERT_SCAN_CONST;
1966 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1967 /* If we are doing a trans and we know we want UTF8 set expectation */
1968 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1969 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1973 while (s < send || dorange) {
1974 /* get transliterations out of the way (they're most literal) */
1975 if (PL_lex_inwhat == OP_TRANS) {
1976 /* expand a range A-Z to the full set of characters. AIE! */
1978 I32 i; /* current expanded character */
1979 I32 min; /* first character in range */
1980 I32 max; /* last character in range */
1991 char * const c = (char*)utf8_hop((U8*)d, -1);
1995 *c = (char)UTF_TO_NATIVE(0xff);
1996 /* mark the range as done, and continue */
2002 i = d - SvPVX_const(sv); /* remember current offset */
2005 SvLEN(sv) + (has_utf8 ?
2006 (512 - UTF_CONTINUATION_MARK +
2009 /* How many two-byte within 0..255: 128 in UTF-8,
2010 * 96 in UTF-8-mod. */
2012 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2014 d = SvPVX(sv) + i; /* refresh d after realloc */
2018 for (j = 0; j <= 1; j++) {
2019 char * const c = (char*)utf8_hop((U8*)d, -1);
2020 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2026 max = (U8)0xff; /* only to \xff */
2027 uvmax = uv; /* \x{100} to uvmax */
2029 d = c; /* eat endpoint chars */
2034 d -= 2; /* eat the first char and the - */
2035 min = (U8)*d; /* first char in range */
2036 max = (U8)d[1]; /* last char in range */
2043 "Invalid range \"%c-%c\" in transliteration operator",
2044 (char)min, (char)max);
2048 if (literal_endpoint == 2 &&
2049 ((isLOWER(min) && isLOWER(max)) ||
2050 (isUPPER(min) && isUPPER(max)))) {
2052 for (i = min; i <= max; i++)
2054 *d++ = NATIVE_TO_NEED(has_utf8,i);
2056 for (i = min; i <= max; i++)
2058 *d++ = NATIVE_TO_NEED(has_utf8,i);
2063 for (i = min; i <= max; i++)
2066 const U8 ch = (U8)NATIVE_TO_UTF(i);
2067 if (UNI_IS_INVARIANT(ch))
2070 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2071 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2080 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2082 *d++ = (char)UTF_TO_NATIVE(0xff);
2084 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2088 /* mark the range as done, and continue */
2092 literal_endpoint = 0;
2097 /* range begins (ignore - as first or last char) */
2098 else if (*s == '-' && s+1 < send && s != start) {
2100 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2107 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2117 literal_endpoint = 0;
2118 native_range = TRUE;
2123 /* if we get here, we're not doing a transliteration */
2125 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2126 except for the last char, which will be done separately. */
2127 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2129 while (s+1 < send && *s != ')')
2130 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2132 else if (s[2] == '{' /* This should match regcomp.c */
2133 || (s[2] == '?' && s[3] == '{'))
2136 char *regparse = s + (s[2] == '{' ? 3 : 4);
2139 while (count && (c = *regparse)) {
2140 if (c == '\\' && regparse[1])
2148 if (*regparse != ')')
2149 regparse--; /* Leave one char for continuation. */
2150 while (s < regparse)
2151 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2155 /* likewise skip #-initiated comments in //x patterns */
2156 else if (*s == '#' && PL_lex_inpat &&
2157 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2158 while (s+1 < send && *s != '\n')
2159 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2162 /* check for embedded arrays
2163 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2165 else if (*s == '@' && s[1]) {
2166 if (isALNUM_lazy_if(s+1,UTF))
2168 if (strchr(":'{$", s[1]))
2170 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2171 break; /* in regexp, neither @+ nor @- are interpolated */
2174 /* check for embedded scalars. only stop if we're sure it's a
2177 else if (*s == '$') {
2178 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2180 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2182 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2183 "Possible unintended interpolation of $\\ in regex");
2185 break; /* in regexp, $ might be tail anchor */
2189 /* End of else if chain - OP_TRANS rejoin rest */
2192 if (*s == '\\' && s+1 < send) {
2195 /* deprecate \1 in strings and substitution replacements */
2196 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2197 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2199 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2204 /* string-change backslash escapes */
2205 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2209 /* skip any other backslash escapes in a pattern */
2210 else if (PL_lex_inpat) {
2211 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2212 goto default_action;
2215 /* if we get here, it's either a quoted -, or a digit */
2218 /* quoted - in transliterations */
2220 if (PL_lex_inwhat == OP_TRANS) {
2227 if ((isALPHA(*s) || isDIGIT(*s)))
2228 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2229 "Unrecognized escape \\%c passed through",
2231 /* default action is to copy the quoted character */
2232 goto default_action;
2235 /* eg. \132 indicates the octal constant 0x132 */
2236 case '0': case '1': case '2': case '3':
2237 case '4': case '5': case '6': case '7':
2241 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2244 goto NUM_ESCAPE_INSERT;
2246 /* eg. \x24 indicates the hex constant 0x24 */
2250 char* const e = strchr(s, '}');
2251 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2252 PERL_SCAN_DISALLOW_PREFIX;
2257 yyerror("Missing right brace on \\x{}");
2261 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2267 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2268 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2274 /* Insert oct, hex, or \N{U+...} escaped character. There will
2275 * always be enough room in sv since such escapes will be
2276 * longer than any UTF-8 sequence they can end up as, except if
2277 * they force us to recode the rest of the string into utf8 */
2279 /* Here uv is the ordinal of the next character being added in
2280 * unicode (converted from native). (It has to be done before
2281 * here because \N is interpreted as unicode, and oct and hex
2283 if (!UNI_IS_INVARIANT(uv)) {
2284 if (!has_utf8 && uv > 255) {
2285 /* Might need to recode whatever we have accumulated so
2286 * far if it contains any chars variant in utf8 or
2289 SvCUR_set(sv, d - SvPVX_const(sv));
2292 /* See Note on sizing above. */
2293 sv_utf8_upgrade_flags_grow(sv,
2294 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2295 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2296 d = SvPVX(sv) + SvCUR(sv);
2301 d = (char*)uvuni_to_utf8((U8*)d, uv);
2302 if (PL_lex_inwhat == OP_TRANS &&
2303 PL_sublex_info.sub_op) {
2304 PL_sublex_info.sub_op->op_private |=
2305 (PL_lex_repl ? OPpTRANS_FROM_UTF
2309 if (uv > 255 && !dorange)
2310 native_range = FALSE;
2322 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2327 char* e = strchr(s, '}');
2333 yyerror("Missing right brace on \\N{}");
2337 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2338 /* \N{U+...} The ... is a unicode value even on EBCDIC
2340 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2341 PERL_SCAN_DISALLOW_PREFIX;
2344 uv = grok_hex(s, &len, &flags, NULL);
2345 if ( e > s && len != (STRLEN)(e - s) ) {
2349 goto NUM_ESCAPE_INSERT;
2351 res = newSVpvn(s + 1, e - s - 1);
2352 res = new_constant( NULL, 0, "charnames",
2353 res, NULL, s - 2, e - s + 3 );
2355 sv_utf8_upgrade(res);
2356 str = SvPV_const(res,len);
2357 #ifdef EBCDIC_NEVER_MIND
2358 /* charnames uses pack U and that has been
2359 * recently changed to do the below uni->native
2360 * mapping, so this would be redundant (and wrong,
2361 * the code point would be doubly converted).
2362 * But leave this in just in case the pack U change
2363 * gets revoked, but the semantics is still
2364 * desireable for charnames. --jhi */
2366 UV uv = utf8_to_uvchr((const U8*)str, 0);
2369 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2371 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2372 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2373 str = SvPV_const(res, len);
2377 /* If destination is not in utf8 but this new character is,
2378 * recode the dest to utf8 */
2379 if (!has_utf8 && SvUTF8(res)) {
2380 SvCUR_set(sv, d - SvPVX_const(sv));
2383 /* See Note on sizing above. */
2384 sv_utf8_upgrade_flags_grow(sv,
2385 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2386 len + (STRLEN)(send - s) + 1);
2387 d = SvPVX(sv) + SvCUR(sv);
2389 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2391 /* See Note on sizing above. (NOTE: SvCUR() is not set
2392 * correctly here). */
2393 const STRLEN off = d - SvPVX_const(sv);
2394 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2398 native_range = FALSE; /* \N{} is guessed to be Unicode */
2400 Copy(str, d, len, char);
2407 yyerror("Missing braces on \\N{}");
2410 /* \c is a control character */
2419 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2422 yyerror("Missing control char name in \\c");
2426 /* printf-style backslashes, formfeeds, newlines, etc */
2428 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2431 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2434 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2437 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2440 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2443 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2446 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2452 } /* end if (backslash) */
2459 /* If we started with encoded form, or already know we want it,
2460 then encode the next character */
2461 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2465 /* One might think that it is wasted effort in the case of the
2466 * source being utf8 (this_utf8 == TRUE) to take the next character
2467 * in the source, convert it to an unsigned value, and then convert
2468 * it back again. But the source has not been validated here. The
2469 * routine that does the conversion checks for errors like
2472 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2473 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2475 SvCUR_set(sv, d - SvPVX_const(sv));
2478 /* See Note on sizing above. */
2479 sv_utf8_upgrade_flags_grow(sv,
2480 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2481 need + (STRLEN)(send - s) + 1);
2482 d = SvPVX(sv) + SvCUR(sv);
2484 } else if (need > len) {
2485 /* encoded value larger than old, may need extra space (NOTE:
2486 * SvCUR() is not set correctly here). See Note on sizing
2488 const STRLEN off = d - SvPVX_const(sv);
2489 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2493 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2495 if (uv > 255 && !dorange)
2496 native_range = FALSE;
2500 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2502 } /* while loop to process each character */
2504 /* terminate the string and set up the sv */
2506 SvCUR_set(sv, d - SvPVX_const(sv));
2507 if (SvCUR(sv) >= SvLEN(sv))
2508 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2511 if (PL_encoding && !has_utf8) {
2512 sv_recode_to_utf8(sv, PL_encoding);
2518 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2519 PL_sublex_info.sub_op->op_private |=
2520 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2524 /* shrink the sv if we allocated more than we used */
2525 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2526 SvPV_shrink_to_cur(sv);
2529 /* return the substring (via pl_yylval) only if we parsed anything */
2530 if (s > PL_bufptr) {
2531 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2532 const char *const key = PL_lex_inpat ? "qr" : "q";
2533 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2537 if (PL_lex_inwhat == OP_TRANS) {
2540 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2548 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2551 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2558 * Returns TRUE if there's more to the expression (e.g., a subscript),
2561 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2563 * ->[ and ->{ return TRUE
2564 * { and [ outside a pattern are always subscripts, so return TRUE
2565 * if we're outside a pattern and it's not { or [, then return FALSE
2566 * if we're in a pattern and the first char is a {
2567 * {4,5} (any digits around the comma) returns FALSE
2568 * if we're in a pattern and the first char is a [
2570 * [SOMETHING] has a funky algorithm to decide whether it's a
2571 * character class or not. It has to deal with things like
2572 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2573 * anything else returns TRUE
2576 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2579 S_intuit_more(pTHX_ register char *s)
2583 PERL_ARGS_ASSERT_INTUIT_MORE;
2585 if (PL_lex_brackets)
2587 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2589 if (*s != '{' && *s != '[')
2594 /* In a pattern, so maybe we have {n,m}. */
2611 /* On the other hand, maybe we have a character class */
2614 if (*s == ']' || *s == '^')
2617 /* this is terrifying, and it works */
2618 int weight = 2; /* let's weigh the evidence */
2620 unsigned char un_char = 255, last_un_char;
2621 const char * const send = strchr(s,']');
2622 char tmpbuf[sizeof PL_tokenbuf * 4];
2624 if (!send) /* has to be an expression */
2627 Zero(seen,256,char);
2630 else if (isDIGIT(*s)) {
2632 if (isDIGIT(s[1]) && s[2] == ']')
2638 for (; s < send; s++) {
2639 last_un_char = un_char;
2640 un_char = (unsigned char)*s;
2645 weight -= seen[un_char] * 10;
2646 if (isALNUM_lazy_if(s+1,UTF)) {
2648 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2649 len = (int)strlen(tmpbuf);
2650 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2655 else if (*s == '$' && s[1] &&
2656 strchr("[#!%*<>()-=",s[1])) {
2657 if (/*{*/ strchr("])} =",s[2]))
2666 if (strchr("wds]",s[1]))
2668 else if (seen[(U8)'\''] || seen[(U8)'"'])
2670 else if (strchr("rnftbxcav",s[1]))
2672 else if (isDIGIT(s[1])) {
2674 while (s[1] && isDIGIT(s[1]))
2684 if (strchr("aA01! ",last_un_char))
2686 if (strchr("zZ79~",s[1]))
2688 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2689 weight -= 5; /* cope with negative subscript */
2692 if (!isALNUM(last_un_char)
2693 && !(last_un_char == '$' || last_un_char == '@'
2694 || last_un_char == '&')
2695 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2700 if (keyword(tmpbuf, d - tmpbuf, 0))
2703 if (un_char == last_un_char + 1)
2705 weight -= seen[un_char];
2710 if (weight >= 0) /* probably a character class */
2720 * Does all the checking to disambiguate
2722 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2723 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2725 * First argument is the stuff after the first token, e.g. "bar".
2727 * Not a method if bar is a filehandle.
2728 * Not a method if foo is a subroutine prototyped to take a filehandle.
2729 * Not a method if it's really "Foo $bar"
2730 * Method if it's "foo $bar"
2731 * Not a method if it's really "print foo $bar"
2732 * Method if it's really "foo package::" (interpreted as package->foo)
2733 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2734 * Not a method if bar is a filehandle or package, but is quoted with
2739 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2742 char *s = start + (*start == '$');
2743 char tmpbuf[sizeof PL_tokenbuf];
2750 PERL_ARGS_ASSERT_INTUIT_METHOD;
2753 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2757 const char *proto = SvPVX_const(cv);
2768 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2769 /* start is the beginning of the possible filehandle/object,
2770 * and s is the end of it
2771 * tmpbuf is a copy of it
2774 if (*start == '$') {
2775 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2776 isUPPER(*PL_tokenbuf))
2779 len = start - SvPVX(PL_linestr);
2783 start = SvPVX(PL_linestr) + len;
2787 return *s == '(' ? FUNCMETH : METHOD;
2789 if (!keyword(tmpbuf, len, 0)) {
2790 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2794 soff = s - SvPVX(PL_linestr);
2798 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2799 if (indirgv && GvCVu(indirgv))
2801 /* filehandle or package name makes it a method */
2802 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2804 soff = s - SvPVX(PL_linestr);
2807 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2808 return 0; /* no assumptions -- "=>" quotes bearword */
2810 start_force(PL_curforce);
2811 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2812 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
2813 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2815 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2820 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2822 return *s == '(' ? FUNCMETH : METHOD;
2828 /* Encoded script support. filter_add() effectively inserts a
2829 * 'pre-processing' function into the current source input stream.
2830 * Note that the filter function only applies to the current source file
2831 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2833 * The datasv parameter (which may be NULL) can be used to pass
2834 * private data to this instance of the filter. The filter function
2835 * can recover the SV using the FILTER_DATA macro and use it to
2836 * store private buffers and state information.
2838 * The supplied datasv parameter is upgraded to a PVIO type
2839 * and the IoDIRP/IoANY field is used to store the function pointer,
2840 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2841 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2842 * private use must be set using malloc'd pointers.
2846 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2855 if (!PL_rsfp_filters)
2856 PL_rsfp_filters = newAV();
2859 SvUPGRADE(datasv, SVt_PVIO);
2860 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2861 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2862 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2863 FPTR2DPTR(void *, IoANY(datasv)),
2864 SvPV_nolen(datasv)));
2865 av_unshift(PL_rsfp_filters, 1);
2866 av_store(PL_rsfp_filters, 0, datasv) ;
2871 /* Delete most recently added instance of this filter function. */
2873 Perl_filter_del(pTHX_ filter_t funcp)
2878 PERL_ARGS_ASSERT_FILTER_DEL;
2881 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2882 FPTR2DPTR(void*, funcp)));
2884 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2886 /* if filter is on top of stack (usual case) just pop it off */
2887 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2888 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2889 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2890 IoANY(datasv) = (void *)NULL;
2891 sv_free(av_pop(PL_rsfp_filters));
2895 /* we need to search for the correct entry and clear it */
2896 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2900 /* Invoke the idxth filter function for the current rsfp. */
2901 /* maxlen 0 = read one text line */
2903 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2908 /* This API is bad. It should have been using unsigned int for maxlen.
2909 Not sure if we want to change the API, but if not we should sanity
2910 check the value here. */
2911 const unsigned int correct_length
2920 PERL_ARGS_ASSERT_FILTER_READ;
2922 if (!PL_parser || !PL_rsfp_filters)
2924 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2925 /* Provide a default input filter to make life easy. */
2926 /* Note that we append to the line. This is handy. */
2927 DEBUG_P(PerlIO_printf(Perl_debug_log,
2928 "filter_read %d: from rsfp\n", idx));
2929 if (correct_length) {
2932 const int old_len = SvCUR(buf_sv);
2934 /* ensure buf_sv is large enough */
2935 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2936 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2937 correct_length)) <= 0) {
2938 if (PerlIO_error(PL_rsfp))
2939 return -1; /* error */
2941 return 0 ; /* end of file */
2943 SvCUR_set(buf_sv, old_len + len) ;
2946 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2947 if (PerlIO_error(PL_rsfp))
2948 return -1; /* error */
2950 return 0 ; /* end of file */
2953 return SvCUR(buf_sv);
2955 /* Skip this filter slot if filter has been deleted */
2956 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2957 DEBUG_P(PerlIO_printf(Perl_debug_log,
2958 "filter_read %d: skipped (filter deleted)\n",
2960 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2962 /* Get function pointer hidden within datasv */
2963 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2964 DEBUG_P(PerlIO_printf(Perl_debug_log,
2965 "filter_read %d: via function %p (%s)\n",
2966 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2967 /* Call function. The function is expected to */
2968 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2969 /* Return: <0:error, =0:eof, >0:not eof */
2970 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2974 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2978 PERL_ARGS_ASSERT_FILTER_GETS;
2980 #ifdef PERL_CR_FILTER
2981 if (!PL_rsfp_filters) {
2982 filter_add(S_cr_textfilter,NULL);
2985 if (PL_rsfp_filters) {
2987 SvCUR_set(sv, 0); /* start with empty line */
2988 if (FILTER_READ(0, sv, 0) > 0)
2989 return ( SvPVX(sv) ) ;
2994 return (sv_gets(sv, fp, append));
2998 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3003 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3005 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3009 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3010 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3012 return GvHV(gv); /* Foo:: */
3015 /* use constant CLASS => 'MyClass' */
3016 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3017 if (gv && GvCV(gv)) {
3018 SV * const sv = cv_const_sv(GvCV(gv));
3020 pkgname = SvPV_const(sv, len);
3023 return gv_stashpvn(pkgname, len, 0);
3027 * S_readpipe_override
3028 * Check whether readpipe() is overriden, and generates the appropriate
3029 * optree, provided sublex_start() is called afterwards.
3032 S_readpipe_override(pTHX)
3035 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3036 pl_yylval.ival = OP_BACKTICK;
3038 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3040 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3041 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3042 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3044 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3045 append_elem(OP_LIST,
3046 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3047 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3054 * The intent of this yylex wrapper is to minimize the changes to the
3055 * tokener when we aren't interested in collecting madprops. It remains
3056 * to be seen how successful this strategy will be...
3063 char *s = PL_bufptr;
3065 /* make sure PL_thiswhite is initialized */
3069 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3070 if (PL_pending_ident)
3071 return S_pending_ident(aTHX);
3073 /* previous token ate up our whitespace? */
3074 if (!PL_lasttoke && PL_nextwhite) {
3075 PL_thiswhite = PL_nextwhite;
3079 /* isolate the token, and figure out where it is without whitespace */
3080 PL_realtokenstart = -1;
3084 assert(PL_curforce < 0);
3086 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3087 if (!PL_thistoken) {
3088 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3089 PL_thistoken = newSVpvs("");
3091 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3092 PL_thistoken = newSVpvn(tstart, s - tstart);
3095 if (PL_thismad) /* install head */
3096 CURMAD('X', PL_thistoken);
3099 /* last whitespace of a sublex? */
3100 if (optype == ')' && PL_endwhite) {
3101 CURMAD('X', PL_endwhite);
3106 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3107 if (!PL_thiswhite && !PL_endwhite && !optype) {
3108 sv_free(PL_thistoken);
3113 /* put off final whitespace till peg */
3114 if (optype == ';' && !PL_rsfp) {
3115 PL_nextwhite = PL_thiswhite;
3118 else if (PL_thisopen) {
3119 CURMAD('q', PL_thisopen);
3121 sv_free(PL_thistoken);
3125 /* Store actual token text as madprop X */
3126 CURMAD('X', PL_thistoken);
3130 /* add preceding whitespace as madprop _ */
3131 CURMAD('_', PL_thiswhite);
3135 /* add quoted material as madprop = */
3136 CURMAD('=', PL_thisstuff);
3140 /* add terminating quote as madprop Q */
3141 CURMAD('Q', PL_thisclose);
3145 /* special processing based on optype */
3149 /* opval doesn't need a TOKEN since it can already store mp */
3159 if (pl_yylval.opval)
3160 append_madprops(PL_thismad, pl_yylval.opval, 0);
3168 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3177 /* remember any fake bracket that lexer is about to discard */
3178 if (PL_lex_brackets == 1 &&
3179 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3182 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3185 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3186 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3189 break; /* don't bother looking for trailing comment */
3198 /* attach a trailing comment to its statement instead of next token */
3202 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3204 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3206 if (*s == '\n' || *s == '#') {
3207 while (s < PL_bufend && *s != '\n')
3211 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3212 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3229 /* Create new token struct. Note: opvals return early above. */
3230 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3237 S_tokenize_use(pTHX_ int is_use, char *s) {
3240 PERL_ARGS_ASSERT_TOKENIZE_USE;
3242 if (PL_expect != XSTATE)
3243 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3244 is_use ? "use" : "no"));
3246 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3247 s = force_version(s, TRUE);
3248 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3249 start_force(PL_curforce);
3250 NEXTVAL_NEXTTOKE.opval = NULL;
3253 else if (*s == 'v') {
3254 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3255 s = force_version(s, FALSE);
3259 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3260 s = force_version(s, FALSE);
3262 pl_yylval.ival = is_use;
3266 static const char* const exp_name[] =
3267 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3268 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3275 Works out what to call the token just pulled out of the input
3276 stream. The yacc parser takes care of taking the ops we return and
3277 stitching them into a tree.
3283 if read an identifier
3284 if we're in a my declaration
3285 croak if they tried to say my($foo::bar)
3286 build the ops for a my() declaration
3287 if it's an access to a my() variable
3288 are we in a sort block?
3289 croak if my($a); $a <=> $b
3290 build ops for access to a my() variable
3291 if in a dq string, and they've said @foo and we can't find @foo
3293 build ops for a bareword
3294 if we already built the token before, use it.
3299 #pragma segment Perl_yylex
3305 register char *s = PL_bufptr;
3310 /* orig_keyword, gvp, and gv are initialized here because
3311 * jump to the label just_a_word_zero can bypass their
3312 * initialization later. */
3313 I32 orig_keyword = 0;
3318 SV* tmp = newSVpvs("");
3319 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3320 (IV)CopLINE(PL_curcop),
3321 lex_state_names[PL_lex_state],
3322 exp_name[PL_expect],
3323 pv_display(tmp, s, strlen(s), 0, 60));
3326 /* check if there's an identifier for us to look at */
3327 if (PL_pending_ident)
3328 return REPORT(S_pending_ident(aTHX));
3330 /* no identifier pending identification */
3332 switch (PL_lex_state) {
3334 case LEX_NORMAL: /* Some compilers will produce faster */
3335 case LEX_INTERPNORMAL: /* code if we comment these out. */
3339 /* when we've already built the next token, just pull it out of the queue */
3343 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3345 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3346 PL_nexttoke[PL_lasttoke].next_mad = 0;
3347 if (PL_thismad && PL_thismad->mad_key == '_') {
3348 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3349 PL_thismad->mad_val = 0;
3350 mad_free(PL_thismad);
3355 PL_lex_state = PL_lex_defer;
3356 PL_expect = PL_lex_expect;
3357 PL_lex_defer = LEX_NORMAL;
3358 if (!PL_nexttoke[PL_lasttoke].next_type)
3363 pl_yylval = PL_nextval[PL_nexttoke];
3365 PL_lex_state = PL_lex_defer;
3366 PL_expect = PL_lex_expect;
3367 PL_lex_defer = LEX_NORMAL;
3371 /* FIXME - can these be merged? */
3372 return(PL_nexttoke[PL_lasttoke].next_type);
3374 return REPORT(PL_nexttype[PL_nexttoke]);
3377 /* interpolated case modifiers like \L \U, including \Q and \E.
3378 when we get here, PL_bufptr is at the \
3380 case LEX_INTERPCASEMOD:
3382 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3383 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3385 /* handle \E or end of string */
3386 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3388 if (PL_lex_casemods) {
3389 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3390 PL_lex_casestack[PL_lex_casemods] = '\0';
3392 if (PL_bufptr != PL_bufend
3393 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3395 PL_lex_state = LEX_INTERPCONCAT;
3398 PL_thistoken = newSVpvs("\\E");
3404 while (PL_bufptr != PL_bufend &&
3405 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3407 PL_thiswhite = newSVpvs("");
3408 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3412 if (PL_bufptr != PL_bufend)
3415 PL_lex_state = LEX_INTERPCONCAT;
3419 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3420 "### Saw case modifier\n"); });
3422 if (s[1] == '\\' && s[2] == 'E') {
3425 PL_thiswhite = newSVpvs("");
3426 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3429 PL_lex_state = LEX_INTERPCONCAT;
3434 if (!PL_madskills) /* when just compiling don't need correct */
3435 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3436 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3437 if ((*s == 'L' || *s == 'U') &&
3438 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3439 PL_lex_casestack[--PL_lex_casemods] = '\0';
3442 if (PL_lex_casemods > 10)
3443 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3444 PL_lex_casestack[PL_lex_casemods++] = *s;
3445 PL_lex_casestack[PL_lex_casemods] = '\0';
3446 PL_lex_state = LEX_INTERPCONCAT;
3447 start_force(PL_curforce);
3448 NEXTVAL_NEXTTOKE.ival = 0;
3450 start_force(PL_curforce);
3452 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3454 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3456 NEXTVAL_NEXTTOKE.ival = OP_LC;
3458 NEXTVAL_NEXTTOKE.ival = OP_UC;
3460 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3462 Perl_croak(aTHX_ "panic: yylex");
3464 SV* const tmpsv = newSVpvs("\\ ");
3465 /* replace the space with the character we want to escape
3467 SvPVX(tmpsv)[1] = *s;
3473 if (PL_lex_starts) {
3479 sv_free(PL_thistoken);
3480 PL_thistoken = newSVpvs("");
3483 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3484 if (PL_lex_casemods == 1 && PL_lex_inpat)
3493 case LEX_INTERPPUSH:
3494 return REPORT(sublex_push());
3496 case LEX_INTERPSTART:
3497 if (PL_bufptr == PL_bufend)
3498 return REPORT(sublex_done());
3499 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3500 "### Interpolated variable\n"); });
3502 PL_lex_dojoin = (*PL_bufptr == '@');
3503 PL_lex_state = LEX_INTERPNORMAL;
3504 if (PL_lex_dojoin) {
3505 start_force(PL_curforce);
3506 NEXTVAL_NEXTTOKE.ival = 0;
3508 start_force(PL_curforce);
3509 force_ident("\"", '$');
3510 start_force(PL_curforce);
3511 NEXTVAL_NEXTTOKE.ival = 0;
3513 start_force(PL_curforce);
3514 NEXTVAL_NEXTTOKE.ival = 0;
3516 start_force(PL_curforce);
3517 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3520 if (PL_lex_starts++) {
3525 sv_free(PL_thistoken);
3526 PL_thistoken = newSVpvs("");
3529 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3530 if (!PL_lex_casemods && PL_lex_inpat)
3537 case LEX_INTERPENDMAYBE:
3538 if (intuit_more(PL_bufptr)) {
3539 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3545 if (PL_lex_dojoin) {
3546 PL_lex_dojoin = FALSE;
3547 PL_lex_state = LEX_INTERPCONCAT;
3551 sv_free(PL_thistoken);
3552 PL_thistoken = newSVpvs("");
3557 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3558 && SvEVALED(PL_lex_repl))
3560 if (PL_bufptr != PL_bufend)
3561 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3565 case LEX_INTERPCONCAT:
3567 if (PL_lex_brackets)
3568 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3570 if (PL_bufptr == PL_bufend)
3571 return REPORT(sublex_done());
3573 if (SvIVX(PL_linestr) == '\'') {
3574 SV *sv = newSVsv(PL_linestr);
3577 else if ( PL_hints & HINT_NEW_RE )
3578 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3579 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3583 s = scan_const(PL_bufptr);
3585 PL_lex_state = LEX_INTERPCASEMOD;
3587 PL_lex_state = LEX_INTERPSTART;
3590 if (s != PL_bufptr) {
3591 start_force(PL_curforce);
3593 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3595 NEXTVAL_NEXTTOKE = pl_yylval;
3598 if (PL_lex_starts++) {
3602 sv_free(PL_thistoken);
3603 PL_thistoken = newSVpvs("");
3606 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3607 if (!PL_lex_casemods && PL_lex_inpat)
3620 PL_lex_state = LEX_NORMAL;
3621 s = scan_formline(PL_bufptr);
3622 if (!PL_lex_formbrack)
3628 PL_oldoldbufptr = PL_oldbufptr;
3634 sv_free(PL_thistoken);
3637 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3641 if (isIDFIRST_lazy_if(s,UTF))
3644 unsigned char c = *s;
3645 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3646 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
3647 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
3652 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
3656 goto fake_eof; /* emulate EOF on ^D or ^Z */
3665 if (PL_lex_brackets) {
3666 yyerror((const char *)
3668 ? "Format not terminated"
3669 : "Missing right curly or square bracket"));
3671 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3672 "### Tokener got EOF\n");
3676 if (s++ < PL_bufend)
3677 goto retry; /* ignore stray nulls */
3680 if (!PL_in_eval && !PL_preambled) {
3681 PL_preambled = TRUE;
3687 /* Generate a string of Perl code to load the debugger.
3688 * If PERL5DB is set, it will return the contents of that,
3689 * otherwise a compile-time require of perl5db.pl. */
3691 const char * const pdb = PerlEnv_getenv("PERL5DB");
3694 sv_setpv(PL_linestr, pdb);
3695 sv_catpvs(PL_linestr,";");
3697 SETERRNO(0,SS_NORMAL);
3698 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3701 sv_setpvs(PL_linestr,"");
3702 if (PL_preambleav) {
3703 SV **svp = AvARRAY(PL_preambleav);
3704 SV **const end = svp + AvFILLp(PL_preambleav);
3706 sv_catsv(PL_linestr, *svp);
3708 sv_catpvs(PL_linestr, ";");
3710 sv_free(MUTABLE_SV(PL_preambleav));
3711 PL_preambleav = NULL;
3714 sv_catpvs(PL_linestr,
3715 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3716 if (PL_minus_n || PL_minus_p) {
3717 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3719 sv_catpvs(PL_linestr,"chomp;");
3722 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3723 || *PL_splitstr == '"')
3724 && strchr(PL_splitstr + 1, *PL_splitstr))
3725 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3727 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3728 bytes can be used as quoting characters. :-) */
3729 const char *splits = PL_splitstr;
3730 sv_catpvs(PL_linestr, "our @F=split(q\0");
3733 if (*splits == '\\')
3734 sv_catpvn(PL_linestr, splits, 1);
3735 sv_catpvn(PL_linestr, splits, 1);
3736 } while (*splits++);
3737 /* This loop will embed the trailing NUL of
3738 PL_linestr as the last thing it does before
3740 sv_catpvs(PL_linestr, ");");
3744 sv_catpvs(PL_linestr,"our @F=split(' ');");
3747 sv_catpvs(PL_linestr, "\n");
3748 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3749 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3750 PL_last_lop = PL_last_uni = NULL;
3751 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3752 update_debugger_info(PL_linestr, NULL, 0);
3756 bof = PL_rsfp ? TRUE : FALSE;
3757 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3760 PL_realtokenstart = -1;
3763 if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3764 PerlIO_clearerr(PL_rsfp);
3766 (void)PerlIO_close(PL_rsfp);
3768 PL_doextract = FALSE;
3770 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3776 sv_setpvs(PL_linestr, ";}continue{print;}");
3778 sv_setpvs(PL_linestr, ";}");
3779 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3780 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3781 PL_last_lop = PL_last_uni = NULL;
3782 PL_minus_n = PL_minus_p = 0;
3785 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3786 PL_last_lop = PL_last_uni = NULL;
3787 sv_setpvs(PL_linestr,"");
3788 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3790 /* If it looks like the start of a BOM or raw UTF-16,
3791 * check if it in fact is. */
3797 #ifdef PERLIO_IS_STDIO
3798 # ifdef __GNU_LIBRARY__
3799 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3800 # define FTELL_FOR_PIPE_IS_BROKEN
3804 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3805 # define FTELL_FOR_PIPE_IS_BROKEN
3810 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3812 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3813 s = swallow_bom((U8*)s);
3817 /* Incest with pod. */
3820 sv_catsv(PL_thiswhite, PL_linestr);
3822 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3823 sv_setpvs(PL_linestr, "");
3824 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3825 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3826 PL_last_lop = PL_last_uni = NULL;
3827 PL_doextract = FALSE;
3831 } while (PL_doextract);
3832 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3833 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3834 update_debugger_info(PL_linestr, NULL, 0);
3835 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3836 PL_last_lop = PL_last_uni = NULL;
3837 if (CopLINE(PL_curcop) == 1) {
3838 while (s < PL_bufend && isSPACE(*s))
3840 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3844 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3848 if (*s == '#' && *(s+1) == '!')
3850 #ifdef ALTERNATE_SHEBANG
3852 static char const as[] = ALTERNATE_SHEBANG;
3853 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3854 d = s + (sizeof(as) - 1);
3856 #endif /* ALTERNATE_SHEBANG */
3865 while (*d && !isSPACE(*d))
3869 #ifdef ARG_ZERO_IS_SCRIPT
3870 if (ipathend > ipath) {
3872 * HP-UX (at least) sets argv[0] to the script name,
3873 * which makes $^X incorrect. And Digital UNIX and Linux,
3874 * at least, set argv[0] to the basename of the Perl
3875 * interpreter. So, having found "#!", we'll set it right.
3877 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3879 assert(SvPOK(x) || SvGMAGICAL(x));
3880 if (sv_eq(x, CopFILESV(PL_curcop))) {
3881 sv_setpvn(x, ipath, ipathend - ipath);
3887 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3888 const char * const lstart = SvPV_const(x,llen);
3890 bstart += blen - llen;
3891 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3892 sv_setpvn(x, ipath, ipathend - ipath);
3897 TAINT_NOT; /* $^X is always tainted, but that's OK */
3899 #endif /* ARG_ZERO_IS_SCRIPT */
3904 d = instr(s,"perl -");
3906 d = instr(s,"perl");
3908 /* avoid getting into infinite loops when shebang
3909 * line contains "Perl" rather than "perl" */
3911 for (d = ipathend-4; d >= ipath; --d) {
3912 if ((*d == 'p' || *d == 'P')
3913 && !ibcmp(d, "perl", 4))
3923 #ifdef ALTERNATE_SHEBANG
3925 * If the ALTERNATE_SHEBANG on this system starts with a
3926 * character that can be part of a Perl expression, then if
3927 * we see it but not "perl", we're probably looking at the
3928 * start of Perl code, not a request to hand off to some
3929 * other interpreter. Similarly, if "perl" is there, but
3930 * not in the first 'word' of the line, we assume the line
3931 * contains the start of the Perl program.
3933 if (d && *s != '#') {
3934 const char *c = ipath;
3935 while (*c && !strchr("; \t\r\n\f\v#", *c))
3938 d = NULL; /* "perl" not in first word; ignore */
3940 *s = '#'; /* Don't try to parse shebang line */
3942 #endif /* ALTERNATE_SHEBANG */
3947 !instr(s,"indir") &&
3948 instr(PL_origargv[0],"perl"))
3955 while (s < PL_bufend && isSPACE(*s))
3957 if (s < PL_bufend) {
3958 Newx(newargv,PL_origargc+3,char*);
3960 while (s < PL_bufend && !isSPACE(*s))
3963 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3966 newargv = PL_origargv;
3969 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3971 Perl_croak(aTHX_ "Can't exec %s", ipath);
3974 while (*d && !isSPACE(*d))
3976 while (SPACE_OR_TAB(*d))
3980 const bool switches_done = PL_doswitches;
3981 const U32 oldpdb = PL_perldb;
3982 const bool oldn = PL_minus_n;
3983 const bool oldp = PL_minus_p;
3987 bool baduni = FALSE;
3989 const char *d2 = d1 + 1;
3990 if (parse_unicode_opts((const char **)&d2)
3994 if (baduni || *d1 == 'M' || *d1 == 'm') {
3995 const char * const m = d1;
3996 while (*d1 && !isSPACE(*d1))
3998 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4001 d1 = moreswitches(d1);
4003 if (PL_doswitches && !switches_done) {
4004 int argc = PL_origargc;
4005 char **argv = PL_origargv;
4008 } while (argc && argv[0][0] == '-' && argv[0][1]);
4009 init_argv_symbols(argc,argv);
4011 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4012 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4013 /* if we have already added "LINE: while (<>) {",
4014 we must not do it again */
4016 sv_setpvs(PL_linestr, "");
4017 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4018 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4019 PL_last_lop = PL_last_uni = NULL;
4020 PL_preambled = FALSE;
4021 if (PERLDB_LINE || PERLDB_SAVESRC)
4022 (void)gv_fetchfile(PL_origfilename);
4029 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4031 PL_lex_state = LEX_FORMLINE;
4036 #ifdef PERL_STRICT_CR
4037 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4039 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4041 case ' ': case '\t': case '\f': case 013:
4043 PL_realtokenstart = -1;
4045 PL_thiswhite = newSVpvs("");
4046 sv_catpvn(PL_thiswhite, s, 1);
4053 PL_realtokenstart = -1;
4057 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4058 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4059 /* handle eval qq[#line 1 "foo"\n ...] */
4060 CopLINE_dec(PL_curcop);
4063 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4065 if (!PL_in_eval || PL_rsfp)
4070 while (d < PL_bufend && *d != '\n')
4074 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4075 Perl_croak(aTHX_ "panic: input overflow");
4078 PL_thiswhite = newSVpvn(s, d - s);
4083 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4085 PL_lex_state = LEX_FORMLINE;
4091 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4092 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4095 TOKEN(PEG); /* make sure any #! line is accessible */
4100 /* if (PL_madskills && PL_lex_formbrack) { */
4102 while (d < PL_bufend && *d != '\n')
4106 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4107 Perl_croak(aTHX_ "panic: input overflow");
4108 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4110 PL_thiswhite = newSVpvs("");
4111 if (CopLINE(PL_curcop) == 1) {
4112 sv_setpvs(PL_thiswhite, "");
4115 sv_catpvn(PL_thiswhite, s, d - s);
4129 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4137 while (s < PL_bufend && SPACE_OR_TAB(*s))
4140 if (strnEQ(s,"=>",2)) {
4141 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4142 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4143 OPERATOR('-'); /* unary minus */
4145 PL_last_uni = PL_oldbufptr;
4147 case 'r': ftst = OP_FTEREAD; break;
4148 case 'w': ftst = OP_FTEWRITE; break;
4149 case 'x': ftst = OP_FTEEXEC; break;
4150 case 'o': ftst = OP_FTEOWNED; break;
4151 case 'R': ftst = OP_FTRREAD; break;
4152 case 'W': ftst = OP_FTRWRITE; break;
4153 case 'X': ftst = OP_FTREXEC; break;
4154 case 'O': ftst = OP_FTROWNED; break;
4155 case 'e': ftst = OP_FTIS; break;
4156 case 'z': ftst = OP_FTZERO; break;
4157 case 's': ftst = OP_FTSIZE; break;
4158 case 'f': ftst = OP_FTFILE; break;
4159 case 'd': ftst = OP_FTDIR; break;
4160 case 'l': ftst = OP_FTLINK; break;
4161 case 'p': ftst = OP_FTPIPE; break;
4162 case 'S': ftst = OP_FTSOCK; break;
4163 case 'u': ftst = OP_FTSUID; break;
4164 case 'g': ftst = OP_FTSGID; break;
4165 case 'k': ftst = OP_FTSVTX; break;
4166 case 'b': ftst = OP_FTBLK; break;
4167 case 'c': ftst = OP_FTCHR; break;
4168 case 't': ftst = OP_FTTTY; break;
4169 case 'T': ftst = OP_FTTEXT; break;
4170 case 'B': ftst = OP_FTBINARY; break;
4171 case 'M': case 'A': case 'C':
4172 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4174 case 'M': ftst = OP_FTMTIME; break;
4175 case 'A': ftst = OP_FTATIME; break;
4176 case 'C': ftst = OP_FTCTIME; break;
4184 PL_last_lop_op = (OPCODE)ftst;
4185 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4186 "### Saw file test %c\n", (int)tmp);
4191 /* Assume it was a minus followed by a one-letter named
4192 * subroutine call (or a -bareword), then. */
4193 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4194 "### '-%c' looked like a file test but was not\n",
4201 const char tmp = *s++;
4204 if (PL_expect == XOPERATOR)
4209 else if (*s == '>') {
4212 if (isIDFIRST_lazy_if(s,UTF)) {
4213 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4221 if (PL_expect == XOPERATOR)
4224 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4226 OPERATOR('-'); /* unary minus */
4232 const char tmp = *s++;
4235 if (PL_expect == XOPERATOR)
4240 if (PL_expect == XOPERATOR)
4243 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4250 if (PL_expect != XOPERATOR) {
4251 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4252 PL_expect = XOPERATOR;
4253 force_ident(PL_tokenbuf, '*');
4266 if (PL_expect == XOPERATOR) {
4270 PL_tokenbuf[0] = '%';
4271 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4272 sizeof PL_tokenbuf - 1, FALSE);
4273 if (!PL_tokenbuf[1]) {
4276 PL_pending_ident = '%';
4285 const char tmp = *s++;
4290 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4297 const char tmp = *s++;
4303 goto just_a_word_zero_gv;
4306 switch (PL_expect) {
4312 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4314 PL_bufptr = s; /* update in case we back off */
4320 PL_expect = XTERMBLOCK;
4323 stuffstart = s - SvPVX(PL_linestr) - 1;
4327 while (isIDFIRST_lazy_if(s,UTF)) {
4330 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4331 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4332 if (tmp < 0) tmp = -tmp;
4347 sv = newSVpvn(s, len);
4349 d = scan_str(d,TRUE,TRUE);
4351 /* MUST advance bufptr here to avoid bogus
4352 "at end of line" context messages from yyerror().
4354 PL_bufptr = s + len;
4355 yyerror("Unterminated attribute parameter in attribute list");
4359 return REPORT(0); /* EOF indicator */
4363 sv_catsv(sv, PL_lex_stuff);
4364 attrs = append_elem(OP_LIST, attrs,
4365 newSVOP(OP_CONST, 0, sv));
4366 SvREFCNT_dec(PL_lex_stuff);
4367 PL_lex_stuff = NULL;
4370 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4372 if (PL_in_my == KEY_our) {
4373 deprecate(":unique");
4376 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4379 /* NOTE: any CV attrs applied here need to be part of
4380 the CVf_BUILTIN_ATTRS define in cv.h! */
4381 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4383 CvLVALUE_on(PL_compcv);
4385 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4387 deprecate(":locked");
4389 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4391 CvMETHOD_on(PL_compcv);
4393 /* After we've set the flags, it could be argued that
4394 we don't need to do the attributes.pm-based setting
4395 process, and shouldn't bother appending recognized
4396 flags. To experiment with that, uncomment the
4397 following "else". (Note that's already been
4398 uncommented. That keeps the above-applied built-in
4399 attributes from being intercepted (and possibly
4400 rejected) by a package's attribute routines, but is
4401 justified by the performance win for the common case
4402 of applying only built-in attributes.) */
4404 attrs = append_elem(OP_LIST, attrs,
4405 newSVOP(OP_CONST, 0,
4409 if (*s == ':' && s[1] != ':')
4412 break; /* require real whitespace or :'s */
4413 /* XXX losing whitespace on sequential attributes here */
4417 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4418 if (*s != ';' && *s != '}' && *s != tmp
4419 && (tmp != '=' || *s != ')')) {
4420 const char q = ((*s == '\'') ? '"' : '\'');
4421 /* If here for an expression, and parsed no attrs, back
4423 if (tmp == '=' && !attrs) {
4427 /* MUST advance bufptr here to avoid bogus "at end of line"
4428 context messages from yyerror().
4431 yyerror( (const char *)
4433 ? Perl_form(aTHX_ "Invalid separator character "
4434 "%c%c%c in attribute list", q, *s, q)
4435 : "Unterminated attribute list" ) );
4443 start_force(PL_curforce);
4444 NEXTVAL_NEXTTOKE.opval = attrs;
4445 CURMAD('_', PL_nextwhite);
4450 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4451 (s - SvPVX(PL_linestr)) - stuffstart);
4459 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4460 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4468 const char tmp = *s++;
4473 const char tmp = *s++;
4481 if (PL_lex_brackets <= 0)
4482 yyerror("Unmatched right square bracket");
4485 if (PL_lex_state == LEX_INTERPNORMAL) {
4486 if (PL_lex_brackets == 0) {
4487 if (*s == '-' && s[1] == '>')
4488 PL_lex_state = LEX_INTERPENDMAYBE;
4489 else if (*s != '[' && *s != '{')
4490 PL_lex_state = LEX_INTERPEND;
4497 if (PL_lex_brackets > 100) {
4498 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4500 switch (PL_expect) {
4502 if (PL_lex_formbrack) {
4506 if (PL_oldoldbufptr == PL_last_lop)
4507 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4509 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4510 OPERATOR(HASHBRACK);
4512 while (s < PL_bufend && SPACE_OR_TAB(*s))
4515 PL_tokenbuf[0] = '\0';
4516 if (d < PL_bufend && *d == '-') {
4517 PL_tokenbuf[0] = '-';
4519 while (d < PL_bufend && SPACE_OR_TAB(*d))
4522 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4523 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4525 while (d < PL_bufend && SPACE_OR_TAB(*d))
4528 const char minus = (PL_tokenbuf[0] == '-');
4529 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4537 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4542 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4547 if (PL_oldoldbufptr == PL_last_lop)
4548 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4550 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4553 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4555 /* This hack is to get the ${} in the message. */
4557 yyerror("syntax error");
4560 OPERATOR(HASHBRACK);
4562 /* This hack serves to disambiguate a pair of curlies
4563 * as being a block or an anon hash. Normally, expectation
4564 * determines that, but in cases where we're not in a
4565 * position to expect anything in particular (like inside
4566 * eval"") we have to resolve the ambiguity. This code
4567 * covers the case where the first term in the curlies is a
4568 * quoted string. Most other cases need to be explicitly
4569 * disambiguated by prepending a "+" before the opening
4570 * curly in order to force resolution as an anon hash.
4572 * XXX should probably propagate the outer expectation
4573 * into eval"" to rely less on this hack, but that could
4574 * potentially break current behavior of eval"".
4578 if (*s == '\'' || *s == '"' || *s == '`') {
4579 /* common case: get past first string, handling escapes */
4580 for (t++; t < PL_bufend && *t != *s;)
4581 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4585 else if (*s == 'q') {
4588 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4591 /* skip q//-like construct */
4593 char open, close, term;
4596 while (t < PL_bufend && isSPACE(*t))
4598 /* check for q => */
4599 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4600 OPERATOR(HASHBRACK);
4604 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4608 for (t++; t < PL_bufend; t++) {
4609 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4611 else if (*t == open)
4615 for (t++; t < PL_bufend; t++) {
4616 if (*t == '\\' && t+1 < PL_bufend)
4618 else if (*t == close && --brackets <= 0)
4620 else if (*t == open)
4627 /* skip plain q word */
4628 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4631 else if (isALNUM_lazy_if(t,UTF)) {
4633 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4636 while (t < PL_bufend && isSPACE(*t))
4638 /* if comma follows first term, call it an anon hash */
4639 /* XXX it could be a comma expression with loop modifiers */
4640 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4641 || (*t == '=' && t[1] == '>')))
4642 OPERATOR(HASHBRACK);
4643 if (PL_expect == XREF)
4646 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4652 pl_yylval.ival = CopLINE(PL_curcop);
4653 if (isSPACE(*s) || *s == '#')
4654 PL_copline = NOLINE; /* invalidate current command line number */
4659 if (PL_lex_brackets <= 0)
4660 yyerror("Unmatched right curly bracket");
4662 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4663 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4664 PL_lex_formbrack = 0;
4665 if (PL_lex_state == LEX_INTERPNORMAL) {
4666 if (PL_lex_brackets == 0) {
4667 if (PL_expect & XFAKEBRACK) {
4668 PL_expect &= XENUMMASK;
4669 PL_lex_state = LEX_INTERPEND;
4674 PL_thiswhite = newSVpvs("");
4675 sv_catpvs(PL_thiswhite,"}");
4678 return yylex(); /* ignore fake brackets */
4680 if (*s == '-' && s[1] == '>')
4681 PL_lex_state = LEX_INTERPENDMAYBE;
4682 else if (*s != '[' && *s != '{')
4683 PL_lex_state = LEX_INTERPEND;
4686 if (PL_expect & XFAKEBRACK) {
4687 PL_expect &= XENUMMASK;
4689 return yylex(); /* ignore fake brackets */
4691 start_force(PL_curforce);
4693 curmad('X', newSVpvn(s-1,1));
4694 CURMAD('_', PL_thiswhite);
4699 PL_thistoken = newSVpvs("");
4707 if (PL_expect == XOPERATOR) {
4708 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4709 && isIDFIRST_lazy_if(s,UTF))
4711 CopLINE_dec(PL_curcop);
4712 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
4713 CopLINE_inc(PL_curcop);
4718 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4720 PL_expect = XOPERATOR;
4721 force_ident(PL_tokenbuf, '&');
4725 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4737 const char tmp = *s++;
4744 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4745 && strchr("+-*/%.^&|<",tmp))
4746 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4747 "Reversed %c= operator",(int)tmp);
4749 if (PL_expect == XSTATE && isALPHA(tmp) &&
4750 (s == PL_linestart+1 || s[-2] == '\n') )
4752 if (PL_in_eval && !PL_rsfp) {
4757 if (strnEQ(s,"=cut",4)) {
4773 PL_thiswhite = newSVpvs("");
4774 sv_catpvn(PL_thiswhite, PL_linestart,
4775 PL_bufend - PL_linestart);
4779 PL_doextract = TRUE;
4783 if (PL_lex_brackets < PL_lex_formbrack) {
4785 #ifdef PERL_STRICT_CR
4786 while (SPACE_OR_TAB(*t))
4788 while (SPACE_OR_TAB(*t) || *t == '\r')
4791 if (*t == '\n' || *t == '#') {
4802 const char tmp = *s++;
4804 /* was this !=~ where !~ was meant?
4805 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4807 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4808 const char *t = s+1;
4810 while (t < PL_bufend && isSPACE(*t))
4813 if (*t == '/' || *t == '?' ||
4814 ((*t == 'm' || *t == 's' || *t == 'y')
4815 && !isALNUM(t[1])) ||
4816 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4817 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4818 "!=~ should be !~");
4828 if (PL_expect != XOPERATOR) {
4829 if (s[1] != '<' && !strchr(s,'>'))
4832 s = scan_heredoc(s);
4834 s = scan_inputsymbol(s);
4835 TERM(sublex_start());
4841 SHop(OP_LEFT_SHIFT);
4855 const char tmp = *s++;
4857 SHop(OP_RIGHT_SHIFT);
4858 else if (tmp == '=')
4867 if (PL_expect == XOPERATOR) {
4868 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4869 return deprecate_commaless_var_list();
4873 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4874 PL_tokenbuf[0] = '@';
4875 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4876 sizeof PL_tokenbuf - 1, FALSE);
4877 if (PL_expect == XOPERATOR)
4878 no_op("Array length", s);
4879 if (!PL_tokenbuf[1])
4881 PL_expect = XOPERATOR;
4882 PL_pending_ident = '#';
4886 PL_tokenbuf[0] = '$';
4887 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4888 sizeof PL_tokenbuf - 1, FALSE);
4889 if (PL_expect == XOPERATOR)
4891 if (!PL_tokenbuf[1]) {
4893 yyerror("Final $ should be \\$ or $name");
4897 /* This kludge not intended to be bulletproof. */
4898 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4899 pl_yylval.opval = newSVOP(OP_CONST, 0,
4900 newSViv(CopARYBASE_get(&PL_compiling)));
4901 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4907 const char tmp = *s;
4908 if (PL_lex_state == LEX_NORMAL)
4911 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4912 && intuit_more(s)) {
4914 PL_tokenbuf[0] = '@';
4915 if (ckWARN(WARN_SYNTAX)) {
4918 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4921 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4922 while (t < PL_bufend && *t != ']')
4924 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4925 "Multidimensional syntax %.*s not supported",
4926 (int)((t - PL_bufptr) + 1), PL_bufptr);
4930 else if (*s == '{') {
4932 PL_tokenbuf[0] = '%';
4933 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4934 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4936 char tmpbuf[sizeof PL_tokenbuf];
4939 } while (isSPACE(*t));
4940 if (isIDFIRST_lazy_if(t,UTF)) {
4942 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4946 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4947 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4948 "You need to quote \"%s\"",
4955 PL_expect = XOPERATOR;
4956 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4957 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4958 if (!islop || PL_last_lop_op == OP_GREPSTART)
4959 PL_expect = XOPERATOR;
4960 else if (strchr("$@\"'`q", *s))
4961 PL_expect = XTERM; /* e.g. print $fh "foo" */
4962 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4963 PL_expect = XTERM; /* e.g. print $fh &sub */
4964 else if (isIDFIRST_lazy_if(s,UTF)) {
4965 char tmpbuf[sizeof PL_tokenbuf];
4967 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4968 if ((t2 = keyword(tmpbuf, len, 0))) {
4969 /* binary operators exclude handle interpretations */
4981 PL_expect = XTERM; /* e.g. print $fh length() */
4986 PL_expect = XTERM; /* e.g. print $fh subr() */
4989 else if (isDIGIT(*s))
4990 PL_expect = XTERM; /* e.g. print $fh 3 */
4991 else if (*s == '.' && isDIGIT(s[1]))
4992 PL_expect = XTERM; /* e.g. print $fh .3 */
4993 else if ((*s == '?' || *s == '-' || *s == '+')
4994 && !isSPACE(s[1]) && s[1] != '=')
4995 PL_expect = XTERM; /* e.g. print $fh -1 */
4996 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4998 PL_expect = XTERM; /* e.g. print $fh /.../
4999 XXX except DORDOR operator
5001 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5003 PL_expect = XTERM; /* print $fh <<"EOF" */
5006 PL_pending_ident = '$';
5010 if (PL_expect == XOPERATOR)
5012 PL_tokenbuf[0] = '@';
5013 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5014 if (!PL_tokenbuf[1]) {
5017 if (PL_lex_state == LEX_NORMAL)
5019 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5021 PL_tokenbuf[0] = '%';
5023 /* Warn about @ where they meant $. */
5024 if (*s == '[' || *s == '{') {
5025 if (ckWARN(WARN_SYNTAX)) {
5026 const char *t = s + 1;
5027 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5029 if (*t == '}' || *t == ']') {
5031 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5032 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5033 "Scalar value %.*s better written as $%.*s",
5034 (int)(t-PL_bufptr), PL_bufptr,
5035 (int)(t-PL_bufptr-1), PL_bufptr+1);
5040 PL_pending_ident = '@';
5043 case '/': /* may be division, defined-or, or pattern */
5044 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5048 case '?': /* may either be conditional or pattern */
5049 if (PL_expect == XOPERATOR) {
5057 /* A // operator. */
5067 /* Disable warning on "study /blah/" */
5068 if (PL_oldoldbufptr == PL_last_uni
5069 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5070 || memNE(PL_last_uni, "study", 5)
5071 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5074 s = scan_pat(s,OP_MATCH);
5075 TERM(sublex_start());
5079 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5080 #ifdef PERL_STRICT_CR
5083 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5085 && (s == PL_linestart || s[-1] == '\n') )
5087 PL_lex_formbrack = 0;
5091 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5095 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5101 pl_yylval.ival = OPf_SPECIAL;
5107 if (PL_expect != XOPERATOR)
5112 case '0': case '1': case '2': case '3': case '4':
5113 case '5': case '6': case '7': case '8': case '9':
5114 s = scan_num(s, &pl_yylval);
5115 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5116 if (PL_expect == XOPERATOR)
5121 s = scan_str(s,!!PL_madskills,FALSE);
5122 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5123 if (PL_expect == XOPERATOR) {
5124 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5125 return deprecate_commaless_var_list();
5132 pl_yylval.ival = OP_CONST;
5133 TERM(sublex_start());
5136 s = scan_str(s,!!PL_madskills,FALSE);
5137 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5138 if (PL_expect == XOPERATOR) {
5139 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5140 return deprecate_commaless_var_list();
5147 pl_yylval.ival = OP_CONST;
5148 /* FIXME. I think that this can be const if char *d is replaced by
5149 more localised variables. */
5150 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5151 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5152 pl_yylval.ival = OP_STRINGIFY;
5156 TERM(sublex_start());
5159 s = scan_str(s,!!PL_madskills,FALSE);
5160 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5161 if (PL_expect == XOPERATOR)
5162 no_op("Backticks",s);
5165 readpipe_override();
5166 TERM(sublex_start());
5170 if (PL_lex_inwhat && isDIGIT(*s))
5171 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5173 if (PL_expect == XOPERATOR)
5174 no_op("Backslash",s);
5178 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5179 char *start = s + 2;
5180 while (isDIGIT(*start) || *start == '_')
5182 if (*start == '.' && isDIGIT(start[1])) {
5183 s = scan_num(s, &pl_yylval);
5186 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5187 else if (!isALPHA(*start) && (PL_expect == XTERM
5188 || PL_expect == XREF || PL_expect == XSTATE
5189 || PL_expect == XTERMORDORDOR)) {
5190 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5192 s = scan_num(s, &pl_yylval);
5199 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5241 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5243 /* Some keywords can be followed by any delimiter, including ':' */
5244 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5245 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5246 (PL_tokenbuf[0] == 'q' &&
5247 strchr("qwxr", PL_tokenbuf[1])))));
5249 /* x::* is just a word, unless x is "CORE" */
5250 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5254 while (d < PL_bufend && isSPACE(*d))
5255 d++; /* no comments skipped here, or s### is misparsed */
5257 /* Is this a label? */
5258 if (!tmp && PL_expect == XSTATE
5259 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5260 tmp = keyword(PL_tokenbuf, len, 0);
5262 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5264 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5269 /* Check for keywords */
5270 tmp = keyword(PL_tokenbuf, len, 0);
5272 /* Is this a word before a => operator? */
5273 if (*d == '=' && d[1] == '>') {
5276 = (OP*)newSVOP(OP_CONST, 0,
5277 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5278 pl_yylval.opval->op_private = OPpCONST_BARE;
5282 if (tmp < 0) { /* second-class keyword? */
5283 GV *ogv = NULL; /* override (winner) */
5284 GV *hgv = NULL; /* hidden (loser) */
5285 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5287 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5290 if (GvIMPORTED_CV(gv))
5292 else if (! CvMETHOD(cv))
5296 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5297 (gv = *gvp) && isGV_with_GP(gv) &&
5298 GvCVu(gv) && GvIMPORTED_CV(gv))
5305 tmp = 0; /* overridden by import or by GLOBAL */
5308 && -tmp==KEY_lock /* XXX generalizable kludge */
5311 tmp = 0; /* any sub overrides "weak" keyword */
5313 else { /* no override */
5315 if (tmp == KEY_dump) {
5316 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5317 "dump() better written as CORE::dump()");
5321 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
5322 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5323 "Ambiguous call resolved as CORE::%s(), %s",
5324 GvENAME(hgv), "qualify as such or use &");
5331 default: /* not a keyword */
5332 /* Trade off - by using this evil construction we can pull the
5333 variable gv into the block labelled keylookup. If not, then
5334 we have to give it function scope so that the goto from the
5335 earlier ':' case doesn't bypass the initialisation. */
5337 just_a_word_zero_gv:
5345 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5348 SV *nextPL_nextwhite = 0;
5352 /* Get the rest if it looks like a package qualifier */
5354 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5356 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5359 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5360 *s == '\'' ? "'" : "::");
5365 if (PL_expect == XOPERATOR) {
5366 if (PL_bufptr == PL_linestart) {
5367 CopLINE_dec(PL_curcop);
5368 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5369 CopLINE_inc(PL_curcop);
5372 no_op("Bareword",s);
5375 /* Look for a subroutine with this name in current package,
5376 unless name is "Foo::", in which case Foo is a bearword
5377 (and a package name). */
5379 if (len > 2 && !PL_madskills &&
5380 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5382 if (ckWARN(WARN_BAREWORD)
5383 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5384 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5385 "Bareword \"%s\" refers to nonexistent package",
5388 PL_tokenbuf[len] = '\0';
5394 /* Mustn't actually add anything to a symbol table.
5395 But also don't want to "initialise" any placeholder
5396 constants that might already be there into full
5397 blown PVGVs with attached PVCV. */
5398 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5399 GV_NOADD_NOINIT, SVt_PVCV);
5404 /* if we saw a global override before, get the right name */
5407 sv = newSVpvs("CORE::GLOBAL::");
5408 sv_catpv(sv,PL_tokenbuf);
5411 /* If len is 0, newSVpv does strlen(), which is correct.
5412 If len is non-zero, then it will be the true length,
5413 and so the scalar will be created correctly. */
5414 sv = newSVpv(PL_tokenbuf,len);
5417 if (PL_madskills && !PL_thistoken) {
5418 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5419 PL_thistoken = newSVpvn(start,s - start);
5420 PL_realtokenstart = s - SvPVX(PL_linestr);
5424 /* Presume this is going to be a bareword of some sort. */
5427 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5428 pl_yylval.opval->op_private = OPpCONST_BARE;
5429 /* UTF-8 package name? */
5430 if (UTF && !IN_BYTES &&
5431 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5434 /* And if "Foo::", then that's what it certainly is. */
5439 /* Do the explicit type check so that we don't need to force
5440 the initialisation of the symbol table to have a real GV.
5441 Beware - gv may not really be a PVGV, cv may not really be
5442 a PVCV, (because of the space optimisations that gv_init
5443 understands) But they're true if for this symbol there is
5444 respectively a typeglob and a subroutine.
5446 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5447 /* Real typeglob, so get the real subroutine: */
5449 /* A proxy for a subroutine in this package? */
5450 : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
5453 /* See if it's the indirect object for a list operator. */
5455 if (PL_oldoldbufptr &&
5456 PL_oldoldbufptr < PL_bufptr &&
5457 (PL_oldoldbufptr == PL_last_lop
5458 || PL_oldoldbufptr == PL_last_uni) &&
5459 /* NO SKIPSPACE BEFORE HERE! */
5460 (PL_expect == XREF ||
5461 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5463 bool immediate_paren = *s == '(';
5465 /* (Now we can afford to cross potential line boundary.) */
5466 s = SKIPSPACE2(s,nextPL_nextwhite);
5468 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5471 /* Two barewords in a row may indicate method call. */
5473 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5474 (tmp = intuit_method(s, gv, cv)))
5477 /* If not a declared subroutine, it's an indirect object. */
5478 /* (But it's an indir obj regardless for sort.) */
5479 /* Also, if "_" follows a filetest operator, it's a bareword */
5482 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5484 (PL_last_lop_op != OP_MAPSTART &&
5485 PL_last_lop_op != OP_GREPSTART))))
5486 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5487 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5490 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5495 PL_expect = XOPERATOR;
5498 s = SKIPSPACE2(s,nextPL_nextwhite);
5499 PL_nextwhite = nextPL_nextwhite;
5504 /* Is this a word before a => operator? */
5505 if (*s == '=' && s[1] == '>' && !pkgname) {
5507 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5508 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5509 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5513 /* If followed by a paren, it's certainly a subroutine. */
5518 while (SPACE_OR_TAB(*d))
5520 if (*d == ')' && (sv = gv_const_sv(gv))) {
5527 PL_nextwhite = PL_thiswhite;
5530 start_force(PL_curforce);
5532 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5533 PL_expect = XOPERATOR;
5536 PL_nextwhite = nextPL_nextwhite;
5537 curmad('X', PL_thistoken);
5538 PL_thistoken = newSVpvs("");
5546 /* If followed by var or block, call it a method (unless sub) */
5548 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5549 PL_last_lop = PL_oldbufptr;
5550 PL_last_lop_op = OP_METHOD;
5554 /* If followed by a bareword, see if it looks like indir obj. */
5557 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5558 && (tmp = intuit_method(s, gv, cv)))
5561 /* Not a method, so call it a subroutine (if defined) */
5564 if (lastchar == '-')
5565 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5566 "Ambiguous use of -%s resolved as -&%s()",
5567 PL_tokenbuf, PL_tokenbuf);
5568 /* Check for a constant sub */
5569 if ((sv = gv_const_sv(gv))) {
5571 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5572 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5573 pl_yylval.opval->op_private = 0;
5577 /* Resolve to GV now. */
5578 if (SvTYPE(gv) != SVt_PVGV) {
5579 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5580 assert (SvTYPE(gv) == SVt_PVGV);
5581 /* cv must have been some sort of placeholder, so
5582 now needs replacing with a real code reference. */
5586 op_free(pl_yylval.opval);
5587 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5588 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5589 PL_last_lop = PL_oldbufptr;
5590 PL_last_lop_op = OP_ENTERSUB;
5591 /* Is there a prototype? */
5599 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5602 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5604 while (*proto == ';')
5606 if (*proto == '&' && *s == '{') {
5608 sv_setpvs(PL_subname, "__ANON__");
5610 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5617 PL_nextwhite = PL_thiswhite;
5620 start_force(PL_curforce);
5621 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5624 PL_nextwhite = nextPL_nextwhite;
5625 curmad('X', PL_thistoken);
5626 PL_thistoken = newSVpvs("");
5633 /* Guess harder when madskills require "best effort". */
5634 if (PL_madskills && (!gv || !GvCVu(gv))) {
5635 int probable_sub = 0;
5636 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5638 else if (isALPHA(*s)) {
5642 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5643 if (!keyword(tmpbuf, tmplen, 0))
5646 while (d < PL_bufend && isSPACE(*d))
5648 if (*d == '=' && d[1] == '>')
5653 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5654 op_free(pl_yylval.opval);
5655 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5656 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5657 PL_last_lop = PL_oldbufptr;
5658 PL_last_lop_op = OP_ENTERSUB;
5659 PL_nextwhite = PL_thiswhite;
5661 start_force(PL_curforce);
5662 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5664 PL_nextwhite = nextPL_nextwhite;
5665 curmad('X', PL_thistoken);
5666 PL_thistoken = newSVpvs("");
5671 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5678 /* Call it a bare word */
5680 if (PL_hints & HINT_STRICT_SUBS)
5681 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5684 /* after "print" and similar functions (corresponding to
5685 * "F? L" in opcode.pl), whatever wasn't already parsed as
5686 * a filehandle should be subject to "strict subs".
5687 * Likewise for the optional indirect-object argument to system
5688 * or exec, which can't be a bareword */
5689 if ((PL_last_lop_op == OP_PRINT
5690 || PL_last_lop_op == OP_PRTF
5691 || PL_last_lop_op == OP_SAY
5692 || PL_last_lop_op == OP_SYSTEM
5693 || PL_last_lop_op == OP_EXEC)
5694 && (PL_hints & HINT_STRICT_SUBS))
5695 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5696 if (lastchar != '-') {
5697 if (ckWARN(WARN_RESERVED)) {
5701 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5702 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5709 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
5710 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5711 "Operator or semicolon missing before %c%s",
5712 lastchar, PL_tokenbuf);
5713 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5714 "Ambiguous use of %c resolved as operator %c",
5715 lastchar, lastchar);
5721 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5722 newSVpv(CopFILE(PL_curcop),0));
5726 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5727 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5730 case KEY___PACKAGE__:
5731 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5733 ? newSVhek(HvNAME_HEK(PL_curstash))
5740 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5741 const char *pname = "main";
5742 if (PL_tokenbuf[2] == 'D')
5743 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5744 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5748 GvIOp(gv) = newIO();
5749 IoIFP(GvIOp(gv)) = PL_rsfp;
5750 #if defined(HAS_FCNTL) && defined(F_SETFD)
5752 const int fd = PerlIO_fileno(PL_rsfp);
5753 fcntl(fd,F_SETFD,fd >= 3);
5756 /* Mark this internal pseudo-handle as clean */
5757 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5758 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5759 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5761 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5762 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5763 /* if the script was opened in binmode, we need to revert
5764 * it to text mode for compatibility; but only iff it has CRs
5765 * XXX this is a questionable hack at best. */
5766 if (PL_bufend-PL_bufptr > 2
5767 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5770 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5771 loc = PerlIO_tell(PL_rsfp);
5772 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5775 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5777 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5778 #endif /* NETWARE */
5779 #ifdef PERLIO_IS_STDIO /* really? */
5780 # if defined(__BORLANDC__)
5781 /* XXX see note in do_binmode() */
5782 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5786 PerlIO_seek(PL_rsfp, loc, 0);
5790 #ifdef PERLIO_LAYERS
5793 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5794 else if (PL_encoding) {
5801 XPUSHs(PL_encoding);
5803 call_method("name", G_SCALAR);
5807 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5808 Perl_form(aTHX_ ":encoding(%"SVf")",
5817 if (PL_realtokenstart >= 0) {
5818 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5820 PL_endwhite = newSVpvs("");
5821 sv_catsv(PL_endwhite, PL_thiswhite);
5823 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5824 PL_realtokenstart = -1;
5826 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5827 SvCUR(PL_endwhite))) != NULL) ;
5842 if (PL_expect == XSTATE) {
5849 if (*s == ':' && s[1] == ':') {
5852 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5853 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5854 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5857 else if (tmp == KEY_require || tmp == KEY_do)
5858 /* that's a way to remember we saw "CORE::" */
5871 LOP(OP_ACCEPT,XTERM);
5877 LOP(OP_ATAN2,XTERM);
5883 LOP(OP_BINMODE,XTERM);
5886 LOP(OP_BLESS,XTERM);
5895 /* When 'use switch' is in effect, continue has a dual
5896 life as a control operator. */
5898 if (!FEATURE_IS_ENABLED("switch"))
5901 /* We have to disambiguate the two senses of
5902 "continue". If the next token is a '{' then
5903 treat it as the start of a continue block;
5904 otherwise treat it as a control operator.
5916 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5933 if (!PL_cryptseen) {
5934 PL_cryptseen = TRUE;
5938 LOP(OP_CRYPT,XTERM);
5941 LOP(OP_CHMOD,XTERM);
5944 LOP(OP_CHOWN,XTERM);
5947 LOP(OP_CONNECT,XTERM);
5966 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5967 if (orig_keyword == KEY_do) {
5976 PL_hints |= HINT_BLOCK_SCOPE;
5986 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5987 LOP(OP_DBMOPEN,XTERM);
5993 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6000 pl_yylval.ival = CopLINE(PL_curcop);
6016 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6017 UNIBRACK(OP_ENTEREVAL);
6031 case KEY_endhostent:
6037 case KEY_endservent:
6040 case KEY_endprotoent:
6051 pl_yylval.ival = CopLINE(PL_curcop);
6053 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6056 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6059 if ((PL_bufend - p) >= 3 &&
6060 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6062 else if ((PL_bufend - p) >= 4 &&
6063 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6066 if (isIDFIRST_lazy_if(p,UTF)) {
6067 p = scan_ident(p, PL_bufend,
6068 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6072 Perl_croak(aTHX_ "Missing $ on loop variable");
6074 s = SvPVX(PL_linestr) + soff;
6080 LOP(OP_FORMLINE,XTERM);
6086 LOP(OP_FCNTL,XTERM);
6092 LOP(OP_FLOCK,XTERM);
6101 LOP(OP_GREPSTART, XREF);
6104 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6119 case KEY_getpriority:
6120 LOP(OP_GETPRIORITY,XTERM);
6122 case KEY_getprotobyname:
6125 case KEY_getprotobynumber:
6126 LOP(OP_GPBYNUMBER,XTERM);
6128 case KEY_getprotoent:
6140 case KEY_getpeername:
6141 UNI(OP_GETPEERNAME);
6143 case KEY_gethostbyname:
6146 case KEY_gethostbyaddr:
6147 LOP(OP_GHBYADDR,XTERM);
6149 case KEY_gethostent:
6152 case KEY_getnetbyname:
6155 case KEY_getnetbyaddr:
6156 LOP(OP_GNBYADDR,XTERM);
6161 case KEY_getservbyname:
6162 LOP(OP_GSBYNAME,XTERM);
6164 case KEY_getservbyport:
6165 LOP(OP_GSBYPORT,XTERM);
6167 case KEY_getservent:
6170 case KEY_getsockname:
6171 UNI(OP_GETSOCKNAME);
6173 case KEY_getsockopt:
6174 LOP(OP_GSOCKOPT,XTERM);
6189 pl_yylval.ival = CopLINE(PL_curcop);
6199 pl_yylval.ival = CopLINE(PL_curcop);
6203 LOP(OP_INDEX,XTERM);
6209 LOP(OP_IOCTL,XTERM);
6221 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6253 LOP(OP_LISTEN,XTERM);
6262 s = scan_pat(s,OP_MATCH);
6263 TERM(sublex_start());
6266 LOP(OP_MAPSTART, XREF);
6269 LOP(OP_MKDIR,XTERM);
6272 LOP(OP_MSGCTL,XTERM);
6275 LOP(OP_MSGGET,XTERM);
6278 LOP(OP_MSGRCV,XTERM);
6281 LOP(OP_MSGSND,XTERM);
6286 PL_in_my = (U16)tmp;
6288 if (isIDFIRST_lazy_if(s,UTF)) {
6292 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6293 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6295 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6296 if (!PL_in_my_stash) {
6299 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6303 if (PL_madskills) { /* just add type to declarator token */
6304 sv_catsv(PL_thistoken, PL_nextwhite);
6306 sv_catpvn(PL_thistoken, start, s - start);
6314 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6321 s = tokenize_use(0, s);
6325 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6332 if (isIDFIRST_lazy_if(s,UTF)) {
6334 for (d = s; isALNUM_lazy_if(d,UTF);)
6336 for (t=d; isSPACE(*t);)
6338 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6340 && !(t[0] == '=' && t[1] == '>')
6342 int parms_len = (int)(d-s);
6343 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6344 "Precedence problem: open %.*s should be open(%.*s)",
6345 parms_len, s, parms_len, s);
6351 pl_yylval.ival = OP_OR;
6361 LOP(OP_OPEN_DIR,XTERM);
6364 checkcomma(s,PL_tokenbuf,"filehandle");
6368 checkcomma(s,PL_tokenbuf,"filehandle");
6387 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6388 s = force_version(s, FALSE);
6392 LOP(OP_PIPE_OP,XTERM);
6395 s = scan_str(s,!!PL_madskills,FALSE);
6398 pl_yylval.ival = OP_CONST;
6399 TERM(sublex_start());
6405 s = scan_str(s,!!PL_madskills,FALSE);
6408 PL_expect = XOPERATOR;
6410 if (SvCUR(PL_lex_stuff)) {
6413 d = SvPV_force(PL_lex_stuff, len);
6415 for (; isSPACE(*d) && len; --len, ++d)
6420 if (!warned && ckWARN(WARN_QW)) {
6421 for (; !isSPACE(*d) && len; --len, ++d) {
6423 Perl_warner(aTHX_ packWARN(WARN_QW),
6424 "Possible attempt to separate words with commas");
6427 else if (*d == '#') {
6428 Perl_warner(aTHX_ packWARN(WARN_QW),
6429 "Possible attempt to put comments in qw() list");
6435 for (; !isSPACE(*d) && len; --len, ++d)
6438 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6439 words = append_elem(OP_LIST, words,
6440 newSVOP(OP_CONST, 0, tokeq(sv)));
6444 start_force(PL_curforce);
6445 NEXTVAL_NEXTTOKE.opval = words;
6450 SvREFCNT_dec(PL_lex_stuff);
6451 PL_lex_stuff = NULL;
6457 s = scan_str(s,!!PL_madskills,FALSE);
6460 pl_yylval.ival = OP_STRINGIFY;
6461 if (SvIVX(PL_lex_stuff) == '\'')
6462 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6463 TERM(sublex_start());
6466 s = scan_pat(s,OP_QR);
6467 TERM(sublex_start());
6470 s = scan_str(s,!!PL_madskills,FALSE);
6473 readpipe_override();
6474 TERM(sublex_start());
6482 s = force_version(s, FALSE);
6484 else if (*s != 'v' || !isDIGIT(s[1])
6485 || (s = force_version(s, TRUE), *s == 'v'))
6487 *PL_tokenbuf = '\0';
6488 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6489 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6490 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6492 yyerror("<> should be quotes");
6494 if (orig_keyword == KEY_require) {
6502 PL_last_uni = PL_oldbufptr;
6503 PL_last_lop_op = OP_REQUIRE;
6505 return REPORT( (int)REQUIRE );
6511 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6515 LOP(OP_RENAME,XTERM);
6524 LOP(OP_RINDEX,XTERM);
6533 UNIDOR(OP_READLINE);
6536 UNIDOR(OP_BACKTICK);
6545 LOP(OP_REVERSE,XTERM);
6548 UNIDOR(OP_READLINK);
6555 if (pl_yylval.opval)
6556 TERM(sublex_start());
6558 TOKEN(1); /* force error */
6561 checkcomma(s,PL_tokenbuf,"filehandle");
6571 LOP(OP_SELECT,XTERM);
6577 LOP(OP_SEMCTL,XTERM);
6580 LOP(OP_SEMGET,XTERM);
6583 LOP(OP_SEMOP,XTERM);
6589 LOP(OP_SETPGRP,XTERM);
6591 case KEY_setpriority:
6592 LOP(OP_SETPRIORITY,XTERM);
6594 case KEY_sethostent:
6600 case KEY_setservent:
6603 case KEY_setprotoent:
6613 LOP(OP_SEEKDIR,XTERM);
6615 case KEY_setsockopt:
6616 LOP(OP_SSOCKOPT,XTERM);
6622 LOP(OP_SHMCTL,XTERM);
6625 LOP(OP_SHMGET,XTERM);
6628 LOP(OP_SHMREAD,XTERM);
6631 LOP(OP_SHMWRITE,XTERM);
6634 LOP(OP_SHUTDOWN,XTERM);
6643 LOP(OP_SOCKET,XTERM);
6645 case KEY_socketpair:
6646 LOP(OP_SOCKPAIR,XTERM);
6649 checkcomma(s,PL_tokenbuf,"subroutine name");
6651 if (*s == ';' || *s == ')') /* probably a close */
6652 Perl_croak(aTHX_ "sort is now a reserved word");
6654 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6658 LOP(OP_SPLIT,XTERM);
6661 LOP(OP_SPRINTF,XTERM);
6664 LOP(OP_SPLICE,XTERM);
6679 LOP(OP_SUBSTR,XTERM);
6685 char tmpbuf[sizeof PL_tokenbuf];
6686 SSize_t tboffset = 0;
6687 expectation attrful;
6688 bool have_name, have_proto;
6689 const int key = tmp;
6694 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6695 SV *subtoken = newSVpvn(tstart, s - tstart);
6699 s = SKIPSPACE2(s,tmpwhite);
6704 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6705 (*s == ':' && s[1] == ':'))
6708 SV *nametoke = NULL;
6712 attrful = XATTRBLOCK;
6713 /* remember buffer pos'n for later force_word */
6714 tboffset = s - PL_oldbufptr;
6715 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6718 nametoke = newSVpvn(s, d - s);
6720 if (memchr(tmpbuf, ':', len))
6721 sv_setpvn(PL_subname, tmpbuf, len);
6723 sv_setsv(PL_subname,PL_curstname);
6724 sv_catpvs(PL_subname,"::");
6725 sv_catpvn(PL_subname,tmpbuf,len);
6732 CURMAD('X', nametoke);
6733 CURMAD('_', tmpwhite);
6734 (void) force_word(PL_oldbufptr + tboffset, WORD,
6737 s = SKIPSPACE2(d,tmpwhite);
6744 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6745 PL_expect = XTERMBLOCK;
6746 attrful = XATTRTERM;
6747 sv_setpvs(PL_subname,"?");
6751 if (key == KEY_format) {
6753 PL_lex_formbrack = PL_lex_brackets + 1;
6755 PL_thistoken = subtoken;
6759 (void) force_word(PL_oldbufptr + tboffset, WORD,
6765 /* Look for a prototype */
6768 bool bad_proto = FALSE;
6769 bool in_brackets = FALSE;
6770 char greedy_proto = ' ';
6771 bool proto_after_greedy_proto = FALSE;
6772 bool must_be_last = FALSE;
6773 bool underscore = FALSE;
6774 bool seen_underscore = FALSE;
6775 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6777 s = scan_str(s,!!PL_madskills,FALSE);
6779 Perl_croak(aTHX_ "Prototype not terminated");
6780 /* strip spaces and check for bad characters */
6781 d = SvPVX(PL_lex_stuff);
6783 for (p = d; *p; ++p) {
6789 proto_after_greedy_proto = TRUE;
6790 if (!strchr("$@%*;[]&\\_", *p)) {
6802 else if ( *p == ']' ) {
6803 in_brackets = FALSE;
6805 else if ( (*p == '@' || *p == '%') &&
6806 ( tmp < 2 || d[tmp-2] != '\\' ) &&
6808 must_be_last = TRUE;
6811 else if ( *p == '_' ) {
6812 underscore = seen_underscore = TRUE;
6819 if (proto_after_greedy_proto)
6820 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6821 "Prototype after '%c' for %"SVf" : %s",
6822 greedy_proto, SVfARG(PL_subname), d);
6824 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6825 "Illegal character %sin prototype for %"SVf" : %s",
6826 seen_underscore ? "after '_' " : "",
6827 SVfARG(PL_subname), d);
6828 SvCUR_set(PL_lex_stuff, tmp);
6833 CURMAD('q', PL_thisopen);
6834 CURMAD('_', tmpwhite);
6835 CURMAD('=', PL_thisstuff);
6836 CURMAD('Q', PL_thisclose);
6837 NEXTVAL_NEXTTOKE.opval =
6838 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6839 PL_lex_stuff = NULL;
6842 s = SKIPSPACE2(s,tmpwhite);
6850 if (*s == ':' && s[1] != ':')
6851 PL_expect = attrful;
6852 else if (*s != '{' && key == KEY_sub) {
6854 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6856 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6863 curmad('^', newSVpvs(""));
6864 CURMAD('_', tmpwhite);
6868 PL_thistoken = subtoken;
6871 NEXTVAL_NEXTTOKE.opval =
6872 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6873 PL_lex_stuff = NULL;
6879 sv_setpvs(PL_subname, "__ANON__");
6881 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6885 (void) force_word(PL_oldbufptr + tboffset, WORD,
6894 LOP(OP_SYSTEM,XREF);
6897 LOP(OP_SYMLINK,XTERM);
6900 LOP(OP_SYSCALL,XTERM);
6903 LOP(OP_SYSOPEN,XTERM);
6906 LOP(OP_SYSSEEK,XTERM);
6909 LOP(OP_SYSREAD,XTERM);
6912 LOP(OP_SYSWRITE,XTERM);
6916 TERM(sublex_start());
6937 LOP(OP_TRUNCATE,XTERM);
6949 pl_yylval.ival = CopLINE(PL_curcop);
6953 pl_yylval.ival = CopLINE(PL_curcop);
6957 LOP(OP_UNLINK,XTERM);
6963 LOP(OP_UNPACK,XTERM);
6966 LOP(OP_UTIME,XTERM);
6972 LOP(OP_UNSHIFT,XTERM);
6975 s = tokenize_use(1, s);
6985 pl_yylval.ival = CopLINE(PL_curcop);
6989 pl_yylval.ival = CopLINE(PL_curcop);
6993 PL_hints |= HINT_BLOCK_SCOPE;
7000 LOP(OP_WAITPID,XTERM);
7009 ctl_l[0] = toCTRL('L');
7011 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7014 /* Make sure $^L is defined */
7015 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7020 if (PL_expect == XOPERATOR)
7026 pl_yylval.ival = OP_XOR;
7031 TERM(sublex_start());
7036 #pragma segment Main
7040 S_pending_ident(pTHX)
7045 /* pit holds the identifier we read and pending_ident is reset */
7046 char pit = PL_pending_ident;
7047 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7048 /* All routes through this function want to know if there is a colon. */
7049 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7050 PL_pending_ident = 0;
7052 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7053 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7054 "### Pending identifier '%s'\n", PL_tokenbuf); });
7056 /* if we're in a my(), we can't allow dynamics here.
7057 $foo'bar has already been turned into $foo::bar, so
7058 just check for colons.
7060 if it's a legal name, the OP is a PADANY.
7063 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7065 yyerror(Perl_form(aTHX_ "No package name allowed for "
7066 "variable %s in \"our\"",
7068 tmp = allocmy(PL_tokenbuf);
7072 yyerror(Perl_form(aTHX_ PL_no_myglob,
7073 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7075 pl_yylval.opval = newOP(OP_PADANY, 0);
7076 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
7082 build the ops for accesses to a my() variable.
7084 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7085 then used in a comparison. This catches most, but not
7086 all cases. For instance, it catches
7087 sort { my($a); $a <=> $b }
7089 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7090 (although why you'd do that is anyone's guess).
7095 tmp = pad_findmy(PL_tokenbuf);
7096 if (tmp != NOT_IN_PAD) {
7097 /* might be an "our" variable" */
7098 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7099 /* build ops for a bareword */
7100 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7101 HEK * const stashname = HvNAME_HEK(stash);
7102 SV * const sym = newSVhek(stashname);
7103 sv_catpvs(sym, "::");
7104 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7105 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7106 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7109 ? (GV_ADDMULTI | GV_ADDINEVAL)
7112 ((PL_tokenbuf[0] == '$') ? SVt_PV
7113 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7118 /* if it's a sort block and they're naming $a or $b */
7119 if (PL_last_lop_op == OP_SORT &&
7120 PL_tokenbuf[0] == '$' &&
7121 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7124 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7125 d < PL_bufend && *d != '\n';
7128 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7129 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7135 pl_yylval.opval = newOP(OP_PADANY, 0);
7136 pl_yylval.opval->op_targ = tmp;
7142 Whine if they've said @foo in a doublequoted string,
7143 and @foo isn't a variable we can find in the symbol
7146 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7147 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7149 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7150 /* DO NOT warn for @- and @+ */
7151 && !( PL_tokenbuf[2] == '\0' &&
7152 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7155 /* Downgraded from fatal to warning 20000522 mjd */
7156 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7157 "Possible unintended interpolation of %s in string",
7162 /* build ops for a bareword */
7163 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7165 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7167 PL_tokenbuf + 1, tokenbuf_len - 1,
7168 /* If the identifier refers to a stash, don't autovivify it.
7169 * Change 24660 had the side effect of causing symbol table
7170 * hashes to always be defined, even if they were freshly
7171 * created and the only reference in the entire program was
7172 * the single statement with the defined %foo::bar:: test.
7173 * It appears that all code in the wild doing this actually
7174 * wants to know whether sub-packages have been loaded, so
7175 * by avoiding auto-vivifying symbol tables, we ensure that
7176 * defined %foo::bar:: continues to be false, and the existing
7177 * tests still give the expected answers, even though what
7178 * they're actually testing has now changed subtly.
7180 (*PL_tokenbuf == '%'
7181 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7184 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7185 ((PL_tokenbuf[0] == '$') ? SVt_PV
7186 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7192 * The following code was generated by perl_keyword.pl.
7196 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7200 PERL_ARGS_ASSERT_KEYWORD;
7204 case 1: /* 5 tokens of length 1 */
7236 case 2: /* 18 tokens of length 2 */
7382 case 3: /* 29 tokens of length 3 */
7386 if (name[1] == 'N' &&
7449 if (name[1] == 'i' &&
7481 if (name[1] == 'o' &&
7490 if (name[1] == 'e' &&
7499 if (name[1] == 'n' &&
7508 if (name[1] == 'o' &&
7517 if (name[1] == 'a' &&
7526 if (name[1] == 'o' &&
7588 if (name[1] == 'e' &&
7602 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7628 if (name[1] == 'i' &&
7637 if (name[1] == 's' &&
7646 if (name[1] == 'e' &&
7655 if (name[1] == 'o' &&
7667 case 4: /* 41 tokens of length 4 */
7671 if (name[1] == 'O' &&
7681 if (name[1] == 'N' &&
7691 if (name[1] == 'i' &&
7701 if (name[1] == 'h' &&
7711 if (name[1] == 'u' &&
7724 if (name[2] == 'c' &&
7733 if (name[2] == 's' &&
7742 if (name[2] == 'a' &&
7778 if (name[1] == 'o' &&
7791 if (name[2] == 't' &&
7800 if (name[2] == 'o' &&
7809 if (name[2] == 't' &&
7818 if (name[2] == 'e' &&
7831 if (name[1] == 'o' &&
7844 if (name[2] == 'y' &&
7853 if (name[2] == 'l' &&
7869 if (name[2] == 's' &&
7878 if (name[2] == 'n' &&
7887 if (name[2] == 'c' &&
7900 if (name[1] == 'e' &&
7910 if (name[1] == 'p' &&
7923 if (name[2] == 'c' &&
7932 if (name[2] == 'p' &&
7941 if (name[2] == 's' &&
7957 if (name[2] == 'n' &&
8027 if (name[2] == 'r' &&
8036 if (name[2] == 'r' &&
8045 if (name[2] == 'a' &&
8061 if (name[2] == 'l' &&
8123 if (name[2] == 'e' &&
8126 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8139 case 5: /* 39 tokens of length 5 */
8143 if (name[1] == 'E' &&
8154 if (name[1] == 'H' &&
8168 if (name[2] == 'a' &&
8178 if (name[2] == 'a' &&
8195 if (name[2] == 'e' &&
8205 if (name[2] == 'e' &&
8209 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8225 if (name[3] == 'i' &&
8234 if (name[3] == 'o' &&
8270 if (name[2] == 'o' &&
8280 if (name[2] == 'y' &&
8294 if (name[1] == 'l' &&
8308 if (name[2] == 'n' &&
8318 if (name[2] == 'o' &&
8332 if (name[1] == 'i' &&
8337 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8346 if (name[2] == 'd' &&
8356 if (name[2] == 'c' &&
8373 if (name[2] == 'c' &&
8383 if (name[2] == 't' &&
8397 if (name[1] == 'k' &&
8408 if (name[1] == 'r' &&
8422 if (name[2] == 's' &&
8432 if (name[2] == 'd' &&
8449 if (name[2] == 'm' &&
8459 if (name[2] == 'i' &&
8469 if (name[2] == 'e' &&
8479 if (name[2] == 'l' &&
8489 if (name[2] == 'a' &&
8502 if (name[3] == 't' &&
8505 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8511 if (name[3] == 'd' &&
8528 if (name[1] == 'i' &&
8542 if (name[2] == 'a' &&
8555 if (name[3] == 'e' &&
8590 if (name[2] == 'i' &&
8607 if (name[2] == 'i' &&
8617 if (name[2] == 'i' &&
8634 case 6: /* 33 tokens of length 6 */
8638 if (name[1] == 'c' &&
8653 if (name[2] == 'l' &&
8664 if (name[2] == 'r' &&
8679 if (name[1] == 'e' &&
8694 if (name[2] == 's' &&
8699 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8705 if (name[2] == 'i' &&
8723 if (name[2] == 'l' &&
8734 if (name[2] == 'r' &&
8749 if (name[1] == 'm' &&
8764 if (name[2] == 'n' &&
8775 if (name[2] == 's' &&
8790 if (name[1] == 's' &&
8796 if (name[4] == 't' &&
8805 if (name[4] == 'e' &&
8814 if (name[4] == 'c' &&
8823 if (name[4] == 'n' &&
8839 if (name[1] == 'r' &&
8857 if (name[3] == 'a' &&
8867 if (name[3] == 'u' &&
8881 if (name[2] == 'n' &&
8899 if (name[2] == 'a' &&
8913 if (name[3] == 'e' &&
8926 if (name[4] == 't' &&
8935 if (name[4] == 'e' &&
8957 if (name[4] == 't' &&
8966 if (name[4] == 'e' &&
8982 if (name[2] == 'c' &&
8993 if (name[2] == 'l' &&
9004 if (name[2] == 'b' &&
9015 if (name[2] == 's' &&
9038 if (name[4] == 's' &&
9047 if (name[4] == 'n' &&
9060 if (name[3] == 'a' &&
9077 if (name[1] == 'a' &&
9092 case 7: /* 29 tokens of length 7 */
9096 if (name[1] == 'E' &&
9109 if (name[1] == '_' &&
9122 if (name[1] == 'i' &&
9129 return -KEY_binmode;
9135 if (name[1] == 'o' &&
9142 return -KEY_connect;
9151 if (name[2] == 'm' &&
9157 return -KEY_dbmopen;
9168 if (name[4] == 'u' &&
9172 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9178 if (name[4] == 'n' &&
9199 if (name[1] == 'o' &&
9212 if (name[1] == 'e' &&
9219 if (name[5] == 'r' &&
9222 return -KEY_getpgrp;
9228 if (name[5] == 'i' &&
9231 return -KEY_getppid;
9244 if (name[1] == 'c' &&
9251 return -KEY_lcfirst;
9257 if (name[1] == 'p' &&
9264 return -KEY_opendir;
9270 if (name[1] == 'a' &&
9288 if (name[3] == 'd' &&
9293 return -KEY_readdir;
9299 if (name[3] == 'u' &&
9310 if (name[3] == 'e' &&
9315 return -KEY_reverse;
9334 if (name[3] == 'k' &&
9339 return -KEY_seekdir;
9345 if (name[3] == 'p' &&
9350 return -KEY_setpgrp;
9360 if (name[2] == 'm' &&
9366 return -KEY_shmread;
9372 if (name[2] == 'r' &&
9378 return -KEY_sprintf;
9387 if (name[3] == 'l' &&
9392 return -KEY_symlink;
9401 if (name[4] == 'a' &&
9405 return -KEY_syscall;
9411 if (name[4] == 'p' &&
9415 return -KEY_sysopen;
9421 if (name[4] == 'e' &&
9425 return -KEY_sysread;
9431 if (name[4] == 'e' &&
9435 return -KEY_sysseek;
9453 if (name[1] == 'e' &&
9460 return -KEY_telldir;
9469 if (name[2] == 'f' &&
9475 return -KEY_ucfirst;
9481 if (name[2] == 's' &&
9487 return -KEY_unshift;
9497 if (name[1] == 'a' &&
9504 return -KEY_waitpid;
9513 case 8: /* 26 tokens of length 8 */
9517 if (name[1] == 'U' &&
9525 return KEY_AUTOLOAD;
9536 if (name[3] == 'A' &&
9542 return KEY___DATA__;
9548 if (name[3] == 'I' &&
9554 return -KEY___FILE__;
9560 if (name[3] == 'I' &&
9566 return -KEY___LINE__;
9582 if (name[2] == 'o' &&
9589 return -KEY_closedir;
9595 if (name[2] == 'n' &&
9602 return -KEY_continue;
9612 if (name[1] == 'b' &&
9620 return -KEY_dbmclose;
9626 if (name[1] == 'n' &&
9632 if (name[4] == 'r' &&
9637 return -KEY_endgrent;
9643 if (name[4] == 'w' &&
9648 return -KEY_endpwent;
9661 if (name[1] == 'o' &&
9669 return -KEY_formline;
9675 if (name[1] == 'e' &&
9686 if (name[6] == 'n' &&
9689 return -KEY_getgrent;
9695 if (name[6] == 'i' &&
9698 return -KEY_getgrgid;
9704 if (name[6] == 'a' &&
9707 return -KEY_getgrnam;
9720 if (name[4] == 'o' &&
9725 return -KEY_getlogin;
9736 if (name[6] == 'n' &&
9739 return -KEY_getpwent;
9745 if (name[6] == 'a' &&
9748 return -KEY_getpwnam;
9754 if (name[6] == 'i' &&
9757 return -KEY_getpwuid;
9777 if (name[1] == 'e' &&
9784 if (name[5] == 'i' &&
9791 return -KEY_readline;
9796 return -KEY_readlink;
9807 if (name[5] == 'i' &&
9811 return -KEY_readpipe;
9832 if (name[4] == 'r' &&
9837 return -KEY_setgrent;
9843 if (name[4] == 'w' &&
9848 return -KEY_setpwent;
9864 if (name[3] == 'w' &&
9870 return -KEY_shmwrite;
9876 if (name[3] == 't' &&
9882 return -KEY_shutdown;
9892 if (name[2] == 's' &&
9899 return -KEY_syswrite;
9909 if (name[1] == 'r' &&
9917 return -KEY_truncate;
9926 case 9: /* 9 tokens of length 9 */
9930 if (name[1] == 'N' &&
9939 return KEY_UNITCHECK;
9945 if (name[1] == 'n' &&
9954 return -KEY_endnetent;
9960 if (name[1] == 'e' &&
9969 return -KEY_getnetent;
9975 if (name[1] == 'o' &&
9984 return -KEY_localtime;
9990 if (name[1] == 'r' &&
9999 return KEY_prototype;
10005 if (name[1] == 'u' &&
10014 return -KEY_quotemeta;
10020 if (name[1] == 'e' &&
10029 return -KEY_rewinddir;
10035 if (name[1] == 'e' &&
10044 return -KEY_setnetent;
10050 if (name[1] == 'a' &&
10059 return -KEY_wantarray;
10068 case 10: /* 9 tokens of length 10 */
10072 if (name[1] == 'n' &&
10078 if (name[4] == 'o' &&
10085 return -KEY_endhostent;
10091 if (name[4] == 'e' &&
10098 return -KEY_endservent;
10111 if (name[1] == 'e' &&
10117 if (name[4] == 'o' &&
10124 return -KEY_gethostent;
10133 if (name[5] == 'r' &&
10139 return -KEY_getservent;
10145 if (name[5] == 'c' &&
10151 return -KEY_getsockopt;
10171 if (name[2] == 't')
10176 if (name[4] == 'o' &&
10183 return -KEY_sethostent;
10192 if (name[5] == 'r' &&
10198 return -KEY_setservent;
10204 if (name[5] == 'c' &&
10210 return -KEY_setsockopt;
10227 if (name[2] == 'c' &&
10236 return -KEY_socketpair;
10249 case 11: /* 8 tokens of length 11 */
10253 if (name[1] == '_' &&
10263 { /* __PACKAGE__ */
10264 return -KEY___PACKAGE__;
10270 if (name[1] == 'n' &&
10280 { /* endprotoent */
10281 return -KEY_endprotoent;
10287 if (name[1] == 'e' &&
10296 if (name[5] == 'e' &&
10302 { /* getpeername */
10303 return -KEY_getpeername;
10312 if (name[6] == 'o' &&
10317 { /* getpriority */
10318 return -KEY_getpriority;
10324 if (name[6] == 't' &&
10329 { /* getprotoent */
10330 return -KEY_getprotoent;
10344 if (name[4] == 'o' &&
10351 { /* getsockname */
10352 return -KEY_getsockname;
10365 if (name[1] == 'e' &&
10373 if (name[6] == 'o' &&
10378 { /* setpriority */
10379 return -KEY_setpriority;
10385 if (name[6] == 't' &&
10390 { /* setprotoent */
10391 return -KEY_setprotoent;
10407 case 12: /* 2 tokens of length 12 */
10408 if (name[0] == 'g' &&
10420 if (name[9] == 'd' &&
10423 { /* getnetbyaddr */
10424 return -KEY_getnetbyaddr;
10430 if (name[9] == 'a' &&
10433 { /* getnetbyname */
10434 return -KEY_getnetbyname;
10446 case 13: /* 4 tokens of length 13 */
10447 if (name[0] == 'g' &&
10454 if (name[4] == 'o' &&
10463 if (name[10] == 'd' &&
10466 { /* gethostbyaddr */
10467 return -KEY_gethostbyaddr;
10473 if (name[10] == 'a' &&
10476 { /* gethostbyname */
10477 return -KEY_gethostbyname;
10490 if (name[4] == 'e' &&
10499 if (name[10] == 'a' &&
10502 { /* getservbyname */
10503 return -KEY_getservbyname;
10509 if (name[10] == 'o' &&
10512 { /* getservbyport */
10513 return -KEY_getservbyport;
10532 case 14: /* 1 tokens of length 14 */
10533 if (name[0] == 'g' &&
10547 { /* getprotobyname */
10548 return -KEY_getprotobyname;
10553 case 16: /* 1 tokens of length 16 */
10554 if (name[0] == 'g' &&
10570 { /* getprotobynumber */
10571 return -KEY_getprotobynumber;
10585 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10589 PERL_ARGS_ASSERT_CHECKCOMMA;
10591 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10592 if (ckWARN(WARN_SYNTAX)) {
10595 for (w = s+2; *w && level; w++) {
10598 else if (*w == ')')
10601 while (isSPACE(*w))
10603 /* the list of chars below is for end of statements or
10604 * block / parens, boolean operators (&&, ||, //) and branch
10605 * constructs (or, and, if, until, unless, while, err, for).
10606 * Not a very solid hack... */
10607 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10608 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10609 "%s (...) interpreted as function",name);
10612 while (s < PL_bufend && isSPACE(*s))
10616 while (s < PL_bufend && isSPACE(*s))
10618 if (isIDFIRST_lazy_if(s,UTF)) {
10619 const char * const w = s++;
10620 while (isALNUM_lazy_if(s,UTF))
10622 while (s < PL_bufend && isSPACE(*s))
10626 if (keyword(w, s - w, 0))
10629 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10630 if (gv && GvCVu(gv))
10632 Perl_croak(aTHX_ "No comma allowed after %s", what);
10637 /* Either returns sv, or mortalizes sv and returns a new SV*.
10638 Best used as sv=new_constant(..., sv, ...).
10639 If s, pv are NULL, calls subroutine with one argument,
10640 and type is used with error messages only. */
10643 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10644 SV *sv, SV *pv, const char *type, STRLEN typelen)
10647 HV * const table = GvHV(PL_hintgv); /* ^H */
10651 const char *why1 = "", *why2 = "", *why3 = "";
10653 PERL_ARGS_ASSERT_NEW_CONSTANT;
10655 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10658 why2 = (const char *)
10659 (strEQ(key,"charnames")
10660 ? "(possibly a missing \"use charnames ...\")"
10662 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10663 (type ? type: "undef"), why2);
10665 /* This is convoluted and evil ("goto considered harmful")
10666 * but I do not understand the intricacies of all the different
10667 * failure modes of %^H in here. The goal here is to make
10668 * the most probable error message user-friendly. --jhi */
10673 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10674 (type ? type: "undef"), why1, why2, why3);
10676 yyerror(SvPVX_const(msg));
10680 cvp = hv_fetch(table, key, keylen, FALSE);
10681 if (!cvp || !SvOK(*cvp)) {
10684 why3 = "} is not defined";
10687 sv_2mortal(sv); /* Parent created it permanently */
10690 pv = newSVpvn_flags(s, len, SVs_TEMP);
10692 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10694 typesv = &PL_sv_undef;
10696 PUSHSTACKi(PERLSI_OVERLOAD);
10708 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10712 /* Check the eval first */
10713 if (!PL_in_eval && SvTRUE(ERRSV)) {
10714 sv_catpvs(ERRSV, "Propagated");
10715 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10717 res = SvREFCNT_inc_simple(sv);
10721 SvREFCNT_inc_simple_void(res);
10730 why1 = "Call to &{$^H{";
10732 why3 = "}} did not return a defined value";
10740 /* Returns a NUL terminated string, with the length of the string written to
10744 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10747 register char *d = dest;
10748 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10750 PERL_ARGS_ASSERT_SCAN_WORD;
10754 Perl_croak(aTHX_ ident_too_long);
10755 if (isALNUM(*s)) /* UTF handled below */
10757 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10762 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10766 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10767 char *t = s + UTF8SKIP(s);
10769 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10773 Perl_croak(aTHX_ ident_too_long);
10774 Copy(s, d, len, char);
10787 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10790 char *bracket = NULL;
10792 register char *d = dest;
10793 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10795 PERL_ARGS_ASSERT_SCAN_IDENT;
10800 while (isDIGIT(*s)) {
10802 Perl_croak(aTHX_ ident_too_long);
10809 Perl_croak(aTHX_ ident_too_long);
10810 if (isALNUM(*s)) /* UTF handled below */
10812 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10817 else if (*s == ':' && s[1] == ':') {
10821 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10822 char *t = s + UTF8SKIP(s);
10823 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10825 if (d + (t - s) > e)
10826 Perl_croak(aTHX_ ident_too_long);
10827 Copy(s, d, t - s, char);
10838 if (PL_lex_state != LEX_NORMAL)
10839 PL_lex_state = LEX_INTERPENDMAYBE;
10842 if (*s == '$' && s[1] &&
10843 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10856 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10861 if (isSPACE(s[-1])) {
10863 const char ch = *s++;
10864 if (!SPACE_OR_TAB(ch)) {
10870 if (isIDFIRST_lazy_if(d,UTF)) {
10874 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10875 end += UTF8SKIP(end);
10876 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10877 end += UTF8SKIP(end);
10879 Copy(s, d, end - s, char);
10884 while ((isALNUM(*s) || *s == ':') && d < e)
10887 Perl_croak(aTHX_ ident_too_long);
10890 while (s < send && SPACE_OR_TAB(*s))
10892 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10893 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10894 const char * const brack =
10896 ((*s == '[') ? "[...]" : "{...}");
10897 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10898 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10899 funny, dest, brack, funny, dest, brack);
10902 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10906 /* Handle extended ${^Foo} variables
10907 * 1999-02-27 mjd-perl-patch@plover.com */
10908 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10912 while (isALNUM(*s) && d < e) {
10916 Perl_croak(aTHX_ ident_too_long);
10921 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10922 PL_lex_state = LEX_INTERPEND;
10925 if (PL_lex_state == LEX_NORMAL) {
10926 if (ckWARN(WARN_AMBIGUOUS) &&
10927 (keyword(dest, d - dest, 0)
10928 || get_cvn_flags(dest, d - dest, 0)))
10932 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10933 "Ambiguous use of %c{%s} resolved to %c%s",
10934 funny, dest, funny, dest);
10939 s = bracket; /* let the parser handle it */
10943 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10944 PL_lex_state = LEX_INTERPEND;
10949 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10951 PERL_ARGS_ASSERT_PMFLAG;
10953 PERL_UNUSED_CONTEXT;
10955 const char c = (char)ch;
10957 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10958 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10959 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10960 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10961 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10967 S_scan_pat(pTHX_ char *start, I32 type)
10971 char *s = scan_str(start,!!PL_madskills,FALSE);
10972 const char * const valid_flags =
10973 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10978 PERL_ARGS_ASSERT_SCAN_PAT;
10981 const char * const delimiter = skipspace(start);
10985 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10986 : "Search pattern not terminated" ));
10989 pm = (PMOP*)newPMOP(type, 0);
10990 if (PL_multi_open == '?') {
10991 /* This is the only point in the code that sets PMf_ONCE: */
10992 pm->op_pmflags |= PMf_ONCE;
10994 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10995 allows us to restrict the list needed by reset to just the ??
10997 assert(type != OP_TRANS);
10999 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11002 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11005 elements = mg->mg_len / sizeof(PMOP**);
11006 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11007 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11008 mg->mg_len = elements * sizeof(PMOP**);
11009 PmopSTASH_set(pm,PL_curstash);
11015 while (*s && strchr(valid_flags, *s))
11016 pmflag(&pm->op_pmflags,*s++);
11018 if (PL_madskills && modstart != s) {
11019 SV* tmptoken = newSVpvn(modstart, s - modstart);
11020 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11023 /* issue a warning if /c is specified,but /g is not */
11024 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11026 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11027 "Use of /c modifier is meaningless without /g" );
11030 PL_lex_op = (OP*)pm;
11031 pl_yylval.ival = OP_MATCH;
11036 S_scan_subst(pTHX_ char *start)
11047 PERL_ARGS_ASSERT_SCAN_SUBST;
11049 pl_yylval.ival = OP_NULL;
11051 s = scan_str(start,!!PL_madskills,FALSE);
11054 Perl_croak(aTHX_ "Substitution pattern not terminated");
11056 if (s[-1] == PL_multi_open)
11059 if (PL_madskills) {
11060 CURMAD('q', PL_thisopen);
11061 CURMAD('_', PL_thiswhite);
11062 CURMAD('E', PL_thisstuff);
11063 CURMAD('Q', PL_thisclose);
11064 PL_realtokenstart = s - SvPVX(PL_linestr);
11068 first_start = PL_multi_start;
11069 s = scan_str(s,!!PL_madskills,FALSE);
11071 if (PL_lex_stuff) {
11072 SvREFCNT_dec(PL_lex_stuff);
11073 PL_lex_stuff = NULL;
11075 Perl_croak(aTHX_ "Substitution replacement not terminated");
11077 PL_multi_start = first_start; /* so whole substitution is taken together */
11079 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11082 if (PL_madskills) {
11083 CURMAD('z', PL_thisopen);
11084 CURMAD('R', PL_thisstuff);
11085 CURMAD('Z', PL_thisclose);
11091 if (*s == EXEC_PAT_MOD) {
11095 else if (strchr(S_PAT_MODS, *s))
11096 pmflag(&pm->op_pmflags,*s++);
11102 if (PL_madskills) {
11104 curmad('m', newSVpvn(modstart, s - modstart));
11105 append_madprops(PL_thismad, (OP*)pm, 0);
11109 if ((pm->op_pmflags & PMf_CONTINUE)) {
11110 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11114 SV * const repl = newSVpvs("");
11116 PL_sublex_info.super_bufptr = s;
11117 PL_sublex_info.super_bufend = PL_bufend;
11119 pm->op_pmflags |= PMf_EVAL;
11122 sv_catpvs(repl, "eval ");
11124 sv_catpvs(repl, "do ");
11126 sv_catpvs(repl, "{");
11127 sv_catsv(repl, PL_lex_repl);
11128 if (strchr(SvPVX(PL_lex_repl), '#'))
11129 sv_catpvs(repl, "\n");
11130 sv_catpvs(repl, "}");
11132 SvREFCNT_dec(PL_lex_repl);
11133 PL_lex_repl = repl;
11136 PL_lex_op = (OP*)pm;
11137 pl_yylval.ival = OP_SUBST;
11142 S_scan_trans(pTHX_ char *start)
11155 PERL_ARGS_ASSERT_SCAN_TRANS;
11157 pl_yylval.ival = OP_NULL;
11159 s = scan_str(start,!!PL_madskills,FALSE);
11161 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11163 if (s[-1] == PL_multi_open)
11166 if (PL_madskills) {
11167 CURMAD('q', PL_thisopen);
11168 CURMAD('_', PL_thiswhite);
11169 CURMAD('E', PL_thisstuff);
11170 CURMAD('Q', PL_thisclose);
11171 PL_realtokenstart = s - SvPVX(PL_linestr);
11175 s = scan_str(s,!!PL_madskills,FALSE);
11177 if (PL_lex_stuff) {
11178 SvREFCNT_dec(PL_lex_stuff);
11179 PL_lex_stuff = NULL;
11181 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11183 if (PL_madskills) {
11184 CURMAD('z', PL_thisopen);
11185 CURMAD('R', PL_thisstuff);
11186 CURMAD('Z', PL_thisclose);
11189 complement = del = squash = 0;
11196 complement = OPpTRANS_COMPLEMENT;
11199 del = OPpTRANS_DELETE;
11202 squash = OPpTRANS_SQUASH;
11211 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11212 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11213 o->op_private &= ~OPpTRANS_ALL;
11214 o->op_private |= del|squash|complement|
11215 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11216 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11219 pl_yylval.ival = OP_TRANS;
11222 if (PL_madskills) {
11224 curmad('m', newSVpvn(modstart, s - modstart));
11225 append_madprops(PL_thismad, o, 0);
11234 S_scan_heredoc(pTHX_ register char *s)
11238 I32 op_type = OP_SCALAR;
11242 const char *found_newline;
11246 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11248 I32 stuffstart = s - SvPVX(PL_linestr);
11251 PL_realtokenstart = -1;
11254 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11258 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11262 while (SPACE_OR_TAB(*peek))
11264 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11267 s = delimcpy(d, e, s, PL_bufend, term, &len);
11277 if (!isALNUM_lazy_if(s,UTF))
11278 deprecate("bare << to mean <<\"\"");
11279 for (; isALNUM_lazy_if(s,UTF); s++) {
11284 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11285 Perl_croak(aTHX_ "Delimiter for here document is too long");
11288 len = d - PL_tokenbuf;
11291 if (PL_madskills) {
11292 tstart = PL_tokenbuf + !outer;
11293 PL_thisclose = newSVpvn(tstart, len - !outer);
11294 tstart = SvPVX(PL_linestr) + stuffstart;
11295 PL_thisopen = newSVpvn(tstart, s - tstart);
11296 stuffstart = s - SvPVX(PL_linestr);
11299 #ifndef PERL_STRICT_CR
11300 d = strchr(s, '\r');
11302 char * const olds = s;
11304 while (s < PL_bufend) {
11310 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11319 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11326 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11327 herewas = newSVpvn(s,PL_bufend-s);
11331 herewas = newSVpvn(s-1,found_newline-s+1);
11334 herewas = newSVpvn(s,found_newline-s);
11338 if (PL_madskills) {
11339 tstart = SvPVX(PL_linestr) + stuffstart;
11341 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11343 PL_thisstuff = newSVpvn(tstart, s - tstart);
11346 s += SvCUR(herewas);
11349 stuffstart = s - SvPVX(PL_linestr);
11355 tmpstr = newSV_type(SVt_PVIV);
11356 SvGROW(tmpstr, 80);
11357 if (term == '\'') {
11358 op_type = OP_CONST;
11359 SvIV_set(tmpstr, -1);
11361 else if (term == '`') {
11362 op_type = OP_BACKTICK;
11363 SvIV_set(tmpstr, '\\');
11367 PL_multi_start = CopLINE(PL_curcop);
11368 PL_multi_open = PL_multi_close = '<';
11369 term = *PL_tokenbuf;
11370 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11371 char * const bufptr = PL_sublex_info.super_bufptr;
11372 char * const bufend = PL_sublex_info.super_bufend;
11373 char * const olds = s - SvCUR(herewas);
11374 s = strchr(bufptr, '\n');
11378 while (s < bufend &&
11379 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11381 CopLINE_inc(PL_curcop);
11384 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11385 missingterm(PL_tokenbuf);
11387 sv_setpvn(herewas,bufptr,d-bufptr+1);
11388 sv_setpvn(tmpstr,d+1,s-d);
11390 sv_catpvn(herewas,s,bufend-s);
11391 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11398 while (s < PL_bufend &&
11399 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11401 CopLINE_inc(PL_curcop);
11403 if (s >= PL_bufend) {
11404 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11405 missingterm(PL_tokenbuf);
11407 sv_setpvn(tmpstr,d+1,s-d);
11409 if (PL_madskills) {
11411 sv_catpvn(PL_thisstuff, d + 1, s - d);
11413 PL_thisstuff = newSVpvn(d + 1, s - d);
11414 stuffstart = s - SvPVX(PL_linestr);
11418 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11420 sv_catpvn(herewas,s,PL_bufend-s);
11421 sv_setsv(PL_linestr,herewas);
11422 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11423 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11424 PL_last_lop = PL_last_uni = NULL;
11427 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
11428 while (s >= PL_bufend) { /* multiple line string? */
11430 if (PL_madskills) {
11431 tstart = SvPVX(PL_linestr) + stuffstart;
11433 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11435 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11439 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11440 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11441 missingterm(PL_tokenbuf);
11444 stuffstart = s - SvPVX(PL_linestr);
11446 CopLINE_inc(PL_curcop);
11447 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11448 PL_last_lop = PL_last_uni = NULL;
11449 #ifndef PERL_STRICT_CR
11450 if (PL_bufend - PL_linestart >= 2) {
11451 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11452 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11454 PL_bufend[-2] = '\n';
11456 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11458 else if (PL_bufend[-1] == '\r')
11459 PL_bufend[-1] = '\n';
11461 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11462 PL_bufend[-1] = '\n';
11464 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11465 update_debugger_info(PL_linestr, NULL, 0);
11466 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11467 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11468 *(SvPVX(PL_linestr) + off ) = ' ';
11469 sv_catsv(PL_linestr,herewas);
11470 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11471 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11475 sv_catsv(tmpstr,PL_linestr);
11480 PL_multi_end = CopLINE(PL_curcop);
11481 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11482 SvPV_shrink_to_cur(tmpstr);
11484 SvREFCNT_dec(herewas);
11486 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11488 else if (PL_encoding)
11489 sv_recode_to_utf8(tmpstr, PL_encoding);
11491 PL_lex_stuff = tmpstr;
11492 pl_yylval.ival = op_type;
11496 /* scan_inputsymbol
11497 takes: current position in input buffer
11498 returns: new position in input buffer
11499 side-effects: pl_yylval and lex_op are set.
11504 <FH> read from filehandle
11505 <pkg::FH> read from package qualified filehandle
11506 <pkg'FH> read from package qualified filehandle
11507 <$fh> read from filehandle in $fh
11508 <*.h> filename glob
11513 S_scan_inputsymbol(pTHX_ char *start)
11516 register char *s = start; /* current position in buffer */
11519 char *d = PL_tokenbuf; /* start of temp holding space */
11520 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11522 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11524 end = strchr(s, '\n');
11527 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11529 /* die if we didn't have space for the contents of the <>,
11530 or if it didn't end, or if we see a newline
11533 if (len >= (I32)sizeof PL_tokenbuf)
11534 Perl_croak(aTHX_ "Excessively long <> operator");
11536 Perl_croak(aTHX_ "Unterminated <> operator");
11541 Remember, only scalar variables are interpreted as filehandles by
11542 this code. Anything more complex (e.g., <$fh{$num}>) will be
11543 treated as a glob() call.
11544 This code makes use of the fact that except for the $ at the front,
11545 a scalar variable and a filehandle look the same.
11547 if (*d == '$' && d[1]) d++;
11549 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11550 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11553 /* If we've tried to read what we allow filehandles to look like, and
11554 there's still text left, then it must be a glob() and not a getline.
11555 Use scan_str to pull out the stuff between the <> and treat it
11556 as nothing more than a string.
11559 if (d - PL_tokenbuf != len) {
11560 pl_yylval.ival = OP_GLOB;
11561 s = scan_str(start,!!PL_madskills,FALSE);
11563 Perl_croak(aTHX_ "Glob not terminated");
11567 bool readline_overriden = FALSE;
11570 /* we're in a filehandle read situation */
11573 /* turn <> into <ARGV> */
11575 Copy("ARGV",d,5,char);
11577 /* Check whether readline() is overriden */
11578 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11580 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11582 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11583 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11584 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11585 readline_overriden = TRUE;
11587 /* if <$fh>, create the ops to turn the variable into a
11591 /* try to find it in the pad for this block, otherwise find
11592 add symbol table ops
11594 const PADOFFSET tmp = pad_findmy(d);
11595 if (tmp != NOT_IN_PAD) {
11596 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11597 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11598 HEK * const stashname = HvNAME_HEK(stash);
11599 SV * const sym = sv_2mortal(newSVhek(stashname));
11600 sv_catpvs(sym, "::");
11601 sv_catpv(sym, d+1);
11606 OP * const o = newOP(OP_PADSV, 0);
11608 PL_lex_op = readline_overriden
11609 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11610 append_elem(OP_LIST, o,
11611 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11612 : (OP*)newUNOP(OP_READLINE, 0, o);
11621 ? (GV_ADDMULTI | GV_ADDINEVAL)
11624 PL_lex_op = readline_overriden
11625 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11626 append_elem(OP_LIST,
11627 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11628 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11629 : (OP*)newUNOP(OP_READLINE, 0,
11630 newUNOP(OP_RV2SV, 0,
11631 newGVOP(OP_GV, 0, gv)));
11633 if (!readline_overriden)
11634 PL_lex_op->op_flags |= OPf_SPECIAL;
11635 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11636 pl_yylval.ival = OP_NULL;
11639 /* If it's none of the above, it must be a literal filehandle
11640 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11642 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11643 PL_lex_op = readline_overriden
11644 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11645 append_elem(OP_LIST,
11646 newGVOP(OP_GV, 0, gv),
11647 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11648 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11649 pl_yylval.ival = OP_NULL;
11658 takes: start position in buffer
11659 keep_quoted preserve \ on the embedded delimiter(s)
11660 keep_delims preserve the delimiters around the string
11661 returns: position to continue reading from buffer
11662 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11663 updates the read buffer.
11665 This subroutine pulls a string out of the input. It is called for:
11666 q single quotes q(literal text)
11667 ' single quotes 'literal text'
11668 qq double quotes qq(interpolate $here please)
11669 " double quotes "interpolate $here please"
11670 qx backticks qx(/bin/ls -l)
11671 ` backticks `/bin/ls -l`
11672 qw quote words @EXPORT_OK = qw( func() $spam )
11673 m// regexp match m/this/
11674 s/// regexp substitute s/this/that/
11675 tr/// string transliterate tr/this/that/
11676 y/// string transliterate y/this/that/
11677 ($*@) sub prototypes sub foo ($)
11678 (stuff) sub attr parameters sub foo : attr(stuff)
11679 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11681 In most of these cases (all but <>, patterns and transliterate)
11682 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11683 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11684 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11687 It skips whitespace before the string starts, and treats the first
11688 character as the delimiter. If the delimiter is one of ([{< then
11689 the corresponding "close" character )]}> is used as the closing
11690 delimiter. It allows quoting of delimiters, and if the string has
11691 balanced delimiters ([{<>}]) it allows nesting.
11693 On success, the SV with the resulting string is put into lex_stuff or,
11694 if that is already non-NULL, into lex_repl. The second case occurs only
11695 when parsing the RHS of the special constructs s/// and tr/// (y///).
11696 For convenience, the terminating delimiter character is stuffed into
11701 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11704 SV *sv; /* scalar value: string */
11705 const char *tmps; /* temp string, used for delimiter matching */
11706 register char *s = start; /* current position in the buffer */
11707 register char term; /* terminating character */
11708 register char *to; /* current position in the sv's data */
11709 I32 brackets = 1; /* bracket nesting level */
11710 bool has_utf8 = FALSE; /* is there any utf8 content? */
11711 I32 termcode; /* terminating char. code */
11712 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11713 STRLEN termlen; /* length of terminating string */
11714 int last_off = 0; /* last position for nesting bracket */
11720 PERL_ARGS_ASSERT_SCAN_STR;
11722 /* skip space before the delimiter */
11728 if (PL_realtokenstart >= 0) {
11729 stuffstart = PL_realtokenstart;
11730 PL_realtokenstart = -1;
11733 stuffstart = start - SvPVX(PL_linestr);
11735 /* mark where we are, in case we need to report errors */
11738 /* after skipping whitespace, the next character is the terminator */
11741 termcode = termstr[0] = term;
11745 termcode = utf8_to_uvchr((U8*)s, &termlen);
11746 Copy(s, termstr, termlen, U8);
11747 if (!UTF8_IS_INVARIANT(term))
11751 /* mark where we are */
11752 PL_multi_start = CopLINE(PL_curcop);
11753 PL_multi_open = term;
11755 /* find corresponding closing delimiter */
11756 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11757 termcode = termstr[0] = term = tmps[5];
11759 PL_multi_close = term;
11761 /* create a new SV to hold the contents. 79 is the SV's initial length.
11762 What a random number. */
11763 sv = newSV_type(SVt_PVIV);
11765 SvIV_set(sv, termcode);
11766 (void)SvPOK_only(sv); /* validate pointer */
11768 /* move past delimiter and try to read a complete string */
11770 sv_catpvn(sv, s, termlen);
11773 tstart = SvPVX(PL_linestr) + stuffstart;
11774 if (!PL_thisopen && !keep_delims) {
11775 PL_thisopen = newSVpvn(tstart, s - tstart);
11776 stuffstart = s - SvPVX(PL_linestr);
11780 if (PL_encoding && !UTF) {
11784 int offset = s - SvPVX_const(PL_linestr);
11785 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11786 &offset, (char*)termstr, termlen);
11787 const char * const ns = SvPVX_const(PL_linestr) + offset;
11788 char * const svlast = SvEND(sv) - 1;
11790 for (; s < ns; s++) {
11791 if (*s == '\n' && !PL_rsfp)
11792 CopLINE_inc(PL_curcop);
11795 goto read_more_line;
11797 /* handle quoted delimiters */
11798 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11800 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11802 if ((svlast-1 - t) % 2) {
11803 if (!keep_quoted) {
11804 *(svlast-1) = term;
11806 SvCUR_set(sv, SvCUR(sv) - 1);
11811 if (PL_multi_open == PL_multi_close) {
11817 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11818 /* At here, all closes are "was quoted" one,
11819 so we don't check PL_multi_close. */
11821 if (!keep_quoted && *(t+1) == PL_multi_open)
11826 else if (*t == PL_multi_open)
11834 SvCUR_set(sv, w - SvPVX_const(sv));
11836 last_off = w - SvPVX(sv);
11837 if (--brackets <= 0)
11842 if (!keep_delims) {
11843 SvCUR_set(sv, SvCUR(sv) - 1);
11849 /* extend sv if need be */
11850 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11851 /* set 'to' to the next character in the sv's string */
11852 to = SvPVX(sv)+SvCUR(sv);
11854 /* if open delimiter is the close delimiter read unbridle */
11855 if (PL_multi_open == PL_multi_close) {
11856 for (; s < PL_bufend; s++,to++) {
11857 /* embedded newlines increment the current line number */
11858 if (*s == '\n' && !PL_rsfp)
11859 CopLINE_inc(PL_curcop);
11860 /* handle quoted delimiters */
11861 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11862 if (!keep_quoted && s[1] == term)
11864 /* any other quotes are simply copied straight through */
11868 /* terminate when run out of buffer (the for() condition), or
11869 have found the terminator */
11870 else if (*s == term) {
11873 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11876 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11882 /* if the terminator isn't the same as the start character (e.g.,
11883 matched brackets), we have to allow more in the quoting, and
11884 be prepared for nested brackets.
11887 /* read until we run out of string, or we find the terminator */
11888 for (; s < PL_bufend; s++,to++) {
11889 /* embedded newlines increment the line count */
11890 if (*s == '\n' && !PL_rsfp)
11891 CopLINE_inc(PL_curcop);
11892 /* backslashes can escape the open or closing characters */
11893 if (*s == '\\' && s+1 < PL_bufend) {
11894 if (!keep_quoted &&
11895 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11900 /* allow nested opens and closes */
11901 else if (*s == PL_multi_close && --brackets <= 0)
11903 else if (*s == PL_multi_open)
11905 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11910 /* terminate the copied string and update the sv's end-of-string */
11912 SvCUR_set(sv, to - SvPVX_const(sv));
11915 * this next chunk reads more into the buffer if we're not done yet
11919 break; /* handle case where we are done yet :-) */
11921 #ifndef PERL_STRICT_CR
11922 if (to - SvPVX_const(sv) >= 2) {
11923 if ((to[-2] == '\r' && to[-1] == '\n') ||
11924 (to[-2] == '\n' && to[-1] == '\r'))
11928 SvCUR_set(sv, to - SvPVX_const(sv));
11930 else if (to[-1] == '\r')
11933 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11938 /* if we're out of file, or a read fails, bail and reset the current
11939 line marker so we can report where the unterminated string began
11942 if (PL_madskills) {
11943 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11945 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11947 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11951 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11953 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11959 /* we read a line, so increment our line counter */
11960 CopLINE_inc(PL_curcop);
11962 /* update debugger info */
11963 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11964 update_debugger_info(PL_linestr, NULL, 0);
11966 /* having changed the buffer, we must update PL_bufend */
11967 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11968 PL_last_lop = PL_last_uni = NULL;
11971 /* at this point, we have successfully read the delimited string */
11973 if (!PL_encoding || UTF) {
11975 if (PL_madskills) {
11976 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11977 const int len = s - tstart;
11979 sv_catpvn(PL_thisstuff, tstart, len);
11981 PL_thisstuff = newSVpvn(tstart, len);
11982 if (!PL_thisclose && !keep_delims)
11983 PL_thisclose = newSVpvn(s,termlen);
11988 sv_catpvn(sv, s, termlen);
11993 if (PL_madskills) {
11994 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11995 const int len = s - tstart - termlen;
11997 sv_catpvn(PL_thisstuff, tstart, len);
11999 PL_thisstuff = newSVpvn(tstart, len);
12000 if (!PL_thisclose && !keep_delims)
12001 PL_thisclose = newSVpvn(s - termlen,termlen);
12005 if (has_utf8 || PL_encoding)
12008 PL_multi_end = CopLINE(PL_curcop);
12010 /* if we allocated too much space, give some back */
12011 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12012 SvLEN_set(sv, SvCUR(sv) + 1);
12013 SvPV_renew(sv, SvLEN(sv));
12016 /* decide whether this is the first or second quoted string we've read
12029 takes: pointer to position in buffer
12030 returns: pointer to new position in buffer
12031 side-effects: builds ops for the constant in pl_yylval.op
12033 Read a number in any of the formats that Perl accepts:
12035 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12036 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12039 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12041 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12044 If it reads a number without a decimal point or an exponent, it will
12045 try converting the number to an integer and see if it can do so
12046 without loss of precision.
12050 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12053 register const char *s = start; /* current position in buffer */
12054 register char *d; /* destination in temp buffer */
12055 register char *e; /* end of temp buffer */
12056 NV nv; /* number read, as a double */
12057 SV *sv = NULL; /* place to put the converted number */
12058 bool floatit; /* boolean: int or float? */
12059 const char *lastub = NULL; /* position of last underbar */
12060 static char const number_too_long[] = "Number too long";
12062 PERL_ARGS_ASSERT_SCAN_NUM;
12064 /* We use the first character to decide what type of number this is */
12068 Perl_croak(aTHX_ "panic: scan_num");
12070 /* if it starts with a 0, it could be an octal number, a decimal in
12071 0.13 disguise, or a hexadecimal number, or a binary number. */
12075 u holds the "number so far"
12076 shift the power of 2 of the base
12077 (hex == 4, octal == 3, binary == 1)
12078 overflowed was the number more than we can hold?
12080 Shift is used when we add a digit. It also serves as an "are
12081 we in octal/hex/binary?" indicator to disallow hex characters
12082 when in octal mode.
12087 bool overflowed = FALSE;
12088 bool just_zero = TRUE; /* just plain 0 or binary number? */
12089 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12090 static const char* const bases[5] =
12091 { "", "binary", "", "octal", "hexadecimal" };
12092 static const char* const Bases[5] =
12093 { "", "Binary", "", "Octal", "Hexadecimal" };
12094 static const char* const maxima[5] =
12096 "0b11111111111111111111111111111111",
12100 const char *base, *Base, *max;
12102 /* check for hex */
12107 } else if (s[1] == 'b') {
12112 /* check for a decimal in disguise */
12113 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12115 /* so it must be octal */
12122 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12123 "Misplaced _ in number");
12127 base = bases[shift];
12128 Base = Bases[shift];
12129 max = maxima[shift];
12131 /* read the rest of the number */
12133 /* x is used in the overflow test,
12134 b is the digit we're adding on. */
12139 /* if we don't mention it, we're done */
12143 /* _ are ignored -- but warned about if consecutive */
12145 if (lastub && s == lastub + 1)
12146 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12147 "Misplaced _ in number");
12151 /* 8 and 9 are not octal */
12152 case '8': case '9':
12154 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12158 case '2': case '3': case '4':
12159 case '5': case '6': case '7':
12161 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12164 case '0': case '1':
12165 b = *s++ & 15; /* ASCII digit -> value of digit */
12169 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12170 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12171 /* make sure they said 0x */
12174 b = (*s++ & 7) + 9;
12176 /* Prepare to put the digit we have onto the end
12177 of the number so far. We check for overflows.
12183 x = u << shift; /* make room for the digit */
12185 if ((x >> shift) != u
12186 && !(PL_hints & HINT_NEW_BINARY)) {
12189 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12190 "Integer overflow in %s number",
12193 u = x | b; /* add the digit to the end */
12196 n *= nvshift[shift];
12197 /* If an NV has not enough bits in its
12198 * mantissa to represent an UV this summing of
12199 * small low-order numbers is a waste of time
12200 * (because the NV cannot preserve the
12201 * low-order bits anyway): we could just
12202 * remember when did we overflow and in the
12203 * end just multiply n by the right
12211 /* if we get here, we had success: make a scalar value from
12216 /* final misplaced underbar check */
12217 if (s[-1] == '_') {
12218 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12223 if (n > 4294967295.0)
12224 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12225 "%s number > %s non-portable",
12231 if (u > 0xffffffff)
12232 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12233 "%s number > %s non-portable",
12238 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12239 sv = new_constant(start, s - start, "integer",
12240 sv, NULL, NULL, 0);
12241 else if (PL_hints & HINT_NEW_BINARY)
12242 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12247 handle decimal numbers.
12248 we're also sent here when we read a 0 as the first digit
12250 case '1': case '2': case '3': case '4': case '5':
12251 case '6': case '7': case '8': case '9': case '.':
12254 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12257 /* read next group of digits and _ and copy into d */
12258 while (isDIGIT(*s) || *s == '_') {
12259 /* skip underscores, checking for misplaced ones
12263 if (lastub && s == lastub + 1)
12264 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12265 "Misplaced _ in number");
12269 /* check for end of fixed-length buffer */
12271 Perl_croak(aTHX_ number_too_long);
12272 /* if we're ok, copy the character */
12277 /* final misplaced underbar check */
12278 if (lastub && s == lastub + 1) {
12279 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12282 /* read a decimal portion if there is one. avoid
12283 3..5 being interpreted as the number 3. followed
12286 if (*s == '.' && s[1] != '.') {
12291 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12292 "Misplaced _ in number");
12296 /* copy, ignoring underbars, until we run out of digits.
12298 for (; isDIGIT(*s) || *s == '_'; s++) {
12299 /* fixed length buffer check */
12301 Perl_croak(aTHX_ number_too_long);
12303 if (lastub && s == lastub + 1)
12304 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12305 "Misplaced _ in number");
12311 /* fractional part ending in underbar? */
12312 if (s[-1] == '_') {
12313 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12314 "Misplaced _ in number");
12316 if (*s == '.' && isDIGIT(s[1])) {
12317 /* oops, it's really a v-string, but without the "v" */
12323 /* read exponent part, if present */
12324 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12328 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12329 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12331 /* stray preinitial _ */
12333 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12334 "Misplaced _ in number");
12338 /* allow positive or negative exponent */
12339 if (*s == '+' || *s == '-')
12342 /* stray initial _ */
12344 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12345 "Misplaced _ in number");
12349 /* read digits of exponent */
12350 while (isDIGIT(*s) || *s == '_') {
12353 Perl_croak(aTHX_ number_too_long);
12357 if (((lastub && s == lastub + 1) ||
12358 (!isDIGIT(s[1]) && s[1] != '_')))
12359 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12360 "Misplaced _ in number");
12367 /* make an sv from the string */
12371 We try to do an integer conversion first if no characters
12372 indicating "float" have been found.
12377 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12379 if (flags == IS_NUMBER_IN_UV) {
12381 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12384 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12385 if (uv <= (UV) IV_MIN)
12386 sv_setiv(sv, -(IV)uv);
12393 /* terminate the string */
12395 nv = Atof(PL_tokenbuf);
12400 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12401 const char *const key = floatit ? "float" : "integer";
12402 const STRLEN keylen = floatit ? 5 : 7;
12403 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12404 key, keylen, sv, NULL, NULL, 0);
12408 /* if it starts with a v, it could be a v-string */
12411 sv = newSV(5); /* preallocate storage space */
12412 s = scan_vstring(s, PL_bufend, sv);
12416 /* make the op for the constant and return */
12419 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12421 lvalp->opval = NULL;
12427 S_scan_formline(pTHX_ register char *s)
12430 register char *eol;
12432 SV * const stuff = newSVpvs("");
12433 bool needargs = FALSE;
12434 bool eofmt = FALSE;
12436 char *tokenstart = s;
12437 SV* savewhite = NULL;
12439 if (PL_madskills) {
12440 savewhite = PL_thiswhite;
12445 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12447 while (!needargs) {
12450 #ifdef PERL_STRICT_CR
12451 while (SPACE_OR_TAB(*t))
12454 while (SPACE_OR_TAB(*t) || *t == '\r')
12457 if (*t == '\n' || t == PL_bufend) {
12462 if (PL_in_eval && !PL_rsfp) {
12463 eol = (char *) memchr(s,'\n',PL_bufend-s);
12468 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12470 for (t = s; t < eol; t++) {
12471 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12473 goto enough; /* ~~ must be first line in formline */
12475 if (*t == '@' || *t == '^')
12479 sv_catpvn(stuff, s, eol-s);
12480 #ifndef PERL_STRICT_CR
12481 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12482 char *end = SvPVX(stuff) + SvCUR(stuff);
12485 SvCUR_set(stuff, SvCUR(stuff) - 1);
12495 if (PL_madskills) {
12497 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12499 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12502 s = filter_gets(PL_linestr, PL_rsfp, 0);
12504 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12506 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12508 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12509 PL_last_lop = PL_last_uni = NULL;
12518 if (SvCUR(stuff)) {
12521 PL_lex_state = LEX_NORMAL;
12522 start_force(PL_curforce);
12523 NEXTVAL_NEXTTOKE.ival = 0;
12527 PL_lex_state = LEX_FORMLINE;
12529 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12531 else if (PL_encoding)
12532 sv_recode_to_utf8(stuff, PL_encoding);
12534 start_force(PL_curforce);
12535 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12537 start_force(PL_curforce);
12538 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12542 SvREFCNT_dec(stuff);
12544 PL_lex_formbrack = 0;
12548 if (PL_madskills) {
12550 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12552 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12553 PL_thiswhite = savewhite;
12560 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12563 const I32 oldsavestack_ix = PL_savestack_ix;
12564 CV* const outsidecv = PL_compcv;
12567 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12569 SAVEI32(PL_subline);
12570 save_item(PL_subname);
12571 SAVESPTR(PL_compcv);
12573 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12574 CvFLAGS(PL_compcv) |= flags;
12576 PL_subline = CopLINE(PL_curcop);
12577 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12578 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12579 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12581 return oldsavestack_ix;
12585 #pragma segment Perl_yylex
12588 S_yywarn(pTHX_ const char *const s)
12592 PERL_ARGS_ASSERT_YYWARN;
12594 PL_in_eval |= EVAL_WARNONLY;
12596 PL_in_eval &= ~EVAL_WARNONLY;
12601 Perl_yyerror(pTHX_ const char *const s)
12604 const char *where = NULL;
12605 const char *context = NULL;
12608 int yychar = PL_parser->yychar;
12610 PERL_ARGS_ASSERT_YYERROR;
12612 if (!yychar || (yychar == ';' && !PL_rsfp))
12614 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12615 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12616 PL_oldbufptr != PL_bufptr) {
12619 The code below is removed for NetWare because it abends/crashes on NetWare
12620 when the script has error such as not having the closing quotes like:
12621 if ($var eq "value)
12622 Checking of white spaces is anyway done in NetWare code.
12625 while (isSPACE(*PL_oldoldbufptr))
12628 context = PL_oldoldbufptr;
12629 contlen = PL_bufptr - PL_oldoldbufptr;
12631 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12632 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12635 The code below is removed for NetWare because it abends/crashes on NetWare
12636 when the script has error such as not having the closing quotes like:
12637 if ($var eq "value)
12638 Checking of white spaces is anyway done in NetWare code.
12641 while (isSPACE(*PL_oldbufptr))
12644 context = PL_oldbufptr;
12645 contlen = PL_bufptr - PL_oldbufptr;
12647 else if (yychar > 255)
12648 where = "next token ???";
12649 else if (yychar == -2) { /* YYEMPTY */
12650 if (PL_lex_state == LEX_NORMAL ||
12651 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12652 where = "at end of line";
12653 else if (PL_lex_inpat)
12654 where = "within pattern";
12656 where = "within string";
12659 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12661 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12662 else if (isPRINT_LC(yychar)) {
12663 const char string = yychar;
12664 sv_catpvn(where_sv, &string, 1);
12667 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12668 where = SvPVX_const(where_sv);
12670 msg = sv_2mortal(newSVpv(s, 0));
12671 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12672 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12674 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12676 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12677 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12678 Perl_sv_catpvf(aTHX_ msg,
12679 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12680 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12683 if (PL_in_eval & EVAL_WARNONLY) {
12684 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12688 if (PL_error_count >= 10) {
12689 if (PL_in_eval && SvCUR(ERRSV))
12690 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12691 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12693 Perl_croak(aTHX_ "%s has too many errors.\n",
12694 OutCopFILE(PL_curcop));
12697 PL_in_my_stash = NULL;
12701 #pragma segment Main
12705 S_swallow_bom(pTHX_ U8 *s)
12708 const STRLEN slen = SvCUR(PL_linestr);
12710 PERL_ARGS_ASSERT_SWALLOW_BOM;
12714 if (s[1] == 0xFE) {
12715 /* UTF-16 little-endian? (or UTF32-LE?) */
12716 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12717 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12718 #ifndef PERL_NO_UTF16_FILTER
12719 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12722 if (PL_bufend > (char*)s) {
12726 filter_add(utf16rev_textfilter, NULL);
12727 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12728 utf16_to_utf8_reversed(s, news,
12729 PL_bufend - (char*)s - 1,
12731 sv_setpvn(PL_linestr, (const char*)news, newlen);
12733 s = (U8*)SvPVX(PL_linestr);
12734 Copy(news, s, newlen, U8);
12738 SvUTF8_on(PL_linestr);
12739 s = (U8*)SvPVX(PL_linestr);
12741 /* FIXME - is this a general bug fix? */
12744 PL_bufend = SvPVX(PL_linestr) + newlen;
12747 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12752 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12753 #ifndef PERL_NO_UTF16_FILTER
12754 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12757 if (PL_bufend > (char *)s) {
12761 filter_add(utf16_textfilter, NULL);
12762 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12763 utf16_to_utf8(s, news,
12764 PL_bufend - (char*)s,
12766 sv_setpvn(PL_linestr, (const char*)news, newlen);
12768 SvUTF8_on(PL_linestr);
12769 s = (U8*)SvPVX(PL_linestr);
12770 PL_bufend = SvPVX(PL_linestr) + newlen;
12773 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12778 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12779 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12780 s += 3; /* UTF-8 */
12786 if (s[2] == 0xFE && s[3] == 0xFF) {
12787 /* UTF-32 big-endian */
12788 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12791 else if (s[2] == 0 && s[3] != 0) {
12794 * are a good indicator of UTF-16BE. */
12795 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12801 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12802 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12803 s += 4; /* UTF-8 */
12809 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12812 * are a good indicator of UTF-16LE. */
12813 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12821 #ifndef PERL_NO_UTF16_FILTER
12823 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12826 const STRLEN old = SvCUR(sv);
12827 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12828 DEBUG_P(PerlIO_printf(Perl_debug_log,
12829 "utf16_textfilter(%p): %d %d (%d)\n",
12830 FPTR2DPTR(void *, utf16_textfilter),
12831 idx, maxlen, (int) count));
12835 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12836 Copy(SvPVX_const(sv), tmps, old, char);
12837 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12838 SvCUR(sv) - old, &newlen);
12839 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12841 DEBUG_P({sv_dump(sv);});
12846 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12849 const STRLEN old = SvCUR(sv);
12850 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12851 DEBUG_P(PerlIO_printf(Perl_debug_log,
12852 "utf16rev_textfilter(%p): %d %d (%d)\n",
12853 FPTR2DPTR(void *, utf16rev_textfilter),
12854 idx, maxlen, (int) count));
12858 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12859 Copy(SvPVX_const(sv), tmps, old, char);
12860 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12861 SvCUR(sv) - old, &newlen);
12862 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12864 DEBUG_P({ sv_dump(sv); });
12870 Returns a pointer to the next character after the parsed
12871 vstring, as well as updating the passed in sv.
12873 Function must be called like
12876 s = scan_vstring(s,e,sv);
12878 where s and e are the start and end of the string.
12879 The sv should already be large enough to store the vstring
12880 passed in, for performance reasons.
12885 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12888 const char *pos = s;
12889 const char *start = s;
12891 PERL_ARGS_ASSERT_SCAN_VSTRING;
12893 if (*pos == 'v') pos++; /* get past 'v' */
12894 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12896 if ( *pos != '.') {
12897 /* this may not be a v-string if followed by => */
12898 const char *next = pos;
12899 while (next < e && isSPACE(*next))
12901 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12902 /* return string not v-string */
12903 sv_setpvn(sv,(char *)s,pos-s);
12904 return (char *)pos;
12908 if (!isALPHA(*pos)) {
12909 U8 tmpbuf[UTF8_MAXBYTES+1];
12912 s++; /* get past 'v' */
12917 /* this is atoi() that tolerates underscores */
12920 const char *end = pos;
12922 while (--end >= s) {
12924 const UV orev = rev;
12925 rev += (*end - '0') * mult;
12928 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12929 "Integer overflow in decimal number");
12933 if (rev > 0x7FFFFFFF)
12934 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12936 /* Append native character for the rev point */
12937 tmpend = uvchr_to_utf8(tmpbuf, rev);
12938 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12939 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12941 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12947 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12951 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12959 * c-indentation-style: bsd
12960 * c-basic-offset: 4
12961 * indent-tabs-mode: t
12964 * ex: set ts=8 sts=4 sw=4 noet: