3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 #define PERL_IN_TOKE_C
28 #define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
31 #define pl_yylval (PL_parser->yylval)
33 /* YYINITDEPTH -- initial size of the parser's stacks. */
34 #define YYINITDEPTH 200
36 /* XXX temporary backwards compatibility */
37 #define PL_lex_brackets (PL_parser->lex_brackets)
38 #define PL_lex_brackstack (PL_parser->lex_brackstack)
39 #define PL_lex_casemods (PL_parser->lex_casemods)
40 #define PL_lex_casestack (PL_parser->lex_casestack)
41 #define PL_lex_defer (PL_parser->lex_defer)
42 #define PL_lex_dojoin (PL_parser->lex_dojoin)
43 #define PL_lex_expect (PL_parser->lex_expect)
44 #define PL_lex_formbrack (PL_parser->lex_formbrack)
45 #define PL_lex_inpat (PL_parser->lex_inpat)
46 #define PL_lex_inwhat (PL_parser->lex_inwhat)
47 #define PL_lex_op (PL_parser->lex_op)
48 #define PL_lex_repl (PL_parser->lex_repl)
49 #define PL_lex_starts (PL_parser->lex_starts)
50 #define PL_lex_stuff (PL_parser->lex_stuff)
51 #define PL_multi_start (PL_parser->multi_start)
52 #define PL_multi_open (PL_parser->multi_open)
53 #define PL_multi_close (PL_parser->multi_close)
54 #define PL_pending_ident (PL_parser->pending_ident)
55 #define PL_preambled (PL_parser->preambled)
56 #define PL_sublex_info (PL_parser->sublex_info)
57 #define PL_linestr (PL_parser->linestr)
58 #define PL_expect (PL_parser->expect)
59 #define PL_copline (PL_parser->copline)
60 #define PL_bufptr (PL_parser->bufptr)
61 #define PL_oldbufptr (PL_parser->oldbufptr)
62 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63 #define PL_linestart (PL_parser->linestart)
64 #define PL_bufend (PL_parser->bufend)
65 #define PL_last_uni (PL_parser->last_uni)
66 #define PL_last_lop (PL_parser->last_lop)
67 #define PL_last_lop_op (PL_parser->last_lop_op)
68 #define PL_lex_state (PL_parser->lex_state)
69 #define PL_rsfp (PL_parser->rsfp)
70 #define PL_rsfp_filters (PL_parser->rsfp_filters)
71 #define PL_in_my (PL_parser->in_my)
72 #define PL_in_my_stash (PL_parser->in_my_stash)
73 #define PL_tokenbuf (PL_parser->tokenbuf)
74 #define PL_multi_end (PL_parser->multi_end)
75 #define PL_error_count (PL_parser->error_count)
78 # define PL_endwhite (PL_parser->endwhite)
79 # define PL_faketokens (PL_parser->faketokens)
80 # define PL_lasttoke (PL_parser->lasttoke)
81 # define PL_nextwhite (PL_parser->nextwhite)
82 # define PL_realtokenstart (PL_parser->realtokenstart)
83 # define PL_skipwhite (PL_parser->skipwhite)
84 # define PL_thisclose (PL_parser->thisclose)
85 # define PL_thismad (PL_parser->thismad)
86 # define PL_thisopen (PL_parser->thisopen)
87 # define PL_thisstuff (PL_parser->thisstuff)
88 # define PL_thistoken (PL_parser->thistoken)
89 # define PL_thiswhite (PL_parser->thiswhite)
90 # define PL_thiswhite (PL_parser->thiswhite)
91 # define PL_nexttoke (PL_parser->nexttoke)
92 # define PL_curforce (PL_parser->curforce)
94 # define PL_nexttoke (PL_parser->nexttoke)
95 # define PL_nexttype (PL_parser->nexttype)
96 # define PL_nextval (PL_parser->nextval)
100 S_pending_ident(pTHX);
102 static const char ident_too_long[] = "Identifier too long";
103 static const char commaless_variable_list[] = "comma-less variable list";
105 #ifndef PERL_NO_UTF16_FILTER
106 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
107 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
111 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
112 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
114 # define CURMAD(slot,sv)
115 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
118 #define XFAKEBRACK 128
119 #define XENUMMASK 127
121 #ifdef USE_UTF8_SCRIPTS
122 # define UTF (!IN_BYTES)
124 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
127 /* The maximum number of characters preceding the unrecognized one to display */
128 #define UNRECOGNIZED_PRECEDE_COUNT 10
130 /* In variables named $^X, these are the legal values for X.
131 * 1999-02-27 mjd-perl-patch@plover.com */
132 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
134 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
136 /* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
138 * can get by with a single comparison (if the compiler is smart enough).
141 /* #define LEX_NOTPARSING 11 is done in perl.h. */
143 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
144 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
149 /* at end of code, eg "$x" followed by: */
150 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155 #define LEX_INTERPCONST 2 /* NOT USED */
156 #define LEX_FORMLINE 1 /* expecting a format line */
157 #define LEX_KNOWNEXT 0 /* next token known; just return it */
161 static const char* const lex_state_names[] = {
180 #include "keywords.h"
182 /* CLINE is a macro that ensures PL_copline has a sane value */
187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 # define SKIPSPACE0(s) skipspace0(s)
191 # define SKIPSPACE1(s) skipspace1(s)
192 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193 # define PEEKSPACE(s) skipspace2(s,0)
195 # define SKIPSPACE0(s) skipspace(s)
196 # define SKIPSPACE1(s) skipspace(s)
197 # define SKIPSPACE2(s,tsv) skipspace(s)
198 # define PEEKSPACE(s) skipspace(s)
202 * Convenience functions to return different tokens and prime the
203 * lexer for the next token. They all take an argument.
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
215 * FUN1 : not used, except for not, which isn't a UNIOP
216 * BOop : bitwise or or xor
218 * SHop : shift operator
219 * PWop : power operator
220 * PMop : pattern-matching operator
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
224 * Rop : relational operator <= != gt
226 * Also see LOP and lop() below.
229 #ifdef DEBUGGING /* Serve -DT. */
230 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
232 # define REPORT(retval) (retval)
235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
256 /* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
261 #define UNI2(f,x) { \
262 pl_yylval.ival = f; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
268 return REPORT( (int)FUNC1 ); \
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
272 #define UNI(f) UNI2(f,XTERM)
273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
275 #define UNIBRACK(f) { \
276 pl_yylval.ival = f; \
278 PL_last_uni = PL_oldbufptr; \
280 return REPORT( (int)FUNC1 ); \
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
285 /* grandfather return to old style */
286 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
290 /* how to interpret the pl_yylval associated with the token */
294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
300 static struct debug_tokens {
302 enum token_type type;
304 } const debug_tokens[] =
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
371 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
372 { 0, TOKENTYPE_NONE, NULL }
375 /* dump the returned token in rv, plus any optional arg in pl_yylval */
378 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
382 PERL_ARGS_ASSERT_TOKEREPORT;
385 const char *name = NULL;
386 enum token_type type = TOKENTYPE_NONE;
387 const struct debug_tokens *p;
388 SV* const report = newSVpvs("<== ");
390 for (p = debug_tokens; p->token; p++) {
391 if (p->token == (int)rv) {
398 Perl_sv_catpv(aTHX_ report, name);
399 else if ((char)rv > ' ' && (char)rv < '~')
400 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
402 sv_catpvs(report, "EOF");
404 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
407 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
410 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
412 case TOKENTYPE_OPNUM:
413 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
414 PL_op_name[lvalp->ival]);
417 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
419 case TOKENTYPE_OPVAL:
421 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
422 PL_op_name[lvalp->opval->op_type]);
423 if (lvalp->opval->op_type == OP_CONST) {
424 Perl_sv_catpvf(aTHX_ report, " %s",
425 SvPEEK(cSVOPx_sv(lvalp->opval)));
430 sv_catpvs(report, "(opval=null)");
433 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
439 /* print the buffer with suitable escapes */
442 S_printbuf(pTHX_ const char *const fmt, const char *const s)
444 SV* const tmp = newSVpvs("");
446 PERL_ARGS_ASSERT_PRINTBUF;
448 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
457 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
458 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
462 S_ao(pTHX_ int toketype)
465 if (*PL_bufptr == '=') {
467 if (toketype == ANDAND)
468 pl_yylval.ival = OP_ANDASSIGN;
469 else if (toketype == OROR)
470 pl_yylval.ival = OP_ORASSIGN;
471 else if (toketype == DORDOR)
472 pl_yylval.ival = OP_DORASSIGN;
480 * When Perl expects an operator and finds something else, no_op
481 * prints the warning. It always prints "<something> found where
482 * operator expected. It prints "Missing semicolon on previous line?"
483 * if the surprise occurs at the start of the line. "do you need to
484 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
485 * where the compiler doesn't know if foo is a method call or a function.
486 * It prints "Missing operator before end of line" if there's nothing
487 * after the missing operator, or "... before <...>" if there is something
488 * after the missing operator.
492 S_no_op(pTHX_ const char *const what, char *s)
495 char * const oldbp = PL_bufptr;
496 const bool is_first = (PL_oldbufptr == PL_linestart);
498 PERL_ARGS_ASSERT_NO_OP;
504 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
505 if (ckWARN_d(WARN_SYNTAX)) {
507 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
508 "\t(Missing semicolon on previous line?)\n");
509 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
511 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
513 if (t < PL_bufptr && isSPACE(*t))
514 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
515 "\t(Do you need to predeclare %.*s?)\n",
516 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
521 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
529 * Complain about missing quote/regexp/heredoc terminator.
530 * If it's called with NULL then it cauterizes the line buffer.
531 * If we're in a delimited string and the delimiter is a control
532 * character, it's reformatted into a two-char sequence like ^C.
537 S_missingterm(pTHX_ char *s)
543 char * const nl = strrchr(s,'\n');
547 else if (isCNTRL(PL_multi_close)) {
549 tmpbuf[1] = (char)toCTRL(PL_multi_close);
554 *tmpbuf = (char)PL_multi_close;
558 q = strchr(s,'"') ? '\'' : '"';
559 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
562 #define FEATURE_IS_ENABLED(name) \
563 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
564 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
565 /* The longest string we pass in. */
566 #define MAX_FEATURE_LEN (sizeof("switch")-1)
569 * S_feature_is_enabled
570 * Check whether the named feature is enabled.
573 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
576 HV * const hinthv = GvHV(PL_hintgv);
577 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
579 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
581 assert(namelen <= MAX_FEATURE_LEN);
582 memcpy(&he_name[8], name, namelen);
584 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
592 Perl_deprecate(pTHX_ const char *const s)
594 PERL_ARGS_ASSERT_DEPRECATE;
596 Perl_ck_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
600 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
601 * utf16-to-utf8-reversed.
604 #ifdef PERL_CR_FILTER
608 register const char *s = SvPVX_const(sv);
609 register const char * const e = s + SvCUR(sv);
611 PERL_ARGS_ASSERT_STRIP_RETURN;
613 /* outer loop optimized to do nothing if there are no CR-LFs */
615 if (*s++ == '\r' && *s == '\n') {
616 /* hit a CR-LF, need to copy the rest */
617 register char *d = s - 1;
620 if (*s == '\r' && s[1] == '\n')
631 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
633 const I32 count = FILTER_READ(idx+1, sv, maxlen);
634 if (count > 0 && !maxlen)
645 * Create a parser object and initialise its parser and lexer fields
647 * rsfp is the opened file handle to read from (if any),
649 * line holds any initial content already read from the file (or in
650 * the case of no file, such as an eval, the whole contents);
652 * new_filter indicates that this is a new file and it shouldn't inherit
653 * the filters from the current parser (ie require).
657 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
660 const char *s = NULL;
662 yy_parser *parser, *oparser;
664 /* create and initialise a parser */
666 Newxz(parser, 1, yy_parser);
667 parser->old_parser = oparser = PL_parser;
670 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
671 parser->ps = parser->stack;
672 parser->stack_size = YYINITDEPTH;
674 parser->stack->state = 0;
675 parser->yyerrstatus = 0;
676 parser->yychar = YYEMPTY; /* Cause a token to be read. */
678 /* on scope exit, free this parser and restore any outer one */
680 parser->saved_curcop = PL_curcop;
682 /* initialise lexer state */
685 parser->curforce = -1;
687 parser->nexttoke = 0;
689 parser->error_count = oparser ? oparser->error_count : 0;
690 parser->copline = NOLINE;
691 parser->lex_state = LEX_NORMAL;
692 parser->expect = XSTATE;
694 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
695 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
697 Newx(parser->lex_brackstack, 120, char);
698 Newx(parser->lex_casestack, 12, char);
699 *parser->lex_casestack = '\0';
702 s = SvPV_const(line, len);
708 parser->linestr = newSVpvs("\n;");
709 } else if (SvREADONLY(line) || s[len-1] != ';') {
710 parser->linestr = newSVsv(line);
712 sv_catpvs(parser->linestr, "\n;");
715 SvREFCNT_inc_simple_void_NN(line);
716 parser->linestr = line;
718 parser->oldoldbufptr =
721 parser->linestart = SvPVX(parser->linestr);
722 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
723 parser->last_lop = parser->last_uni = NULL;
727 /* delete a parser object */
730 Perl_parser_free(pTHX_ const yy_parser *parser)
732 PERL_ARGS_ASSERT_PARSER_FREE;
734 PL_curcop = parser->saved_curcop;
735 SvREFCNT_dec(parser->linestr);
737 if (parser->rsfp == PerlIO_stdin())
738 PerlIO_clearerr(parser->rsfp);
739 else if (parser->rsfp && (!parser->old_parser ||
740 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
741 PerlIO_close(parser->rsfp);
742 SvREFCNT_dec(parser->rsfp_filters);
744 Safefree(parser->stack);
745 Safefree(parser->lex_brackstack);
746 Safefree(parser->lex_casestack);
747 PL_parser = parser->old_parser;
754 * Finalizer for lexing operations. Must be called when the parser is
755 * done with the lexer.
762 PL_doextract = FALSE;
767 * This subroutine has nothing to do with tilting, whether at windmills
768 * or pinball tables. Its name is short for "increment line". It
769 * increments the current line number in CopLINE(PL_curcop) and checks
770 * to see whether the line starts with a comment of the form
771 * # line 500 "foo.pm"
772 * If so, it sets the current line number and file to the values in the comment.
776 S_incline(pTHX_ const char *s)
783 PERL_ARGS_ASSERT_INCLINE;
785 CopLINE_inc(PL_curcop);
788 while (SPACE_OR_TAB(*s))
790 if (strnEQ(s, "line", 4))
794 if (SPACE_OR_TAB(*s))
798 while (SPACE_OR_TAB(*s))
806 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
808 while (SPACE_OR_TAB(*s))
810 if (*s == '"' && (t = strchr(s+1, '"'))) {
820 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
822 if (*e != '\n' && *e != '\0')
823 return; /* false alarm */
826 const STRLEN len = t - s;
828 SV *const temp_sv = CopFILESV(PL_curcop);
834 tmplen = SvCUR(temp_sv);
840 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
841 /* must copy *{"::_<(eval N)[oldfilename:L]"}
842 * to *{"::_<newfilename"} */
843 /* However, the long form of evals is only turned on by the
844 debugger - usually they're "(eval %lu)" */
848 STRLEN tmplen2 = len;
849 if (tmplen + 2 <= sizeof smallbuf)
852 Newx(tmpbuf, tmplen + 2, char);
855 memcpy(tmpbuf + 2, cf, tmplen);
857 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
862 if (tmplen2 + 2 <= sizeof smallbuf)
865 Newx(tmpbuf2, tmplen2 + 2, char);
867 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
868 /* Either they malloc'd it, or we malloc'd it,
869 so no prefix is present in ours. */
874 memcpy(tmpbuf2 + 2, s, tmplen2);
877 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
879 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
880 /* adjust ${"::_<newfilename"} to store the new file name */
881 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
882 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
883 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
886 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
888 if (tmpbuf != smallbuf) Safefree(tmpbuf);
891 CopFILE_free(PL_curcop);
892 CopFILE_setn(PL_curcop, s, len);
894 CopLINE_set(PL_curcop, atoi(n)-1);
898 /* skip space before PL_thistoken */
901 S_skipspace0(pTHX_ register char *s)
903 PERL_ARGS_ASSERT_SKIPSPACE0;
910 PL_thiswhite = newSVpvs("");
911 sv_catsv(PL_thiswhite, PL_skipwhite);
912 sv_free(PL_skipwhite);
915 PL_realtokenstart = s - SvPVX(PL_linestr);
919 /* skip space after PL_thistoken */
922 S_skipspace1(pTHX_ register char *s)
924 const char *start = s;
925 I32 startoff = start - SvPVX(PL_linestr);
927 PERL_ARGS_ASSERT_SKIPSPACE1;
932 start = SvPVX(PL_linestr) + startoff;
933 if (!PL_thistoken && PL_realtokenstart >= 0) {
934 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
935 PL_thistoken = newSVpvn(tstart, start - tstart);
937 PL_realtokenstart = -1;
940 PL_nextwhite = newSVpvs("");
941 sv_catsv(PL_nextwhite, PL_skipwhite);
942 sv_free(PL_skipwhite);
949 S_skipspace2(pTHX_ register char *s, SV **svp)
952 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
953 const I32 startoff = s - SvPVX(PL_linestr);
955 PERL_ARGS_ASSERT_SKIPSPACE2;
958 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
959 if (!PL_madskills || !svp)
961 start = SvPVX(PL_linestr) + startoff;
962 if (!PL_thistoken && PL_realtokenstart >= 0) {
963 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
964 PL_thistoken = newSVpvn(tstart, start - tstart);
965 PL_realtokenstart = -1;
970 sv_setsv(*svp, PL_skipwhite);
971 sv_free(PL_skipwhite);
980 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
982 AV *av = CopFILEAVx(PL_curcop);
984 SV * const sv = newSV_type(SVt_PVMG);
986 sv_setsv(sv, orig_sv);
988 sv_setpvn(sv, buf, len);
991 av_store(av, (I32)CopLINE(PL_curcop), sv);
997 * Called to gobble the appropriate amount and type of whitespace.
998 * Skips comments as well.
1002 S_skipspace(pTHX_ register char *s)
1007 int startoff = s - SvPVX(PL_linestr);
1009 PERL_ARGS_ASSERT_SKIPSPACE;
1012 sv_free(PL_skipwhite);
1016 PERL_ARGS_ASSERT_SKIPSPACE;
1018 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1019 while (s < PL_bufend && SPACE_OR_TAB(*s))
1029 SSize_t oldprevlen, oldoldprevlen;
1030 SSize_t oldloplen = 0, oldunilen = 0;
1031 while (s < PL_bufend && isSPACE(*s)) {
1032 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1037 if (s < PL_bufend && *s == '#') {
1038 while (s < PL_bufend && *s != '\n')
1040 if (s < PL_bufend) {
1042 if (PL_in_eval && !PL_rsfp) {
1049 /* only continue to recharge the buffer if we're at the end
1050 * of the buffer, we're not reading from a source filter, and
1051 * we're in normal lexing mode
1053 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1054 PL_lex_state == LEX_FORMLINE)
1061 /* try to recharge the buffer */
1063 curoff = s - SvPVX(PL_linestr);
1066 if ((s = filter_gets(PL_linestr, PL_rsfp,
1067 (prevlen = SvCUR(PL_linestr)))) == NULL)
1070 if (PL_madskills && curoff != startoff) {
1072 PL_skipwhite = newSVpvs("");
1073 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1077 /* mustn't throw out old stuff yet if madpropping */
1078 SvCUR(PL_linestr) = curoff;
1079 s = SvPVX(PL_linestr) + curoff;
1081 if (curoff && s[-1] == '\n')
1085 /* end of file. Add on the -p or -n magic */
1086 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1089 sv_catpvs(PL_linestr,
1090 ";}continue{print or die qq(-p destination: $!\\n);}");
1092 sv_setpvs(PL_linestr,
1093 ";}continue{print or die qq(-p destination: $!\\n);}");
1095 PL_minus_n = PL_minus_p = 0;
1097 else if (PL_minus_n) {
1099 sv_catpvs(PL_linestr, ";}");
1101 sv_setpvs(PL_linestr, ";}");
1107 sv_catpvs(PL_linestr,";");
1109 sv_setpvs(PL_linestr,";");
1112 /* reset variables for next time we lex */
1113 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1119 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1120 PL_last_lop = PL_last_uni = NULL;
1122 /* Close the filehandle. Could be from
1123 * STDIN, or a regular file. If we were reading code from
1124 * STDIN (because the commandline held no -e or filename)
1125 * then we don't close it, we reset it so the code can
1126 * read from STDIN too.
1129 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1130 PerlIO_clearerr(PL_rsfp);
1132 (void)PerlIO_close(PL_rsfp);
1137 /* not at end of file, so we only read another line */
1138 /* make corresponding updates to old pointers, for yyerror() */
1139 oldprevlen = PL_oldbufptr - PL_bufend;
1140 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1142 oldunilen = PL_last_uni - PL_bufend;
1144 oldloplen = PL_last_lop - PL_bufend;
1145 PL_linestart = PL_bufptr = s + prevlen;
1146 PL_bufend = s + SvCUR(PL_linestr);
1148 PL_oldbufptr = s + oldprevlen;
1149 PL_oldoldbufptr = s + oldoldprevlen;
1151 PL_last_uni = s + oldunilen;
1153 PL_last_lop = s + oldloplen;
1156 /* debugger active and we're not compiling the debugger code,
1157 * so store the line into the debugger's array of lines
1159 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
1160 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1167 PL_skipwhite = newSVpvs("");
1168 curoff = s - SvPVX(PL_linestr);
1169 if (curoff - startoff)
1170 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1179 * Check the unary operators to ensure there's no ambiguity in how they're
1180 * used. An ambiguous piece of code would be:
1182 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1183 * the +5 is its argument.
1193 if (PL_oldoldbufptr != PL_last_uni)
1195 while (isSPACE(*PL_last_uni))
1198 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1200 if ((t = strchr(s, '(')) && t < PL_bufptr)
1203 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1204 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1205 (int)(s - PL_last_uni), PL_last_uni);
1209 * LOP : macro to build a list operator. Its behaviour has been replaced
1210 * with a subroutine, S_lop() for which LOP is just another name.
1213 #define LOP(f,x) return lop(f,x,s)
1217 * Build a list operator (or something that might be one). The rules:
1218 * - if we have a next token, then it's a list operator [why?]
1219 * - if the next thing is an opening paren, then it's a function
1220 * - else it's a list operator
1224 S_lop(pTHX_ I32 f, int x, char *s)
1228 PERL_ARGS_ASSERT_LOP;
1234 PL_last_lop = PL_oldbufptr;
1235 PL_last_lop_op = (OPCODE)f;
1238 return REPORT(LSTOP);
1241 return REPORT(LSTOP);
1244 return REPORT(FUNC);
1247 return REPORT(FUNC);
1249 return REPORT(LSTOP);
1255 * Sets up for an eventual force_next(). start_force(0) basically does
1256 * an unshift, while start_force(-1) does a push. yylex removes items
1261 S_start_force(pTHX_ int where)
1265 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1266 where = PL_lasttoke;
1267 assert(PL_curforce < 0 || PL_curforce == where);
1268 if (PL_curforce != where) {
1269 for (i = PL_lasttoke; i > where; --i) {
1270 PL_nexttoke[i] = PL_nexttoke[i-1];
1274 if (PL_curforce < 0) /* in case of duplicate start_force() */
1275 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1276 PL_curforce = where;
1279 curmad('^', newSVpvs(""));
1280 CURMAD('_', PL_nextwhite);
1285 S_curmad(pTHX_ char slot, SV *sv)
1291 if (PL_curforce < 0)
1292 where = &PL_thismad;
1294 where = &PL_nexttoke[PL_curforce].next_mad;
1300 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1302 else if (PL_encoding) {
1303 sv_recode_to_utf8(sv, PL_encoding);
1308 /* keep a slot open for the head of the list? */
1309 if (slot != '_' && *where && (*where)->mad_key == '^') {
1310 (*where)->mad_key = slot;
1311 sv_free(MUTABLE_SV(((*where)->mad_val)));
1312 (*where)->mad_val = (void*)sv;
1315 addmad(newMADsv(slot, sv), where, 0);
1318 # define start_force(where) NOOP
1319 # define curmad(slot, sv) NOOP
1324 * When the lexer realizes it knows the next token (for instance,
1325 * it is reordering tokens for the parser) then it can call S_force_next
1326 * to know what token to return the next time the lexer is called. Caller
1327 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1328 * and possibly PL_expect to ensure the lexer handles the token correctly.
1332 S_force_next(pTHX_ I32 type)
1337 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1338 tokereport(type, &NEXTVAL_NEXTTOKE);
1342 if (PL_curforce < 0)
1343 start_force(PL_lasttoke);
1344 PL_nexttoke[PL_curforce].next_type = type;
1345 if (PL_lex_state != LEX_KNOWNEXT)
1346 PL_lex_defer = PL_lex_state;
1347 PL_lex_state = LEX_KNOWNEXT;
1348 PL_lex_expect = PL_expect;
1351 PL_nexttype[PL_nexttoke] = type;
1353 if (PL_lex_state != LEX_KNOWNEXT) {
1354 PL_lex_defer = PL_lex_state;
1355 PL_lex_expect = PL_expect;
1356 PL_lex_state = LEX_KNOWNEXT;
1362 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1365 SV * const sv = newSVpvn_utf8(start, len,
1368 && !is_ascii_string((const U8*)start, len)
1369 && is_utf8_string((const U8*)start, len));
1375 * When the lexer knows the next thing is a word (for instance, it has
1376 * just seen -> and it knows that the next char is a word char, then
1377 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1381 * char *start : buffer position (must be within PL_linestr)
1382 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1383 * int check_keyword : if true, Perl checks to make sure the word isn't
1384 * a keyword (do this if the word is a label, e.g. goto FOO)
1385 * int allow_pack : if true, : characters will also be allowed (require,
1386 * use, etc. do this)
1387 * int allow_initial_tick : used by the "sub" lexer only.
1391 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1397 PERL_ARGS_ASSERT_FORCE_WORD;
1399 start = SKIPSPACE1(start);
1401 if (isIDFIRST_lazy_if(s,UTF) ||
1402 (allow_pack && *s == ':') ||
1403 (allow_initial_tick && *s == '\'') )
1405 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1406 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1408 start_force(PL_curforce);
1410 curmad('X', newSVpvn(start,s-start));
1411 if (token == METHOD) {
1416 PL_expect = XOPERATOR;
1420 curmad('g', newSVpvs( "forced" ));
1421 NEXTVAL_NEXTTOKE.opval
1422 = (OP*)newSVOP(OP_CONST,0,
1423 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1424 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1432 * Called when the lexer wants $foo *foo &foo etc, but the program
1433 * text only contains the "foo" portion. The first argument is a pointer
1434 * to the "foo", and the second argument is the type symbol to prefix.
1435 * Forces the next token to be a "WORD".
1436 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1440 S_force_ident(pTHX_ register const char *s, int kind)
1444 PERL_ARGS_ASSERT_FORCE_IDENT;
1447 const STRLEN len = strlen(s);
1448 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1449 start_force(PL_curforce);
1450 NEXTVAL_NEXTTOKE.opval = o;
1453 o->op_private = OPpCONST_ENTERED;
1454 /* XXX see note in pp_entereval() for why we forgo typo
1455 warnings if the symbol must be introduced in an eval.
1457 gv_fetchpvn_flags(s, len,
1458 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1460 kind == '$' ? SVt_PV :
1461 kind == '@' ? SVt_PVAV :
1462 kind == '%' ? SVt_PVHV :
1470 Perl_str_to_version(pTHX_ SV *sv)
1475 const char *start = SvPV_const(sv,len);
1476 const char * const end = start + len;
1477 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1479 PERL_ARGS_ASSERT_STR_TO_VERSION;
1481 while (start < end) {
1485 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1490 retval += ((NV)n)/nshift;
1499 * Forces the next token to be a version number.
1500 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1501 * and if "guessing" is TRUE, then no new token is created (and the caller
1502 * must use an alternative parsing method).
1506 S_force_version(pTHX_ char *s, int guessing)
1512 I32 startoff = s - SvPVX(PL_linestr);
1515 PERL_ARGS_ASSERT_FORCE_VERSION;
1523 while (isDIGIT(*d) || *d == '_' || *d == '.')
1527 start_force(PL_curforce);
1528 curmad('X', newSVpvn(s,d-s));
1531 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1533 s = scan_num(s, &pl_yylval);
1534 version = pl_yylval.opval;
1535 ver = cSVOPx(version)->op_sv;
1536 if (SvPOK(ver) && !SvNIOK(ver)) {
1537 SvUPGRADE(ver, SVt_PVNV);
1538 SvNV_set(ver, str_to_version(ver));
1539 SvNOK_on(ver); /* hint that it is a version */
1542 else if (guessing) {
1545 sv_free(PL_nextwhite); /* let next token collect whitespace */
1547 s = SvPVX(PL_linestr) + startoff;
1555 if (PL_madskills && !version) {
1556 sv_free(PL_nextwhite); /* let next token collect whitespace */
1558 s = SvPVX(PL_linestr) + startoff;
1561 /* NOTE: The parser sees the package name and the VERSION swapped */
1562 start_force(PL_curforce);
1563 NEXTVAL_NEXTTOKE.opval = version;
1571 * Tokenize a quoted string passed in as an SV. It finds the next
1572 * chunk, up to end of string or a backslash. It may make a new
1573 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1578 S_tokeq(pTHX_ SV *sv)
1582 register char *send;
1587 PERL_ARGS_ASSERT_TOKEQ;
1592 s = SvPV_force(sv, len);
1593 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1596 while (s < send && *s != '\\')
1601 if ( PL_hints & HINT_NEW_STRING ) {
1602 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1606 if (s + 1 < send && (s[1] == '\\'))
1607 s++; /* all that, just for this */
1612 SvCUR_set(sv, d - SvPVX_const(sv));
1614 if ( PL_hints & HINT_NEW_STRING )
1615 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1620 * Now come three functions related to double-quote context,
1621 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1622 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1623 * interact with PL_lex_state, and create fake ( ... ) argument lists
1624 * to handle functions and concatenation.
1625 * They assume that whoever calls them will be setting up a fake
1626 * join call, because each subthing puts a ',' after it. This lets
1629 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1631 * (I'm not sure whether the spurious commas at the end of lcfirst's
1632 * arguments and join's arguments are created or not).
1637 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1639 * Pattern matching will set PL_lex_op to the pattern-matching op to
1640 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1642 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1644 * Everything else becomes a FUNC.
1646 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1647 * had an OP_CONST or OP_READLINE). This just sets us up for a
1648 * call to S_sublex_push().
1652 S_sublex_start(pTHX)
1655 register const I32 op_type = pl_yylval.ival;
1657 if (op_type == OP_NULL) {
1658 pl_yylval.opval = PL_lex_op;
1662 if (op_type == OP_CONST || op_type == OP_READLINE) {
1663 SV *sv = tokeq(PL_lex_stuff);
1665 if (SvTYPE(sv) == SVt_PVIV) {
1666 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1668 const char * const p = SvPV_const(sv, len);
1669 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1673 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1674 PL_lex_stuff = NULL;
1675 /* Allow <FH> // "foo" */
1676 if (op_type == OP_READLINE)
1677 PL_expect = XTERMORDORDOR;
1680 else if (op_type == OP_BACKTICK && PL_lex_op) {
1681 /* readpipe() vas overriden */
1682 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1683 pl_yylval.opval = PL_lex_op;
1685 PL_lex_stuff = NULL;
1689 PL_sublex_info.super_state = PL_lex_state;
1690 PL_sublex_info.sub_inwhat = (U16)op_type;
1691 PL_sublex_info.sub_op = PL_lex_op;
1692 PL_lex_state = LEX_INTERPPUSH;
1696 pl_yylval.opval = PL_lex_op;
1706 * Create a new scope to save the lexing state. The scope will be
1707 * ended in S_sublex_done. Returns a '(', starting the function arguments
1708 * to the uc, lc, etc. found before.
1709 * Sets PL_lex_state to LEX_INTERPCONCAT.
1718 PL_lex_state = PL_sublex_info.super_state;
1719 SAVEBOOL(PL_lex_dojoin);
1720 SAVEI32(PL_lex_brackets);
1721 SAVEI32(PL_lex_casemods);
1722 SAVEI32(PL_lex_starts);
1723 SAVEI8(PL_lex_state);
1724 SAVEVPTR(PL_lex_inpat);
1725 SAVEI16(PL_lex_inwhat);
1726 SAVECOPLINE(PL_curcop);
1727 SAVEPPTR(PL_bufptr);
1728 SAVEPPTR(PL_bufend);
1729 SAVEPPTR(PL_oldbufptr);
1730 SAVEPPTR(PL_oldoldbufptr);
1731 SAVEPPTR(PL_last_lop);
1732 SAVEPPTR(PL_last_uni);
1733 SAVEPPTR(PL_linestart);
1734 SAVESPTR(PL_linestr);
1735 SAVEGENERICPV(PL_lex_brackstack);
1736 SAVEGENERICPV(PL_lex_casestack);
1738 PL_linestr = PL_lex_stuff;
1739 PL_lex_stuff = NULL;
1741 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1742 = SvPVX(PL_linestr);
1743 PL_bufend += SvCUR(PL_linestr);
1744 PL_last_lop = PL_last_uni = NULL;
1745 SAVEFREESV(PL_linestr);
1747 PL_lex_dojoin = FALSE;
1748 PL_lex_brackets = 0;
1749 Newx(PL_lex_brackstack, 120, char);
1750 Newx(PL_lex_casestack, 12, char);
1751 PL_lex_casemods = 0;
1752 *PL_lex_casestack = '\0';
1754 PL_lex_state = LEX_INTERPCONCAT;
1755 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1757 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1758 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1759 PL_lex_inpat = PL_sublex_info.sub_op;
1761 PL_lex_inpat = NULL;
1768 * Restores lexer state after a S_sublex_push.
1775 if (!PL_lex_starts++) {
1776 SV * const sv = newSVpvs("");
1777 if (SvUTF8(PL_linestr))
1779 PL_expect = XOPERATOR;
1780 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1784 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1785 PL_lex_state = LEX_INTERPCASEMOD;
1789 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1790 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1791 PL_linestr = PL_lex_repl;
1793 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1794 PL_bufend += SvCUR(PL_linestr);
1795 PL_last_lop = PL_last_uni = NULL;
1796 SAVEFREESV(PL_linestr);
1797 PL_lex_dojoin = FALSE;
1798 PL_lex_brackets = 0;
1799 PL_lex_casemods = 0;
1800 *PL_lex_casestack = '\0';
1802 if (SvEVALED(PL_lex_repl)) {
1803 PL_lex_state = LEX_INTERPNORMAL;
1805 /* we don't clear PL_lex_repl here, so that we can check later
1806 whether this is an evalled subst; that means we rely on the
1807 logic to ensure sublex_done() is called again only via the
1808 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1811 PL_lex_state = LEX_INTERPCONCAT;
1821 PL_endwhite = newSVpvs("");
1822 sv_catsv(PL_endwhite, PL_thiswhite);
1826 sv_setpvs(PL_thistoken,"");
1828 PL_realtokenstart = -1;
1832 PL_bufend = SvPVX(PL_linestr);
1833 PL_bufend += SvCUR(PL_linestr);
1834 PL_expect = XOPERATOR;
1835 PL_sublex_info.sub_inwhat = 0;
1843 Extracts a pattern, double-quoted string, or transliteration. This
1846 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1847 processing a pattern (PL_lex_inpat is true), a transliteration
1848 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1850 Returns a pointer to the character scanned up to. If this is
1851 advanced from the start pointer supplied (i.e. if anything was
1852 successfully parsed), will leave an OP for the substring scanned
1853 in pl_yylval. Caller must intuit reason for not parsing further
1854 by looking at the next characters herself.
1858 double-quoted style: \r and \n
1859 regexp special ones: \D \s
1862 case and quoting: \U \Q \E
1863 stops on @ and $, but not for $ as tail anchor
1865 In transliterations:
1866 characters are VERY literal, except for - not at the start or end
1867 of the string, which indicates a range. If the range is in bytes,
1868 scan_const expands the range to the full set of intermediate
1869 characters. If the range is in utf8, the hyphen is replaced with
1870 a certain range mark which will be handled by pmtrans() in op.c.
1872 In double-quoted strings:
1874 double-quoted style: \r and \n
1876 deprecated backrefs: \1 (in substitution replacements)
1877 case and quoting: \U \Q \E
1880 scan_const does *not* construct ops to handle interpolated strings.
1881 It stops processing as soon as it finds an embedded $ or @ variable
1882 and leaves it to the caller to work out what's going on.
1884 embedded arrays (whether in pattern or not) could be:
1885 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1887 $ in double-quoted strings must be the symbol of an embedded scalar.
1889 $ in pattern could be $foo or could be tail anchor. Assumption:
1890 it's a tail anchor if $ is the last thing in the string, or if it's
1891 followed by one of "()| \r\n\t"
1893 \1 (backreferences) are turned into $1
1895 The structure of the code is
1896 while (there's a character to process) {
1897 handle transliteration ranges
1898 skip regexp comments /(?#comment)/ and codes /(?{code})/
1899 skip #-initiated comments in //x patterns
1900 check for embedded arrays
1901 check for embedded scalars
1903 leave intact backslashes from leaveit (below)
1904 deprecate \1 in substitution replacements
1905 handle string-changing backslashes \l \U \Q \E, etc.
1906 switch (what was escaped) {
1907 handle \- in a transliteration (becomes a literal -)
1908 handle \132 (octal characters)
1909 handle \x15 and \x{1234} (hex characters)
1910 handle \N{name} (named characters)
1911 handle \cV (control characters)
1912 handle printf-style backslashes (\f, \r, \n, etc)
1915 } (end if backslash)
1916 handle regular character
1917 } (end while character to read)
1922 S_scan_const(pTHX_ char *start)
1925 register char *send = PL_bufend; /* end of the constant */
1926 SV *sv = newSV(send - start); /* sv for the constant. See
1927 note below on sizing. */
1928 register char *s = start; /* start of the constant */
1929 register char *d = SvPVX(sv); /* destination for copies */
1930 bool dorange = FALSE; /* are we in a translit range? */
1931 bool didrange = FALSE; /* did we just finish a range? */
1932 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1933 I32 this_utf8 = UTF; /* Is the source string assumed
1934 to be UTF8? But, this can
1935 show as true when the source
1936 isn't utf8, as for example
1937 when it is entirely composed
1940 /* Note on sizing: The scanned constant is placed into sv, which is
1941 * initialized by newSV() assuming one byte of output for every byte of
1942 * input. This routine expects newSV() to allocate an extra byte for a
1943 * trailing NUL, which this routine will append if it gets to the end of
1944 * the input. There may be more bytes of input than output (eg., \N{LATIN
1945 * CAPITAL LETTER A}), or more output than input if the constant ends up
1946 * recoded to utf8, but each time a construct is found that might increase
1947 * the needed size, SvGROW() is called. Its size parameter each time is
1948 * based on the best guess estimate at the time, namely the length used so
1949 * far, plus the length the current construct will occupy, plus room for
1950 * the trailing NUL, plus one byte for every input byte still unscanned */
1954 UV literal_endpoint = 0;
1955 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1958 PERL_ARGS_ASSERT_SCAN_CONST;
1960 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1961 /* If we are doing a trans and we know we want UTF8 set expectation */
1962 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1963 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1967 while (s < send || dorange) {
1968 /* get transliterations out of the way (they're most literal) */
1969 if (PL_lex_inwhat == OP_TRANS) {
1970 /* expand a range A-Z to the full set of characters. AIE! */
1972 I32 i; /* current expanded character */
1973 I32 min; /* first character in range */
1974 I32 max; /* last character in range */
1985 char * const c = (char*)utf8_hop((U8*)d, -1);
1989 *c = (char)UTF_TO_NATIVE(0xff);
1990 /* mark the range as done, and continue */
1996 i = d - SvPVX_const(sv); /* remember current offset */
1999 SvLEN(sv) + (has_utf8 ?
2000 (512 - UTF_CONTINUATION_MARK +
2003 /* How many two-byte within 0..255: 128 in UTF-8,
2004 * 96 in UTF-8-mod. */
2006 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2008 d = SvPVX(sv) + i; /* refresh d after realloc */
2012 for (j = 0; j <= 1; j++) {
2013 char * const c = (char*)utf8_hop((U8*)d, -1);
2014 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2020 max = (U8)0xff; /* only to \xff */
2021 uvmax = uv; /* \x{100} to uvmax */
2023 d = c; /* eat endpoint chars */
2028 d -= 2; /* eat the first char and the - */
2029 min = (U8)*d; /* first char in range */
2030 max = (U8)d[1]; /* last char in range */
2037 "Invalid range \"%c-%c\" in transliteration operator",
2038 (char)min, (char)max);
2042 if (literal_endpoint == 2 &&
2043 ((isLOWER(min) && isLOWER(max)) ||
2044 (isUPPER(min) && isUPPER(max)))) {
2046 for (i = min; i <= max; i++)
2048 *d++ = NATIVE_TO_NEED(has_utf8,i);
2050 for (i = min; i <= max; i++)
2052 *d++ = NATIVE_TO_NEED(has_utf8,i);
2057 for (i = min; i <= max; i++)
2060 const U8 ch = (U8)NATIVE_TO_UTF(i);
2061 if (UNI_IS_INVARIANT(ch))
2064 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2065 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2074 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2076 *d++ = (char)UTF_TO_NATIVE(0xff);
2078 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2082 /* mark the range as done, and continue */
2086 literal_endpoint = 0;
2091 /* range begins (ignore - as first or last char) */
2092 else if (*s == '-' && s+1 < send && s != start) {
2094 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2101 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2111 literal_endpoint = 0;
2112 native_range = TRUE;
2117 /* if we get here, we're not doing a transliteration */
2119 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2120 except for the last char, which will be done separately. */
2121 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2123 while (s+1 < send && *s != ')')
2124 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2126 else if (s[2] == '{' /* This should match regcomp.c */
2127 || (s[2] == '?' && s[3] == '{'))
2130 char *regparse = s + (s[2] == '{' ? 3 : 4);
2133 while (count && (c = *regparse)) {
2134 if (c == '\\' && regparse[1])
2142 if (*regparse != ')')
2143 regparse--; /* Leave one char for continuation. */
2144 while (s < regparse)
2145 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2149 /* likewise skip #-initiated comments in //x patterns */
2150 else if (*s == '#' && PL_lex_inpat &&
2151 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2152 while (s+1 < send && *s != '\n')
2153 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2156 /* check for embedded arrays
2157 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2159 else if (*s == '@' && s[1]) {
2160 if (isALNUM_lazy_if(s+1,UTF))
2162 if (strchr(":'{$", s[1]))
2164 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2165 break; /* in regexp, neither @+ nor @- are interpolated */
2168 /* check for embedded scalars. only stop if we're sure it's a
2171 else if (*s == '$') {
2172 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2174 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2176 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2177 "Possible unintended interpolation of $\\ in regex");
2179 break; /* in regexp, $ might be tail anchor */
2183 /* End of else if chain - OP_TRANS rejoin rest */
2186 if (*s == '\\' && s+1 < send) {
2189 /* deprecate \1 in strings and substitution replacements */
2190 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2191 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2193 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2198 /* string-change backslash escapes */
2199 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2203 /* skip any other backslash escapes in a pattern */
2204 else if (PL_lex_inpat) {
2205 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2206 goto default_action;
2209 /* if we get here, it's either a quoted -, or a digit */
2212 /* quoted - in transliterations */
2214 if (PL_lex_inwhat == OP_TRANS) {
2221 if ((isALPHA(*s) || isDIGIT(*s)))
2222 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2223 "Unrecognized escape \\%c passed through",
2225 /* default action is to copy the quoted character */
2226 goto default_action;
2229 /* eg. \132 indicates the octal constant 0x132 */
2230 case '0': case '1': case '2': case '3':
2231 case '4': case '5': case '6': case '7':
2235 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2238 goto NUM_ESCAPE_INSERT;
2240 /* eg. \x24 indicates the hex constant 0x24 */
2244 char* const e = strchr(s, '}');
2245 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2246 PERL_SCAN_DISALLOW_PREFIX;
2251 yyerror("Missing right brace on \\x{}");
2255 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2261 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2262 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2268 /* Insert oct, hex, or \N{U+...} escaped character. There will
2269 * always be enough room in sv since such escapes will be
2270 * longer than any UTF-8 sequence they can end up as, except if
2271 * they force us to recode the rest of the string into utf8 */
2273 /* Here uv is the ordinal of the next character being added in
2274 * unicode (converted from native). (It has to be done before
2275 * here because \N is interpreted as unicode, and oct and hex
2277 if (!UNI_IS_INVARIANT(uv)) {
2278 if (!has_utf8 && uv > 255) {
2279 /* Might need to recode whatever we have accumulated so
2280 * far if it contains any chars variant in utf8 or
2283 SvCUR_set(sv, d - SvPVX_const(sv));
2286 /* See Note on sizing above. */
2287 sv_utf8_upgrade_flags_grow(sv,
2288 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2289 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2290 d = SvPVX(sv) + SvCUR(sv);
2295 d = (char*)uvuni_to_utf8((U8*)d, uv);
2296 if (PL_lex_inwhat == OP_TRANS &&
2297 PL_sublex_info.sub_op) {
2298 PL_sublex_info.sub_op->op_private |=
2299 (PL_lex_repl ? OPpTRANS_FROM_UTF
2303 if (uv > 255 && !dorange)
2304 native_range = FALSE;
2316 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2321 char* e = strchr(s, '}');
2327 yyerror("Missing right brace on \\N{}");
2331 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2332 /* \N{U+...} The ... is a unicode value even on EBCDIC
2334 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2335 PERL_SCAN_DISALLOW_PREFIX;
2338 uv = grok_hex(s, &len, &flags, NULL);
2339 if ( e > s && len != (STRLEN)(e - s) ) {
2343 goto NUM_ESCAPE_INSERT;
2345 res = newSVpvn(s + 1, e - s - 1);
2346 res = new_constant( NULL, 0, "charnames",
2347 res, NULL, s - 2, e - s + 3 );
2349 sv_utf8_upgrade(res);
2350 str = SvPV_const(res,len);
2351 #ifdef EBCDIC_NEVER_MIND
2352 /* charnames uses pack U and that has been
2353 * recently changed to do the below uni->native
2354 * mapping, so this would be redundant (and wrong,
2355 * the code point would be doubly converted).
2356 * But leave this in just in case the pack U change
2357 * gets revoked, but the semantics is still
2358 * desireable for charnames. --jhi */
2360 UV uv = utf8_to_uvchr((const U8*)str, 0);
2363 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2365 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2366 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2367 str = SvPV_const(res, len);
2371 /* If destination is not in utf8 but this new character is,
2372 * recode the dest to utf8 */
2373 if (!has_utf8 && SvUTF8(res)) {
2374 SvCUR_set(sv, d - SvPVX_const(sv));
2377 /* See Note on sizing above. */
2378 sv_utf8_upgrade_flags_grow(sv,
2379 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2380 len + (STRLEN)(send - s) + 1);
2381 d = SvPVX(sv) + SvCUR(sv);
2383 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2385 /* See Note on sizing above. (NOTE: SvCUR() is not set
2386 * correctly here). */
2387 const STRLEN off = d - SvPVX_const(sv);
2388 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2392 native_range = FALSE; /* \N{} is guessed to be Unicode */
2394 Copy(str, d, len, char);
2401 yyerror("Missing braces on \\N{}");
2404 /* \c is a control character */
2413 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2416 yyerror("Missing control char name in \\c");
2420 /* printf-style backslashes, formfeeds, newlines, etc */
2422 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2425 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2428 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2431 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2434 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2437 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2440 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2446 } /* end if (backslash) */
2453 /* If we started with encoded form, or already know we want it,
2454 then encode the next character */
2455 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2459 /* One might think that it is wasted effort in the case of the
2460 * source being utf8 (this_utf8 == TRUE) to take the next character
2461 * in the source, convert it to an unsigned value, and then convert
2462 * it back again. But the source has not been validated here. The
2463 * routine that does the conversion checks for errors like
2466 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2467 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2469 SvCUR_set(sv, d - SvPVX_const(sv));
2472 /* See Note on sizing above. */
2473 sv_utf8_upgrade_flags_grow(sv,
2474 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2475 need + (STRLEN)(send - s) + 1);
2476 d = SvPVX(sv) + SvCUR(sv);
2478 } else if (need > len) {
2479 /* encoded value larger than old, may need extra space (NOTE:
2480 * SvCUR() is not set correctly here). See Note on sizing
2482 const STRLEN off = d - SvPVX_const(sv);
2483 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2487 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2489 if (uv > 255 && !dorange)
2490 native_range = FALSE;
2494 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2496 } /* while loop to process each character */
2498 /* terminate the string and set up the sv */
2500 SvCUR_set(sv, d - SvPVX_const(sv));
2501 if (SvCUR(sv) >= SvLEN(sv))
2502 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2505 if (PL_encoding && !has_utf8) {
2506 sv_recode_to_utf8(sv, PL_encoding);
2512 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2513 PL_sublex_info.sub_op->op_private |=
2514 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2518 /* shrink the sv if we allocated more than we used */
2519 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2520 SvPV_shrink_to_cur(sv);
2523 /* return the substring (via pl_yylval) only if we parsed anything */
2524 if (s > PL_bufptr) {
2525 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2526 const char *const key = PL_lex_inpat ? "qr" : "q";
2527 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2531 if (PL_lex_inwhat == OP_TRANS) {
2534 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2542 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2545 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2552 * Returns TRUE if there's more to the expression (e.g., a subscript),
2555 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2557 * ->[ and ->{ return TRUE
2558 * { and [ outside a pattern are always subscripts, so return TRUE
2559 * if we're outside a pattern and it's not { or [, then return FALSE
2560 * if we're in a pattern and the first char is a {
2561 * {4,5} (any digits around the comma) returns FALSE
2562 * if we're in a pattern and the first char is a [
2564 * [SOMETHING] has a funky algorithm to decide whether it's a
2565 * character class or not. It has to deal with things like
2566 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2567 * anything else returns TRUE
2570 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2573 S_intuit_more(pTHX_ register char *s)
2577 PERL_ARGS_ASSERT_INTUIT_MORE;
2579 if (PL_lex_brackets)
2581 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2583 if (*s != '{' && *s != '[')
2588 /* In a pattern, so maybe we have {n,m}. */
2605 /* On the other hand, maybe we have a character class */
2608 if (*s == ']' || *s == '^')
2611 /* this is terrifying, and it works */
2612 int weight = 2; /* let's weigh the evidence */
2614 unsigned char un_char = 255, last_un_char;
2615 const char * const send = strchr(s,']');
2616 char tmpbuf[sizeof PL_tokenbuf * 4];
2618 if (!send) /* has to be an expression */
2621 Zero(seen,256,char);
2624 else if (isDIGIT(*s)) {
2626 if (isDIGIT(s[1]) && s[2] == ']')
2632 for (; s < send; s++) {
2633 last_un_char = un_char;
2634 un_char = (unsigned char)*s;
2639 weight -= seen[un_char] * 10;
2640 if (isALNUM_lazy_if(s+1,UTF)) {
2642 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2643 len = (int)strlen(tmpbuf);
2644 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2649 else if (*s == '$' && s[1] &&
2650 strchr("[#!%*<>()-=",s[1])) {
2651 if (/*{*/ strchr("])} =",s[2]))
2660 if (strchr("wds]",s[1]))
2662 else if (seen[(U8)'\''] || seen[(U8)'"'])
2664 else if (strchr("rnftbxcav",s[1]))
2666 else if (isDIGIT(s[1])) {
2668 while (s[1] && isDIGIT(s[1]))
2678 if (strchr("aA01! ",last_un_char))
2680 if (strchr("zZ79~",s[1]))
2682 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2683 weight -= 5; /* cope with negative subscript */
2686 if (!isALNUM(last_un_char)
2687 && !(last_un_char == '$' || last_un_char == '@'
2688 || last_un_char == '&')
2689 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2694 if (keyword(tmpbuf, d - tmpbuf, 0))
2697 if (un_char == last_un_char + 1)
2699 weight -= seen[un_char];
2704 if (weight >= 0) /* probably a character class */
2714 * Does all the checking to disambiguate
2716 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2717 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2719 * First argument is the stuff after the first token, e.g. "bar".
2721 * Not a method if bar is a filehandle.
2722 * Not a method if foo is a subroutine prototyped to take a filehandle.
2723 * Not a method if it's really "Foo $bar"
2724 * Method if it's "foo $bar"
2725 * Not a method if it's really "print foo $bar"
2726 * Method if it's really "foo package::" (interpreted as package->foo)
2727 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2728 * Not a method if bar is a filehandle or package, but is quoted with
2733 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2736 char *s = start + (*start == '$');
2737 char tmpbuf[sizeof PL_tokenbuf];
2744 PERL_ARGS_ASSERT_INTUIT_METHOD;
2747 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2751 const char *proto = SvPVX_const(cv);
2762 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2763 /* start is the beginning of the possible filehandle/object,
2764 * and s is the end of it
2765 * tmpbuf is a copy of it
2768 if (*start == '$') {
2769 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2770 isUPPER(*PL_tokenbuf))
2773 len = start - SvPVX(PL_linestr);
2777 start = SvPVX(PL_linestr) + len;
2781 return *s == '(' ? FUNCMETH : METHOD;
2783 if (!keyword(tmpbuf, len, 0)) {
2784 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2788 soff = s - SvPVX(PL_linestr);
2792 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2793 if (indirgv && GvCVu(indirgv))
2795 /* filehandle or package name makes it a method */
2796 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2798 soff = s - SvPVX(PL_linestr);
2801 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2802 return 0; /* no assumptions -- "=>" quotes bearword */
2804 start_force(PL_curforce);
2805 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2806 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
2807 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2809 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2814 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2816 return *s == '(' ? FUNCMETH : METHOD;
2822 /* Encoded script support. filter_add() effectively inserts a
2823 * 'pre-processing' function into the current source input stream.
2824 * Note that the filter function only applies to the current source file
2825 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2827 * The datasv parameter (which may be NULL) can be used to pass
2828 * private data to this instance of the filter. The filter function
2829 * can recover the SV using the FILTER_DATA macro and use it to
2830 * store private buffers and state information.
2832 * The supplied datasv parameter is upgraded to a PVIO type
2833 * and the IoDIRP/IoANY field is used to store the function pointer,
2834 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2835 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2836 * private use must be set using malloc'd pointers.
2840 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2849 if (!PL_rsfp_filters)
2850 PL_rsfp_filters = newAV();
2853 SvUPGRADE(datasv, SVt_PVIO);
2854 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2855 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2856 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2857 FPTR2DPTR(void *, IoANY(datasv)),
2858 SvPV_nolen(datasv)));
2859 av_unshift(PL_rsfp_filters, 1);
2860 av_store(PL_rsfp_filters, 0, datasv) ;
2865 /* Delete most recently added instance of this filter function. */
2867 Perl_filter_del(pTHX_ filter_t funcp)
2872 PERL_ARGS_ASSERT_FILTER_DEL;
2875 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2876 FPTR2DPTR(void*, funcp)));
2878 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2880 /* if filter is on top of stack (usual case) just pop it off */
2881 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2882 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2883 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2884 IoANY(datasv) = (void *)NULL;
2885 sv_free(av_pop(PL_rsfp_filters));
2889 /* we need to search for the correct entry and clear it */
2890 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2894 /* Invoke the idxth filter function for the current rsfp. */
2895 /* maxlen 0 = read one text line */
2897 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2902 /* This API is bad. It should have been using unsigned int for maxlen.
2903 Not sure if we want to change the API, but if not we should sanity
2904 check the value here. */
2905 const unsigned int correct_length
2914 PERL_ARGS_ASSERT_FILTER_READ;
2916 if (!PL_parser || !PL_rsfp_filters)
2918 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2919 /* Provide a default input filter to make life easy. */
2920 /* Note that we append to the line. This is handy. */
2921 DEBUG_P(PerlIO_printf(Perl_debug_log,
2922 "filter_read %d: from rsfp\n", idx));
2923 if (correct_length) {
2926 const int old_len = SvCUR(buf_sv);
2928 /* ensure buf_sv is large enough */
2929 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2930 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2931 correct_length)) <= 0) {
2932 if (PerlIO_error(PL_rsfp))
2933 return -1; /* error */
2935 return 0 ; /* end of file */
2937 SvCUR_set(buf_sv, old_len + len) ;
2940 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2941 if (PerlIO_error(PL_rsfp))
2942 return -1; /* error */
2944 return 0 ; /* end of file */
2947 return SvCUR(buf_sv);
2949 /* Skip this filter slot if filter has been deleted */
2950 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2951 DEBUG_P(PerlIO_printf(Perl_debug_log,
2952 "filter_read %d: skipped (filter deleted)\n",
2954 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2956 /* Get function pointer hidden within datasv */
2957 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2958 DEBUG_P(PerlIO_printf(Perl_debug_log,
2959 "filter_read %d: via function %p (%s)\n",
2960 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2961 /* Call function. The function is expected to */
2962 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2963 /* Return: <0:error, =0:eof, >0:not eof */
2964 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2968 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2972 PERL_ARGS_ASSERT_FILTER_GETS;
2974 #ifdef PERL_CR_FILTER
2975 if (!PL_rsfp_filters) {
2976 filter_add(S_cr_textfilter,NULL);
2979 if (PL_rsfp_filters) {
2981 SvCUR_set(sv, 0); /* start with empty line */
2982 if (FILTER_READ(0, sv, 0) > 0)
2983 return ( SvPVX(sv) ) ;
2988 return (sv_gets(sv, fp, append));
2992 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
2997 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2999 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3003 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3004 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3006 return GvHV(gv); /* Foo:: */
3009 /* use constant CLASS => 'MyClass' */
3010 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3011 if (gv && GvCV(gv)) {
3012 SV * const sv = cv_const_sv(GvCV(gv));
3014 pkgname = SvPV_const(sv, len);
3017 return gv_stashpvn(pkgname, len, 0);
3021 * S_readpipe_override
3022 * Check whether readpipe() is overriden, and generates the appropriate
3023 * optree, provided sublex_start() is called afterwards.
3026 S_readpipe_override(pTHX)
3029 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3030 pl_yylval.ival = OP_BACKTICK;
3032 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3034 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3035 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3036 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3038 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3039 append_elem(OP_LIST,
3040 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3041 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3048 * The intent of this yylex wrapper is to minimize the changes to the
3049 * tokener when we aren't interested in collecting madprops. It remains
3050 * to be seen how successful this strategy will be...
3057 char *s = PL_bufptr;
3059 /* make sure PL_thiswhite is initialized */
3063 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3064 if (PL_pending_ident)
3065 return S_pending_ident(aTHX);
3067 /* previous token ate up our whitespace? */
3068 if (!PL_lasttoke && PL_nextwhite) {
3069 PL_thiswhite = PL_nextwhite;
3073 /* isolate the token, and figure out where it is without whitespace */
3074 PL_realtokenstart = -1;
3078 assert(PL_curforce < 0);
3080 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3081 if (!PL_thistoken) {
3082 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3083 PL_thistoken = newSVpvs("");
3085 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3086 PL_thistoken = newSVpvn(tstart, s - tstart);
3089 if (PL_thismad) /* install head */
3090 CURMAD('X', PL_thistoken);
3093 /* last whitespace of a sublex? */
3094 if (optype == ')' && PL_endwhite) {
3095 CURMAD('X', PL_endwhite);
3100 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3101 if (!PL_thiswhite && !PL_endwhite && !optype) {
3102 sv_free(PL_thistoken);
3107 /* put off final whitespace till peg */
3108 if (optype == ';' && !PL_rsfp) {
3109 PL_nextwhite = PL_thiswhite;
3112 else if (PL_thisopen) {
3113 CURMAD('q', PL_thisopen);
3115 sv_free(PL_thistoken);
3119 /* Store actual token text as madprop X */
3120 CURMAD('X', PL_thistoken);
3124 /* add preceding whitespace as madprop _ */
3125 CURMAD('_', PL_thiswhite);
3129 /* add quoted material as madprop = */
3130 CURMAD('=', PL_thisstuff);
3134 /* add terminating quote as madprop Q */
3135 CURMAD('Q', PL_thisclose);
3139 /* special processing based on optype */
3143 /* opval doesn't need a TOKEN since it can already store mp */
3153 if (pl_yylval.opval)
3154 append_madprops(PL_thismad, pl_yylval.opval, 0);
3162 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3171 /* remember any fake bracket that lexer is about to discard */
3172 if (PL_lex_brackets == 1 &&
3173 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3176 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3179 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3180 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3183 break; /* don't bother looking for trailing comment */
3192 /* attach a trailing comment to its statement instead of next token */
3196 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3198 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3200 if (*s == '\n' || *s == '#') {
3201 while (s < PL_bufend && *s != '\n')
3205 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3206 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3223 /* Create new token struct. Note: opvals return early above. */
3224 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3231 S_tokenize_use(pTHX_ int is_use, char *s) {
3234 PERL_ARGS_ASSERT_TOKENIZE_USE;
3236 if (PL_expect != XSTATE)
3237 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3238 is_use ? "use" : "no"));
3240 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3241 s = force_version(s, TRUE);
3242 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3243 start_force(PL_curforce);
3244 NEXTVAL_NEXTTOKE.opval = NULL;
3247 else if (*s == 'v') {
3248 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3249 s = force_version(s, FALSE);
3253 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3254 s = force_version(s, FALSE);
3256 pl_yylval.ival = is_use;
3260 static const char* const exp_name[] =
3261 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3262 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3269 Works out what to call the token just pulled out of the input
3270 stream. The yacc parser takes care of taking the ops we return and
3271 stitching them into a tree.
3277 if read an identifier
3278 if we're in a my declaration
3279 croak if they tried to say my($foo::bar)
3280 build the ops for a my() declaration
3281 if it's an access to a my() variable
3282 are we in a sort block?
3283 croak if my($a); $a <=> $b
3284 build ops for access to a my() variable
3285 if in a dq string, and they've said @foo and we can't find @foo
3287 build ops for a bareword
3288 if we already built the token before, use it.
3293 #pragma segment Perl_yylex
3299 register char *s = PL_bufptr;
3304 /* orig_keyword, gvp, and gv are initialized here because
3305 * jump to the label just_a_word_zero can bypass their
3306 * initialization later. */
3307 I32 orig_keyword = 0;
3312 SV* tmp = newSVpvs("");
3313 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3314 (IV)CopLINE(PL_curcop),
3315 lex_state_names[PL_lex_state],
3316 exp_name[PL_expect],
3317 pv_display(tmp, s, strlen(s), 0, 60));
3320 /* check if there's an identifier for us to look at */
3321 if (PL_pending_ident)
3322 return REPORT(S_pending_ident(aTHX));
3324 /* no identifier pending identification */
3326 switch (PL_lex_state) {
3328 case LEX_NORMAL: /* Some compilers will produce faster */
3329 case LEX_INTERPNORMAL: /* code if we comment these out. */
3333 /* when we've already built the next token, just pull it out of the queue */
3337 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3339 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3340 PL_nexttoke[PL_lasttoke].next_mad = 0;
3341 if (PL_thismad && PL_thismad->mad_key == '_') {
3342 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3343 PL_thismad->mad_val = 0;
3344 mad_free(PL_thismad);
3349 PL_lex_state = PL_lex_defer;
3350 PL_expect = PL_lex_expect;
3351 PL_lex_defer = LEX_NORMAL;
3352 if (!PL_nexttoke[PL_lasttoke].next_type)
3357 pl_yylval = PL_nextval[PL_nexttoke];
3359 PL_lex_state = PL_lex_defer;
3360 PL_expect = PL_lex_expect;
3361 PL_lex_defer = LEX_NORMAL;
3365 /* FIXME - can these be merged? */
3366 return(PL_nexttoke[PL_lasttoke].next_type);
3368 return REPORT(PL_nexttype[PL_nexttoke]);
3371 /* interpolated case modifiers like \L \U, including \Q and \E.
3372 when we get here, PL_bufptr is at the \
3374 case LEX_INTERPCASEMOD:
3376 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3377 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3379 /* handle \E or end of string */
3380 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3382 if (PL_lex_casemods) {
3383 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3384 PL_lex_casestack[PL_lex_casemods] = '\0';
3386 if (PL_bufptr != PL_bufend
3387 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3389 PL_lex_state = LEX_INTERPCONCAT;
3392 PL_thistoken = newSVpvs("\\E");
3398 while (PL_bufptr != PL_bufend &&
3399 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3401 PL_thiswhite = newSVpvs("");
3402 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3406 if (PL_bufptr != PL_bufend)
3409 PL_lex_state = LEX_INTERPCONCAT;
3413 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3414 "### Saw case modifier\n"); });
3416 if (s[1] == '\\' && s[2] == 'E') {
3419 PL_thiswhite = newSVpvs("");
3420 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3423 PL_lex_state = LEX_INTERPCONCAT;
3428 if (!PL_madskills) /* when just compiling don't need correct */
3429 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3430 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3431 if ((*s == 'L' || *s == 'U') &&
3432 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3433 PL_lex_casestack[--PL_lex_casemods] = '\0';
3436 if (PL_lex_casemods > 10)
3437 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3438 PL_lex_casestack[PL_lex_casemods++] = *s;
3439 PL_lex_casestack[PL_lex_casemods] = '\0';
3440 PL_lex_state = LEX_INTERPCONCAT;
3441 start_force(PL_curforce);
3442 NEXTVAL_NEXTTOKE.ival = 0;
3444 start_force(PL_curforce);
3446 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3448 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3450 NEXTVAL_NEXTTOKE.ival = OP_LC;
3452 NEXTVAL_NEXTTOKE.ival = OP_UC;
3454 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3456 Perl_croak(aTHX_ "panic: yylex");
3458 SV* const tmpsv = newSVpvs("\\ ");
3459 /* replace the space with the character we want to escape
3461 SvPVX(tmpsv)[1] = *s;
3467 if (PL_lex_starts) {
3473 sv_free(PL_thistoken);
3474 PL_thistoken = newSVpvs("");
3477 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3478 if (PL_lex_casemods == 1 && PL_lex_inpat)
3487 case LEX_INTERPPUSH:
3488 return REPORT(sublex_push());
3490 case LEX_INTERPSTART:
3491 if (PL_bufptr == PL_bufend)
3492 return REPORT(sublex_done());
3493 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3494 "### Interpolated variable\n"); });
3496 PL_lex_dojoin = (*PL_bufptr == '@');
3497 PL_lex_state = LEX_INTERPNORMAL;
3498 if (PL_lex_dojoin) {
3499 start_force(PL_curforce);
3500 NEXTVAL_NEXTTOKE.ival = 0;
3502 start_force(PL_curforce);
3503 force_ident("\"", '$');
3504 start_force(PL_curforce);
3505 NEXTVAL_NEXTTOKE.ival = 0;
3507 start_force(PL_curforce);
3508 NEXTVAL_NEXTTOKE.ival = 0;
3510 start_force(PL_curforce);
3511 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3514 if (PL_lex_starts++) {
3519 sv_free(PL_thistoken);
3520 PL_thistoken = newSVpvs("");
3523 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3524 if (!PL_lex_casemods && PL_lex_inpat)
3531 case LEX_INTERPENDMAYBE:
3532 if (intuit_more(PL_bufptr)) {
3533 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3539 if (PL_lex_dojoin) {
3540 PL_lex_dojoin = FALSE;
3541 PL_lex_state = LEX_INTERPCONCAT;
3545 sv_free(PL_thistoken);
3546 PL_thistoken = newSVpvs("");
3551 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3552 && SvEVALED(PL_lex_repl))
3554 if (PL_bufptr != PL_bufend)
3555 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3559 case LEX_INTERPCONCAT:
3561 if (PL_lex_brackets)
3562 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3564 if (PL_bufptr == PL_bufend)
3565 return REPORT(sublex_done());
3567 if (SvIVX(PL_linestr) == '\'') {
3568 SV *sv = newSVsv(PL_linestr);
3571 else if ( PL_hints & HINT_NEW_RE )
3572 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3573 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3577 s = scan_const(PL_bufptr);
3579 PL_lex_state = LEX_INTERPCASEMOD;
3581 PL_lex_state = LEX_INTERPSTART;
3584 if (s != PL_bufptr) {
3585 start_force(PL_curforce);
3587 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3589 NEXTVAL_NEXTTOKE = pl_yylval;
3592 if (PL_lex_starts++) {
3596 sv_free(PL_thistoken);
3597 PL_thistoken = newSVpvs("");
3600 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3601 if (!PL_lex_casemods && PL_lex_inpat)
3614 PL_lex_state = LEX_NORMAL;
3615 s = scan_formline(PL_bufptr);
3616 if (!PL_lex_formbrack)
3622 PL_oldoldbufptr = PL_oldbufptr;
3628 sv_free(PL_thistoken);
3631 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3635 if (isIDFIRST_lazy_if(s,UTF))
3638 unsigned char c = *s;
3639 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3640 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
3641 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
3646 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
3650 goto fake_eof; /* emulate EOF on ^D or ^Z */
3659 if (PL_lex_brackets) {
3660 yyerror((const char *)
3662 ? "Format not terminated"
3663 : "Missing right curly or square bracket"));
3665 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3666 "### Tokener got EOF\n");
3670 if (s++ < PL_bufend)
3671 goto retry; /* ignore stray nulls */
3674 if (!PL_in_eval && !PL_preambled) {
3675 PL_preambled = TRUE;
3681 /* Generate a string of Perl code to load the debugger.
3682 * If PERL5DB is set, it will return the contents of that,
3683 * otherwise a compile-time require of perl5db.pl. */
3685 const char * const pdb = PerlEnv_getenv("PERL5DB");
3688 sv_setpv(PL_linestr, pdb);
3689 sv_catpvs(PL_linestr,";");
3691 SETERRNO(0,SS_NORMAL);
3692 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3695 sv_setpvs(PL_linestr,"");
3696 if (PL_preambleav) {
3697 SV **svp = AvARRAY(PL_preambleav);
3698 SV **const end = svp + AvFILLp(PL_preambleav);
3700 sv_catsv(PL_linestr, *svp);
3702 sv_catpvs(PL_linestr, ";");
3704 sv_free(MUTABLE_SV(PL_preambleav));
3705 PL_preambleav = NULL;
3708 sv_catpvs(PL_linestr,
3709 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3710 if (PL_minus_n || PL_minus_p) {
3711 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3713 sv_catpvs(PL_linestr,"chomp;");
3716 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3717 || *PL_splitstr == '"')
3718 && strchr(PL_splitstr + 1, *PL_splitstr))
3719 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3721 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3722 bytes can be used as quoting characters. :-) */
3723 const char *splits = PL_splitstr;
3724 sv_catpvs(PL_linestr, "our @F=split(q\0");
3727 if (*splits == '\\')
3728 sv_catpvn(PL_linestr, splits, 1);
3729 sv_catpvn(PL_linestr, splits, 1);
3730 } while (*splits++);
3731 /* This loop will embed the trailing NUL of
3732 PL_linestr as the last thing it does before
3734 sv_catpvs(PL_linestr, ");");
3738 sv_catpvs(PL_linestr,"our @F=split(' ');");
3741 sv_catpvs(PL_linestr, "\n");
3742 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3743 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3744 PL_last_lop = PL_last_uni = NULL;
3745 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3746 update_debugger_info(PL_linestr, NULL, 0);
3750 bof = PL_rsfp ? TRUE : FALSE;
3751 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3754 PL_realtokenstart = -1;
3757 if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3758 PerlIO_clearerr(PL_rsfp);
3760 (void)PerlIO_close(PL_rsfp);
3762 PL_doextract = FALSE;
3764 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3770 sv_setpvs(PL_linestr, ";}continue{print;}");
3772 sv_setpvs(PL_linestr, ";}");
3773 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3774 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3775 PL_last_lop = PL_last_uni = NULL;
3776 PL_minus_n = PL_minus_p = 0;
3779 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3780 PL_last_lop = PL_last_uni = NULL;
3781 sv_setpvs(PL_linestr,"");
3782 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3784 /* If it looks like the start of a BOM or raw UTF-16,
3785 * check if it in fact is. */
3791 #ifdef PERLIO_IS_STDIO
3792 # ifdef __GNU_LIBRARY__
3793 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3794 # define FTELL_FOR_PIPE_IS_BROKEN
3798 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3799 # define FTELL_FOR_PIPE_IS_BROKEN
3804 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3806 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3807 s = swallow_bom((U8*)s);
3811 /* Incest with pod. */
3814 sv_catsv(PL_thiswhite, PL_linestr);
3816 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3817 sv_setpvs(PL_linestr, "");
3818 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3820 PL_last_lop = PL_last_uni = NULL;
3821 PL_doextract = FALSE;
3825 } while (PL_doextract);
3826 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3827 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3828 update_debugger_info(PL_linestr, NULL, 0);
3829 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3830 PL_last_lop = PL_last_uni = NULL;
3831 if (CopLINE(PL_curcop) == 1) {
3832 while (s < PL_bufend && isSPACE(*s))
3834 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3838 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3842 if (*s == '#' && *(s+1) == '!')
3844 #ifdef ALTERNATE_SHEBANG
3846 static char const as[] = ALTERNATE_SHEBANG;
3847 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3848 d = s + (sizeof(as) - 1);
3850 #endif /* ALTERNATE_SHEBANG */
3859 while (*d && !isSPACE(*d))
3863 #ifdef ARG_ZERO_IS_SCRIPT
3864 if (ipathend > ipath) {
3866 * HP-UX (at least) sets argv[0] to the script name,
3867 * which makes $^X incorrect. And Digital UNIX and Linux,
3868 * at least, set argv[0] to the basename of the Perl
3869 * interpreter. So, having found "#!", we'll set it right.
3871 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3873 assert(SvPOK(x) || SvGMAGICAL(x));
3874 if (sv_eq(x, CopFILESV(PL_curcop))) {
3875 sv_setpvn(x, ipath, ipathend - ipath);
3881 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3882 const char * const lstart = SvPV_const(x,llen);
3884 bstart += blen - llen;
3885 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3886 sv_setpvn(x, ipath, ipathend - ipath);
3891 TAINT_NOT; /* $^X is always tainted, but that's OK */
3893 #endif /* ARG_ZERO_IS_SCRIPT */
3898 d = instr(s,"perl -");
3900 d = instr(s,"perl");
3902 /* avoid getting into infinite loops when shebang
3903 * line contains "Perl" rather than "perl" */
3905 for (d = ipathend-4; d >= ipath; --d) {
3906 if ((*d == 'p' || *d == 'P')
3907 && !ibcmp(d, "perl", 4))
3917 #ifdef ALTERNATE_SHEBANG
3919 * If the ALTERNATE_SHEBANG on this system starts with a
3920 * character that can be part of a Perl expression, then if
3921 * we see it but not "perl", we're probably looking at the
3922 * start of Perl code, not a request to hand off to some
3923 * other interpreter. Similarly, if "perl" is there, but
3924 * not in the first 'word' of the line, we assume the line
3925 * contains the start of the Perl program.
3927 if (d && *s != '#') {
3928 const char *c = ipath;
3929 while (*c && !strchr("; \t\r\n\f\v#", *c))
3932 d = NULL; /* "perl" not in first word; ignore */
3934 *s = '#'; /* Don't try to parse shebang line */
3936 #endif /* ALTERNATE_SHEBANG */
3941 !instr(s,"indir") &&
3942 instr(PL_origargv[0],"perl"))
3949 while (s < PL_bufend && isSPACE(*s))
3951 if (s < PL_bufend) {
3952 Newx(newargv,PL_origargc+3,char*);
3954 while (s < PL_bufend && !isSPACE(*s))
3957 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3960 newargv = PL_origargv;
3963 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3965 Perl_croak(aTHX_ "Can't exec %s", ipath);
3968 while (*d && !isSPACE(*d))
3970 while (SPACE_OR_TAB(*d))
3974 const bool switches_done = PL_doswitches;
3975 const U32 oldpdb = PL_perldb;
3976 const bool oldn = PL_minus_n;
3977 const bool oldp = PL_minus_p;
3981 bool baduni = FALSE;
3983 const char *d2 = d1 + 1;
3984 if (parse_unicode_opts((const char **)&d2)
3988 if (baduni || *d1 == 'M' || *d1 == 'm') {
3989 const char * const m = d1;
3990 while (*d1 && !isSPACE(*d1))
3992 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3995 d1 = moreswitches(d1);
3997 if (PL_doswitches && !switches_done) {
3998 int argc = PL_origargc;
3999 char **argv = PL_origargv;
4002 } while (argc && argv[0][0] == '-' && argv[0][1]);
4003 init_argv_symbols(argc,argv);
4005 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4006 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4007 /* if we have already added "LINE: while (<>) {",
4008 we must not do it again */
4010 sv_setpvs(PL_linestr, "");
4011 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4012 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4013 PL_last_lop = PL_last_uni = NULL;
4014 PL_preambled = FALSE;
4015 if (PERLDB_LINE || PERLDB_SAVESRC)
4016 (void)gv_fetchfile(PL_origfilename);
4023 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4025 PL_lex_state = LEX_FORMLINE;
4030 #ifdef PERL_STRICT_CR
4031 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4033 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4035 case ' ': case '\t': case '\f': case 013:
4037 PL_realtokenstart = -1;
4039 PL_thiswhite = newSVpvs("");
4040 sv_catpvn(PL_thiswhite, s, 1);
4047 PL_realtokenstart = -1;
4051 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4052 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4053 /* handle eval qq[#line 1 "foo"\n ...] */
4054 CopLINE_dec(PL_curcop);
4057 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4059 if (!PL_in_eval || PL_rsfp)
4064 while (d < PL_bufend && *d != '\n')
4068 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4069 Perl_croak(aTHX_ "panic: input overflow");
4072 PL_thiswhite = newSVpvn(s, d - s);
4077 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4079 PL_lex_state = LEX_FORMLINE;
4085 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4086 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4089 TOKEN(PEG); /* make sure any #! line is accessible */
4094 /* if (PL_madskills && PL_lex_formbrack) { */
4096 while (d < PL_bufend && *d != '\n')
4100 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4101 Perl_croak(aTHX_ "panic: input overflow");
4102 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4104 PL_thiswhite = newSVpvs("");
4105 if (CopLINE(PL_curcop) == 1) {
4106 sv_setpvs(PL_thiswhite, "");
4109 sv_catpvn(PL_thiswhite, s, d - s);
4123 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4131 while (s < PL_bufend && SPACE_OR_TAB(*s))
4134 if (strnEQ(s,"=>",2)) {
4135 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4136 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4137 OPERATOR('-'); /* unary minus */
4139 PL_last_uni = PL_oldbufptr;
4141 case 'r': ftst = OP_FTEREAD; break;
4142 case 'w': ftst = OP_FTEWRITE; break;
4143 case 'x': ftst = OP_FTEEXEC; break;
4144 case 'o': ftst = OP_FTEOWNED; break;
4145 case 'R': ftst = OP_FTRREAD; break;
4146 case 'W': ftst = OP_FTRWRITE; break;
4147 case 'X': ftst = OP_FTREXEC; break;
4148 case 'O': ftst = OP_FTROWNED; break;
4149 case 'e': ftst = OP_FTIS; break;
4150 case 'z': ftst = OP_FTZERO; break;
4151 case 's': ftst = OP_FTSIZE; break;
4152 case 'f': ftst = OP_FTFILE; break;
4153 case 'd': ftst = OP_FTDIR; break;
4154 case 'l': ftst = OP_FTLINK; break;
4155 case 'p': ftst = OP_FTPIPE; break;
4156 case 'S': ftst = OP_FTSOCK; break;
4157 case 'u': ftst = OP_FTSUID; break;
4158 case 'g': ftst = OP_FTSGID; break;
4159 case 'k': ftst = OP_FTSVTX; break;
4160 case 'b': ftst = OP_FTBLK; break;
4161 case 'c': ftst = OP_FTCHR; break;
4162 case 't': ftst = OP_FTTTY; break;
4163 case 'T': ftst = OP_FTTEXT; break;
4164 case 'B': ftst = OP_FTBINARY; break;
4165 case 'M': case 'A': case 'C':
4166 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4168 case 'M': ftst = OP_FTMTIME; break;
4169 case 'A': ftst = OP_FTATIME; break;
4170 case 'C': ftst = OP_FTCTIME; break;
4178 PL_last_lop_op = (OPCODE)ftst;
4179 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4180 "### Saw file test %c\n", (int)tmp);
4185 /* Assume it was a minus followed by a one-letter named
4186 * subroutine call (or a -bareword), then. */
4187 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4188 "### '-%c' looked like a file test but was not\n",
4195 const char tmp = *s++;
4198 if (PL_expect == XOPERATOR)
4203 else if (*s == '>') {
4206 if (isIDFIRST_lazy_if(s,UTF)) {
4207 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4215 if (PL_expect == XOPERATOR)
4218 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4220 OPERATOR('-'); /* unary minus */
4226 const char tmp = *s++;
4229 if (PL_expect == XOPERATOR)
4234 if (PL_expect == XOPERATOR)
4237 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4244 if (PL_expect != XOPERATOR) {
4245 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4246 PL_expect = XOPERATOR;
4247 force_ident(PL_tokenbuf, '*');
4260 if (PL_expect == XOPERATOR) {
4264 PL_tokenbuf[0] = '%';
4265 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4266 sizeof PL_tokenbuf - 1, FALSE);
4267 if (!PL_tokenbuf[1]) {
4270 PL_pending_ident = '%';
4279 const char tmp = *s++;
4284 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4291 const char tmp = *s++;
4297 goto just_a_word_zero_gv;
4300 switch (PL_expect) {
4306 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4308 PL_bufptr = s; /* update in case we back off */
4314 PL_expect = XTERMBLOCK;
4317 stuffstart = s - SvPVX(PL_linestr) - 1;
4321 while (isIDFIRST_lazy_if(s,UTF)) {
4324 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4325 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4326 if (tmp < 0) tmp = -tmp;
4341 sv = newSVpvn(s, len);
4343 d = scan_str(d,TRUE,TRUE);
4345 /* MUST advance bufptr here to avoid bogus
4346 "at end of line" context messages from yyerror().
4348 PL_bufptr = s + len;
4349 yyerror("Unterminated attribute parameter in attribute list");
4353 return REPORT(0); /* EOF indicator */
4357 sv_catsv(sv, PL_lex_stuff);
4358 attrs = append_elem(OP_LIST, attrs,
4359 newSVOP(OP_CONST, 0, sv));
4360 SvREFCNT_dec(PL_lex_stuff);
4361 PL_lex_stuff = NULL;
4364 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4366 if (PL_in_my == KEY_our) {
4367 deprecate(":unique");
4370 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4373 /* NOTE: any CV attrs applied here need to be part of
4374 the CVf_BUILTIN_ATTRS define in cv.h! */
4375 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4377 CvLVALUE_on(PL_compcv);
4379 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4381 deprecate(":locked");
4383 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4385 CvMETHOD_on(PL_compcv);
4387 /* After we've set the flags, it could be argued that
4388 we don't need to do the attributes.pm-based setting
4389 process, and shouldn't bother appending recognized
4390 flags. To experiment with that, uncomment the
4391 following "else". (Note that's already been
4392 uncommented. That keeps the above-applied built-in
4393 attributes from being intercepted (and possibly
4394 rejected) by a package's attribute routines, but is
4395 justified by the performance win for the common case
4396 of applying only built-in attributes.) */
4398 attrs = append_elem(OP_LIST, attrs,
4399 newSVOP(OP_CONST, 0,
4403 if (*s == ':' && s[1] != ':')
4406 break; /* require real whitespace or :'s */
4407 /* XXX losing whitespace on sequential attributes here */
4411 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4412 if (*s != ';' && *s != '}' && *s != tmp
4413 && (tmp != '=' || *s != ')')) {
4414 const char q = ((*s == '\'') ? '"' : '\'');
4415 /* If here for an expression, and parsed no attrs, back
4417 if (tmp == '=' && !attrs) {
4421 /* MUST advance bufptr here to avoid bogus "at end of line"
4422 context messages from yyerror().
4425 yyerror( (const char *)
4427 ? Perl_form(aTHX_ "Invalid separator character "
4428 "%c%c%c in attribute list", q, *s, q)
4429 : "Unterminated attribute list" ) );
4437 start_force(PL_curforce);
4438 NEXTVAL_NEXTTOKE.opval = attrs;
4439 CURMAD('_', PL_nextwhite);
4444 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4445 (s - SvPVX(PL_linestr)) - stuffstart);
4453 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4454 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4462 const char tmp = *s++;
4467 const char tmp = *s++;
4475 if (PL_lex_brackets <= 0)
4476 yyerror("Unmatched right square bracket");
4479 if (PL_lex_state == LEX_INTERPNORMAL) {
4480 if (PL_lex_brackets == 0) {
4481 if (*s == '-' && s[1] == '>')
4482 PL_lex_state = LEX_INTERPENDMAYBE;
4483 else if (*s != '[' && *s != '{')
4484 PL_lex_state = LEX_INTERPEND;
4491 if (PL_lex_brackets > 100) {
4492 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4494 switch (PL_expect) {
4496 if (PL_lex_formbrack) {
4500 if (PL_oldoldbufptr == PL_last_lop)
4501 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4503 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4504 OPERATOR(HASHBRACK);
4506 while (s < PL_bufend && SPACE_OR_TAB(*s))
4509 PL_tokenbuf[0] = '\0';
4510 if (d < PL_bufend && *d == '-') {
4511 PL_tokenbuf[0] = '-';
4513 while (d < PL_bufend && SPACE_OR_TAB(*d))
4516 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4517 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4519 while (d < PL_bufend && SPACE_OR_TAB(*d))
4522 const char minus = (PL_tokenbuf[0] == '-');
4523 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4531 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4536 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4541 if (PL_oldoldbufptr == PL_last_lop)
4542 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4544 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4547 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4549 /* This hack is to get the ${} in the message. */
4551 yyerror("syntax error");
4554 OPERATOR(HASHBRACK);
4556 /* This hack serves to disambiguate a pair of curlies
4557 * as being a block or an anon hash. Normally, expectation
4558 * determines that, but in cases where we're not in a
4559 * position to expect anything in particular (like inside
4560 * eval"") we have to resolve the ambiguity. This code
4561 * covers the case where the first term in the curlies is a
4562 * quoted string. Most other cases need to be explicitly
4563 * disambiguated by prepending a "+" before the opening
4564 * curly in order to force resolution as an anon hash.
4566 * XXX should probably propagate the outer expectation
4567 * into eval"" to rely less on this hack, but that could
4568 * potentially break current behavior of eval"".
4572 if (*s == '\'' || *s == '"' || *s == '`') {
4573 /* common case: get past first string, handling escapes */
4574 for (t++; t < PL_bufend && *t != *s;)
4575 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4579 else if (*s == 'q') {
4582 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4585 /* skip q//-like construct */
4587 char open, close, term;
4590 while (t < PL_bufend && isSPACE(*t))
4592 /* check for q => */
4593 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4594 OPERATOR(HASHBRACK);
4598 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4602 for (t++; t < PL_bufend; t++) {
4603 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4605 else if (*t == open)
4609 for (t++; t < PL_bufend; t++) {
4610 if (*t == '\\' && t+1 < PL_bufend)
4612 else if (*t == close && --brackets <= 0)
4614 else if (*t == open)
4621 /* skip plain q word */
4622 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4625 else if (isALNUM_lazy_if(t,UTF)) {
4627 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4630 while (t < PL_bufend && isSPACE(*t))
4632 /* if comma follows first term, call it an anon hash */
4633 /* XXX it could be a comma expression with loop modifiers */
4634 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4635 || (*t == '=' && t[1] == '>')))
4636 OPERATOR(HASHBRACK);
4637 if (PL_expect == XREF)
4640 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4646 pl_yylval.ival = CopLINE(PL_curcop);
4647 if (isSPACE(*s) || *s == '#')
4648 PL_copline = NOLINE; /* invalidate current command line number */
4653 if (PL_lex_brackets <= 0)
4654 yyerror("Unmatched right curly bracket");
4656 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4657 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4658 PL_lex_formbrack = 0;
4659 if (PL_lex_state == LEX_INTERPNORMAL) {
4660 if (PL_lex_brackets == 0) {
4661 if (PL_expect & XFAKEBRACK) {
4662 PL_expect &= XENUMMASK;
4663 PL_lex_state = LEX_INTERPEND;
4668 PL_thiswhite = newSVpvs("");
4669 sv_catpvs(PL_thiswhite,"}");
4672 return yylex(); /* ignore fake brackets */
4674 if (*s == '-' && s[1] == '>')
4675 PL_lex_state = LEX_INTERPENDMAYBE;
4676 else if (*s != '[' && *s != '{')
4677 PL_lex_state = LEX_INTERPEND;
4680 if (PL_expect & XFAKEBRACK) {
4681 PL_expect &= XENUMMASK;
4683 return yylex(); /* ignore fake brackets */
4685 start_force(PL_curforce);
4687 curmad('X', newSVpvn(s-1,1));
4688 CURMAD('_', PL_thiswhite);
4693 PL_thistoken = newSVpvs("");
4701 if (PL_expect == XOPERATOR) {
4702 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4703 && isIDFIRST_lazy_if(s,UTF))
4705 CopLINE_dec(PL_curcop);
4706 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
4707 CopLINE_inc(PL_curcop);
4712 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4714 PL_expect = XOPERATOR;
4715 force_ident(PL_tokenbuf, '&');
4719 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4731 const char tmp = *s++;
4738 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4739 && strchr("+-*/%.^&|<",tmp))
4740 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4741 "Reversed %c= operator",(int)tmp);
4743 if (PL_expect == XSTATE && isALPHA(tmp) &&
4744 (s == PL_linestart+1 || s[-2] == '\n') )
4746 if (PL_in_eval && !PL_rsfp) {
4751 if (strnEQ(s,"=cut",4)) {
4767 PL_thiswhite = newSVpvs("");
4768 sv_catpvn(PL_thiswhite, PL_linestart,
4769 PL_bufend - PL_linestart);
4773 PL_doextract = TRUE;
4777 if (PL_lex_brackets < PL_lex_formbrack) {
4779 #ifdef PERL_STRICT_CR
4780 while (SPACE_OR_TAB(*t))
4782 while (SPACE_OR_TAB(*t) || *t == '\r')
4785 if (*t == '\n' || *t == '#') {
4796 const char tmp = *s++;
4798 /* was this !=~ where !~ was meant?
4799 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4801 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4802 const char *t = s+1;
4804 while (t < PL_bufend && isSPACE(*t))
4807 if (*t == '/' || *t == '?' ||
4808 ((*t == 'm' || *t == 's' || *t == 'y')
4809 && !isALNUM(t[1])) ||
4810 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4811 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4812 "!=~ should be !~");
4822 if (PL_expect != XOPERATOR) {
4823 if (s[1] != '<' && !strchr(s,'>'))
4826 s = scan_heredoc(s);
4828 s = scan_inputsymbol(s);
4829 TERM(sublex_start());
4835 SHop(OP_LEFT_SHIFT);
4849 const char tmp = *s++;
4851 SHop(OP_RIGHT_SHIFT);
4852 else if (tmp == '=')
4861 if (PL_expect == XOPERATOR) {
4862 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4864 deprecate(commaless_variable_list);
4865 return REPORT(','); /* grandfather non-comma-format format */
4869 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4870 PL_tokenbuf[0] = '@';
4871 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4872 sizeof PL_tokenbuf - 1, FALSE);
4873 if (PL_expect == XOPERATOR)
4874 no_op("Array length", s);
4875 if (!PL_tokenbuf[1])
4877 PL_expect = XOPERATOR;
4878 PL_pending_ident = '#';
4882 PL_tokenbuf[0] = '$';
4883 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4884 sizeof PL_tokenbuf - 1, FALSE);
4885 if (PL_expect == XOPERATOR)
4887 if (!PL_tokenbuf[1]) {
4889 yyerror("Final $ should be \\$ or $name");
4893 /* This kludge not intended to be bulletproof. */
4894 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4895 pl_yylval.opval = newSVOP(OP_CONST, 0,
4896 newSViv(CopARYBASE_get(&PL_compiling)));
4897 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4903 const char tmp = *s;
4904 if (PL_lex_state == LEX_NORMAL)
4907 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4908 && intuit_more(s)) {
4910 PL_tokenbuf[0] = '@';
4911 if (ckWARN(WARN_SYNTAX)) {
4914 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4917 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4918 while (t < PL_bufend && *t != ']')
4920 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4921 "Multidimensional syntax %.*s not supported",
4922 (int)((t - PL_bufptr) + 1), PL_bufptr);
4926 else if (*s == '{') {
4928 PL_tokenbuf[0] = '%';
4929 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4930 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4932 char tmpbuf[sizeof PL_tokenbuf];
4935 } while (isSPACE(*t));
4936 if (isIDFIRST_lazy_if(t,UTF)) {
4938 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4942 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4943 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4944 "You need to quote \"%s\"",
4951 PL_expect = XOPERATOR;
4952 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4953 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4954 if (!islop || PL_last_lop_op == OP_GREPSTART)
4955 PL_expect = XOPERATOR;
4956 else if (strchr("$@\"'`q", *s))
4957 PL_expect = XTERM; /* e.g. print $fh "foo" */
4958 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4959 PL_expect = XTERM; /* e.g. print $fh &sub */
4960 else if (isIDFIRST_lazy_if(s,UTF)) {
4961 char tmpbuf[sizeof PL_tokenbuf];
4963 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4964 if ((t2 = keyword(tmpbuf, len, 0))) {
4965 /* binary operators exclude handle interpretations */
4977 PL_expect = XTERM; /* e.g. print $fh length() */
4982 PL_expect = XTERM; /* e.g. print $fh subr() */
4985 else if (isDIGIT(*s))
4986 PL_expect = XTERM; /* e.g. print $fh 3 */
4987 else if (*s == '.' && isDIGIT(s[1]))
4988 PL_expect = XTERM; /* e.g. print $fh .3 */
4989 else if ((*s == '?' || *s == '-' || *s == '+')
4990 && !isSPACE(s[1]) && s[1] != '=')
4991 PL_expect = XTERM; /* e.g. print $fh -1 */
4992 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4994 PL_expect = XTERM; /* e.g. print $fh /.../
4995 XXX except DORDOR operator
4997 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4999 PL_expect = XTERM; /* print $fh <<"EOF" */
5002 PL_pending_ident = '$';
5006 if (PL_expect == XOPERATOR)
5008 PL_tokenbuf[0] = '@';
5009 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5010 if (!PL_tokenbuf[1]) {
5013 if (PL_lex_state == LEX_NORMAL)
5015 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5017 PL_tokenbuf[0] = '%';
5019 /* Warn about @ where they meant $. */
5020 if (*s == '[' || *s == '{') {
5021 if (ckWARN(WARN_SYNTAX)) {
5022 const char *t = s + 1;
5023 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5025 if (*t == '}' || *t == ']') {
5027 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5028 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5029 "Scalar value %.*s better written as $%.*s",
5030 (int)(t-PL_bufptr), PL_bufptr,
5031 (int)(t-PL_bufptr-1), PL_bufptr+1);
5036 PL_pending_ident = '@';
5039 case '/': /* may be division, defined-or, or pattern */
5040 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5044 case '?': /* may either be conditional or pattern */
5045 if (PL_expect == XOPERATOR) {
5053 /* A // operator. */
5063 /* Disable warning on "study /blah/" */
5064 if (PL_oldoldbufptr == PL_last_uni
5065 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5066 || memNE(PL_last_uni, "study", 5)
5067 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5070 s = scan_pat(s,OP_MATCH);
5071 TERM(sublex_start());
5075 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5076 #ifdef PERL_STRICT_CR
5079 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5081 && (s == PL_linestart || s[-1] == '\n') )
5083 PL_lex_formbrack = 0;
5087 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5091 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5097 pl_yylval.ival = OPf_SPECIAL;
5103 if (PL_expect != XOPERATOR)
5108 case '0': case '1': case '2': case '3': case '4':
5109 case '5': case '6': case '7': case '8': case '9':
5110 s = scan_num(s, &pl_yylval);
5111 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5112 if (PL_expect == XOPERATOR)
5117 s = scan_str(s,!!PL_madskills,FALSE);
5118 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5119 if (PL_expect == XOPERATOR) {
5120 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5122 deprecate(commaless_variable_list);
5123 return REPORT(','); /* grandfather non-comma-format format */
5130 pl_yylval.ival = OP_CONST;
5131 TERM(sublex_start());
5134 s = scan_str(s,!!PL_madskills,FALSE);
5135 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5136 if (PL_expect == XOPERATOR) {
5137 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5139 deprecate(commaless_variable_list);
5140 return REPORT(','); /* grandfather non-comma-format format */
5147 pl_yylval.ival = OP_CONST;
5148 /* FIXME. I think that this can be const if char *d is replaced by
5149 more localised variables. */
5150 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5151 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5152 pl_yylval.ival = OP_STRINGIFY;
5156 TERM(sublex_start());
5159 s = scan_str(s,!!PL_madskills,FALSE);
5160 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5161 if (PL_expect == XOPERATOR)
5162 no_op("Backticks",s);
5165 readpipe_override();
5166 TERM(sublex_start());
5170 if (PL_lex_inwhat && isDIGIT(*s))
5171 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5173 if (PL_expect == XOPERATOR)
5174 no_op("Backslash",s);
5178 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5179 char *start = s + 2;
5180 while (isDIGIT(*start) || *start == '_')
5182 if (*start == '.' && isDIGIT(start[1])) {
5183 s = scan_num(s, &pl_yylval);
5186 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5187 else if (!isALPHA(*start) && (PL_expect == XTERM
5188 || PL_expect == XREF || PL_expect == XSTATE
5189 || PL_expect == XTERMORDORDOR)) {
5190 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5192 s = scan_num(s, &pl_yylval);
5199 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5241 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5243 /* Some keywords can be followed by any delimiter, including ':' */
5244 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5245 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5246 (PL_tokenbuf[0] == 'q' &&
5247 strchr("qwxr", PL_tokenbuf[1])))));
5249 /* x::* is just a word, unless x is "CORE" */
5250 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5254 while (d < PL_bufend && isSPACE(*d))
5255 d++; /* no comments skipped here, or s### is misparsed */
5257 /* Is this a label? */
5258 if (!tmp && PL_expect == XSTATE
5259 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5260 tmp = keyword(PL_tokenbuf, len, 0);
5262 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5264 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5269 /* Check for keywords */
5270 tmp = keyword(PL_tokenbuf, len, 0);
5272 /* Is this a word before a => operator? */
5273 if (*d == '=' && d[1] == '>') {
5276 = (OP*)newSVOP(OP_CONST, 0,
5277 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5278 pl_yylval.opval->op_private = OPpCONST_BARE;
5282 if (tmp < 0) { /* second-class keyword? */
5283 GV *ogv = NULL; /* override (winner) */
5284 GV *hgv = NULL; /* hidden (loser) */
5285 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5287 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5290 if (GvIMPORTED_CV(gv))
5292 else if (! CvMETHOD(cv))
5296 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5297 (gv = *gvp) && isGV_with_GP(gv) &&
5298 GvCVu(gv) && GvIMPORTED_CV(gv))
5305 tmp = 0; /* overridden by import or by GLOBAL */
5308 && -tmp==KEY_lock /* XXX generalizable kludge */
5311 tmp = 0; /* any sub overrides "weak" keyword */
5313 else { /* no override */
5315 if (tmp == KEY_dump) {
5316 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5317 "dump() better written as CORE::dump()");
5321 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
5322 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5323 "Ambiguous call resolved as CORE::%s(), %s",
5324 GvENAME(hgv), "qualify as such or use &");
5331 default: /* not a keyword */
5332 /* Trade off - by using this evil construction we can pull the
5333 variable gv into the block labelled keylookup. If not, then
5334 we have to give it function scope so that the goto from the
5335 earlier ':' case doesn't bypass the initialisation. */
5337 just_a_word_zero_gv:
5345 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5348 SV *nextPL_nextwhite = 0;
5352 /* Get the rest if it looks like a package qualifier */
5354 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5356 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5359 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5360 *s == '\'' ? "'" : "::");
5365 if (PL_expect == XOPERATOR) {
5366 if (PL_bufptr == PL_linestart) {
5367 CopLINE_dec(PL_curcop);
5368 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5369 CopLINE_inc(PL_curcop);
5372 no_op("Bareword",s);
5375 /* Look for a subroutine with this name in current package,
5376 unless name is "Foo::", in which case Foo is a bearword
5377 (and a package name). */
5379 if (len > 2 && !PL_madskills &&
5380 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5382 if (ckWARN(WARN_BAREWORD)
5383 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5384 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5385 "Bareword \"%s\" refers to nonexistent package",
5388 PL_tokenbuf[len] = '\0';
5394 /* Mustn't actually add anything to a symbol table.
5395 But also don't want to "initialise" any placeholder
5396 constants that might already be there into full
5397 blown PVGVs with attached PVCV. */
5398 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5399 GV_NOADD_NOINIT, SVt_PVCV);
5404 /* if we saw a global override before, get the right name */
5407 sv = newSVpvs("CORE::GLOBAL::");
5408 sv_catpv(sv,PL_tokenbuf);
5411 /* If len is 0, newSVpv does strlen(), which is correct.
5412 If len is non-zero, then it will be the true length,
5413 and so the scalar will be created correctly. */
5414 sv = newSVpv(PL_tokenbuf,len);
5417 if (PL_madskills && !PL_thistoken) {
5418 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5419 PL_thistoken = newSVpvn(start,s - start);
5420 PL_realtokenstart = s - SvPVX(PL_linestr);
5424 /* Presume this is going to be a bareword of some sort. */
5427 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5428 pl_yylval.opval->op_private = OPpCONST_BARE;
5429 /* UTF-8 package name? */
5430 if (UTF && !IN_BYTES &&
5431 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5434 /* And if "Foo::", then that's what it certainly is. */
5439 /* Do the explicit type check so that we don't need to force
5440 the initialisation of the symbol table to have a real GV.
5441 Beware - gv may not really be a PVGV, cv may not really be
5442 a PVCV, (because of the space optimisations that gv_init
5443 understands) But they're true if for this symbol there is
5444 respectively a typeglob and a subroutine.
5446 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5447 /* Real typeglob, so get the real subroutine: */
5449 /* A proxy for a subroutine in this package? */
5450 : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
5453 /* See if it's the indirect object for a list operator. */
5455 if (PL_oldoldbufptr &&
5456 PL_oldoldbufptr < PL_bufptr &&
5457 (PL_oldoldbufptr == PL_last_lop
5458 || PL_oldoldbufptr == PL_last_uni) &&
5459 /* NO SKIPSPACE BEFORE HERE! */
5460 (PL_expect == XREF ||
5461 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5463 bool immediate_paren = *s == '(';
5465 /* (Now we can afford to cross potential line boundary.) */
5466 s = SKIPSPACE2(s,nextPL_nextwhite);
5468 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5471 /* Two barewords in a row may indicate method call. */
5473 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5474 (tmp = intuit_method(s, gv, cv)))
5477 /* If not a declared subroutine, it's an indirect object. */
5478 /* (But it's an indir obj regardless for sort.) */
5479 /* Also, if "_" follows a filetest operator, it's a bareword */
5482 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5484 (PL_last_lop_op != OP_MAPSTART &&
5485 PL_last_lop_op != OP_GREPSTART))))
5486 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5487 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5490 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5495 PL_expect = XOPERATOR;
5498 s = SKIPSPACE2(s,nextPL_nextwhite);
5499 PL_nextwhite = nextPL_nextwhite;
5504 /* Is this a word before a => operator? */
5505 if (*s == '=' && s[1] == '>' && !pkgname) {
5507 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5508 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5509 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5513 /* If followed by a paren, it's certainly a subroutine. */
5518 while (SPACE_OR_TAB(*d))
5520 if (*d == ')' && (sv = gv_const_sv(gv))) {
5527 PL_nextwhite = PL_thiswhite;
5530 start_force(PL_curforce);
5532 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5533 PL_expect = XOPERATOR;
5536 PL_nextwhite = nextPL_nextwhite;
5537 curmad('X', PL_thistoken);
5538 PL_thistoken = newSVpvs("");
5546 /* If followed by var or block, call it a method (unless sub) */
5548 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5549 PL_last_lop = PL_oldbufptr;
5550 PL_last_lop_op = OP_METHOD;
5554 /* If followed by a bareword, see if it looks like indir obj. */
5557 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5558 && (tmp = intuit_method(s, gv, cv)))
5561 /* Not a method, so call it a subroutine (if defined) */
5564 if (lastchar == '-')
5565 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5566 "Ambiguous use of -%s resolved as -&%s()",
5567 PL_tokenbuf, PL_tokenbuf);
5568 /* Check for a constant sub */
5569 if ((sv = gv_const_sv(gv))) {
5571 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5572 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5573 pl_yylval.opval->op_private = 0;
5577 /* Resolve to GV now. */
5578 if (SvTYPE(gv) != SVt_PVGV) {
5579 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5580 assert (SvTYPE(gv) == SVt_PVGV);
5581 /* cv must have been some sort of placeholder, so
5582 now needs replacing with a real code reference. */
5586 op_free(pl_yylval.opval);
5587 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5588 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5589 PL_last_lop = PL_oldbufptr;
5590 PL_last_lop_op = OP_ENTERSUB;
5591 /* Is there a prototype? */
5599 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5602 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5604 while (*proto == ';')
5606 if (*proto == '&' && *s == '{') {
5608 sv_setpvs(PL_subname, "__ANON__");
5610 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5617 PL_nextwhite = PL_thiswhite;
5620 start_force(PL_curforce);
5621 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5624 PL_nextwhite = nextPL_nextwhite;
5625 curmad('X', PL_thistoken);
5626 PL_thistoken = newSVpvs("");
5633 /* Guess harder when madskills require "best effort". */
5634 if (PL_madskills && (!gv || !GvCVu(gv))) {
5635 int probable_sub = 0;
5636 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5638 else if (isALPHA(*s)) {
5642 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5643 if (!keyword(tmpbuf, tmplen, 0))
5646 while (d < PL_bufend && isSPACE(*d))
5648 if (*d == '=' && d[1] == '>')
5653 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5654 op_free(pl_yylval.opval);
5655 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5656 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5657 PL_last_lop = PL_oldbufptr;
5658 PL_last_lop_op = OP_ENTERSUB;
5659 PL_nextwhite = PL_thiswhite;
5661 start_force(PL_curforce);
5662 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5664 PL_nextwhite = nextPL_nextwhite;
5665 curmad('X', PL_thistoken);
5666 PL_thistoken = newSVpvs("");
5671 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5678 /* Call it a bare word */
5680 if (PL_hints & HINT_STRICT_SUBS)
5681 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5684 /* after "print" and similar functions (corresponding to
5685 * "F? L" in opcode.pl), whatever wasn't already parsed as
5686 * a filehandle should be subject to "strict subs".
5687 * Likewise for the optional indirect-object argument to system
5688 * or exec, which can't be a bareword */
5689 if ((PL_last_lop_op == OP_PRINT
5690 || PL_last_lop_op == OP_PRTF
5691 || PL_last_lop_op == OP_SAY
5692 || PL_last_lop_op == OP_SYSTEM
5693 || PL_last_lop_op == OP_EXEC)
5694 && (PL_hints & HINT_STRICT_SUBS))
5695 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5696 if (lastchar != '-') {
5697 if (ckWARN(WARN_RESERVED)) {
5701 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5702 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5709 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
5710 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5711 "Operator or semicolon missing before %c%s",
5712 lastchar, PL_tokenbuf);
5713 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5714 "Ambiguous use of %c resolved as operator %c",
5715 lastchar, lastchar);
5721 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5722 newSVpv(CopFILE(PL_curcop),0));
5726 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5727 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5730 case KEY___PACKAGE__:
5731 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5733 ? newSVhek(HvNAME_HEK(PL_curstash))
5740 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5741 const char *pname = "main";
5742 if (PL_tokenbuf[2] == 'D')
5743 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5744 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5748 GvIOp(gv) = newIO();
5749 IoIFP(GvIOp(gv)) = PL_rsfp;
5750 #if defined(HAS_FCNTL) && defined(F_SETFD)
5752 const int fd = PerlIO_fileno(PL_rsfp);
5753 fcntl(fd,F_SETFD,fd >= 3);
5756 /* Mark this internal pseudo-handle as clean */
5757 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5758 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5759 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5761 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5762 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5763 /* if the script was opened in binmode, we need to revert
5764 * it to text mode for compatibility; but only iff it has CRs
5765 * XXX this is a questionable hack at best. */
5766 if (PL_bufend-PL_bufptr > 2
5767 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5770 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5771 loc = PerlIO_tell(PL_rsfp);
5772 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5775 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5777 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5778 #endif /* NETWARE */
5779 #ifdef PERLIO_IS_STDIO /* really? */
5780 # if defined(__BORLANDC__)
5781 /* XXX see note in do_binmode() */
5782 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5786 PerlIO_seek(PL_rsfp, loc, 0);
5790 #ifdef PERLIO_LAYERS
5793 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5794 else if (PL_encoding) {
5801 XPUSHs(PL_encoding);
5803 call_method("name", G_SCALAR);
5807 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5808 Perl_form(aTHX_ ":encoding(%"SVf")",
5817 if (PL_realtokenstart >= 0) {
5818 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5820 PL_endwhite = newSVpvs("");
5821 sv_catsv(PL_endwhite, PL_thiswhite);
5823 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5824 PL_realtokenstart = -1;
5826 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5827 SvCUR(PL_endwhite))) != NULL) ;
5842 if (PL_expect == XSTATE) {
5849 if (*s == ':' && s[1] == ':') {
5852 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5853 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5854 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5857 else if (tmp == KEY_require || tmp == KEY_do)
5858 /* that's a way to remember we saw "CORE::" */
5871 LOP(OP_ACCEPT,XTERM);
5877 LOP(OP_ATAN2,XTERM);
5883 LOP(OP_BINMODE,XTERM);
5886 LOP(OP_BLESS,XTERM);
5895 /* When 'use switch' is in effect, continue has a dual
5896 life as a control operator. */
5898 if (!FEATURE_IS_ENABLED("switch"))
5901 /* We have to disambiguate the two senses of
5902 "continue". If the next token is a '{' then
5903 treat it as the start of a continue block;
5904 otherwise treat it as a control operator.
5916 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5933 if (!PL_cryptseen) {
5934 PL_cryptseen = TRUE;
5938 LOP(OP_CRYPT,XTERM);
5941 LOP(OP_CHMOD,XTERM);
5944 LOP(OP_CHOWN,XTERM);
5947 LOP(OP_CONNECT,XTERM);
5966 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5967 if (orig_keyword == KEY_do) {
5976 PL_hints |= HINT_BLOCK_SCOPE;
5986 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5987 LOP(OP_DBMOPEN,XTERM);
5993 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6000 pl_yylval.ival = CopLINE(PL_curcop);
6016 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6017 UNIBRACK(OP_ENTEREVAL);
6031 case KEY_endhostent:
6037 case KEY_endservent:
6040 case KEY_endprotoent:
6051 pl_yylval.ival = CopLINE(PL_curcop);
6053 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6056 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6059 if ((PL_bufend - p) >= 3 &&
6060 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6062 else if ((PL_bufend - p) >= 4 &&
6063 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6066 if (isIDFIRST_lazy_if(p,UTF)) {
6067 p = scan_ident(p, PL_bufend,
6068 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6072 Perl_croak(aTHX_ "Missing $ on loop variable");
6074 s = SvPVX(PL_linestr) + soff;
6080 LOP(OP_FORMLINE,XTERM);
6086 LOP(OP_FCNTL,XTERM);
6092 LOP(OP_FLOCK,XTERM);
6101 LOP(OP_GREPSTART, XREF);
6104 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6119 case KEY_getpriority:
6120 LOP(OP_GETPRIORITY,XTERM);
6122 case KEY_getprotobyname:
6125 case KEY_getprotobynumber:
6126 LOP(OP_GPBYNUMBER,XTERM);
6128 case KEY_getprotoent:
6140 case KEY_getpeername:
6141 UNI(OP_GETPEERNAME);
6143 case KEY_gethostbyname:
6146 case KEY_gethostbyaddr:
6147 LOP(OP_GHBYADDR,XTERM);
6149 case KEY_gethostent:
6152 case KEY_getnetbyname:
6155 case KEY_getnetbyaddr:
6156 LOP(OP_GNBYADDR,XTERM);
6161 case KEY_getservbyname:
6162 LOP(OP_GSBYNAME,XTERM);
6164 case KEY_getservbyport:
6165 LOP(OP_GSBYPORT,XTERM);
6167 case KEY_getservent:
6170 case KEY_getsockname:
6171 UNI(OP_GETSOCKNAME);
6173 case KEY_getsockopt:
6174 LOP(OP_GSOCKOPT,XTERM);
6189 pl_yylval.ival = CopLINE(PL_curcop);
6199 pl_yylval.ival = CopLINE(PL_curcop);
6203 LOP(OP_INDEX,XTERM);
6209 LOP(OP_IOCTL,XTERM);
6221 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6253 LOP(OP_LISTEN,XTERM);
6262 s = scan_pat(s,OP_MATCH);
6263 TERM(sublex_start());
6266 LOP(OP_MAPSTART, XREF);
6269 LOP(OP_MKDIR,XTERM);
6272 LOP(OP_MSGCTL,XTERM);
6275 LOP(OP_MSGGET,XTERM);
6278 LOP(OP_MSGRCV,XTERM);
6281 LOP(OP_MSGSND,XTERM);
6286 PL_in_my = (U16)tmp;
6288 if (isIDFIRST_lazy_if(s,UTF)) {
6292 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6293 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6295 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6296 if (!PL_in_my_stash) {
6299 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6303 if (PL_madskills) { /* just add type to declarator token */
6304 sv_catsv(PL_thistoken, PL_nextwhite);
6306 sv_catpvn(PL_thistoken, start, s - start);
6314 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6321 s = tokenize_use(0, s);
6325 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6332 if (isIDFIRST_lazy_if(s,UTF)) {
6334 for (d = s; isALNUM_lazy_if(d,UTF);)
6336 for (t=d; isSPACE(*t);)
6338 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6340 && !(t[0] == '=' && t[1] == '>')
6342 int parms_len = (int)(d-s);
6343 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6344 "Precedence problem: open %.*s should be open(%.*s)",
6345 parms_len, s, parms_len, s);
6351 pl_yylval.ival = OP_OR;
6361 LOP(OP_OPEN_DIR,XTERM);
6364 checkcomma(s,PL_tokenbuf,"filehandle");
6368 checkcomma(s,PL_tokenbuf,"filehandle");
6387 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6388 s = force_version(s, FALSE);
6392 LOP(OP_PIPE_OP,XTERM);
6395 s = scan_str(s,!!PL_madskills,FALSE);
6398 pl_yylval.ival = OP_CONST;
6399 TERM(sublex_start());
6405 s = scan_str(s,!!PL_madskills,FALSE);
6408 PL_expect = XOPERATOR;
6410 if (SvCUR(PL_lex_stuff)) {
6413 d = SvPV_force(PL_lex_stuff, len);
6415 for (; isSPACE(*d) && len; --len, ++d)
6420 if (!warned && ckWARN(WARN_QW)) {
6421 for (; !isSPACE(*d) && len; --len, ++d) {
6423 Perl_warner(aTHX_ packWARN(WARN_QW),
6424 "Possible attempt to separate words with commas");
6427 else if (*d == '#') {
6428 Perl_warner(aTHX_ packWARN(WARN_QW),
6429 "Possible attempt to put comments in qw() list");
6435 for (; !isSPACE(*d) && len; --len, ++d)
6438 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6439 words = append_elem(OP_LIST, words,
6440 newSVOP(OP_CONST, 0, tokeq(sv)));
6444 start_force(PL_curforce);
6445 NEXTVAL_NEXTTOKE.opval = words;
6450 SvREFCNT_dec(PL_lex_stuff);
6451 PL_lex_stuff = NULL;
6457 s = scan_str(s,!!PL_madskills,FALSE);
6460 pl_yylval.ival = OP_STRINGIFY;
6461 if (SvIVX(PL_lex_stuff) == '\'')
6462 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6463 TERM(sublex_start());
6466 s = scan_pat(s,OP_QR);
6467 TERM(sublex_start());
6470 s = scan_str(s,!!PL_madskills,FALSE);
6473 readpipe_override();
6474 TERM(sublex_start());
6482 s = force_version(s, FALSE);
6484 else if (*s != 'v' || !isDIGIT(s[1])
6485 || (s = force_version(s, TRUE), *s == 'v'))
6487 *PL_tokenbuf = '\0';
6488 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6489 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6490 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6492 yyerror("<> should be quotes");
6494 if (orig_keyword == KEY_require) {
6502 PL_last_uni = PL_oldbufptr;
6503 PL_last_lop_op = OP_REQUIRE;
6505 return REPORT( (int)REQUIRE );
6511 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6515 LOP(OP_RENAME,XTERM);
6524 LOP(OP_RINDEX,XTERM);
6533 UNIDOR(OP_READLINE);
6536 UNIDOR(OP_BACKTICK);
6545 LOP(OP_REVERSE,XTERM);
6548 UNIDOR(OP_READLINK);
6555 if (pl_yylval.opval)
6556 TERM(sublex_start());
6558 TOKEN(1); /* force error */
6561 checkcomma(s,PL_tokenbuf,"filehandle");
6571 LOP(OP_SELECT,XTERM);
6577 LOP(OP_SEMCTL,XTERM);
6580 LOP(OP_SEMGET,XTERM);
6583 LOP(OP_SEMOP,XTERM);
6589 LOP(OP_SETPGRP,XTERM);
6591 case KEY_setpriority:
6592 LOP(OP_SETPRIORITY,XTERM);
6594 case KEY_sethostent:
6600 case KEY_setservent:
6603 case KEY_setprotoent:
6613 LOP(OP_SEEKDIR,XTERM);
6615 case KEY_setsockopt:
6616 LOP(OP_SSOCKOPT,XTERM);
6622 LOP(OP_SHMCTL,XTERM);
6625 LOP(OP_SHMGET,XTERM);
6628 LOP(OP_SHMREAD,XTERM);
6631 LOP(OP_SHMWRITE,XTERM);
6634 LOP(OP_SHUTDOWN,XTERM);
6643 LOP(OP_SOCKET,XTERM);
6645 case KEY_socketpair:
6646 LOP(OP_SOCKPAIR,XTERM);
6649 checkcomma(s,PL_tokenbuf,"subroutine name");
6651 if (*s == ';' || *s == ')') /* probably a close */
6652 Perl_croak(aTHX_ "sort is now a reserved word");
6654 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6658 LOP(OP_SPLIT,XTERM);
6661 LOP(OP_SPRINTF,XTERM);
6664 LOP(OP_SPLICE,XTERM);
6679 LOP(OP_SUBSTR,XTERM);
6685 char tmpbuf[sizeof PL_tokenbuf];
6686 SSize_t tboffset = 0;
6687 expectation attrful;
6688 bool have_name, have_proto;
6689 const int key = tmp;
6694 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6695 SV *subtoken = newSVpvn(tstart, s - tstart);
6699 s = SKIPSPACE2(s,tmpwhite);
6704 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6705 (*s == ':' && s[1] == ':'))
6708 SV *nametoke = NULL;
6712 attrful = XATTRBLOCK;
6713 /* remember buffer pos'n for later force_word */
6714 tboffset = s - PL_oldbufptr;
6715 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6718 nametoke = newSVpvn(s, d - s);
6720 if (memchr(tmpbuf, ':', len))
6721 sv_setpvn(PL_subname, tmpbuf, len);
6723 sv_setsv(PL_subname,PL_curstname);
6724 sv_catpvs(PL_subname,"::");
6725 sv_catpvn(PL_subname,tmpbuf,len);
6732 CURMAD('X', nametoke);
6733 CURMAD('_', tmpwhite);
6734 (void) force_word(PL_oldbufptr + tboffset, WORD,
6737 s = SKIPSPACE2(d,tmpwhite);
6744 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6745 PL_expect = XTERMBLOCK;
6746 attrful = XATTRTERM;
6747 sv_setpvs(PL_subname,"?");
6751 if (key == KEY_format) {
6753 PL_lex_formbrack = PL_lex_brackets + 1;
6755 PL_thistoken = subtoken;
6759 (void) force_word(PL_oldbufptr + tboffset, WORD,
6765 /* Look for a prototype */
6768 bool bad_proto = FALSE;
6769 bool in_brackets = FALSE;
6770 char greedy_proto = ' ';
6771 bool proto_after_greedy_proto = FALSE;
6772 bool must_be_last = FALSE;
6773 bool underscore = FALSE;
6774 bool seen_underscore = FALSE;
6775 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6777 s = scan_str(s,!!PL_madskills,FALSE);
6779 Perl_croak(aTHX_ "Prototype not terminated");
6780 /* strip spaces and check for bad characters */
6781 d = SvPVX(PL_lex_stuff);
6783 for (p = d; *p; ++p) {
6789 proto_after_greedy_proto = TRUE;
6790 if (!strchr("$@%*;[]&\\_", *p)) {
6802 else if ( *p == ']' ) {
6803 in_brackets = FALSE;
6805 else if ( (*p == '@' || *p == '%') &&
6806 ( tmp < 2 || d[tmp-2] != '\\' ) &&
6808 must_be_last = TRUE;
6811 else if ( *p == '_' ) {
6812 underscore = seen_underscore = TRUE;
6819 if (proto_after_greedy_proto)
6820 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6821 "Prototype after '%c' for %"SVf" : %s",
6822 greedy_proto, SVfARG(PL_subname), d);
6824 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6825 "Illegal character %sin prototype for %"SVf" : %s",
6826 seen_underscore ? "after '_' " : "",
6827 SVfARG(PL_subname), d);
6828 SvCUR_set(PL_lex_stuff, tmp);
6833 CURMAD('q', PL_thisopen);
6834 CURMAD('_', tmpwhite);
6835 CURMAD('=', PL_thisstuff);
6836 CURMAD('Q', PL_thisclose);
6837 NEXTVAL_NEXTTOKE.opval =
6838 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6839 PL_lex_stuff = NULL;
6842 s = SKIPSPACE2(s,tmpwhite);
6850 if (*s == ':' && s[1] != ':')
6851 PL_expect = attrful;
6852 else if (*s != '{' && key == KEY_sub) {
6854 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6856 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6863 curmad('^', newSVpvs(""));
6864 CURMAD('_', tmpwhite);
6868 PL_thistoken = subtoken;
6871 NEXTVAL_NEXTTOKE.opval =
6872 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6873 PL_lex_stuff = NULL;
6879 sv_setpvs(PL_subname, "__ANON__");
6881 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6885 (void) force_word(PL_oldbufptr + tboffset, WORD,
6894 LOP(OP_SYSTEM,XREF);
6897 LOP(OP_SYMLINK,XTERM);
6900 LOP(OP_SYSCALL,XTERM);
6903 LOP(OP_SYSOPEN,XTERM);
6906 LOP(OP_SYSSEEK,XTERM);
6909 LOP(OP_SYSREAD,XTERM);
6912 LOP(OP_SYSWRITE,XTERM);
6916 TERM(sublex_start());
6937 LOP(OP_TRUNCATE,XTERM);
6949 pl_yylval.ival = CopLINE(PL_curcop);
6953 pl_yylval.ival = CopLINE(PL_curcop);
6957 LOP(OP_UNLINK,XTERM);
6963 LOP(OP_UNPACK,XTERM);
6966 LOP(OP_UTIME,XTERM);
6972 LOP(OP_UNSHIFT,XTERM);
6975 s = tokenize_use(1, s);
6985 pl_yylval.ival = CopLINE(PL_curcop);
6989 pl_yylval.ival = CopLINE(PL_curcop);
6993 PL_hints |= HINT_BLOCK_SCOPE;
7000 LOP(OP_WAITPID,XTERM);
7009 ctl_l[0] = toCTRL('L');
7011 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7014 /* Make sure $^L is defined */
7015 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7020 if (PL_expect == XOPERATOR)
7026 pl_yylval.ival = OP_XOR;
7031 TERM(sublex_start());
7036 #pragma segment Main
7040 S_pending_ident(pTHX)
7045 /* pit holds the identifier we read and pending_ident is reset */
7046 char pit = PL_pending_ident;
7047 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7048 /* All routes through this function want to know if there is a colon. */
7049 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7050 PL_pending_ident = 0;
7052 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7053 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7054 "### Pending identifier '%s'\n", PL_tokenbuf); });
7056 /* if we're in a my(), we can't allow dynamics here.
7057 $foo'bar has already been turned into $foo::bar, so
7058 just check for colons.
7060 if it's a legal name, the OP is a PADANY.
7063 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7065 yyerror(Perl_form(aTHX_ "No package name allowed for "
7066 "variable %s in \"our\"",
7068 tmp = allocmy(PL_tokenbuf);
7072 yyerror(Perl_form(aTHX_ PL_no_myglob,
7073 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7075 pl_yylval.opval = newOP(OP_PADANY, 0);
7076 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
7082 build the ops for accesses to a my() variable.
7084 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7085 then used in a comparison. This catches most, but not
7086 all cases. For instance, it catches
7087 sort { my($a); $a <=> $b }
7089 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7090 (although why you'd do that is anyone's guess).
7095 tmp = pad_findmy(PL_tokenbuf);
7096 if (tmp != NOT_IN_PAD) {
7097 /* might be an "our" variable" */
7098 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7099 /* build ops for a bareword */
7100 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7101 HEK * const stashname = HvNAME_HEK(stash);
7102 SV * const sym = newSVhek(stashname);
7103 sv_catpvs(sym, "::");
7104 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7105 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7106 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7109 ? (GV_ADDMULTI | GV_ADDINEVAL)
7112 ((PL_tokenbuf[0] == '$') ? SVt_PV
7113 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7118 /* if it's a sort block and they're naming $a or $b */
7119 if (PL_last_lop_op == OP_SORT &&
7120 PL_tokenbuf[0] == '$' &&
7121 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7124 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7125 d < PL_bufend && *d != '\n';
7128 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7129 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7135 pl_yylval.opval = newOP(OP_PADANY, 0);
7136 pl_yylval.opval->op_targ = tmp;
7142 Whine if they've said @foo in a doublequoted string,
7143 and @foo isn't a variable we can find in the symbol
7146 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7147 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7149 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7150 /* DO NOT warn for @- and @+ */
7151 && !( PL_tokenbuf[2] == '\0' &&
7152 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7155 /* Downgraded from fatal to warning 20000522 mjd */
7156 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7157 "Possible unintended interpolation of %s in string",
7162 /* build ops for a bareword */
7163 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7165 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7167 PL_tokenbuf + 1, tokenbuf_len - 1,
7168 /* If the identifier refers to a stash, don't autovivify it.
7169 * Change 24660 had the side effect of causing symbol table
7170 * hashes to always be defined, even if they were freshly
7171 * created and the only reference in the entire program was
7172 * the single statement with the defined %foo::bar:: test.
7173 * It appears that all code in the wild doing this actually
7174 * wants to know whether sub-packages have been loaded, so
7175 * by avoiding auto-vivifying symbol tables, we ensure that
7176 * defined %foo::bar:: continues to be false, and the existing
7177 * tests still give the expected answers, even though what
7178 * they're actually testing has now changed subtly.
7180 (*PL_tokenbuf == '%'
7181 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7184 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7185 ((PL_tokenbuf[0] == '$') ? SVt_PV
7186 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7192 * The following code was generated by perl_keyword.pl.
7196 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7200 PERL_ARGS_ASSERT_KEYWORD;
7204 case 1: /* 5 tokens of length 1 */
7236 case 2: /* 18 tokens of length 2 */
7382 case 3: /* 29 tokens of length 3 */
7386 if (name[1] == 'N' &&
7449 if (name[1] == 'i' &&
7481 if (name[1] == 'o' &&
7490 if (name[1] == 'e' &&
7499 if (name[1] == 'n' &&
7508 if (name[1] == 'o' &&
7517 if (name[1] == 'a' &&
7526 if (name[1] == 'o' &&
7588 if (name[1] == 'e' &&
7602 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7628 if (name[1] == 'i' &&
7637 if (name[1] == 's' &&
7646 if (name[1] == 'e' &&
7655 if (name[1] == 'o' &&
7667 case 4: /* 41 tokens of length 4 */
7671 if (name[1] == 'O' &&
7681 if (name[1] == 'N' &&
7691 if (name[1] == 'i' &&
7701 if (name[1] == 'h' &&
7711 if (name[1] == 'u' &&
7724 if (name[2] == 'c' &&
7733 if (name[2] == 's' &&
7742 if (name[2] == 'a' &&
7778 if (name[1] == 'o' &&
7791 if (name[2] == 't' &&
7800 if (name[2] == 'o' &&
7809 if (name[2] == 't' &&
7818 if (name[2] == 'e' &&
7831 if (name[1] == 'o' &&
7844 if (name[2] == 'y' &&
7853 if (name[2] == 'l' &&
7869 if (name[2] == 's' &&
7878 if (name[2] == 'n' &&
7887 if (name[2] == 'c' &&
7900 if (name[1] == 'e' &&
7910 if (name[1] == 'p' &&
7923 if (name[2] == 'c' &&
7932 if (name[2] == 'p' &&
7941 if (name[2] == 's' &&
7957 if (name[2] == 'n' &&
8027 if (name[2] == 'r' &&
8036 if (name[2] == 'r' &&
8045 if (name[2] == 'a' &&
8061 if (name[2] == 'l' &&
8123 if (name[2] == 'e' &&
8126 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8139 case 5: /* 39 tokens of length 5 */
8143 if (name[1] == 'E' &&
8154 if (name[1] == 'H' &&
8168 if (name[2] == 'a' &&
8178 if (name[2] == 'a' &&
8195 if (name[2] == 'e' &&
8205 if (name[2] == 'e' &&
8209 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8225 if (name[3] == 'i' &&
8234 if (name[3] == 'o' &&
8270 if (name[2] == 'o' &&
8280 if (name[2] == 'y' &&
8294 if (name[1] == 'l' &&
8308 if (name[2] == 'n' &&
8318 if (name[2] == 'o' &&
8332 if (name[1] == 'i' &&
8337 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8346 if (name[2] == 'd' &&
8356 if (name[2] == 'c' &&
8373 if (name[2] == 'c' &&
8383 if (name[2] == 't' &&
8397 if (name[1] == 'k' &&
8408 if (name[1] == 'r' &&
8422 if (name[2] == 's' &&
8432 if (name[2] == 'd' &&
8449 if (name[2] == 'm' &&
8459 if (name[2] == 'i' &&
8469 if (name[2] == 'e' &&
8479 if (name[2] == 'l' &&
8489 if (name[2] == 'a' &&
8502 if (name[3] == 't' &&
8505 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8511 if (name[3] == 'd' &&
8528 if (name[1] == 'i' &&
8542 if (name[2] == 'a' &&
8555 if (name[3] == 'e' &&
8590 if (name[2] == 'i' &&
8607 if (name[2] == 'i' &&
8617 if (name[2] == 'i' &&
8634 case 6: /* 33 tokens of length 6 */
8638 if (name[1] == 'c' &&
8653 if (name[2] == 'l' &&
8664 if (name[2] == 'r' &&
8679 if (name[1] == 'e' &&
8694 if (name[2] == 's' &&
8699 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8705 if (name[2] == 'i' &&
8723 if (name[2] == 'l' &&
8734 if (name[2] == 'r' &&
8749 if (name[1] == 'm' &&
8764 if (name[2] == 'n' &&
8775 if (name[2] == 's' &&
8790 if (name[1] == 's' &&
8796 if (name[4] == 't' &&
8805 if (name[4] == 'e' &&
8814 if (name[4] == 'c' &&
8823 if (name[4] == 'n' &&
8839 if (name[1] == 'r' &&
8857 if (name[3] == 'a' &&
8867 if (name[3] == 'u' &&
8881 if (name[2] == 'n' &&
8899 if (name[2] == 'a' &&
8913 if (name[3] == 'e' &&
8926 if (name[4] == 't' &&
8935 if (name[4] == 'e' &&
8957 if (name[4] == 't' &&
8966 if (name[4] == 'e' &&
8982 if (name[2] == 'c' &&
8993 if (name[2] == 'l' &&
9004 if (name[2] == 'b' &&
9015 if (name[2] == 's' &&
9038 if (name[4] == 's' &&
9047 if (name[4] == 'n' &&
9060 if (name[3] == 'a' &&
9077 if (name[1] == 'a' &&
9092 case 7: /* 29 tokens of length 7 */
9096 if (name[1] == 'E' &&
9109 if (name[1] == '_' &&
9122 if (name[1] == 'i' &&
9129 return -KEY_binmode;
9135 if (name[1] == 'o' &&
9142 return -KEY_connect;
9151 if (name[2] == 'm' &&
9157 return -KEY_dbmopen;
9168 if (name[4] == 'u' &&
9172 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9178 if (name[4] == 'n' &&
9199 if (name[1] == 'o' &&
9212 if (name[1] == 'e' &&
9219 if (name[5] == 'r' &&
9222 return -KEY_getpgrp;
9228 if (name[5] == 'i' &&
9231 return -KEY_getppid;
9244 if (name[1] == 'c' &&
9251 return -KEY_lcfirst;
9257 if (name[1] == 'p' &&
9264 return -KEY_opendir;
9270 if (name[1] == 'a' &&
9288 if (name[3] == 'd' &&
9293 return -KEY_readdir;
9299 if (name[3] == 'u' &&
9310 if (name[3] == 'e' &&
9315 return -KEY_reverse;
9334 if (name[3] == 'k' &&
9339 return -KEY_seekdir;
9345 if (name[3] == 'p' &&
9350 return -KEY_setpgrp;
9360 if (name[2] == 'm' &&
9366 return -KEY_shmread;
9372 if (name[2] == 'r' &&
9378 return -KEY_sprintf;
9387 if (name[3] == 'l' &&
9392 return -KEY_symlink;
9401 if (name[4] == 'a' &&
9405 return -KEY_syscall;
9411 if (name[4] == 'p' &&
9415 return -KEY_sysopen;
9421 if (name[4] == 'e' &&
9425 return -KEY_sysread;
9431 if (name[4] == 'e' &&
9435 return -KEY_sysseek;
9453 if (name[1] == 'e' &&
9460 return -KEY_telldir;
9469 if (name[2] == 'f' &&
9475 return -KEY_ucfirst;
9481 if (name[2] == 's' &&
9487 return -KEY_unshift;
9497 if (name[1] == 'a' &&
9504 return -KEY_waitpid;
9513 case 8: /* 26 tokens of length 8 */
9517 if (name[1] == 'U' &&
9525 return KEY_AUTOLOAD;
9536 if (name[3] == 'A' &&
9542 return KEY___DATA__;
9548 if (name[3] == 'I' &&
9554 return -KEY___FILE__;
9560 if (name[3] == 'I' &&
9566 return -KEY___LINE__;
9582 if (name[2] == 'o' &&
9589 return -KEY_closedir;
9595 if (name[2] == 'n' &&
9602 return -KEY_continue;
9612 if (name[1] == 'b' &&
9620 return -KEY_dbmclose;
9626 if (name[1] == 'n' &&
9632 if (name[4] == 'r' &&
9637 return -KEY_endgrent;
9643 if (name[4] == 'w' &&
9648 return -KEY_endpwent;
9661 if (name[1] == 'o' &&
9669 return -KEY_formline;
9675 if (name[1] == 'e' &&
9686 if (name[6] == 'n' &&
9689 return -KEY_getgrent;
9695 if (name[6] == 'i' &&
9698 return -KEY_getgrgid;
9704 if (name[6] == 'a' &&
9707 return -KEY_getgrnam;
9720 if (name[4] == 'o' &&
9725 return -KEY_getlogin;
9736 if (name[6] == 'n' &&
9739 return -KEY_getpwent;
9745 if (name[6] == 'a' &&
9748 return -KEY_getpwnam;
9754 if (name[6] == 'i' &&
9757 return -KEY_getpwuid;
9777 if (name[1] == 'e' &&
9784 if (name[5] == 'i' &&
9791 return -KEY_readline;
9796 return -KEY_readlink;
9807 if (name[5] == 'i' &&
9811 return -KEY_readpipe;
9832 if (name[4] == 'r' &&
9837 return -KEY_setgrent;
9843 if (name[4] == 'w' &&
9848 return -KEY_setpwent;
9864 if (name[3] == 'w' &&
9870 return -KEY_shmwrite;
9876 if (name[3] == 't' &&
9882 return -KEY_shutdown;
9892 if (name[2] == 's' &&
9899 return -KEY_syswrite;
9909 if (name[1] == 'r' &&
9917 return -KEY_truncate;
9926 case 9: /* 9 tokens of length 9 */
9930 if (name[1] == 'N' &&
9939 return KEY_UNITCHECK;
9945 if (name[1] == 'n' &&
9954 return -KEY_endnetent;
9960 if (name[1] == 'e' &&
9969 return -KEY_getnetent;
9975 if (name[1] == 'o' &&
9984 return -KEY_localtime;
9990 if (name[1] == 'r' &&
9999 return KEY_prototype;
10005 if (name[1] == 'u' &&
10014 return -KEY_quotemeta;
10020 if (name[1] == 'e' &&
10029 return -KEY_rewinddir;
10035 if (name[1] == 'e' &&
10044 return -KEY_setnetent;
10050 if (name[1] == 'a' &&
10059 return -KEY_wantarray;
10068 case 10: /* 9 tokens of length 10 */
10072 if (name[1] == 'n' &&
10078 if (name[4] == 'o' &&
10085 return -KEY_endhostent;
10091 if (name[4] == 'e' &&
10098 return -KEY_endservent;
10111 if (name[1] == 'e' &&
10117 if (name[4] == 'o' &&
10124 return -KEY_gethostent;
10133 if (name[5] == 'r' &&
10139 return -KEY_getservent;
10145 if (name[5] == 'c' &&
10151 return -KEY_getsockopt;
10171 if (name[2] == 't')
10176 if (name[4] == 'o' &&
10183 return -KEY_sethostent;
10192 if (name[5] == 'r' &&
10198 return -KEY_setservent;
10204 if (name[5] == 'c' &&
10210 return -KEY_setsockopt;
10227 if (name[2] == 'c' &&
10236 return -KEY_socketpair;
10249 case 11: /* 8 tokens of length 11 */
10253 if (name[1] == '_' &&
10263 { /* __PACKAGE__ */
10264 return -KEY___PACKAGE__;
10270 if (name[1] == 'n' &&
10280 { /* endprotoent */
10281 return -KEY_endprotoent;
10287 if (name[1] == 'e' &&
10296 if (name[5] == 'e' &&
10302 { /* getpeername */
10303 return -KEY_getpeername;
10312 if (name[6] == 'o' &&
10317 { /* getpriority */
10318 return -KEY_getpriority;
10324 if (name[6] == 't' &&
10329 { /* getprotoent */
10330 return -KEY_getprotoent;
10344 if (name[4] == 'o' &&
10351 { /* getsockname */
10352 return -KEY_getsockname;
10365 if (name[1] == 'e' &&
10373 if (name[6] == 'o' &&
10378 { /* setpriority */
10379 return -KEY_setpriority;
10385 if (name[6] == 't' &&
10390 { /* setprotoent */
10391 return -KEY_setprotoent;
10407 case 12: /* 2 tokens of length 12 */
10408 if (name[0] == 'g' &&
10420 if (name[9] == 'd' &&
10423 { /* getnetbyaddr */
10424 return -KEY_getnetbyaddr;
10430 if (name[9] == 'a' &&
10433 { /* getnetbyname */
10434 return -KEY_getnetbyname;
10446 case 13: /* 4 tokens of length 13 */
10447 if (name[0] == 'g' &&
10454 if (name[4] == 'o' &&
10463 if (name[10] == 'd' &&
10466 { /* gethostbyaddr */
10467 return -KEY_gethostbyaddr;
10473 if (name[10] == 'a' &&
10476 { /* gethostbyname */
10477 return -KEY_gethostbyname;
10490 if (name[4] == 'e' &&
10499 if (name[10] == 'a' &&
10502 { /* getservbyname */
10503 return -KEY_getservbyname;
10509 if (name[10] == 'o' &&
10512 { /* getservbyport */
10513 return -KEY_getservbyport;
10532 case 14: /* 1 tokens of length 14 */
10533 if (name[0] == 'g' &&
10547 { /* getprotobyname */
10548 return -KEY_getprotobyname;
10553 case 16: /* 1 tokens of length 16 */
10554 if (name[0] == 'g' &&
10570 { /* getprotobynumber */
10571 return -KEY_getprotobynumber;
10585 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10589 PERL_ARGS_ASSERT_CHECKCOMMA;
10591 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10592 if (ckWARN(WARN_SYNTAX)) {
10595 for (w = s+2; *w && level; w++) {
10598 else if (*w == ')')
10601 while (isSPACE(*w))
10603 /* the list of chars below is for end of statements or
10604 * block / parens, boolean operators (&&, ||, //) and branch
10605 * constructs (or, and, if, until, unless, while, err, for).
10606 * Not a very solid hack... */
10607 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10608 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10609 "%s (...) interpreted as function",name);
10612 while (s < PL_bufend && isSPACE(*s))
10616 while (s < PL_bufend && isSPACE(*s))
10618 if (isIDFIRST_lazy_if(s,UTF)) {
10619 const char * const w = s++;
10620 while (isALNUM_lazy_if(s,UTF))
10622 while (s < PL_bufend && isSPACE(*s))
10626 if (keyword(w, s - w, 0))
10629 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10630 if (gv && GvCVu(gv))
10632 Perl_croak(aTHX_ "No comma allowed after %s", what);
10637 /* Either returns sv, or mortalizes sv and returns a new SV*.
10638 Best used as sv=new_constant(..., sv, ...).
10639 If s, pv are NULL, calls subroutine with one argument,
10640 and type is used with error messages only. */
10643 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10644 SV *sv, SV *pv, const char *type, STRLEN typelen)
10647 HV * const table = GvHV(PL_hintgv); /* ^H */
10651 const char *why1 = "", *why2 = "", *why3 = "";
10653 PERL_ARGS_ASSERT_NEW_CONSTANT;
10655 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10658 why2 = (const char *)
10659 (strEQ(key,"charnames")
10660 ? "(possibly a missing \"use charnames ...\")"
10662 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10663 (type ? type: "undef"), why2);
10665 /* This is convoluted and evil ("goto considered harmful")
10666 * but I do not understand the intricacies of all the different
10667 * failure modes of %^H in here. The goal here is to make
10668 * the most probable error message user-friendly. --jhi */
10673 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10674 (type ? type: "undef"), why1, why2, why3);
10676 yyerror(SvPVX_const(msg));
10680 cvp = hv_fetch(table, key, keylen, FALSE);
10681 if (!cvp || !SvOK(*cvp)) {
10684 why3 = "} is not defined";
10687 sv_2mortal(sv); /* Parent created it permanently */
10690 pv = newSVpvn_flags(s, len, SVs_TEMP);
10692 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10694 typesv = &PL_sv_undef;
10696 PUSHSTACKi(PERLSI_OVERLOAD);
10708 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10712 /* Check the eval first */
10713 if (!PL_in_eval && SvTRUE(ERRSV)) {
10714 sv_catpvs(ERRSV, "Propagated");
10715 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10717 res = SvREFCNT_inc_simple(sv);
10721 SvREFCNT_inc_simple_void(res);
10730 why1 = "Call to &{$^H{";
10732 why3 = "}} did not return a defined value";
10740 /* Returns a NUL terminated string, with the length of the string written to
10744 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10747 register char *d = dest;
10748 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10750 PERL_ARGS_ASSERT_SCAN_WORD;
10754 Perl_croak(aTHX_ ident_too_long);
10755 if (isALNUM(*s)) /* UTF handled below */
10757 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10762 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10766 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10767 char *t = s + UTF8SKIP(s);
10769 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10773 Perl_croak(aTHX_ ident_too_long);
10774 Copy(s, d, len, char);
10787 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10790 char *bracket = NULL;
10792 register char *d = dest;
10793 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10795 PERL_ARGS_ASSERT_SCAN_IDENT;
10800 while (isDIGIT(*s)) {
10802 Perl_croak(aTHX_ ident_too_long);
10809 Perl_croak(aTHX_ ident_too_long);
10810 if (isALNUM(*s)) /* UTF handled below */
10812 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10817 else if (*s == ':' && s[1] == ':') {
10821 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10822 char *t = s + UTF8SKIP(s);
10823 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10825 if (d + (t - s) > e)
10826 Perl_croak(aTHX_ ident_too_long);
10827 Copy(s, d, t - s, char);
10838 if (PL_lex_state != LEX_NORMAL)
10839 PL_lex_state = LEX_INTERPENDMAYBE;
10842 if (*s == '$' && s[1] &&
10843 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10856 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10861 if (isSPACE(s[-1])) {
10863 const char ch = *s++;
10864 if (!SPACE_OR_TAB(ch)) {
10870 if (isIDFIRST_lazy_if(d,UTF)) {
10874 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10875 end += UTF8SKIP(end);
10876 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10877 end += UTF8SKIP(end);
10879 Copy(s, d, end - s, char);
10884 while ((isALNUM(*s) || *s == ':') && d < e)
10887 Perl_croak(aTHX_ ident_too_long);
10890 while (s < send && SPACE_OR_TAB(*s))
10892 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10893 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10894 const char * const brack =
10896 ((*s == '[') ? "[...]" : "{...}");
10897 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10898 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10899 funny, dest, brack, funny, dest, brack);
10902 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10906 /* Handle extended ${^Foo} variables
10907 * 1999-02-27 mjd-perl-patch@plover.com */
10908 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10912 while (isALNUM(*s) && d < e) {
10916 Perl_croak(aTHX_ ident_too_long);
10921 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10922 PL_lex_state = LEX_INTERPEND;
10925 if (PL_lex_state == LEX_NORMAL) {
10926 if (ckWARN(WARN_AMBIGUOUS) &&
10927 (keyword(dest, d - dest, 0)
10928 || get_cvn_flags(dest, d - dest, 0)))
10932 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10933 "Ambiguous use of %c{%s} resolved to %c%s",
10934 funny, dest, funny, dest);
10939 s = bracket; /* let the parser handle it */
10943 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10944 PL_lex_state = LEX_INTERPEND;
10949 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10951 PERL_ARGS_ASSERT_PMFLAG;
10953 PERL_UNUSED_CONTEXT;
10955 const char c = (char)ch;
10957 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10958 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10959 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10960 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10961 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10967 S_scan_pat(pTHX_ char *start, I32 type)
10971 char *s = scan_str(start,!!PL_madskills,FALSE);
10972 const char * const valid_flags =
10973 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10978 PERL_ARGS_ASSERT_SCAN_PAT;
10981 const char * const delimiter = skipspace(start);
10985 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10986 : "Search pattern not terminated" ));
10989 pm = (PMOP*)newPMOP(type, 0);
10990 if (PL_multi_open == '?') {
10991 /* This is the only point in the code that sets PMf_ONCE: */
10992 pm->op_pmflags |= PMf_ONCE;
10994 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10995 allows us to restrict the list needed by reset to just the ??
10997 assert(type != OP_TRANS);
10999 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11002 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11005 elements = mg->mg_len / sizeof(PMOP**);
11006 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11007 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11008 mg->mg_len = elements * sizeof(PMOP**);
11009 PmopSTASH_set(pm,PL_curstash);
11015 while (*s && strchr(valid_flags, *s))
11016 pmflag(&pm->op_pmflags,*s++);
11018 if (PL_madskills && modstart != s) {
11019 SV* tmptoken = newSVpvn(modstart, s - modstart);
11020 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11023 /* issue a warning if /c is specified,but /g is not */
11024 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11026 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11027 "Use of /c modifier is meaningless without /g" );
11030 PL_lex_op = (OP*)pm;
11031 pl_yylval.ival = OP_MATCH;
11036 S_scan_subst(pTHX_ char *start)
11047 PERL_ARGS_ASSERT_SCAN_SUBST;
11049 pl_yylval.ival = OP_NULL;
11051 s = scan_str(start,!!PL_madskills,FALSE);
11054 Perl_croak(aTHX_ "Substitution pattern not terminated");
11056 if (s[-1] == PL_multi_open)
11059 if (PL_madskills) {
11060 CURMAD('q', PL_thisopen);
11061 CURMAD('_', PL_thiswhite);
11062 CURMAD('E', PL_thisstuff);
11063 CURMAD('Q', PL_thisclose);
11064 PL_realtokenstart = s - SvPVX(PL_linestr);
11068 first_start = PL_multi_start;
11069 s = scan_str(s,!!PL_madskills,FALSE);
11071 if (PL_lex_stuff) {
11072 SvREFCNT_dec(PL_lex_stuff);
11073 PL_lex_stuff = NULL;
11075 Perl_croak(aTHX_ "Substitution replacement not terminated");
11077 PL_multi_start = first_start; /* so whole substitution is taken together */
11079 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11082 if (PL_madskills) {
11083 CURMAD('z', PL_thisopen);
11084 CURMAD('R', PL_thisstuff);
11085 CURMAD('Z', PL_thisclose);
11091 if (*s == EXEC_PAT_MOD) {
11095 else if (strchr(S_PAT_MODS, *s))
11096 pmflag(&pm->op_pmflags,*s++);
11102 if (PL_madskills) {
11104 curmad('m', newSVpvn(modstart, s - modstart));
11105 append_madprops(PL_thismad, (OP*)pm, 0);
11109 if ((pm->op_pmflags & PMf_CONTINUE)) {
11110 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11114 SV * const repl = newSVpvs("");
11116 PL_sublex_info.super_bufptr = s;
11117 PL_sublex_info.super_bufend = PL_bufend;
11119 pm->op_pmflags |= PMf_EVAL;
11122 sv_catpvs(repl, "eval ");
11124 sv_catpvs(repl, "do ");
11126 sv_catpvs(repl, "{");
11127 sv_catsv(repl, PL_lex_repl);
11128 if (strchr(SvPVX(PL_lex_repl), '#'))
11129 sv_catpvs(repl, "\n");
11130 sv_catpvs(repl, "}");
11132 SvREFCNT_dec(PL_lex_repl);
11133 PL_lex_repl = repl;
11136 PL_lex_op = (OP*)pm;
11137 pl_yylval.ival = OP_SUBST;
11142 S_scan_trans(pTHX_ char *start)
11155 PERL_ARGS_ASSERT_SCAN_TRANS;
11157 pl_yylval.ival = OP_NULL;
11159 s = scan_str(start,!!PL_madskills,FALSE);
11161 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11163 if (s[-1] == PL_multi_open)
11166 if (PL_madskills) {
11167 CURMAD('q', PL_thisopen);
11168 CURMAD('_', PL_thiswhite);
11169 CURMAD('E', PL_thisstuff);
11170 CURMAD('Q', PL_thisclose);
11171 PL_realtokenstart = s - SvPVX(PL_linestr);
11175 s = scan_str(s,!!PL_madskills,FALSE);
11177 if (PL_lex_stuff) {
11178 SvREFCNT_dec(PL_lex_stuff);
11179 PL_lex_stuff = NULL;
11181 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11183 if (PL_madskills) {
11184 CURMAD('z', PL_thisopen);
11185 CURMAD('R', PL_thisstuff);
11186 CURMAD('Z', PL_thisclose);
11189 complement = del = squash = 0;
11196 complement = OPpTRANS_COMPLEMENT;
11199 del = OPpTRANS_DELETE;
11202 squash = OPpTRANS_SQUASH;
11211 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11212 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11213 o->op_private &= ~OPpTRANS_ALL;
11214 o->op_private |= del|squash|complement|
11215 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11216 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11219 pl_yylval.ival = OP_TRANS;
11222 if (PL_madskills) {
11224 curmad('m', newSVpvn(modstart, s - modstart));
11225 append_madprops(PL_thismad, o, 0);
11234 S_scan_heredoc(pTHX_ register char *s)
11238 I32 op_type = OP_SCALAR;
11242 const char *found_newline;
11246 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11248 I32 stuffstart = s - SvPVX(PL_linestr);
11251 PL_realtokenstart = -1;
11254 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11258 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11262 while (SPACE_OR_TAB(*peek))
11264 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11267 s = delimcpy(d, e, s, PL_bufend, term, &len);
11277 if (!isALNUM_lazy_if(s,UTF))
11278 deprecate("bare << to mean <<\"\"");
11279 for (; isALNUM_lazy_if(s,UTF); s++) {
11284 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11285 Perl_croak(aTHX_ "Delimiter for here document is too long");
11288 len = d - PL_tokenbuf;
11291 if (PL_madskills) {
11292 tstart = PL_tokenbuf + !outer;
11293 PL_thisclose = newSVpvn(tstart, len - !outer);
11294 tstart = SvPVX(PL_linestr) + stuffstart;
11295 PL_thisopen = newSVpvn(tstart, s - tstart);
11296 stuffstart = s - SvPVX(PL_linestr);
11299 #ifndef PERL_STRICT_CR
11300 d = strchr(s, '\r');
11302 char * const olds = s;
11304 while (s < PL_bufend) {
11310 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11319 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11326 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11327 herewas = newSVpvn(s,PL_bufend-s);
11331 herewas = newSVpvn(s-1,found_newline-s+1);
11334 herewas = newSVpvn(s,found_newline-s);
11338 if (PL_madskills) {
11339 tstart = SvPVX(PL_linestr) + stuffstart;
11341 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11343 PL_thisstuff = newSVpvn(tstart, s - tstart);
11346 s += SvCUR(herewas);
11349 stuffstart = s - SvPVX(PL_linestr);
11355 tmpstr = newSV_type(SVt_PVIV);
11356 SvGROW(tmpstr, 80);
11357 if (term == '\'') {
11358 op_type = OP_CONST;
11359 SvIV_set(tmpstr, -1);
11361 else if (term == '`') {
11362 op_type = OP_BACKTICK;
11363 SvIV_set(tmpstr, '\\');
11367 PL_multi_start = CopLINE(PL_curcop);
11368 PL_multi_open = PL_multi_close = '<';
11369 term = *PL_tokenbuf;
11370 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11371 char * const bufptr = PL_sublex_info.super_bufptr;
11372 char * const bufend = PL_sublex_info.super_bufend;
11373 char * const olds = s - SvCUR(herewas);
11374 s = strchr(bufptr, '\n');
11378 while (s < bufend &&
11379 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11381 CopLINE_inc(PL_curcop);
11384 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11385 missingterm(PL_tokenbuf);
11387 sv_setpvn(herewas,bufptr,d-bufptr+1);
11388 sv_setpvn(tmpstr,d+1,s-d);
11390 sv_catpvn(herewas,s,bufend-s);
11391 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11398 while (s < PL_bufend &&
11399 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11401 CopLINE_inc(PL_curcop);
11403 if (s >= PL_bufend) {
11404 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11405 missingterm(PL_tokenbuf);
11407 sv_setpvn(tmpstr,d+1,s-d);
11409 if (PL_madskills) {
11411 sv_catpvn(PL_thisstuff, d + 1, s - d);
11413 PL_thisstuff = newSVpvn(d + 1, s - d);
11414 stuffstart = s - SvPVX(PL_linestr);
11418 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11420 sv_catpvn(herewas,s,PL_bufend-s);
11421 sv_setsv(PL_linestr,herewas);
11422 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11423 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11424 PL_last_lop = PL_last_uni = NULL;
11427 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
11428 while (s >= PL_bufend) { /* multiple line string? */
11430 if (PL_madskills) {
11431 tstart = SvPVX(PL_linestr) + stuffstart;
11433 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11435 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11439 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11440 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11441 missingterm(PL_tokenbuf);
11444 stuffstart = s - SvPVX(PL_linestr);
11446 CopLINE_inc(PL_curcop);
11447 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11448 PL_last_lop = PL_last_uni = NULL;
11449 #ifndef PERL_STRICT_CR
11450 if (PL_bufend - PL_linestart >= 2) {
11451 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11452 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11454 PL_bufend[-2] = '\n';
11456 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11458 else if (PL_bufend[-1] == '\r')
11459 PL_bufend[-1] = '\n';
11461 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11462 PL_bufend[-1] = '\n';
11464 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11465 update_debugger_info(PL_linestr, NULL, 0);
11466 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11467 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11468 *(SvPVX(PL_linestr) + off ) = ' ';
11469 sv_catsv(PL_linestr,herewas);
11470 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11471 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11475 sv_catsv(tmpstr,PL_linestr);
11480 PL_multi_end = CopLINE(PL_curcop);
11481 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11482 SvPV_shrink_to_cur(tmpstr);
11484 SvREFCNT_dec(herewas);
11486 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11488 else if (PL_encoding)
11489 sv_recode_to_utf8(tmpstr, PL_encoding);
11491 PL_lex_stuff = tmpstr;
11492 pl_yylval.ival = op_type;
11496 /* scan_inputsymbol
11497 takes: current position in input buffer
11498 returns: new position in input buffer
11499 side-effects: pl_yylval and lex_op are set.
11504 <FH> read from filehandle
11505 <pkg::FH> read from package qualified filehandle
11506 <pkg'FH> read from package qualified filehandle
11507 <$fh> read from filehandle in $fh
11508 <*.h> filename glob
11513 S_scan_inputsymbol(pTHX_ char *start)
11516 register char *s = start; /* current position in buffer */
11519 char *d = PL_tokenbuf; /* start of temp holding space */
11520 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11522 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11524 end = strchr(s, '\n');
11527 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11529 /* die if we didn't have space for the contents of the <>,
11530 or if it didn't end, or if we see a newline
11533 if (len >= (I32)sizeof PL_tokenbuf)
11534 Perl_croak(aTHX_ "Excessively long <> operator");
11536 Perl_croak(aTHX_ "Unterminated <> operator");
11541 Remember, only scalar variables are interpreted as filehandles by
11542 this code. Anything more complex (e.g., <$fh{$num}>) will be
11543 treated as a glob() call.
11544 This code makes use of the fact that except for the $ at the front,
11545 a scalar variable and a filehandle look the same.
11547 if (*d == '$' && d[1]) d++;
11549 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11550 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11553 /* If we've tried to read what we allow filehandles to look like, and
11554 there's still text left, then it must be a glob() and not a getline.
11555 Use scan_str to pull out the stuff between the <> and treat it
11556 as nothing more than a string.
11559 if (d - PL_tokenbuf != len) {
11560 pl_yylval.ival = OP_GLOB;
11561 s = scan_str(start,!!PL_madskills,FALSE);
11563 Perl_croak(aTHX_ "Glob not terminated");
11567 bool readline_overriden = FALSE;
11570 /* we're in a filehandle read situation */
11573 /* turn <> into <ARGV> */
11575 Copy("ARGV",d,5,char);
11577 /* Check whether readline() is overriden */
11578 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11580 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11582 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11583 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11584 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11585 readline_overriden = TRUE;
11587 /* if <$fh>, create the ops to turn the variable into a
11591 /* try to find it in the pad for this block, otherwise find
11592 add symbol table ops
11594 const PADOFFSET tmp = pad_findmy(d);
11595 if (tmp != NOT_IN_PAD) {
11596 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11597 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11598 HEK * const stashname = HvNAME_HEK(stash);
11599 SV * const sym = sv_2mortal(newSVhek(stashname));
11600 sv_catpvs(sym, "::");
11601 sv_catpv(sym, d+1);
11606 OP * const o = newOP(OP_PADSV, 0);
11608 PL_lex_op = readline_overriden
11609 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11610 append_elem(OP_LIST, o,
11611 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11612 : (OP*)newUNOP(OP_READLINE, 0, o);
11621 ? (GV_ADDMULTI | GV_ADDINEVAL)
11624 PL_lex_op = readline_overriden
11625 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11626 append_elem(OP_LIST,
11627 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11628 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11629 : (OP*)newUNOP(OP_READLINE, 0,
11630 newUNOP(OP_RV2SV, 0,
11631 newGVOP(OP_GV, 0, gv)));
11633 if (!readline_overriden)
11634 PL_lex_op->op_flags |= OPf_SPECIAL;
11635 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11636 pl_yylval.ival = OP_NULL;
11639 /* If it's none of the above, it must be a literal filehandle
11640 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11642 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11643 PL_lex_op = readline_overriden
11644 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11645 append_elem(OP_LIST,
11646 newGVOP(OP_GV, 0, gv),
11647 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11648 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11649 pl_yylval.ival = OP_NULL;
11658 takes: start position in buffer
11659 keep_quoted preserve \ on the embedded delimiter(s)
11660 keep_delims preserve the delimiters around the string
11661 returns: position to continue reading from buffer
11662 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11663 updates the read buffer.
11665 This subroutine pulls a string out of the input. It is called for:
11666 q single quotes q(literal text)
11667 ' single quotes 'literal text'
11668 qq double quotes qq(interpolate $here please)
11669 " double quotes "interpolate $here please"
11670 qx backticks qx(/bin/ls -l)
11671 ` backticks `/bin/ls -l`
11672 qw quote words @EXPORT_OK = qw( func() $spam )
11673 m// regexp match m/this/
11674 s/// regexp substitute s/this/that/
11675 tr/// string transliterate tr/this/that/
11676 y/// string transliterate y/this/that/
11677 ($*@) sub prototypes sub foo ($)
11678 (stuff) sub attr parameters sub foo : attr(stuff)
11679 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11681 In most of these cases (all but <>, patterns and transliterate)
11682 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11683 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11684 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11687 It skips whitespace before the string starts, and treats the first
11688 character as the delimiter. If the delimiter is one of ([{< then
11689 the corresponding "close" character )]}> is used as the closing
11690 delimiter. It allows quoting of delimiters, and if the string has
11691 balanced delimiters ([{<>}]) it allows nesting.
11693 On success, the SV with the resulting string is put into lex_stuff or,
11694 if that is already non-NULL, into lex_repl. The second case occurs only
11695 when parsing the RHS of the special constructs s/// and tr/// (y///).
11696 For convenience, the terminating delimiter character is stuffed into
11701 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11704 SV *sv; /* scalar value: string */
11705 const char *tmps; /* temp string, used for delimiter matching */
11706 register char *s = start; /* current position in the buffer */
11707 register char term; /* terminating character */
11708 register char *to; /* current position in the sv's data */
11709 I32 brackets = 1; /* bracket nesting level */
11710 bool has_utf8 = FALSE; /* is there any utf8 content? */
11711 I32 termcode; /* terminating char. code */
11712 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11713 STRLEN termlen; /* length of terminating string */
11714 int last_off = 0; /* last position for nesting bracket */
11720 PERL_ARGS_ASSERT_SCAN_STR;
11722 /* skip space before the delimiter */
11728 if (PL_realtokenstart >= 0) {
11729 stuffstart = PL_realtokenstart;
11730 PL_realtokenstart = -1;
11733 stuffstart = start - SvPVX(PL_linestr);
11735 /* mark where we are, in case we need to report errors */
11738 /* after skipping whitespace, the next character is the terminator */
11741 termcode = termstr[0] = term;
11745 termcode = utf8_to_uvchr((U8*)s, &termlen);
11746 Copy(s, termstr, termlen, U8);
11747 if (!UTF8_IS_INVARIANT(term))
11751 /* mark where we are */
11752 PL_multi_start = CopLINE(PL_curcop);
11753 PL_multi_open = term;
11755 /* find corresponding closing delimiter */
11756 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11757 termcode = termstr[0] = term = tmps[5];
11759 PL_multi_close = term;
11761 /* create a new SV to hold the contents. 79 is the SV's initial length.
11762 What a random number. */
11763 sv = newSV_type(SVt_PVIV);
11765 SvIV_set(sv, termcode);
11766 (void)SvPOK_only(sv); /* validate pointer */
11768 /* move past delimiter and try to read a complete string */
11770 sv_catpvn(sv, s, termlen);
11773 tstart = SvPVX(PL_linestr) + stuffstart;
11774 if (!PL_thisopen && !keep_delims) {
11775 PL_thisopen = newSVpvn(tstart, s - tstart);
11776 stuffstart = s - SvPVX(PL_linestr);
11780 if (PL_encoding && !UTF) {
11784 int offset = s - SvPVX_const(PL_linestr);
11785 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11786 &offset, (char*)termstr, termlen);
11787 const char * const ns = SvPVX_const(PL_linestr) + offset;
11788 char * const svlast = SvEND(sv) - 1;
11790 for (; s < ns; s++) {
11791 if (*s == '\n' && !PL_rsfp)
11792 CopLINE_inc(PL_curcop);
11795 goto read_more_line;
11797 /* handle quoted delimiters */
11798 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11800 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11802 if ((svlast-1 - t) % 2) {
11803 if (!keep_quoted) {
11804 *(svlast-1) = term;
11806 SvCUR_set(sv, SvCUR(sv) - 1);
11811 if (PL_multi_open == PL_multi_close) {
11817 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11818 /* At here, all closes are "was quoted" one,
11819 so we don't check PL_multi_close. */
11821 if (!keep_quoted && *(t+1) == PL_multi_open)
11826 else if (*t == PL_multi_open)
11834 SvCUR_set(sv, w - SvPVX_const(sv));
11836 last_off = w - SvPVX(sv);
11837 if (--brackets <= 0)
11842 if (!keep_delims) {
11843 SvCUR_set(sv, SvCUR(sv) - 1);
11849 /* extend sv if need be */
11850 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11851 /* set 'to' to the next character in the sv's string */
11852 to = SvPVX(sv)+SvCUR(sv);
11854 /* if open delimiter is the close delimiter read unbridle */
11855 if (PL_multi_open == PL_multi_close) {
11856 for (; s < PL_bufend; s++,to++) {
11857 /* embedded newlines increment the current line number */
11858 if (*s == '\n' && !PL_rsfp)
11859 CopLINE_inc(PL_curcop);
11860 /* handle quoted delimiters */
11861 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11862 if (!keep_quoted && s[1] == term)
11864 /* any other quotes are simply copied straight through */
11868 /* terminate when run out of buffer (the for() condition), or
11869 have found the terminator */
11870 else if (*s == term) {
11873 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11876 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11882 /* if the terminator isn't the same as the start character (e.g.,
11883 matched brackets), we have to allow more in the quoting, and
11884 be prepared for nested brackets.
11887 /* read until we run out of string, or we find the terminator */
11888 for (; s < PL_bufend; s++,to++) {
11889 /* embedded newlines increment the line count */
11890 if (*s == '\n' && !PL_rsfp)
11891 CopLINE_inc(PL_curcop);
11892 /* backslashes can escape the open or closing characters */
11893 if (*s == '\\' && s+1 < PL_bufend) {
11894 if (!keep_quoted &&
11895 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11900 /* allow nested opens and closes */
11901 else if (*s == PL_multi_close && --brackets <= 0)
11903 else if (*s == PL_multi_open)
11905 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11910 /* terminate the copied string and update the sv's end-of-string */
11912 SvCUR_set(sv, to - SvPVX_const(sv));
11915 * this next chunk reads more into the buffer if we're not done yet
11919 break; /* handle case where we are done yet :-) */
11921 #ifndef PERL_STRICT_CR
11922 if (to - SvPVX_const(sv) >= 2) {
11923 if ((to[-2] == '\r' && to[-1] == '\n') ||
11924 (to[-2] == '\n' && to[-1] == '\r'))
11928 SvCUR_set(sv, to - SvPVX_const(sv));
11930 else if (to[-1] == '\r')
11933 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11938 /* if we're out of file, or a read fails, bail and reset the current
11939 line marker so we can report where the unterminated string began
11942 if (PL_madskills) {
11943 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11945 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11947 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11951 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11953 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11959 /* we read a line, so increment our line counter */
11960 CopLINE_inc(PL_curcop);
11962 /* update debugger info */
11963 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11964 update_debugger_info(PL_linestr, NULL, 0);
11966 /* having changed the buffer, we must update PL_bufend */
11967 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11968 PL_last_lop = PL_last_uni = NULL;
11971 /* at this point, we have successfully read the delimited string */
11973 if (!PL_encoding || UTF) {
11975 if (PL_madskills) {
11976 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11977 const int len = s - tstart;
11979 sv_catpvn(PL_thisstuff, tstart, len);
11981 PL_thisstuff = newSVpvn(tstart, len);
11982 if (!PL_thisclose && !keep_delims)
11983 PL_thisclose = newSVpvn(s,termlen);
11988 sv_catpvn(sv, s, termlen);
11993 if (PL_madskills) {
11994 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11995 const int len = s - tstart - termlen;
11997 sv_catpvn(PL_thisstuff, tstart, len);
11999 PL_thisstuff = newSVpvn(tstart, len);
12000 if (!PL_thisclose && !keep_delims)
12001 PL_thisclose = newSVpvn(s - termlen,termlen);
12005 if (has_utf8 || PL_encoding)
12008 PL_multi_end = CopLINE(PL_curcop);
12010 /* if we allocated too much space, give some back */
12011 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12012 SvLEN_set(sv, SvCUR(sv) + 1);
12013 SvPV_renew(sv, SvLEN(sv));
12016 /* decide whether this is the first or second quoted string we've read
12029 takes: pointer to position in buffer
12030 returns: pointer to new position in buffer
12031 side-effects: builds ops for the constant in pl_yylval.op
12033 Read a number in any of the formats that Perl accepts:
12035 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12036 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12039 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12041 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12044 If it reads a number without a decimal point or an exponent, it will
12045 try converting the number to an integer and see if it can do so
12046 without loss of precision.
12050 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12053 register const char *s = start; /* current position in buffer */
12054 register char *d; /* destination in temp buffer */
12055 register char *e; /* end of temp buffer */
12056 NV nv; /* number read, as a double */
12057 SV *sv = NULL; /* place to put the converted number */
12058 bool floatit; /* boolean: int or float? */
12059 const char *lastub = NULL; /* position of last underbar */
12060 static char const number_too_long[] = "Number too long";
12062 PERL_ARGS_ASSERT_SCAN_NUM;
12064 /* We use the first character to decide what type of number this is */
12068 Perl_croak(aTHX_ "panic: scan_num");
12070 /* if it starts with a 0, it could be an octal number, a decimal in
12071 0.13 disguise, or a hexadecimal number, or a binary number. */
12075 u holds the "number so far"
12076 shift the power of 2 of the base
12077 (hex == 4, octal == 3, binary == 1)
12078 overflowed was the number more than we can hold?
12080 Shift is used when we add a digit. It also serves as an "are
12081 we in octal/hex/binary?" indicator to disallow hex characters
12082 when in octal mode.
12087 bool overflowed = FALSE;
12088 bool just_zero = TRUE; /* just plain 0 or binary number? */
12089 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12090 static const char* const bases[5] =
12091 { "", "binary", "", "octal", "hexadecimal" };
12092 static const char* const Bases[5] =
12093 { "", "Binary", "", "Octal", "Hexadecimal" };
12094 static const char* const maxima[5] =
12096 "0b11111111111111111111111111111111",
12100 const char *base, *Base, *max;
12102 /* check for hex */
12107 } else if (s[1] == 'b') {
12112 /* check for a decimal in disguise */
12113 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12115 /* so it must be octal */
12122 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12123 "Misplaced _ in number");
12127 base = bases[shift];
12128 Base = Bases[shift];
12129 max = maxima[shift];
12131 /* read the rest of the number */
12133 /* x is used in the overflow test,
12134 b is the digit we're adding on. */
12139 /* if we don't mention it, we're done */
12143 /* _ are ignored -- but warned about if consecutive */
12145 if (lastub && s == lastub + 1)
12146 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12147 "Misplaced _ in number");
12151 /* 8 and 9 are not octal */
12152 case '8': case '9':
12154 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12158 case '2': case '3': case '4':
12159 case '5': case '6': case '7':
12161 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12164 case '0': case '1':
12165 b = *s++ & 15; /* ASCII digit -> value of digit */
12169 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12170 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12171 /* make sure they said 0x */
12174 b = (*s++ & 7) + 9;
12176 /* Prepare to put the digit we have onto the end
12177 of the number so far. We check for overflows.
12183 x = u << shift; /* make room for the digit */
12185 if ((x >> shift) != u
12186 && !(PL_hints & HINT_NEW_BINARY)) {
12189 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12190 "Integer overflow in %s number",
12193 u = x | b; /* add the digit to the end */
12196 n *= nvshift[shift];
12197 /* If an NV has not enough bits in its
12198 * mantissa to represent an UV this summing of
12199 * small low-order numbers is a waste of time
12200 * (because the NV cannot preserve the
12201 * low-order bits anyway): we could just
12202 * remember when did we overflow and in the
12203 * end just multiply n by the right
12211 /* if we get here, we had success: make a scalar value from
12216 /* final misplaced underbar check */
12217 if (s[-1] == '_') {
12218 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12223 if (n > 4294967295.0)
12224 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12225 "%s number > %s non-portable",
12231 if (u > 0xffffffff)
12232 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12233 "%s number > %s non-portable",
12238 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12239 sv = new_constant(start, s - start, "integer",
12240 sv, NULL, NULL, 0);
12241 else if (PL_hints & HINT_NEW_BINARY)
12242 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12247 handle decimal numbers.
12248 we're also sent here when we read a 0 as the first digit
12250 case '1': case '2': case '3': case '4': case '5':
12251 case '6': case '7': case '8': case '9': case '.':
12254 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12257 /* read next group of digits and _ and copy into d */
12258 while (isDIGIT(*s) || *s == '_') {
12259 /* skip underscores, checking for misplaced ones
12263 if (lastub && s == lastub + 1)
12264 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12265 "Misplaced _ in number");
12269 /* check for end of fixed-length buffer */
12271 Perl_croak(aTHX_ number_too_long);
12272 /* if we're ok, copy the character */
12277 /* final misplaced underbar check */
12278 if (lastub && s == lastub + 1) {
12279 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12282 /* read a decimal portion if there is one. avoid
12283 3..5 being interpreted as the number 3. followed
12286 if (*s == '.' && s[1] != '.') {
12291 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12292 "Misplaced _ in number");
12296 /* copy, ignoring underbars, until we run out of digits.
12298 for (; isDIGIT(*s) || *s == '_'; s++) {
12299 /* fixed length buffer check */
12301 Perl_croak(aTHX_ number_too_long);
12303 if (lastub && s == lastub + 1)
12304 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12305 "Misplaced _ in number");
12311 /* fractional part ending in underbar? */
12312 if (s[-1] == '_') {
12313 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12314 "Misplaced _ in number");
12316 if (*s == '.' && isDIGIT(s[1])) {
12317 /* oops, it's really a v-string, but without the "v" */
12323 /* read exponent part, if present */
12324 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12328 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12329 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12331 /* stray preinitial _ */
12333 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12334 "Misplaced _ in number");
12338 /* allow positive or negative exponent */
12339 if (*s == '+' || *s == '-')
12342 /* stray initial _ */
12344 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12345 "Misplaced _ in number");
12349 /* read digits of exponent */
12350 while (isDIGIT(*s) || *s == '_') {
12353 Perl_croak(aTHX_ number_too_long);
12357 if (((lastub && s == lastub + 1) ||
12358 (!isDIGIT(s[1]) && s[1] != '_')))
12359 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12360 "Misplaced _ in number");
12367 /* make an sv from the string */
12371 We try to do an integer conversion first if no characters
12372 indicating "float" have been found.
12377 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12379 if (flags == IS_NUMBER_IN_UV) {
12381 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12384 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12385 if (uv <= (UV) IV_MIN)
12386 sv_setiv(sv, -(IV)uv);
12393 /* terminate the string */
12395 nv = Atof(PL_tokenbuf);
12400 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12401 const char *const key = floatit ? "float" : "integer";
12402 const STRLEN keylen = floatit ? 5 : 7;
12403 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12404 key, keylen, sv, NULL, NULL, 0);
12408 /* if it starts with a v, it could be a v-string */
12411 sv = newSV(5); /* preallocate storage space */
12412 s = scan_vstring(s, PL_bufend, sv);
12416 /* make the op for the constant and return */
12419 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12421 lvalp->opval = NULL;
12427 S_scan_formline(pTHX_ register char *s)
12430 register char *eol;
12432 SV * const stuff = newSVpvs("");
12433 bool needargs = FALSE;
12434 bool eofmt = FALSE;
12436 char *tokenstart = s;
12437 SV* savewhite = NULL;
12439 if (PL_madskills) {
12440 savewhite = PL_thiswhite;
12445 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12447 while (!needargs) {
12450 #ifdef PERL_STRICT_CR
12451 while (SPACE_OR_TAB(*t))
12454 while (SPACE_OR_TAB(*t) || *t == '\r')
12457 if (*t == '\n' || t == PL_bufend) {
12462 if (PL_in_eval && !PL_rsfp) {
12463 eol = (char *) memchr(s,'\n',PL_bufend-s);
12468 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12470 for (t = s; t < eol; t++) {
12471 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12473 goto enough; /* ~~ must be first line in formline */
12475 if (*t == '@' || *t == '^')
12479 sv_catpvn(stuff, s, eol-s);
12480 #ifndef PERL_STRICT_CR
12481 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12482 char *end = SvPVX(stuff) + SvCUR(stuff);
12485 SvCUR_set(stuff, SvCUR(stuff) - 1);
12495 if (PL_madskills) {
12497 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12499 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12502 s = filter_gets(PL_linestr, PL_rsfp, 0);
12504 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12506 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12508 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12509 PL_last_lop = PL_last_uni = NULL;
12518 if (SvCUR(stuff)) {
12521 PL_lex_state = LEX_NORMAL;
12522 start_force(PL_curforce);
12523 NEXTVAL_NEXTTOKE.ival = 0;
12527 PL_lex_state = LEX_FORMLINE;
12529 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12531 else if (PL_encoding)
12532 sv_recode_to_utf8(stuff, PL_encoding);
12534 start_force(PL_curforce);
12535 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12537 start_force(PL_curforce);
12538 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12542 SvREFCNT_dec(stuff);
12544 PL_lex_formbrack = 0;
12548 if (PL_madskills) {
12550 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12552 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12553 PL_thiswhite = savewhite;
12560 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12563 const I32 oldsavestack_ix = PL_savestack_ix;
12564 CV* const outsidecv = PL_compcv;
12567 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12569 SAVEI32(PL_subline);
12570 save_item(PL_subname);
12571 SAVESPTR(PL_compcv);
12573 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12574 CvFLAGS(PL_compcv) |= flags;
12576 PL_subline = CopLINE(PL_curcop);
12577 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12578 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12579 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12581 return oldsavestack_ix;
12585 #pragma segment Perl_yylex
12588 S_yywarn(pTHX_ const char *const s)
12592 PERL_ARGS_ASSERT_YYWARN;
12594 PL_in_eval |= EVAL_WARNONLY;
12596 PL_in_eval &= ~EVAL_WARNONLY;
12601 Perl_yyerror(pTHX_ const char *const s)
12604 const char *where = NULL;
12605 const char *context = NULL;
12608 int yychar = PL_parser->yychar;
12610 PERL_ARGS_ASSERT_YYERROR;
12612 if (!yychar || (yychar == ';' && !PL_rsfp))
12614 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12615 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12616 PL_oldbufptr != PL_bufptr) {
12619 The code below is removed for NetWare because it abends/crashes on NetWare
12620 when the script has error such as not having the closing quotes like:
12621 if ($var eq "value)
12622 Checking of white spaces is anyway done in NetWare code.
12625 while (isSPACE(*PL_oldoldbufptr))
12628 context = PL_oldoldbufptr;
12629 contlen = PL_bufptr - PL_oldoldbufptr;
12631 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12632 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12635 The code below is removed for NetWare because it abends/crashes on NetWare
12636 when the script has error such as not having the closing quotes like:
12637 if ($var eq "value)
12638 Checking of white spaces is anyway done in NetWare code.
12641 while (isSPACE(*PL_oldbufptr))
12644 context = PL_oldbufptr;
12645 contlen = PL_bufptr - PL_oldbufptr;
12647 else if (yychar > 255)
12648 where = "next token ???";
12649 else if (yychar == -2) { /* YYEMPTY */
12650 if (PL_lex_state == LEX_NORMAL ||
12651 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12652 where = "at end of line";
12653 else if (PL_lex_inpat)
12654 where = "within pattern";
12656 where = "within string";
12659 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12661 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12662 else if (isPRINT_LC(yychar)) {
12663 const char string = yychar;
12664 sv_catpvn(where_sv, &string, 1);
12667 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12668 where = SvPVX_const(where_sv);
12670 msg = sv_2mortal(newSVpv(s, 0));
12671 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12672 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12674 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12676 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12677 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12678 Perl_sv_catpvf(aTHX_ msg,
12679 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12680 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12683 if (PL_in_eval & EVAL_WARNONLY) {
12684 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12688 if (PL_error_count >= 10) {
12689 if (PL_in_eval && SvCUR(ERRSV))
12690 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12691 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12693 Perl_croak(aTHX_ "%s has too many errors.\n",
12694 OutCopFILE(PL_curcop));
12697 PL_in_my_stash = NULL;
12701 #pragma segment Main
12705 S_swallow_bom(pTHX_ U8 *s)
12708 const STRLEN slen = SvCUR(PL_linestr);
12710 PERL_ARGS_ASSERT_SWALLOW_BOM;
12714 if (s[1] == 0xFE) {
12715 /* UTF-16 little-endian? (or UTF32-LE?) */
12716 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12717 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12718 #ifndef PERL_NO_UTF16_FILTER
12719 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12722 if (PL_bufend > (char*)s) {
12726 filter_add(utf16rev_textfilter, NULL);
12727 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12728 utf16_to_utf8_reversed(s, news,
12729 PL_bufend - (char*)s - 1,
12731 sv_setpvn(PL_linestr, (const char*)news, newlen);
12733 s = (U8*)SvPVX(PL_linestr);
12734 Copy(news, s, newlen, U8);
12738 SvUTF8_on(PL_linestr);
12739 s = (U8*)SvPVX(PL_linestr);
12741 /* FIXME - is this a general bug fix? */
12744 PL_bufend = SvPVX(PL_linestr) + newlen;
12747 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12752 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12753 #ifndef PERL_NO_UTF16_FILTER
12754 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12757 if (PL_bufend > (char *)s) {
12761 filter_add(utf16_textfilter, NULL);
12762 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12763 utf16_to_utf8(s, news,
12764 PL_bufend - (char*)s,
12766 sv_setpvn(PL_linestr, (const char*)news, newlen);
12768 SvUTF8_on(PL_linestr);
12769 s = (U8*)SvPVX(PL_linestr);
12770 PL_bufend = SvPVX(PL_linestr) + newlen;
12773 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12778 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12779 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12780 s += 3; /* UTF-8 */
12786 if (s[2] == 0xFE && s[3] == 0xFF) {
12787 /* UTF-32 big-endian */
12788 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12791 else if (s[2] == 0 && s[3] != 0) {
12794 * are a good indicator of UTF-16BE. */
12795 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12801 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12802 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12803 s += 4; /* UTF-8 */
12809 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12812 * are a good indicator of UTF-16LE. */
12813 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12821 #ifndef PERL_NO_UTF16_FILTER
12823 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12826 const STRLEN old = SvCUR(sv);
12827 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12828 DEBUG_P(PerlIO_printf(Perl_debug_log,
12829 "utf16_textfilter(%p): %d %d (%d)\n",
12830 FPTR2DPTR(void *, utf16_textfilter),
12831 idx, maxlen, (int) count));
12835 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12836 Copy(SvPVX_const(sv), tmps, old, char);
12837 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12838 SvCUR(sv) - old, &newlen);
12839 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12841 DEBUG_P({sv_dump(sv);});
12846 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12849 const STRLEN old = SvCUR(sv);
12850 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12851 DEBUG_P(PerlIO_printf(Perl_debug_log,
12852 "utf16rev_textfilter(%p): %d %d (%d)\n",
12853 FPTR2DPTR(void *, utf16rev_textfilter),
12854 idx, maxlen, (int) count));
12858 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12859 Copy(SvPVX_const(sv), tmps, old, char);
12860 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12861 SvCUR(sv) - old, &newlen);
12862 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12864 DEBUG_P({ sv_dump(sv); });
12870 Returns a pointer to the next character after the parsed
12871 vstring, as well as updating the passed in sv.
12873 Function must be called like
12876 s = scan_vstring(s,e,sv);
12878 where s and e are the start and end of the string.
12879 The sv should already be large enough to store the vstring
12880 passed in, for performance reasons.
12885 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12888 const char *pos = s;
12889 const char *start = s;
12891 PERL_ARGS_ASSERT_SCAN_VSTRING;
12893 if (*pos == 'v') pos++; /* get past 'v' */
12894 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12896 if ( *pos != '.') {
12897 /* this may not be a v-string if followed by => */
12898 const char *next = pos;
12899 while (next < e && isSPACE(*next))
12901 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12902 /* return string not v-string */
12903 sv_setpvn(sv,(char *)s,pos-s);
12904 return (char *)pos;
12908 if (!isALPHA(*pos)) {
12909 U8 tmpbuf[UTF8_MAXBYTES+1];
12912 s++; /* get past 'v' */
12917 /* this is atoi() that tolerates underscores */
12920 const char *end = pos;
12922 while (--end >= s) {
12924 const UV orev = rev;
12925 rev += (*end - '0') * mult;
12928 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12929 "Integer overflow in decimal number");
12933 if (rev > 0x7FFFFFFF)
12934 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12936 /* Append native character for the rev point */
12937 tmpend = uvchr_to_utf8(tmpbuf, rev);
12938 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12939 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12941 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12947 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12951 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12959 * c-indentation-style: bsd
12960 * c-basic-offset: 4
12961 * indent-tabs-mode: t
12964 * ex: set ts=8 sts=4 sw=4 noet: